about summary refs log tree commit homepage
path: root/script/public-inbox-cindex
diff options
context:
space:
mode:
Diffstat (limited to 'script/public-inbox-cindex')
-rwxr-xr-xscript/public-inbox-cindex102
1 files changed, 102 insertions, 0 deletions
diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex
new file mode 100755
index 00000000..dd00623a
--- /dev/null
+++ b/script/public-inbox-cindex
@@ -0,0 +1,102 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use v5.12;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: public-inbox-cindex [options] -g GIT_DIR [-g GIT_DIR]...
+usage: public-inbox-cindex [options] --project-list=FILE -r PROJECT_ROOT
+
+  Create and update search indices for code repos
+
+  -d EXTDIR           use EXTDIR instead of GIT_DIR/public-inbox-cindex
+  --no-fsync          speed up indexing, risk corruption on power outage
+  -L LEVEL            `medium', or `full' (default: medium)
+  --project-list=FILE use a cgit/gitweb-compatible list of projects
+  --update | -u       update previously-indexed code repos with `-d'
+  --jobs=NUM          set or disable parallelization (NUM=0)
+  --batch-size=BYTES  flush changes to OS after a given number of bytes
+  --max-size=BYTES    do not index commit diffs larger than the given size
+  --prune             prune old repos and commits
+  --reindex           reindex previously indexed repos
+  --verbose | -v      increase verbosity (may be repeated)
+
+BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
+See public-inbox-cindex(1) man page for full documentation.
+EOF
+my $opt = { fsync => 1, scan => 1 }; # --no-scan is hidden
+GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous
+                indexlevel|index-level|L=s join:s@
+                batch_size|batch-size=s max_size|max-size=s
+                include|I=s@ only=s@ all show:s@
+                project-list=s exclude=s@ project-root|r=s
+                git-dir|g=s@
+                sort-parallel=s sort-compress-program=s sort-buffer-size=s
+                d=s update|u scan! prune dry-run|n C=s@ help|h))
+        or die $help;
+if ($opt->{help}) { print $help; exit 0 };
+die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
+require IO::Handle;
+STDOUT->autoflush(1);
+STDERR->autoflush(1);
+$SIG{USR1} = 'IGNORE'; # to be overridden in cidx_sync
+$SIG{PIPE} = 'IGNORE';
+# require lazily to speed up --help
+require PublicInbox::Admin;
+PublicInbox::Admin::do_chdir(delete $opt->{C});
+my $cfg = $opt->{-pi_cfg} = PublicInbox::Config->new;
+my $cidx_dir = $opt->{d};
+PublicInbox::Admin::require_or_die('Xapian');
+PublicInbox::Admin::progress_prepare($opt);
+my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
+%ENV = (%ENV, %$env) if $env;
+
+my @git_dirs;
+require PublicInbox::CodeSearchIdx; # unstable internal API
+if (@ARGV) {
+        my @g = map { "-g $_" } @ARGV;
+        die <<EOM;
+Specify git directories with `-g' (or --git-dir=): @g
+Or use --project-list=... and --project-root=...
+EOM
+} elsif (defined(my $pl = $opt->{'project-list'})) {
+        my $pfx = $opt->{'project-root'} // die <<EOM;
+PROJECT_ROOT required for --project-list
+EOM
+        $opt->{'git-dir'} and die <<EOM;
+--project-list does not accept additional --git-dir directories
+(@{$opt->{'git-dir'}})
+EOM
+        open my $fh, '<', $pl or die "open($pl): $!\n";
+        chomp(@git_dirs = <$fh>);
+        $pfx .= '/';
+        $pfx =~ tr!/!/!s;
+        substr($_, 0, 0, $pfx) for @git_dirs;
+} elsif (my $gd = $opt->{'git-dir'}) {
+        @git_dirs = @$gd;
+} elsif (grep defined, @$opt{qw(show update prune)}) {
+} else {
+        warn "No --git-dir= nor --project-list= + --project-root= specified\n";
+        die $help;
+}
+
+$_ = PublicInbox::Admin::resolve_git_dir($_) for @git_dirs;
+if (defined $cidx_dir) { # external index
+        die "`%' is not allowed in $cidx_dir\n" if $cidx_dir =~ /\%/;
+        my $cidx = PublicInbox::CodeSearchIdx->new($cidx_dir, $opt);
+        @{$cidx->{git_dirs}} = @git_dirs; # may be empty
+        $cidx->cidx_run;
+} elsif (!@git_dirs) {
+        die $help
+} else {
+        die <<EOM if $opt->{update};
+--update requires `-d EXTDIR'
+EOM
+        for my $gd (@git_dirs) {
+                my $cd = "$gd/public-inbox-cindex";
+                my $cidx = PublicInbox::CodeSearchIdx->new($cd, { %$opt });
+                $cidx->{-cidx_internal} = 1;
+                @{$cidx->{git_dirs}} = ($gd);
+                $cidx->cidx_run;
+        }
+}