about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--Documentation/public-inbox-extindex.pod22
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm6
-rwxr-xr-xscript/public-inbox-extindex9
-rw-r--r--t/extsearch.t5
4 files changed, 36 insertions, 6 deletions
diff --git a/Documentation/public-inbox-extindex.pod b/Documentation/public-inbox-extindex.pod
index 2e2e6383..a0fca83c 100644
--- a/Documentation/public-inbox-extindex.pod
+++ b/Documentation/public-inbox-extindex.pod
@@ -40,6 +40,28 @@ C<indexlevel> set to C<basic> and their respective Xapian
 public-inboxes where cross-posting is common, this allows
 significant space savings on Xapian indices.
 
+=item --gc
+
+Perform garbage collection instead of indexing.  Use this if
+inboxes are removed from the extindex, or if messages are
+purged or removed from some inboxes.
+
+=item --reindex
+
+Forces a re-index of all messages in the extindex.  This can be
+used for in-place upgrades and bugfixes while read-only server
+processes are utilizing the index.  Keep in mind this roughly
+doubles the size of the already-large Xapian database.
+
+The extindex locks will be released roughly every 10s to
+allow L<public-inbox-mda(1)> and L<public-inbox-watch(1)>
+processes to write to the extindex.
+
+=item --fast
+
+Used with C<--reindex>, it will only look for new and stale
+entries and not touch already-indexed messages.
+
 =back
 
 =head1 FILES
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 7cc8dd95..20c4cf78 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -811,7 +811,7 @@ sub _reindex_check_unseen ($$$) {
         local $sync->{-regen_fmt} =
                         "$ekey checking unseen %u/".$ibx->over->max."\n";
         ${$sync->{nr}} = 0;
-
+        my $fast = $sync->{-opt}->{fast};
         while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end)})) {
                 ${$sync->{nr}} = $beg;
                 $beg = $msgs->[-1]->{num} + 1;
@@ -835,7 +835,7 @@ ibx_id = ? AND xnum = ? AND oidbin = ?
                         # the first time around ASAP:
                         if (scalar(@$docids) == 0) {
                                 reindex_unseen($self, $sync, $ibx, $xsmsg);
-                        } else { # already seen, reindex later
+                        } elsif (!$fast) { # already seen, reindex later
                                 for my $r (@$docids) {
                                         $self->{oidx}->eidxq_add($r->[0]);
                                 }
@@ -853,7 +853,7 @@ sub _reindex_check_stale ($$$) {
         my $fetching;
         my $ekey = $ibx->eidx_key;
         local $sync->{-regen_fmt} =
-                        "$ekey check stale/missing %u/".$ibx->over->max."\n";
+                        "$ekey checking stale/missing %u/".$ibx->over->max."\n";
         ${$sync->{nr}} = 0;
         do {
                 if (checkpoint_due($sync)) {
diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex
index 1572a1d2..c63f5dc2 100755
--- a/script/public-inbox-extindex
+++ b/script/public-inbox-extindex
@@ -18,6 +18,8 @@ usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...]
   --max-size=BYTES    do not index messages larger than the given size
   --gc                perform garbage collection instead of indexing
   --dedupe[=MSGID]    fix prior deduplication errors (may be repeated)
+  --reindex           index previously indexed inboxes
+  --fast              only reindex unseen/stale messages
   --verbose | -v      increase verbosity (may be repeated)
   --dry-run | -n      dry-run on --dedupe
 
@@ -26,7 +28,7 @@ See public-inbox-extindex(1) man page for full documentation.
 EOF
 my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 };
 GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i
-                fsync|sync!
+                fsync|sync! fast
                 indexlevel|index-level|L=s max_size|max-size=s
                 batch_size|batch-size=s
                 dedupe:s@ gc commit-interval=i watch scan! dry-run|n
@@ -59,9 +61,10 @@ if ($opt->{gc}) {
 } else {
         @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
 }
-if ($opt->{'dry-run'} && !$opt->{dedupe}) {
+$opt->{'dry-run'} && !$opt->{dedupe} and
         die "E: --dry-run only affects --dedupe\n";
-}
+$opt->{fast} && !$opt->{reindex} and
+        die "E: --fast only affects --reindex\n";
 
 PublicInbox::Admin::require_or_die(qw(-search));
 PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n";
diff --git a/t/extsearch.t b/t/extsearch.t
index ca586f61..896e2704 100644
--- a/t/extsearch.t
+++ b/t/extsearch.t
@@ -336,6 +336,11 @@ if ('reindex catches missed messages') {
         $es->{xdb}->reopen;
         $mset = $es->mset("mid:$new->{mid}");
         is($mset->size, 0, 'stale mid gone Xapian');
+
+        ok(run_script([qw(-extindex --reindex --all --fast), "$home/extindex"],
+                        undef, $opt), '--reindex w/ --fast');
+        ok(!run_script([qw(-extindex --all --fast), "$home/extindex"],
+                        undef, $opt), '--fast alone makes no sense');
 }
 
 if ('reindex catches content bifurcation') {