about summary refs log tree commit homepage
path: root/script
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-11-29 05:09:35 +0000
committerEric Wong <e@80x24.org>2020-11-29 06:42:00 +0000
commitea885bf0f76dc1795dc771667be721ec0fed5482 (patch)
tree5323da5926b6aa4c20619e2a05805f581722a389 /script
parent33f80a367325f3dac983633cb0a59946205776c2 (diff)
downloadpublic-inbox-ea885bf0f76dc1795dc771667be721ec0fed5482.tar.gz
Inboxes may be removed or newsgroups renamed over time.
Introduce a switch to do garbage collection and eliminate stale
search and xref3 results based on inboxes which remain in the
config file.

This may also fixup stale results leftover from any bugs which
may leave stale data around.

This is also useful in case a clumsy BOFH (me :P) is swapping
between several PI_CONFIGs and accidentally indexed a bunch of
inboxes they didn't intend to.
Diffstat (limited to 'script')
-rw-r--r--script/public-inbox-extindex19
1 files changed, 16 insertions, 3 deletions
diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex
index 20a0737c..17ad59fa 100644
--- a/script/public-inbox-extindex
+++ b/script/public-inbox-extindex
@@ -16,6 +16,7 @@ usage: public-inbox-extindex [options] EXTINDEX_DIR [INBOX_DIR]
   --jobs=NUM          set or disable parallelization (NUM=0)
   --batch-size=BYTES  flush changes to OS after a given number of bytes
   --max-size=BYTES    do not index messages larger than the given size
+  --gc                perform garbage collection instead of indexing
   --verbose | -v      increase verbosity (may be repeated)
 
 BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
@@ -26,6 +27,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i
                 fsync|sync!
                 indexlevel|index-level|L=s max_size|max-size=s
                 batch_size|batch-size=s
+                gc
                 all help|h))
         or die $help;
 if ($opt->{help}) { print $help; exit 0 };
@@ -36,7 +38,13 @@ my $eidx_dir = shift(@ARGV) // die "E: $help";
 local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync
 require PublicInbox::Admin;
 my $cfg = PublicInbox::Config->new;
-my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
+my @ibxs;
+if ($opt->{gc}) {
+        die "E: inbox paths must not be specified with --gc\n" if @ARGV;
+        die "E: --all not compatible --gc\n" if $opt->{all};
+} else {
+        @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
+}
 PublicInbox::Admin::require_or_die(qw(-search));
 PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n";
 PublicInbox::Admin::progress_prepare($opt);
@@ -44,5 +52,10 @@ my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
 local %ENV = (%ENV, %$env) if $env;
 require PublicInbox::ExtSearchIdx;
 my $eidx = PublicInbox::ExtSearchIdx->new($eidx_dir, $opt);
-$eidx->attach_inbox($_) for @ibxs;
-$eidx->eidx_sync($opt);
+if ($opt->{gc}) {
+        $eidx->attach_config($cfg);
+        $eidx->eidx_gc($opt);
+} else {
+        $eidx->attach_inbox($_) for @ibxs;
+        $eidx->eidx_sync($opt);
+}