diff options
author | Eric Wong <e@80x24.org> | 2021-07-06 12:42:02 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2021-07-06 13:36:54 +0000 |
commit | 8ef622d408d2e4d98ad3aada8466f539c9ac61ba (patch) | |
tree | 5a66df2970f98c9cb591b615d0f94e28a7f9b32c /script | |
parent | f1f2464064af3840f2f1a697b638e5b769f111af (diff) | |
download | public-inbox-8ef622d408d2e4d98ad3aada8466f539c9ac61ba.tar.gz |
This is intended to fix older indices that had deduplication bugs for matching content. It'll also make dealing with future changes to ContentHash easier since that's never guaranteed stable. It also supports --dry-run to print changes only without making them.
Diffstat (limited to 'script')
-rwxr-xr-x | script/public-inbox-extindex | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index 771486c4..dcb12e5a 100755 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -17,7 +17,9 @@ usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...] --batch-size=BYTES flush changes to OS after a given number of bytes --max-size=BYTES do not index messages larger than the given size --gc perform garbage collection instead of indexing + --dedupe fix prior deduplication errors --verbose | -v increase verbosity (may be repeated) + --dry-run | -n dry-run on --dedupe BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) See public-inbox-extindex(1) man page for full documentation. @@ -27,7 +29,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i fsync|sync! indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s - gc commit-interval=i watch scan! + dedupe gc commit-interval=i watch scan! dry-run|n all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; @@ -50,11 +52,16 @@ unless (defined $eidx_dir) { my @ibxs; if ($opt->{gc}) { die "E: inbox paths must not be specified with --gc\n" if @ARGV; - die "E: --all not compatible with --gc\n" if $opt->{all}; - die "E: --watch is not compatible with --gc\n" if $opt->{watch}; + for my $sw (qw(all watch dry-run dedupe)) { + die "E: --$sw is not compatible with --gc\n" if $opt->{$sw}; + } } else { @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); } +if ($opt->{'dry-run'} && !$opt->{dedupe}) { + die "E: --dry-run only affects --dedupe\n"; +} + PublicInbox::Admin::require_or_die(qw(-search)); PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n"; PublicInbox::Admin::progress_prepare($opt); |