diff options
author | Eric Wong <e@yhbt.net> | 2020-08-20 20:24:56 +0000 |
---|---|---|
committer | Eric Wong <e@yhbt.net> | 2020-08-20 21:11:24 +0000 |
commit | f344d64066f85dd6737daeb42c94902e1bbfda78 (patch) | |
tree | 9a68a2a657a13ec245cfe360031b601a4d9d0c5c /script | |
parent | f62ddb19552b19f398d56193d7cf20cf20b61a04 (diff) | |
download | public-inbox-f344d64066f85dd6737daeb42c94902e1bbfda78.tar.gz |
Since we no longer read document data from Xapian, allow users to opt-out of storing it. This breaks compatibility with previous releases of public-inbox, but gives us a ~1.5% space savings on Xapian storage (and associated I/O and page cache pressure reduction).
Diffstat (limited to 'script')
-rwxr-xr-x | script/public-inbox-convert | 3 | ||||
-rwxr-xr-x | script/public-inbox-index | 7 | ||||
-rwxr-xr-x | script/public-inbox-init | 8 |
3 files changed, 15 insertions, 3 deletions
diff --git a/script/public-inbox-convert b/script/public-inbox-convert index d655dcc6..4ff198d1 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -77,7 +77,8 @@ if ($old) { die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2; require PublicInbox::Admin; -$old->{indexlevel} //= PublicInbox::Admin::detect_indexlevel($old); +my $detected = PublicInbox::Admin::detect_indexlevel($old); +$old->{indexlevel} //= $detected; my $env; if ($opt->{'index'}) { my $mods = {}; diff --git a/script/public-inbox-index b/script/public-inbox-index index 30d24838..9855c67d 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -39,7 +39,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s sequential_shard|seq-shard|sequential-shard - all help|?)) + skip-docdata all help|?)) or die "bad command-line args\n$usage"; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; @@ -58,9 +58,11 @@ unless (@ibxs) { print STDERR "Usage: $usage\n"; exit 1 } my $mods = {}; foreach my $ibx (@ibxs) { + # detect_indexlevel may also set $ibx->{-skip_docdata} + my $detected = PublicInbox::Admin::detect_indexlevel($ibx); # XXX: users can shoot themselves in the foot, with opt->{indexlevel} $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ? - 'full' : PublicInbox::Admin::detect_indexlevel($ibx)); + 'full' : $detected); PublicInbox::Admin::scan_ibx_modules($mods, $ibx); } @@ -75,6 +77,7 @@ for my $ibx (@ibxs) { PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt}); } $ibx->{-no_fsync} = 1 if !$opt->{fsync}; + $ibx->{-skip_docdata} //= $opt->{'skip-docdata'}; my $ibx_opt = $opt; if (defined(my $s = $ibx->{lc('indexSequentialShard')})) { diff --git a/script/public-inbox-init b/script/public-inbox-init index b19c2321..037e8e56 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -34,6 +34,7 @@ require PublicInbox::Admin; PublicInbox::Admin::require_or_die('-base'); my ($version, $indexlevel, $skip_epoch, $skip_artnum, $jobs, $show_help); +my $skip_docdata; my $ng = ''; my %opts = ( 'V|version=i' => \$version, @@ -42,6 +43,7 @@ my %opts = ( 'skip-artnum=i' => \$skip_artnum, 'j|jobs=i' => \$jobs, 'ng|newsgroup=s' => \$ng, + 'skip-docdata' => \$skip_docdata, 'help|?' => \$show_help, ); my $usage_cb = sub { @@ -177,6 +179,12 @@ if (defined $jobs) { require PublicInbox::InboxWritable; $ibx = PublicInbox::InboxWritable->new($ibx, $creat_opt); +if ($skip_docdata) { + $ibx->{indexlevel} //= 'full'; # ensure init_inbox writes xdb + $ibx->{indexlevel} eq 'basic' and + die "--skip-docdata ignored with --indexlevel=basic\n"; + $ibx->{-skip_docdata} = $skip_docdata; +} $ibx->init_inbox(0, $skip_epoch, $skip_artnum); # needed for git prior to v2.1.0 |