about summary refs log tree commit homepage
path: root/script
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-08-20 20:24:56 +0000
committerEric Wong <e@yhbt.net>2020-08-20 21:11:24 +0000
commitf344d64066f85dd6737daeb42c94902e1bbfda78 (patch)
tree9a68a2a657a13ec245cfe360031b601a4d9d0c5c /script
parentf62ddb19552b19f398d56193d7cf20cf20b61a04 (diff)
downloadpublic-inbox-f344d64066f85dd6737daeb42c94902e1bbfda78.tar.gz
Since we no longer read document data from Xapian, allow users
to opt-out of storing it.

This breaks compatibility with previous releases of
public-inbox, but gives us a ~1.5% space savings on Xapian
storage (and associated I/O and page cache pressure reduction).
Diffstat (limited to 'script')
-rwxr-xr-xscript/public-inbox-convert3
-rwxr-xr-xscript/public-inbox-index7
-rwxr-xr-xscript/public-inbox-init8
3 files changed, 15 insertions, 3 deletions
diff --git a/script/public-inbox-convert b/script/public-inbox-convert
index d655dcc6..4ff198d1 100755
--- a/script/public-inbox-convert
+++ b/script/public-inbox-convert
@@ -77,7 +77,8 @@ if ($old) {
 die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2;
 
 require PublicInbox::Admin;
-$old->{indexlevel} //= PublicInbox::Admin::detect_indexlevel($old);
+my $detected = PublicInbox::Admin::detect_indexlevel($old);
+$old->{indexlevel} //= $detected;
 my $env;
 if ($opt->{'index'}) {
         my $mods = {};
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 30d24838..9855c67d 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -39,7 +39,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
                 indexlevel|index-level|L=s max_size|max-size=s
                 batch_size|batch-size=s
                 sequential_shard|seq-shard|sequential-shard
-                all help|?))
+                skip-docdata all help|?))
         or die "bad command-line args\n$usage";
 if ($opt->{help}) { print $help; exit 0 };
 die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
@@ -58,9 +58,11 @@ unless (@ibxs) { print STDERR "Usage: $usage\n"; exit 1 }
 
 my $mods = {};
 foreach my $ibx (@ibxs) {
+        # detect_indexlevel may also set $ibx->{-skip_docdata}
+        my $detected = PublicInbox::Admin::detect_indexlevel($ibx);
         # XXX: users can shoot themselves in the foot, with opt->{indexlevel}
         $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ?
-                        'full' : PublicInbox::Admin::detect_indexlevel($ibx));
+                        'full' : $detected);
         PublicInbox::Admin::scan_ibx_modules($mods, $ibx);
 }
 
@@ -75,6 +77,7 @@ for my $ibx (@ibxs) {
                 PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt});
         }
         $ibx->{-no_fsync} = 1 if !$opt->{fsync};
+        $ibx->{-skip_docdata} //= $opt->{'skip-docdata'};
 
         my $ibx_opt = $opt;
         if (defined(my $s = $ibx->{lc('indexSequentialShard')})) {
diff --git a/script/public-inbox-init b/script/public-inbox-init
index b19c2321..037e8e56 100755
--- a/script/public-inbox-init
+++ b/script/public-inbox-init
@@ -34,6 +34,7 @@ require PublicInbox::Admin;
 PublicInbox::Admin::require_or_die('-base');
 
 my ($version, $indexlevel, $skip_epoch, $skip_artnum, $jobs, $show_help);
+my $skip_docdata;
 my $ng = '';
 my %opts = (
         'V|version=i' => \$version,
@@ -42,6 +43,7 @@ my %opts = (
         'skip-artnum=i' => \$skip_artnum,
         'j|jobs=i' => \$jobs,
         'ng|newsgroup=s' => \$ng,
+        'skip-docdata' => \$skip_docdata,
         'help|?' => \$show_help,
 );
 my $usage_cb = sub {
@@ -177,6 +179,12 @@ if (defined $jobs) {
 
 require PublicInbox::InboxWritable;
 $ibx = PublicInbox::InboxWritable->new($ibx, $creat_opt);
+if ($skip_docdata) {
+        $ibx->{indexlevel} //= 'full'; # ensure init_inbox writes xdb
+        $ibx->{indexlevel} eq 'basic' and
+                die "--skip-docdata ignored with --indexlevel=basic\n";
+        $ibx->{-skip_docdata} = $skip_docdata;
+}
 $ibx->init_inbox(0, $skip_epoch, $skip_artnum);
 
 # needed for git prior to v2.1.0