about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-29 20:17:19 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-29 20:17:48 +0000
commite5c2e2588d7ad2243afeabad67b3c951c5b66643 (patch)
tree76fa5a4687c53a73b0729b3e5268cbc17e6f977d
parent11a7b5403d3d3dda8266efa374336ca344288cfe (diff)
downloadpublic-inbox-e5c2e2588d7ad2243afeabad67b3c951c5b66643.tar.gz
Having multiple Xapian partitions is mostly pointless after
the initial import.  We can compact all the partitions into
one while keeping the skeleton separate.
-rw-r--r--Documentation/public-inbox-compact.pod50
-rw-r--r--MANIFEST3
-rwxr-xr-xscript/public-inbox-compact94
-rw-r--r--t/convert-compact.t57
4 files changed, 204 insertions, 0 deletions
diff --git a/Documentation/public-inbox-compact.pod b/Documentation/public-inbox-compact.pod
new file mode 100644
index 00000000..4a519ce9
--- /dev/null
+++ b/Documentation/public-inbox-compact.pod
@@ -0,0 +1,50 @@
+=head1 NAME
+
+public-inbox-compact - compact Xapian DBs
+
+=head1 SYNOPSIS
+
+        public-inbox-compact INBOX_DIR
+
+=head1 DESCRIPTION
+
+public-inbox-compact is a wrapper for L<xapian-compact(1)>
+designed for "v2" inboxes.  It combines multiple Xapian
+partitions into one to reduce space overhead after an initial
+mass import (using multiple partitions) is done.
+
+It locks the inbox and prevents other processes such as
+L<public-inbox-watch(1)> from writing while it operates.
+
+It also supports "v1" (ssoma) inboxes with limited
+usefulness over L<xapian-compact(1)>
+
+=head1 ENVIRONMENT
+
+=over 8
+
+=item PI_CONFIG
+
+The default config file, normally "~/.public-inbox/config".
+See L<public-inbox-config(5)>
+
+=back
+
+=head1 UPGRADING
+
+=head1 CONTACT
+
+Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
+
+The mail archives are hosted at L<https://public-inbox.org/meta/>
+and L<http://hjrcffqmbrq6wope.onion/meta/>
+
+=head1 COPYRIGHT
+
+Copyright 2018 all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<xapian-compact(1)>, L<public-inbox-index(1)>
diff --git a/MANIFEST b/MANIFEST
index 1e48d3a9..ce6cd116 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -7,6 +7,7 @@ Documentation/design_notes.txt
 Documentation/design_www.txt
 Documentation/hosted.txt
 Documentation/include.mk
+Documentation/public-inbox-compact.pod
 Documentation/public-inbox-config.pod
 Documentation/public-inbox-convert.pod
 Documentation/public-inbox-daemon.pod
@@ -110,6 +111,7 @@ sa_config/Makefile
 sa_config/README
 sa_config/root/etc/spamassassin/public-inbox.pre
 sa_config/user/.spamassassin/user_prefs
+script/public-inbox-compact
 script/public-inbox-convert
 script/public-inbox-httpd
 script/public-inbox-index
@@ -137,6 +139,7 @@ t/common.perl
 t/config.t
 t/config_limiter.t
 t/content_id.t
+t/convert-compact.t
 t/emergency.t
 t/fail-bin/spamc
 t/feed.t
diff --git a/script/public-inbox-compact b/script/public-inbox-compact
new file mode 100755
index 00000000..016873d3
--- /dev/null
+++ b/script/public-inbox-compact
@@ -0,0 +1,94 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+use PublicInbox::V2Writable;
+use PublicInbox::Search;
+use PublicInbox::Config;
+use Cwd 'abs_path';
+use File::Temp qw(tempdir);
+use File::Path qw(remove_tree);
+use PublicInbox::Spawn qw(spawn);
+my $usage = "Usage: public-inbox-compact REPO_DIR\n";
+my $dir = shift or die $usage;
+my $config = PublicInbox::Config->new;
+my $ibx;
+$config->each_inbox(sub {
+        $ibx = $_[0] if abs_path($_[0]->{mainrepo}) eq $dir
+});
+unless ($ibx) {
+        warn "W: $dir not configured in ".
+                PublicInbox::Config::default_file() . "\n";
+        $ibx = {
+                mainrepo => $dir,
+                name => 'ignored',
+                address => [ 'old@example.com' ],
+        };
+        $ibx = PublicInbox::Inbox->new($ibx);
+}
+my $v = ($ibx->{version} || 1);
+if ($v == 2) {
+        require PublicInbox::V2Writable;
+        my $v2w = PublicInbox::V2Writable->new($ibx);
+        my $xap_v = 'xap'.PublicInbox::Search::SCHEMA_VERSION;
+        my $xroot = "$ibx->{mainrepo}/$xap_v";
+        opendir my $dh, $xroot or die "Failed to opendir $xroot: $!\n";
+        $v2w->lock_acquire;
+        my $new = tempdir(CLEANUP => 1, DIR => $ibx->{mainrepo});
+        my @parts;
+        my $skel;
+        while (defined(my $dn = readdir($dh))) {
+                if ($dn =~ /\A\d+\z/) {
+                        push @parts, "$xroot/$dn";
+                } elsif ($dn eq 'skel') {
+                        $skel = "$xroot/$dn";
+                } elsif ($dn eq '.' || $dn eq '..') {
+                } else {
+                        warn "W: skipping unknown Xapian DB: $xroot/$dn\n";
+                }
+        }
+        close $dh;
+        my %pids;
+        if (@parts) {
+                my $pid = spawn([ qw(xapian-compact), @parts, "$new/0" ]);
+                defined $pid or die "compact failed: $?\n";
+                $pids{$pid} = 'xapian-compact (parts)';
+        } else {
+                warn "No parts found in $xroot\n";
+        }
+        if (defined $skel) {
+                my $pid = spawn([ qw(xapian-compact), $skel, "$new/skel" ]);
+                defined $pid or die "compact failed: $?\n";
+                $pids{$pid} = 'xapian-compact (skel)';
+        } else {
+                warn "$xroot/skel missing\n";
+        }
+        die "No xapian-compact processes running\n" unless scalar keys %pids;
+        while (scalar keys %pids) {
+                my $pid = waitpid(-1, 0);
+                my $desc = delete $pids{$pid};
+                die "$desc failed: $?\n" if $?;
+        }
+        rename($xroot, "$new/old") or die "rename $xroot => $new/old: $!\n";
+        rename($new, $xroot) or die "rename $new => $xroot: $!\n";
+        $v2w->lock_release;
+        remove_tree("$xroot/old") or die "failed to remove $xroot/old: $!\n";
+} elsif ($v == 1) {
+        require PublicInbox::Import;
+        my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
+        my $xap_v = 'xapian'.PublicInbox::Search::SCHEMA_VERSION;
+        my $v1_root = "$ibx->{mainrepo}/public-inbox";
+        my $old = "$v1_root/$xap_v";
+        -d $old or die "$old does not exist\n";
+        my $new = tempdir(CLEANUP => 1, DIR => $v1_root);
+        $im->lock_acquire;
+        PublicInbox::Import::run_die([ qw(xapian-compact), $old, $new ]);
+        rename($old, "$new/old") or die "rename $old => $new: $!\n";
+        rename($new, $old) or die "rename $new => $old: $!\n";
+        $im->lock_release;
+        remove_tree("$old/old") or die "failed to remove $old/old: $!\n";
+} else {
+        die "Unsupported inbox version: $v\n";
+}
diff --git a/t/convert-compact.t b/t/convert-compact.t
new file mode 100644
index 00000000..922ec9c2
--- /dev/null
+++ b/t/convert-compact.t
@@ -0,0 +1,57 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+my @mods = qw(DBD::SQLite Search::Xapian);
+foreach my $mod (@mods) {
+        eval "require $mod";
+        plan skip_all => "$mod missing for convert-compact.t" if $@;
+}
+use PublicInbox::V2Writable;
+use PublicInbox::Import;
+my $tmpdir = tempdir('convert-compact-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $ibx = {
+        mainrepo => "$tmpdir/v1",
+        name => 'test-v1',
+        -primary_address => 'test@example.com',
+};
+
+ok(PublicInbox::Import::run_die([qw(git init --bare -q), $ibx->{mainrepo}]),
+        'initialized v1 repo');
+$ibx = PublicInbox::Inbox->new($ibx);
+my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
+my $mime = PublicInbox::MIME->create(
+        header => [
+                From => 'a@example.com',
+                To => 'test@example.com',
+                Subject => 'this is a subject',
+                'Message-ID' => '<a-mid@b>',
+                Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
+        ],
+        body => "hello world\n",
+);
+ok($im->add($mime), 'added one message');
+$im->done;
+PublicInbox::SearchIdx->new($ibx, 1)->index_sync;
+local $ENV{PATH} = "blib/script:$ENV{PATH}";
+open my $err, '>>', "$tmpdir/err.log" or die "open: err.log $!\n";
+open my $out, '>>', "$tmpdir/out.log" or die "open: out.log $!\n";
+my $rdr = { 1 => fileno($out), 2 => fileno($err) };
+
+my $cmd = [ 'public-inbox-compact', $ibx->{mainrepo} ];
+ok(PublicInbox::Import::run_die($cmd, undef, $rdr), 'v1 compact works');
+
+$cmd = [ 'public-inbox-convert', $ibx->{mainrepo}, "$tmpdir/v2" ];
+ok(PublicInbox::Import::run_die($cmd, undef, $rdr), 'convert works');
+
+$cmd = [ 'public-inbox-compact', "$tmpdir/v2" ];
+my $env = { NPROC => 2 };
+ok(PublicInbox::Import::run_die($cmd, $env, $rdr), 'v2 compact works');
+$ibx->{mainrepo} = "$tmpdir/v2";
+my $v2w = PublicInbox::V2Writable->new($ibx);
+is($v2w->{partitions}, 1, "only one partition in compacted repo");
+
+done_testing();