From e5c2e2588d7ad2243afeabad67b3c951c5b66643 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Thu, 29 Mar 2018 20:17:19 +0000 Subject: public-inbox-compact: new tool for driving xapian-compact Having multiple Xapian partitions is mostly pointless after the initial import. We can compact all the partitions into one while keeping the skeleton separate. --- Documentation/public-inbox-compact.pod | 50 ++++++++++++++++++ MANIFEST | 3 ++ script/public-inbox-compact | 94 ++++++++++++++++++++++++++++++++++ t/convert-compact.t | 57 +++++++++++++++++++++ 4 files changed, 204 insertions(+) create mode 100644 Documentation/public-inbox-compact.pod create mode 100755 script/public-inbox-compact create mode 100644 t/convert-compact.t diff --git a/Documentation/public-inbox-compact.pod b/Documentation/public-inbox-compact.pod new file mode 100644 index 00000000..4a519ce9 --- /dev/null +++ b/Documentation/public-inbox-compact.pod @@ -0,0 +1,50 @@ +=head1 NAME + +public-inbox-compact - compact Xapian DBs + +=head1 SYNOPSIS + + public-inbox-compact INBOX_DIR + +=head1 DESCRIPTION + +public-inbox-compact is a wrapper for L +designed for "v2" inboxes. It combines multiple Xapian +partitions into one to reduce space overhead after an initial +mass import (using multiple partitions) is done. + +It locks the inbox and prevents other processes such as +L from writing while it operates. + +It also supports "v1" (ssoma) inboxes with limited +usefulness over L + +=head1 ENVIRONMENT + +=over 8 + +=item PI_CONFIG + +The default config file, normally "~/.public-inbox/config". +See L + +=back + +=head1 UPGRADING + +=head1 CONTACT + +Feedback welcome via plain-text mail to L + +The mail archives are hosted at L +and L + +=head1 COPYRIGHT + +Copyright 2018 all contributors L + +License: AGPL-3.0+ L + +=head1 SEE ALSO + +L, L diff --git a/MANIFEST b/MANIFEST index 1e48d3a9..ce6cd116 100644 --- a/MANIFEST +++ b/MANIFEST @@ -7,6 +7,7 @@ Documentation/design_notes.txt Documentation/design_www.txt Documentation/hosted.txt Documentation/include.mk +Documentation/public-inbox-compact.pod Documentation/public-inbox-config.pod Documentation/public-inbox-convert.pod Documentation/public-inbox-daemon.pod @@ -110,6 +111,7 @@ sa_config/Makefile sa_config/README sa_config/root/etc/spamassassin/public-inbox.pre sa_config/user/.spamassassin/user_prefs +script/public-inbox-compact script/public-inbox-convert script/public-inbox-httpd script/public-inbox-index @@ -137,6 +139,7 @@ t/common.perl t/config.t t/config_limiter.t t/content_id.t +t/convert-compact.t t/emergency.t t/fail-bin/spamc t/feed.t diff --git a/script/public-inbox-compact b/script/public-inbox-compact new file mode 100755 index 00000000..016873d3 --- /dev/null +++ b/script/public-inbox-compact @@ -0,0 +1,94 @@ +#!/usr/bin/perl -w +# Copyright (C) 2018 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use PublicInbox::V2Writable; +use PublicInbox::Search; +use PublicInbox::Config; +use Cwd 'abs_path'; +use File::Temp qw(tempdir); +use File::Path qw(remove_tree); +use PublicInbox::Spawn qw(spawn); +my $usage = "Usage: public-inbox-compact REPO_DIR\n"; +my $dir = shift or die $usage; +my $config = PublicInbox::Config->new; +my $ibx; +$config->each_inbox(sub { + $ibx = $_[0] if abs_path($_[0]->{mainrepo}) eq $dir +}); +unless ($ibx) { + warn "W: $dir not configured in ". + PublicInbox::Config::default_file() . "\n"; + $ibx = { + mainrepo => $dir, + name => 'ignored', + address => [ 'old@example.com' ], + }; + $ibx = PublicInbox::Inbox->new($ibx); +} +my $v = ($ibx->{version} || 1); +if ($v == 2) { + require PublicInbox::V2Writable; + my $v2w = PublicInbox::V2Writable->new($ibx); + my $xap_v = 'xap'.PublicInbox::Search::SCHEMA_VERSION; + my $xroot = "$ibx->{mainrepo}/$xap_v"; + opendir my $dh, $xroot or die "Failed to opendir $xroot: $!\n"; + $v2w->lock_acquire; + my $new = tempdir(CLEANUP => 1, DIR => $ibx->{mainrepo}); + my @parts; + my $skel; + while (defined(my $dn = readdir($dh))) { + if ($dn =~ /\A\d+\z/) { + push @parts, "$xroot/$dn"; + } elsif ($dn eq 'skel') { + $skel = "$xroot/$dn"; + } elsif ($dn eq '.' || $dn eq '..') { + } else { + warn "W: skipping unknown Xapian DB: $xroot/$dn\n"; + } + } + close $dh; + my %pids; + if (@parts) { + my $pid = spawn([ qw(xapian-compact), @parts, "$new/0" ]); + defined $pid or die "compact failed: $?\n"; + $pids{$pid} = 'xapian-compact (parts)'; + } else { + warn "No parts found in $xroot\n"; + } + if (defined $skel) { + my $pid = spawn([ qw(xapian-compact), $skel, "$new/skel" ]); + defined $pid or die "compact failed: $?\n"; + $pids{$pid} = 'xapian-compact (skel)'; + } else { + warn "$xroot/skel missing\n"; + } + die "No xapian-compact processes running\n" unless scalar keys %pids; + while (scalar keys %pids) { + my $pid = waitpid(-1, 0); + my $desc = delete $pids{$pid}; + die "$desc failed: $?\n" if $?; + } + rename($xroot, "$new/old") or die "rename $xroot => $new/old: $!\n"; + rename($new, $xroot) or die "rename $new => $xroot: $!\n"; + $v2w->lock_release; + remove_tree("$xroot/old") or die "failed to remove $xroot/old: $!\n"; +} elsif ($v == 1) { + require PublicInbox::Import; + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $xap_v = 'xapian'.PublicInbox::Search::SCHEMA_VERSION; + my $v1_root = "$ibx->{mainrepo}/public-inbox"; + my $old = "$v1_root/$xap_v"; + -d $old or die "$old does not exist\n"; + my $new = tempdir(CLEANUP => 1, DIR => $v1_root); + $im->lock_acquire; + PublicInbox::Import::run_die([ qw(xapian-compact), $old, $new ]); + rename($old, "$new/old") or die "rename $old => $new: $!\n"; + rename($new, $old) or die "rename $new => $old: $!\n"; + $im->lock_release; + remove_tree("$old/old") or die "failed to remove $old/old: $!\n"; +} else { + die "Unsupported inbox version: $v\n"; +} diff --git a/t/convert-compact.t b/t/convert-compact.t new file mode 100644 index 00000000..922ec9c2 --- /dev/null +++ b/t/convert-compact.t @@ -0,0 +1,57 @@ +# Copyright (C) 2018 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +use PublicInbox::MIME; +my @mods = qw(DBD::SQLite Search::Xapian); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for convert-compact.t" if $@; +} +use PublicInbox::V2Writable; +use PublicInbox::Import; +my $tmpdir = tempdir('convert-compact-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $ibx = { + mainrepo => "$tmpdir/v1", + name => 'test-v1', + -primary_address => 'test@example.com', +}; + +ok(PublicInbox::Import::run_die([qw(git init --bare -q), $ibx->{mainrepo}]), + 'initialized v1 repo'); +$ibx = PublicInbox::Inbox->new($ibx); +my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); +my $mime = PublicInbox::MIME->create( + header => [ + From => 'a@example.com', + To => 'test@example.com', + Subject => 'this is a subject', + 'Message-ID' => '', + Date => 'Fri, 02 Oct 1993 00:00:00 +0000', + ], + body => "hello world\n", +); +ok($im->add($mime), 'added one message'); +$im->done; +PublicInbox::SearchIdx->new($ibx, 1)->index_sync; +local $ENV{PATH} = "blib/script:$ENV{PATH}"; +open my $err, '>>', "$tmpdir/err.log" or die "open: err.log $!\n"; +open my $out, '>>', "$tmpdir/out.log" or die "open: out.log $!\n"; +my $rdr = { 1 => fileno($out), 2 => fileno($err) }; + +my $cmd = [ 'public-inbox-compact', $ibx->{mainrepo} ]; +ok(PublicInbox::Import::run_die($cmd, undef, $rdr), 'v1 compact works'); + +$cmd = [ 'public-inbox-convert', $ibx->{mainrepo}, "$tmpdir/v2" ]; +ok(PublicInbox::Import::run_die($cmd, undef, $rdr), 'convert works'); + +$cmd = [ 'public-inbox-compact', "$tmpdir/v2" ]; +my $env = { NPROC => 2 }; +ok(PublicInbox::Import::run_die($cmd, $env, $rdr), 'v2 compact works'); +$ibx->{mainrepo} = "$tmpdir/v2"; +my $v2w = PublicInbox::V2Writable->new($ibx); +is($v2w->{partitions}, 1, "only one partition in compacted repo"); + +done_testing(); -- cgit v1.2.3-24-ge0c7