From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 241C420605 for ; Thu, 23 May 2019 09:37:09 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 12/26] xapcmd: xcpdb supports compaction Date: Thu, 23 May 2019 09:36:50 +0000 Message-Id: <20190523093704.18367-13-e@80x24.org> In-Reply-To: <20190523093704.18367-1-e@80x24.org> References: <20190523093704.18367-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: To minimize the delay on active inboxes, it's actually ideal to run xapian-compact at the end of the per-partition cpdb process; since the new DB isn't accessible yet and so we don't have to deal with lock contention with -mda or -watch processes. The downside is temporary file overhead (3x instead of 2x) required. --- lib/PublicInbox/Xapcmd.pm | 34 ++++++++++++++++++++++++++++++++-- script/public-inbox-xcpdb | 8 ++++++-- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index ca74ea0..d2de874 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -8,6 +8,10 @@ use PublicInbox::Over; use File::Temp qw(tempdir); use File::Path qw(remove_tree); +# support testing with dev versions of Xapian which installs +# commands with a version number suffix (e.g. "xapian-compact-1.5") +our $XAPIAN_COMPACT = $ENV{XAPIAN_COMPACT} || 'xapian-compact'; + sub commit_changes ($$$) { my ($im, $old, $new) = @_; my @st = stat($old) or die "failed to stat($old): $!\n"; @@ -38,17 +42,23 @@ sub xspawn { } } +sub runnable_or_die ($) { + my ($exe) = @_; + which($exe) or die "$exe not found in PATH\n"; +} + sub run { my ($ibx, $cmd, $env, $opt) = @_; $opt ||= {}; my $dir = $ibx->{mainrepo} or die "no mainrepo in inbox\n"; my $exe = $cmd->[0]; my $pfx = $exe; + runnable_or_die($XAPIAN_COMPACT) if $opt->{compact}; if (ref($exe) eq 'CODE') { $pfx = 'CODE'; require Search::Xapian::WritableDatabase; } else { - which($exe) or die "$exe not found in PATH\n"; + runnable_or_die($exe); } $ibx->umask_prepare; my $old = $ibx->search->xdir(1); @@ -107,11 +117,12 @@ sub cpdb { my ($args, $env, $opt) = @_; my ($old, $new) = @$args; my $src = Search::Xapian::Database->new($old); + my $tmp = $opt->{compact} ? "$new.compact" : $new; # like copydatabase(1), be sure we don't overwrite anything in case # of other bugs: my $creat = Search::Xapian::DB_CREATE(); - my $dst = Search::Xapian::WritableDatabase->new($new, $creat); + my $dst = Search::Xapian::WritableDatabase->new($tmp, $creat); my ($it, $end); do { @@ -140,6 +151,25 @@ sub cpdb { # (and public-inbox does not use those features) }; } while (cpdb_retryable($src, $@)); + + return unless $opt->{compact}; + + $src = $dst = undef; # flushes and closes + + # this is probably the best place to do xapian-compact + # since $dst isn't readable by HTTP or NNTP clients, yet: + my $cmd = [ $XAPIAN_COMPACT, '--no-renumber', $tmp, $new ]; + my $rdr = {}; + foreach my $fd (0..2) { + defined(my $dst = $opt->{$fd}) or next; + $rdr->{$fd} = $dst; + } + my $pid = spawn($cmd, $env, $rdr); + my $r = waitpid($pid, 0); + if ($? || $r != $pid) { + die join(' ', @$cmd)." failed: $? (pid=$pid, reaped=$r)\n"; + } + remove_tree($tmp) or die "failed to remove $tmp: $!\n"; } 1; diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb index d494991..78d37da 100755 --- a/script/public-inbox-xcpdb +++ b/script/public-inbox-xcpdb @@ -2,17 +2,21 @@ # Copyright (C) 2019 all contributors # License: AGPL-3.0+ # xcpdb: Xapian copy database, a wrapper around Xapian's copydatabase(1) +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use PublicInbox::InboxWritable; use PublicInbox::Xapcmd; use PublicInbox::Admin; PublicInbox::Admin::require_or_die('-search'); my $usage = "Usage: public-inbox-xcpdb INBOX_DIR\n"; +my $opt = {}; +GetOptions($opt, qw(compact)) or die "bad command-line args\n$usage"; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage; + my $cmd = [ \&PublicInbox::Xapcmd::cpdb ]; open my $null, '>', '/dev/null' or die "failed to open /dev/null: $!\n"; -my $rdr = { 1 => fileno($null) }; +$opt->{1} = fileno($null); foreach (@ibxs) { my $ibx = PublicInbox::InboxWritable->new($_); # we rely on --no-renumber to keep docids synched to NNTP - PublicInbox::Xapcmd::run($ibx, $cmd, undef, $rdr); + PublicInbox::Xapcmd::run($ibx, $cmd, undef, $opt); } -- EW