From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 65158205D7 for ; Thu, 23 May 2019 09:37:10 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 15/26] xcpdb: implement progress reporting Date: Thu, 23 May 2019 09:36:53 +0000 Message-Id: <20190523093704.18367-16-e@80x24.org> In-Reply-To: <20190523093704.18367-1-e@80x24.org> References: <20190523093704.18367-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Copying an entire Xapian DB is horribly slow whether it's done via Perl or copydatabase(1). So displaying some progress indication is good for user experience. While we're at it, prefix xapian-compact output, too; since parallel processes end up clobbering each other. --- lib/PublicInbox/Xapcmd.pm | 47 +++++++++++++++++++++++++++++++++++---- script/public-inbox-xcpdb | 5 +---- t/indexlevels-mirror.t | 4 ++-- 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 4555340..99f0e7c 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -82,9 +82,21 @@ sub prepare_reindex ($$) { } } +sub progress_prepare ($) { + my ($opt) = @_; + if ($opt->{quiet}) { + open my $null, '>', '/dev/null' or + die "failed to open /dev/null: $!\n"; + $opt->{1} = fileno($null); + $opt->{-dev_null} = $null; + } else { + $opt->{-progress} = 1; + } +} + sub run { my ($ibx, $cmd, $env, $opt) = @_; - $opt ||= {}; + progress_prepare($opt ||= {}); my $dir = $ibx->{mainrepo} or die "no mainrepo in inbox\n"; my $exe = $cmd->[0]; my $pfx = $exe; @@ -161,6 +173,8 @@ sub cpdb_retryable ($$) { 0; } +# Like copydatabase(1), this is horribly slow; and it doesn't seem due +# to the overhead of Perl. sub cpdb { my ($args, $env, $opt) = @_; my ($old, $new) = @$args; @@ -172,6 +186,7 @@ sub cpdb { my $creat = Search::Xapian::DB_CREATE(); my $dst = Search::Xapian::WritableDatabase->new($tmp, $creat); my ($it, $end); + my ($pfx, $nr, $tot, $fmt); # progress output do { eval { @@ -181,6 +196,13 @@ sub cpdb { $it = $src->postlist_begin(''); $end = $src->postlist_end(''); + if ($opt->{-progress}) { + $nr = 0; + $pfx = (split('/', $old))[-1].':'; + $tot = $src->get_doccount; + $fmt = "$pfx % ".length($tot)."u/$tot\n"; + warn "$pfx copying $tot documents\n"; + } }; } while (cpdb_retryable($src, $@)); @@ -191,6 +213,9 @@ sub cpdb { my $doc = $src->get_document($docid); $dst->replace_document($docid, $doc); $it->inc; + if ($fmt && !(++$nr & 1023)) { + warn(sprintf($fmt, $nr)); + } } # unlike copydatabase(1), we don't copy spelling @@ -200,10 +225,12 @@ sub cpdb { }; } while (cpdb_retryable($src, $@)); + warn(sprintf($fmt, $nr)) if $fmt; return unless $opt->{compact}; $src = $dst = undef; # flushes and closes + warn "$pfx compacting...\n" if $pfx; # this is probably the best place to do xapian-compact # since $dst isn't readable by HTTP or NNTP clients, yet: my $cmd = [ $XAPIAN_COMPACT, '--no-renumber', $tmp, $new ]; @@ -212,10 +239,22 @@ sub cpdb { defined(my $dst = $opt->{$fd}) or next; $rdr->{$fd} = $dst; } + + my ($r, $w); + if ($pfx && pipe($r, $w)) { + $rdr->{1} = fileno($w); + } my $pid = spawn($cmd, $env, $rdr); - my $r = waitpid($pid, 0); - if ($? || $r != $pid) { - die join(' ', @$cmd)." failed: $? (pid=$pid, reaped=$r)\n"; + if ($pfx) { + close $w or die "close: \$w: $!"; + foreach (<$r>) { + s/\r/\r$pfx /g; + warn "$pfx $_"; + } + } + my $rp = waitpid($pid, 0); + if ($? || $rp != $pid) { + die join(' ', @$cmd)." failed: $? (pid=$pid, reaped=$rp)\n"; } remove_tree($tmp) or die "failed to remove $tmp: $!\n"; } diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb index 78d37da..5b66337 100755 --- a/script/public-inbox-xcpdb +++ b/script/public-inbox-xcpdb @@ -9,12 +9,9 @@ use PublicInbox::Admin; PublicInbox::Admin::require_or_die('-search'); my $usage = "Usage: public-inbox-xcpdb INBOX_DIR\n"; my $opt = {}; -GetOptions($opt, qw(compact)) or die "bad command-line args\n$usage"; +GetOptions($opt, qw(compact quiet|q)) or die "bad command-line args\n$usage"; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage; - my $cmd = [ \&PublicInbox::Xapcmd::cpdb ]; -open my $null, '>', '/dev/null' or die "failed to open /dev/null: $!\n"; -$opt->{1} = fileno($null); foreach (@ibxs) { my $ibx = PublicInbox::InboxWritable->new($_); # we rely on --no-renumber to keep docids synched to NNTP diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t index 61053b6..57a776f 100644 --- a/t/indexlevels-mirror.t +++ b/t/indexlevels-mirror.t @@ -18,7 +18,7 @@ foreach my $mod (qw(DBD::SQLite)) { my $path = 'blib/script'; my $index = "$path/public-inbox-index"; -my $xcpdb = "$path/public-inbox-xcpdb"; +my @xcpdb = ("$path/public-inbox-xcpdb", '-q'); my $mime = PublicInbox::MIME->create( header => [ @@ -110,7 +110,7 @@ sub import_index_incremental { $im->done; if ($level ne 'basic') { - is(system($xcpdb, $mirror), 0, "v$v xcpdb OK"); + is(system(@xcpdb, $mirror), 0, "v$v xcpdb OK"); delete $ro_mirror->{$_} for (qw(over search)); ($nr, $msgs) = $ro_mirror->search->query('m:m@2'); is($nr, 1, "v$v found m\@2 via Xapian on $level"); -- EW