about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-05-23 09:36:53 +0000
committerEric Wong <e@80x24.org>2019-05-23 17:43:50 +0000
commit7a097dce831a9ad3a99e50013138e57a0def514e (patch)
tree4e0c12f425205168e58bc83c3a4063c53d21ddc9
parent0c6d38221ec9fbf9d7c7e0329252b10f17ab7a27 (diff)
downloadpublic-inbox-7a097dce831a9ad3a99e50013138e57a0def514e.tar.gz
Copying an entire Xapian DB is horribly slow whether it's done
via Perl or copydatabase(1).  So displaying some progress
indication is good for user experience.

While we're at it, prefix xapian-compact output, too; since
parallel processes end up clobbering each other.
-rw-r--r--lib/PublicInbox/Xapcmd.pm47
-rwxr-xr-xscript/public-inbox-xcpdb5
-rw-r--r--t/indexlevels-mirror.t4
3 files changed, 46 insertions, 10 deletions
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 4555340a..99f0e7c1 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -82,9 +82,21 @@ sub prepare_reindex ($$) {
         }
 }
 
+sub progress_prepare ($) {
+        my ($opt) = @_;
+        if ($opt->{quiet}) {
+                open my $null, '>', '/dev/null' or
+                        die "failed to open /dev/null: $!\n";
+                $opt->{1} = fileno($null);
+                $opt->{-dev_null} = $null;
+        } else {
+                $opt->{-progress} = 1;
+        }
+}
+
 sub run {
         my ($ibx, $cmd, $env, $opt) = @_;
-        $opt ||= {};
+        progress_prepare($opt ||= {});
         my $dir = $ibx->{mainrepo} or die "no mainrepo in inbox\n";
         my $exe = $cmd->[0];
         my $pfx = $exe;
@@ -161,6 +173,8 @@ sub cpdb_retryable ($$) {
         0;
 }
 
+# Like copydatabase(1), this is horribly slow; and it doesn't seem due
+# to the overhead of Perl.
 sub cpdb {
         my ($args, $env, $opt) = @_;
         my ($old, $new) = @$args;
@@ -172,6 +186,7 @@ sub cpdb {
         my $creat = Search::Xapian::DB_CREATE();
         my $dst = Search::Xapian::WritableDatabase->new($tmp, $creat);
         my ($it, $end);
+        my ($pfx, $nr, $tot, $fmt); # progress output
 
         do {
                 eval {
@@ -181,6 +196,13 @@ sub cpdb {
 
                         $it = $src->postlist_begin('');
                         $end = $src->postlist_end('');
+                        if ($opt->{-progress}) {
+                                $nr = 0;
+                                $pfx = (split('/', $old))[-1].':';
+                                $tot = $src->get_doccount;
+                                $fmt = "$pfx % ".length($tot)."u/$tot\n";
+                                warn "$pfx copying $tot documents\n";
+                        }
                 };
         } while (cpdb_retryable($src, $@));
 
@@ -191,6 +213,9 @@ sub cpdb {
                                 my $doc = $src->get_document($docid);
                                 $dst->replace_document($docid, $doc);
                                 $it->inc;
+                                if ($fmt && !(++$nr & 1023)) {
+                                        warn(sprintf($fmt, $nr));
+                                }
                         }
 
                         # unlike copydatabase(1), we don't copy spelling
@@ -200,10 +225,12 @@ sub cpdb {
                 };
         } while (cpdb_retryable($src, $@));
 
+        warn(sprintf($fmt, $nr)) if $fmt;
         return unless $opt->{compact};
 
         $src = $dst = undef; # flushes and closes
 
+        warn "$pfx compacting...\n" if $pfx;
         # this is probably the best place to do xapian-compact
         # since $dst isn't readable by HTTP or NNTP clients, yet:
         my $cmd = [ $XAPIAN_COMPACT, '--no-renumber', $tmp, $new ];
@@ -212,10 +239,22 @@ sub cpdb {
                 defined(my $dst = $opt->{$fd}) or next;
                 $rdr->{$fd} = $dst;
         }
+
+        my ($r, $w);
+        if ($pfx && pipe($r, $w)) {
+                $rdr->{1} = fileno($w);
+        }
         my $pid = spawn($cmd, $env, $rdr);
-        my $r = waitpid($pid, 0);
-        if ($? || $r != $pid) {
-                die join(' ', @$cmd)." failed: $? (pid=$pid, reaped=$r)\n";
+        if ($pfx) {
+                close $w or die "close: \$w: $!";
+                foreach (<$r>) {
+                        s/\r/\r$pfx /g;
+                        warn "$pfx $_";
+                }
+        }
+        my $rp = waitpid($pid, 0);
+        if ($? || $rp != $pid) {
+                die join(' ', @$cmd)." failed: $? (pid=$pid, reaped=$rp)\n";
         }
         remove_tree($tmp) or die "failed to remove $tmp: $!\n";
 }
diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb
index 78d37da2..5b66337b 100755
--- a/script/public-inbox-xcpdb
+++ b/script/public-inbox-xcpdb
@@ -9,12 +9,9 @@ use PublicInbox::Admin;
 PublicInbox::Admin::require_or_die('-search');
 my $usage = "Usage: public-inbox-xcpdb INBOX_DIR\n";
 my $opt = {};
-GetOptions($opt, qw(compact)) or die "bad command-line args\n$usage";
+GetOptions($opt, qw(compact quiet|q)) or die "bad command-line args\n$usage";
 my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage;
-
 my $cmd = [ \&PublicInbox::Xapcmd::cpdb ];
-open my $null, '>', '/dev/null' or die "failed to open /dev/null: $!\n";
-$opt->{1} = fileno($null);
 foreach (@ibxs) {
         my $ibx = PublicInbox::InboxWritable->new($_);
         # we rely on --no-renumber to keep docids synched to NNTP
diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t
index 61053b66..57a776f7 100644
--- a/t/indexlevels-mirror.t
+++ b/t/indexlevels-mirror.t
@@ -18,7 +18,7 @@ foreach my $mod (qw(DBD::SQLite)) {
 
 my $path = 'blib/script';
 my $index = "$path/public-inbox-index";
-my $xcpdb = "$path/public-inbox-xcpdb";
+my @xcpdb = ("$path/public-inbox-xcpdb", '-q');
 
 my $mime = PublicInbox::MIME->create(
         header => [
@@ -110,7 +110,7 @@ sub import_index_incremental {
         $im->done;
 
         if ($level ne 'basic') {
-                is(system($xcpdb, $mirror), 0, "v$v xcpdb OK");
+                is(system(@xcpdb, $mirror), 0, "v$v xcpdb OK");
                 delete $ro_mirror->{$_} for (qw(over search));
                 ($nr, $msgs) = $ro_mirror->search->query('m:m@2');
                 is($nr, 1, "v$v found m\@2 via Xapian on $level");