about summary refs log tree commit homepage
path: root/lib/PublicInbox/Xapcmd.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-05-23 09:36:52 +0000
committerEric Wong <e@80x24.org>2019-05-23 17:43:50 +0000
commit0c6d38221ec9fbf9d7c7e0329252b10f17ab7a27 (patch)
treeec8826ca0cfa8c63a636cacc4ba35fe7e658975b /lib/PublicInbox/Xapcmd.pm
parenta1c3fb1bbef319a0af8ab63380495984c3a4ee18 (diff)
downloadpublic-inbox-0c6d38221ec9fbf9d7c7e0329252b10f17ab7a27.tar.gz
Copying an entire Xapian DB takes a long time, so update our
reindexing code to support partial reindexing, snapshot the
pre-copydatabase git revisions, perform the lengthy copy,
and do a partial reindex when the copy + renames are done.
Diffstat (limited to 'lib/PublicInbox/Xapcmd.pm')
-rw-r--r--lib/PublicInbox/Xapcmd.pm58
1 files changed, 53 insertions, 5 deletions
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index d2de8743..4555340a 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -5,6 +5,7 @@ use strict;
 use warnings;
 use PublicInbox::Spawn qw(which spawn);
 use PublicInbox::Over;
+use PublicInbox::Search;
 use File::Temp qw(tempdir);
 use File::Path qw(remove_tree);
 
@@ -12,20 +13,33 @@ use File::Path qw(remove_tree);
 # commands with a version number suffix (e.g. "xapian-compact-1.5")
 our $XAPIAN_COMPACT = $ENV{XAPIAN_COMPACT} || 'xapian-compact';
 
-sub commit_changes ($$$) {
-        my ($im, $old, $new) = @_;
+sub commit_changes ($$$$) {
+        my ($ibx, $old, $new, $opt) = @_;
+
+        my $reindex = $opt->{reindex};
+        my $im = $ibx->importer(0);
+        $im->lock_acquire if $reindex;
+
         my @st = stat($old) or die "failed to stat($old): $!\n";
 
         my $over = "$old/over.sqlite3";
         if (-f $over) {
                 $over = PublicInbox::Over->new($over);
                 $over->connect->sqlite_backup_to_file("$new/over.sqlite3");
+                $over = undef;
         }
         rename($old, "$new/old") or die "rename $old => $new/old: $!\n";
         chmod($st[2] & 07777, $new) or die "chmod $old: $!\n";
         rename($new, $old) or die "rename $new => $old: $!\n";
-        $im->lock_release;
         remove_tree("$old/old") or die "failed to remove $old/old: $!\n";
+
+        if ($reindex) {
+                $opt->{-skip_lock} = 1;
+                PublicInbox::Admin::index_inbox($ibx, $opt);
+                # implicit lock_release
+        } else {
+                $im->lock_release;
+        }
 }
 
 sub xspawn {
@@ -47,6 +61,27 @@ sub runnable_or_die ($) {
         which($exe) or die "$exe not found in PATH\n";
 }
 
+sub prepare_reindex ($$) {
+        my ($ibx, $reindex) = @_;
+        if ($ibx->{version} == 1) {
+                my $dir = $ibx->search->xdir(1);
+                my $xdb = Search::Xapian::Database->new($dir);
+                if (my $lc = $xdb->get_metadata('last_commit')) {
+                        $reindex->{from} = $lc;
+                }
+        } else { # v2
+                my $v2w = $ibx->importer(0);
+                my $max;
+                $v2w->git_dir_latest(\$max) or return;
+                my $from = $reindex->{from};
+                my $mm = $ibx->mm;
+                my $v = PublicInbox::Search::SCHEMA_VERSION();
+                foreach my $i (0..$max) {
+                        $from->[$i] = $mm->last_commit_xap($v, $i);
+                }
+        }
+}
+
 sub run {
         my ($ibx, $cmd, $env, $opt) = @_;
         $opt ||= {};
@@ -54,8 +89,14 @@ sub run {
         my $exe = $cmd->[0];
         my $pfx = $exe;
         runnable_or_die($XAPIAN_COMPACT) if $opt->{compact};
+
+        my $reindex; # v1:{ from => $x40 }, v2:{ from => [ $x40, $x40, .. ] } }
+        my $from; # per-epoch ranges
+
         if (ref($exe) eq 'CODE') {
                 $pfx = 'CODE';
+                $reindex = $opt->{reindex} = {};
+                $from = $reindex->{from} = [];
                 require Search::Xapian::WritableDatabase;
         } else {
                 runnable_or_die($exe);
@@ -64,7 +105,7 @@ sub run {
         my $old = $ibx->search->xdir(1);
         -d $old or die "$old does not exist\n";
         my $new = tempdir("$pfx-XXXXXXXX", DIR => $dir);
-        my $v = $ibx->{version} || 1;
+        my $v = $ibx->{version} ||= 1;
         my @cmds;
         if ($v == 1) {
                 push @cmds, [@$cmd, $old, $new];
@@ -85,6 +126,13 @@ sub run {
         my $max = $opt->{jobs} || scalar(@cmds);
         $ibx->with_umask(sub {
                 $im->lock_acquire;
+
+                # fine-grained locking if we prepare for reindex
+                if ($reindex) {
+                        prepare_reindex($ibx, $reindex);
+                        $im->lock_release;
+                }
+                delete($ibx->{$_}) for (qw(mm over search)); # cleanup
                 my %pids;
                 while (@cmds) {
                         while (scalar(keys(%pids)) < $max && scalar(@cmds)) {
@@ -98,7 +146,7 @@ sub run {
                                 die join(' ', @$x)." failed: $?\n" if $?;
                         }
                 }
-                commit_changes($im, $old, $new);
+                commit_changes($ibx, $old, $new, $opt);
         });
 }