* [PATCH 14/26] xcpdb: use fine-grained locking
2019-05-23 9:36 7% [PATCH 00/26] xcpdb: ease Xapian DB format migrations Eric Wong
@ 2019-05-23 9:36 6% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2019-05-23 9:36 UTC (permalink / raw)
To: meta
Copying an entire Xapian DB takes a long time, so update our
reindexing code to support partial reindexing, snapshot the
pre-copydatabase git revisions, perform the lengthy copy,
and do a partial reindex when the copy + renames are done.
---
lib/PublicInbox/Admin.pm | 2 +-
lib/PublicInbox/SearchIdx.pm | 10 +++++-
lib/PublicInbox/V2Writable.pm | 21 ++++++++++---
lib/PublicInbox/Xapcmd.pm | 58 ++++++++++++++++++++++++++++++++---
4 files changed, 80 insertions(+), 11 deletions(-)
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 94f47ab..34aa312 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -141,7 +141,7 @@ sub index_inbox {
if (ref($ibx) && ($ibx->{version} || 1) == 2) {
eval { require PublicInbox::V2Writable };
die "v2 requirements not met: $@\n" if $@;
- my $v2w = eval {
+ my $v2w = eval { $ibx->importer(0) } || eval {
PublicInbox::V2Writable->new($ibx, {nproc=>$jobs});
};
if (defined $jobs) {
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 114420e..0aeeb6b 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -542,8 +542,10 @@ sub do_cat_mail {
$@ ? undef : $mime;
}
+# called by public-inbox-index
sub index_sync {
my ($self, $opts) = @_;
+ delete $self->{lock_path} if $opts->{-skip_lock};
$self->{-inbox}->with_umask(sub { $self->_index_sync($opts) })
}
@@ -692,6 +694,12 @@ sub _last_x_commit {
$lx;
}
+sub reindex_from ($$) {
+ my ($reindex, $last_commit) = @_;
+ return $last_commit unless $reindex;
+ ref($reindex) eq 'HASH' ? $reindex->{from} : '';
+}
+
# indexes all unindexed messages (v1 only)
sub _index_sync {
my ($self, $opts) = @_;
@@ -705,7 +713,7 @@ sub _index_sync {
do {
$xlog = undef;
$last_commit = _last_x_commit($self, $mm);
- $lx = $opts->{reindex} ? '' : $last_commit;
+ $lx = reindex_from($opts->{reindex}, $last_commit);
$self->{over}->rollback_lazy;
$self->{over}->disconnect;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 3dd606e..1ee19b2 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -238,7 +238,7 @@ sub idx_part {
# idempotent
sub idx_init {
- my ($self) = @_;
+ my ($self, $opt) = @_;
return if $self->{idx_parts};
my $ibx = $self->{-inbox};
@@ -264,7 +264,7 @@ sub idx_init {
my $over = $self->{over};
$ibx->umask_prepare;
$ibx->with_umask(sub {
- $self->lock_acquire;
+ $self->lock_acquire unless ($opt && $opt->{-skip_lock});
$over->create;
# -compact can change partition count while -watch is idle
@@ -924,6 +924,19 @@ sub unindex {
qw(-c gc.reflogExpire=now gc --prune=all)]);
}
+sub index_ranges ($$$) {
+ my ($self, $reindex, $epoch_max) = @_;
+ return last_commits($self, $epoch_max) unless $reindex;
+
+ return [] if ref($reindex) ne 'HASH';
+
+ my $ranges = $reindex->{from}; # arrayref;
+ if (ref($ranges) ne 'ARRAY') {
+ die 'BUG: $reindex->{from} not an ARRAY';
+ }
+ $ranges;
+}
+
# called for public-inbox-index
sub index_sync {
my ($self, $opts) = @_;
@@ -931,10 +944,10 @@ sub index_sync {
my $epoch_max;
my $latest = git_dir_latest($self, \$epoch_max);
return unless defined $latest;
- $self->idx_init; # acquire lock
+ $self->idx_init($opts); # acquire lock
my $mm_tmp = $self->{mm}->tmp_clone;
my $reindex = $opts->{reindex};
- my $ranges = $reindex ? [] : $self->last_commits($epoch_max);
+ my $ranges = index_ranges($self, $reindex, $epoch_max);
my $high = $self->{mm}->num_highwater();
my $regen = $self->index_prepare($opts, $epoch_max, $ranges);
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index d2de874..4555340 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -5,6 +5,7 @@ use strict;
use warnings;
use PublicInbox::Spawn qw(which spawn);
use PublicInbox::Over;
+use PublicInbox::Search;
use File::Temp qw(tempdir);
use File::Path qw(remove_tree);
@@ -12,20 +13,33 @@ use File::Path qw(remove_tree);
# commands with a version number suffix (e.g. "xapian-compact-1.5")
our $XAPIAN_COMPACT = $ENV{XAPIAN_COMPACT} || 'xapian-compact';
-sub commit_changes ($$$) {
- my ($im, $old, $new) = @_;
+sub commit_changes ($$$$) {
+ my ($ibx, $old, $new, $opt) = @_;
+
+ my $reindex = $opt->{reindex};
+ my $im = $ibx->importer(0);
+ $im->lock_acquire if $reindex;
+
my @st = stat($old) or die "failed to stat($old): $!\n";
my $over = "$old/over.sqlite3";
if (-f $over) {
$over = PublicInbox::Over->new($over);
$over->connect->sqlite_backup_to_file("$new/over.sqlite3");
+ $over = undef;
}
rename($old, "$new/old") or die "rename $old => $new/old: $!\n";
chmod($st[2] & 07777, $new) or die "chmod $old: $!\n";
rename($new, $old) or die "rename $new => $old: $!\n";
- $im->lock_release;
remove_tree("$old/old") or die "failed to remove $old/old: $!\n";
+
+ if ($reindex) {
+ $opt->{-skip_lock} = 1;
+ PublicInbox::Admin::index_inbox($ibx, $opt);
+ # implicit lock_release
+ } else {
+ $im->lock_release;
+ }
}
sub xspawn {
@@ -47,6 +61,27 @@ sub runnable_or_die ($) {
which($exe) or die "$exe not found in PATH\n";
}
+sub prepare_reindex ($$) {
+ my ($ibx, $reindex) = @_;
+ if ($ibx->{version} == 1) {
+ my $dir = $ibx->search->xdir(1);
+ my $xdb = Search::Xapian::Database->new($dir);
+ if (my $lc = $xdb->get_metadata('last_commit')) {
+ $reindex->{from} = $lc;
+ }
+ } else { # v2
+ my $v2w = $ibx->importer(0);
+ my $max;
+ $v2w->git_dir_latest(\$max) or return;
+ my $from = $reindex->{from};
+ my $mm = $ibx->mm;
+ my $v = PublicInbox::Search::SCHEMA_VERSION();
+ foreach my $i (0..$max) {
+ $from->[$i] = $mm->last_commit_xap($v, $i);
+ }
+ }
+}
+
sub run {
my ($ibx, $cmd, $env, $opt) = @_;
$opt ||= {};
@@ -54,8 +89,14 @@ sub run {
my $exe = $cmd->[0];
my $pfx = $exe;
runnable_or_die($XAPIAN_COMPACT) if $opt->{compact};
+
+ my $reindex; # v1:{ from => $x40 }, v2:{ from => [ $x40, $x40, .. ] } }
+ my $from; # per-epoch ranges
+
if (ref($exe) eq 'CODE') {
$pfx = 'CODE';
+ $reindex = $opt->{reindex} = {};
+ $from = $reindex->{from} = [];
require Search::Xapian::WritableDatabase;
} else {
runnable_or_die($exe);
@@ -64,7 +105,7 @@ sub run {
my $old = $ibx->search->xdir(1);
-d $old or die "$old does not exist\n";
my $new = tempdir("$pfx-XXXXXXXX", DIR => $dir);
- my $v = $ibx->{version} || 1;
+ my $v = $ibx->{version} ||= 1;
my @cmds;
if ($v == 1) {
push @cmds, [@$cmd, $old, $new];
@@ -85,6 +126,13 @@ sub run {
my $max = $opt->{jobs} || scalar(@cmds);
$ibx->with_umask(sub {
$im->lock_acquire;
+
+ # fine-grained locking if we prepare for reindex
+ if ($reindex) {
+ prepare_reindex($ibx, $reindex);
+ $im->lock_release;
+ }
+ delete($ibx->{$_}) for (qw(mm over search)); # cleanup
my %pids;
while (@cmds) {
while (scalar(keys(%pids)) < $max && scalar(@cmds)) {
@@ -98,7 +146,7 @@ sub run {
die join(' ', @$x)." failed: $?\n" if $?;
}
}
- commit_changes($im, $old, $new);
+ commit_changes($ibx, $old, $new, $opt);
});
}
--
EW
^ permalink raw reply related [relevance 6%]
* [PATCH 00/26] xcpdb: ease Xapian DB format migrations
@ 2019-05-23 9:36 7% Eric Wong
2019-05-23 9:36 6% ` [PATCH 14/26] xcpdb: use fine-grained locking Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2019-05-23 9:36 UTC (permalink / raw)
To: meta
I've noticed performance problems in Xapian's old chert
backend which seem alleviated with the new glass backend;
particularly related to phrase searches.
Unfortunately, the tool distributed with Xapian for updating DB
formats, copydatabase(1), is extremely slow and blocking updates
for hours at a time to perform the migration is not acceptable.
(That's right, "copydatabase" is NOT a Postgres command!)
So, I've written "public-inbox-xcpdb" and gotten it to perform
the bulk copy operation without holding inbox.lock and have it
deal gracefully with Xapian DB modifications. xcpdb is still
slow, but I've (finally!) implemented partial reindexing to
allow it to minimize the lock time and not stall -mda or -watch
processes while it is working.
There's a bunch of cleanups along the way, too; and it should
make future changes to repartition the Xapian DB on existing v2
inboxes easier.
Eric Wong (26):
t/convert-compact: skip on missing xapian-compact(1)
v1writable: retire in favor of InboxWritable
doc: document the reason for --no-renumber
search: reenable phrase search on non-chert Xapian
xapcmd: new module for wrapping Xapian commands
admin: hoist out resolve_inboxes for -compact and -index
xapcmd: support spawn options
xcpdb: new tool which wraps Xapian's copydatabase(1)
xapcmd: do not cleanup on errors
admin: move index_inbox over
xcpdb: implement using Perl bindings
xapcmd: xcpdb supports compaction
v2writable: hoist out log_range sub for readability
xcpdb: use fine-grained locking
xcpdb: implement progress reporting
xcpdb: cleanup error handling and diagnosis
xapcmd: avoid EXDEV when finalizing changes
doc: xcpdb: update to reflect the current state
xapcmd: use "print STDERR" for progress reporting
xcpdb: show re-indexing progress
xcpdb: remove temporary directories on aborts
compact: reuse infrastructure from xcpdb
xcpdb|compact: support some xapian-compact switches
xapcmd: cleanup on interrupted xcpdb "--compact"
xcpdb|compact: support --jobs/-j flag like gmake(1)
xapcmd: do not reset %SIG until last Xtmpdir is done
Documentation/include.mk | 6 +-
Documentation/public-inbox-v1-format.pod | 4 +
Documentation/public-inbox-v2-format.pod | 4 +
Documentation/public-inbox-xcpdb.pod | 57 ++++
MANIFEST | 4 +-
lib/PublicInbox/Admin.pm | 66 ++++
lib/PublicInbox/InboxWritable.pm | 35 ++-
lib/PublicInbox/Search.pm | 48 +--
lib/PublicInbox/SearchIdx.pm | 34 ++-
lib/PublicInbox/V1Writable.pm | 34 ---
lib/PublicInbox/V2Writable.pm | 109 ++++---
lib/PublicInbox/Xapcmd.pm | 370 +++++++++++++++++++++++
script/public-inbox-compact | 102 +------
script/public-inbox-index | 102 +------
script/public-inbox-init | 13 +-
script/public-inbox-xcpdb | 19 ++
t/cgi.t | 4 +-
t/convert-compact.t | 4 +
t/indexlevels-mirror.t | 27 +-
t/init.t | 4 +-
t/nntpd.t | 15 +-
t/search.t | 1 +
t/v2mirror.t | 1 +
23 files changed, 740 insertions(+), 323 deletions(-)
create mode 100644 Documentation/public-inbox-xcpdb.pod
delete mode 100644 lib/PublicInbox/V1Writable.pm
create mode 100644 lib/PublicInbox/Xapcmd.pm
create mode 100755 script/public-inbox-xcpdb
--
EW
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2019-05-23 9:36 7% [PATCH 00/26] xcpdb: ease Xapian DB format migrations Eric Wong
2019-05-23 9:36 6% ` [PATCH 14/26] xcpdb: use fine-grained locking Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).