user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH 0/3] writing/admin code cleanups lock fix
@ 2020-01-26  1:17 Eric Wong
  2020-01-26  1:17 ` [PATCH 1/3] inbox: add ->version method Eric Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Eric Wong @ 2020-01-26  1:17 UTC (permalink / raw)
  To: meta

I accidentally did some parallel -compact runs and noticed some
confusing (though harmless) errors.  It could be harmful if
-xcpdb is running reshard, though, but I don't do that much.

There's also some cleanups leading up to it since I got tired of
looking at '||' and '//' all over the code.

Eric Wong (3):
  inbox: add ->version method
  search: {version} => {ibx_ver}
  xapcmd: increase scope of lock

 lib/PublicInbox/Admin.pm         |  5 +--
 lib/PublicInbox/AltId.pm         |  2 +-
 lib/PublicInbox/Feed.pm          |  2 +-
 lib/PublicInbox/Inbox.pm         | 11 +++---
 lib/PublicInbox/InboxWritable.pm |  9 ++---
 lib/PublicInbox/Search.pm        |  6 +--
 lib/PublicInbox/SearchIdx.pm     |  6 +--
 lib/PublicInbox/Xapcmd.pm        | 64 +++++++++++++++++---------------
 script/public-inbox-convert      |  2 +-
 scripts/import_slrnspool         |  2 +-
 10 files changed, 56 insertions(+), 53 deletions(-)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] inbox: add ->version method
  2020-01-26  1:17 [PATCH 0/3] writing/admin code cleanups lock fix Eric Wong
@ 2020-01-26  1:17 ` Eric Wong
  2020-01-26  1:17 ` [PATCH 2/3] search: {version} => {ibx_ver} Eric Wong
  2020-01-26  1:17 ` [PATCH 3/3] xapcmd: increase scope of lock Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2020-01-26  1:17 UTC (permalink / raw)
  To: meta

This allows us to simplify version checking by avoiding
"//" or "||" operators sprinkled around.
---
 lib/PublicInbox/Admin.pm         |  5 ++---
 lib/PublicInbox/AltId.pm         |  2 +-
 lib/PublicInbox/Feed.pm          |  2 +-
 lib/PublicInbox/Inbox.pm         | 11 ++++++-----
 lib/PublicInbox/InboxWritable.pm |  9 ++++-----
 lib/PublicInbox/Search.pm        |  2 +-
 lib/PublicInbox/SearchIdx.pm     |  2 +-
 lib/PublicInbox/Xapcmd.pm        |  5 ++---
 script/public-inbox-convert      |  2 +-
 scripts/import_slrnspool         |  2 +-
 10 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 1f1b133d..2d3e0281 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -84,7 +84,6 @@ sub resolve_inboxes ($;$$) {
 	if ($cfg) {
 		$cfg->each_inbox(sub {
 			my ($ibx) = @_;
-			$ibx->{version} ||= 1;
 			my $path = abs_path($ibx->{inboxdir});
 			if (defined($path)) {
 				$dir2ibx{$path} = $ibx;
@@ -97,7 +96,7 @@ EOF
 	}
 	if ($opt->{all}) {
 		my @all = values %dir2ibx;
-		@all = grep { $_->{version} >= $min_ver } @all;
+		@all = grep { $_->version >= $min_ver } @all;
 		push @ibxs, @all;
 	} else { # directories specified on the command-line
 		my $i = 0;
@@ -189,7 +188,7 @@ invalid indexlevel=$indexlevel (must be `basic', `medium', or `full')
 sub index_inbox {
 	my ($ibx, $im, $opt) = @_;
 	my $jobs = delete $opt->{jobs} if $opt;
-	if (ref($ibx) && ($ibx->{version} || 1) == 2) {
+	if (ref($ibx) && $ibx->version == 2) {
 		eval { require PublicInbox::V2Writable };
 		die "v2 requirements not met: $@\n" if $@;
 		my $v2w = $im // eval { $ibx->importer(0) } || eval {
diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm
index 6b03d603..5add1ea2 100644
--- a/lib/PublicInbox/AltId.pm
+++ b/lib/PublicInbox/AltId.pm
@@ -30,7 +30,7 @@ sub new {
 	} split(/[&;]/, $query);
 	my $f = $params{file} or die "file: required for $type spec $spec\n";
 	unless (index($f, '/') == 0) {
-		if (($ibx->{version} || 1) == 1) {
+		if ($ibx->version == 1) {
 			$f = "$ibx->{inboxdir}/public-inbox/$f";
 		} else {
 			$f = "$ibx->{inboxdir}/$f";
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index cbf25d46..0bd458c9 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -85,7 +85,7 @@ sub recent_msgs {
 	my $ibx = $ctx->{-inbox};
 	my $max = $ibx->{feedmax};
 	my $qp = $ctx->{qp};
-	my $v = $ibx->{version} || 1;
+	my $v = $ibx->version;
 	if ($v > 2) {
 		die "BUG: unsupported inbox version: $v\n";
 	}
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 07e8b5b7..b76d4e5a 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -125,9 +125,11 @@ sub new {
 	bless $opts, $class;
 }
 
+sub version { $_[0]->{version} // 1 }
+
 sub git_epoch {
 	my ($self, $epoch) = @_;
-	($self->{version} || 1) == 2 or return;
+	$self->version == 2 or return;
 	$self->{"$epoch.git"} ||= eval {
 		my $git_dir = "$self->{inboxdir}/git/$epoch.git";
 		my $g = PublicInbox::Git->new($git_dir);
@@ -141,7 +143,7 @@ sub git {
 	my ($self) = @_;
 	$self->{git} ||= eval {
 		my $git_dir = $self->{inboxdir};
-		$git_dir .= '/all.git' if (($self->{version} || 1) == 2);
+		$git_dir .= '/all.git' if $self->version == 2;
 		my $g = PublicInbox::Git->new($git_dir);
 		$g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
 		_cleanup_later($self);
@@ -151,8 +153,7 @@ sub git {
 
 sub max_git_epoch {
 	my ($self) = @_;
-	my $v = $self->{version};
-	return unless defined($v) && $v == 2;
+	return if $self->version < 2;
 	my $cur = $self->{-max_git_epoch};
 	my $changed = git($self)->alternates_changed;
 	if (!defined($cur) || $changed) {
@@ -178,7 +179,7 @@ sub mm {
 		require PublicInbox::Msgmap;
 		_cleanup_later($self);
 		my $dir = $self->{inboxdir};
-		if (($self->{version} || 1) >= 2) {
+		if ($self->version >= 2) {
 			PublicInbox::Msgmap->new_file("$dir/msgmap.sqlite3");
 		} else {
 			PublicInbox::Msgmap->new($dir);
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 228e786c..5b2aeed3 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -24,7 +24,7 @@ sub new {
 	# TODO: maybe stop supporting this
 	if ($creat_opt) { # for { nproc => $N }
 		$self->{-creat_opt} = $creat_opt;
-		init_inbox($self) if ($self->{version} || 1) == 1;
+		init_inbox($self) if $self->version == 1;
 	}
 	$self;
 }
@@ -39,8 +39,7 @@ sub assert_usable_dir {
 sub init_inbox {
 	my ($self, $shards, $skip_epoch, $skip_artnum) = @_;
 	# TODO: honor skip_artnum
-	my $v = $self->{version} || 1;
-	if ($v == 1) {
+	if ($self->version == 1) {
 		my $dir = assert_usable_dir($self);
 		PublicInbox::Import::init_bare($dir);
 	} else {
@@ -51,7 +50,7 @@ sub init_inbox {
 
 sub importer {
 	my ($self, $parallel) = @_;
-	my $v = $self->{version} || 1;
+	my $v = $self->version;
 	if ($v == 2) {
 		eval { require PublicInbox::V2Writable };
 		die "v2 not supported: $@\n" if $@;
@@ -75,7 +74,7 @@ sub filter {
 		# v2 keeps msgmap open, which causes conflicts for filters
 		# such as PublicInbox::Filter::RubyLang which overload msgmap
 		# for a predictable serial number.
-		if ($im && ($self->{version} || 1) >= 2 && $self->{altid}) {
+		if ($im && $self->version >= 2 && $self->{altid}) {
 			$im->done;
 		}
 
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 5c9dccb5..5e820594 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -198,7 +198,7 @@ sub new {
 	my $self = bless {
 		inboxdir => $ibx->{inboxdir},
 		altid => $ibx->{altid},
-		version => $ibx->{version} // 1,
+		version => $ibx->version,
 	}, $class;
 	my $dir = xdir($self, 1);
 	$self->{over_ro} = PublicInbox::Over->new("$dir/over.sqlite3");
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index cb554912..4e951bbe 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -34,7 +34,7 @@ sub new {
 	ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
 	my $levels = qr/\A(?:full|medium|basic)\z/;
 	my $inboxdir = $ibx->{inboxdir};
-	my $version = $ibx->{version} || 1;
+	my $version = $ibx->version;
 	my $indexlevel = 'full';
 	my $altid = $ibx->{altid};
 	if ($altid) {
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index de2ef5c6..19c6ff07 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -97,7 +97,7 @@ sub runnable_or_die ($) {
 
 sub prepare_reindex ($$$) {
 	my ($ibx, $im, $reindex) = @_;
-	if ($ibx->{version} == 1) {
+	if ($ibx->version == 1) {
 		my $dir = $ibx->search->xdir(1);
 		my $xdb = $PublicInbox::Search::X{Database}->new($dir);
 		if (my $lc = $xdb->get_metadata('last_commit')) {
@@ -173,7 +173,6 @@ sub run {
 	-d $old or die "$old does not exist\n";
 
 	my $tmp = {};
-	my $v = $ibx->{version} ||= 1;
 	my @q;
 	my $reshard = $opt->{reshard};
 	if (defined $reshard && $reshard <= 0) {
@@ -185,7 +184,7 @@ sub run {
 
 	# we want temporary directories to be as deep as possible,
 	# so v2 shards can keep "xap$SCHEMA_VERSION" on a separate FS.
-	if ($v == 1) {
+	if ($ibx->version == 1) {
 		if (defined $reshard) {
 			warn
 "--reshard=$reshard ignored for v1 $ibx->{inboxdir}\n";
diff --git a/script/public-inbox-convert b/script/public-inbox-convert
index 633c4cf8..56a810eb 100755
--- a/script/public-inbox-convert
+++ b/script/public-inbox-convert
@@ -41,7 +41,7 @@ unless ($old) {
 	$old = PublicInbox::Inbox->new($old);
 }
 $old = PublicInbox::InboxWritable->new($old);
-if (($old->{version} || 1) >= 2) {
+if ($old->version >= 2) {
 	die "Only conversion from v1 inboxes is supported\n";
 }
 my $new = { %$old };
diff --git a/scripts/import_slrnspool b/scripts/import_slrnspool
index 1dccb8dd..b913cf32 100755
--- a/scripts/import_slrnspool
+++ b/scripts/import_slrnspool
@@ -26,7 +26,7 @@ my $config = PublicInbox::Config->new;
 my $ibx = $config->lookup($recipient);
 my $git = $ibx->git;
 my $im;
-if (($ibx->{version} || 1) == 2) {
+if ($ibx->version == 2) {
 	require PublicInbox::V2Writable;
 	$im = PublicInbox::V2Writable->new($ibx);
 	$im->{parallel} = 0; # pointless to be parallel for a single message

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] search: {version} => {ibx_ver}
  2020-01-26  1:17 [PATCH 0/3] writing/admin code cleanups lock fix Eric Wong
  2020-01-26  1:17 ` [PATCH 1/3] inbox: add ->version method Eric Wong
@ 2020-01-26  1:17 ` Eric Wong
  2020-01-26  1:17 ` [PATCH 3/3] xapcmd: increase scope of lock Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2020-01-26  1:17 UTC (permalink / raw)
  To: meta

We don't confuse human readers with the Xapian schema version.
We also want to make it obvious this is the version of the inbox
we're indexing, these are Search or SearchIdx objects, not Inbox
objects.
---
 lib/PublicInbox/Search.pm    | 6 +++---
 lib/PublicInbox/SearchIdx.pm | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 5e820594..a4491ca1 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -148,7 +148,7 @@ chomp @HELP;
 
 sub xdir ($;$) {
 	my ($self, $rdonly) = @_;
-	if ($self->{version} == 1) {
+	if ($self->{ibx_ver} == 1) {
 		"$self->{inboxdir}/public-inbox/xapian" . SCHEMA_VERSION;
 	} else {
 		my $dir = "$self->{inboxdir}/xap" . SCHEMA_VERSION;
@@ -165,7 +165,7 @@ sub _xdb ($) {
 	my $dir = xdir($self, 1);
 	my ($xdb, $slow_phrase);
 	my $qpf = \($self->{qp_flags} ||= $QP_FLAGS);
-	if ($self->{version} >= 2) {
+	if ($self->{ibx_ver} >= 2) {
 		foreach my $shard (<$dir/*>) {
 			-d $shard && $shard =~ m!/[0-9]+\z! or next;
 			my $sub = $X{Database}->new($shard);
@@ -198,7 +198,7 @@ sub new {
 	my $self = bless {
 		inboxdir => $ibx->{inboxdir},
 		altid => $ibx->{altid},
-		version => $ibx->version,
+		ibx_ver => $ibx->version,
 	}, $class;
 	my $dir = xdir($self, 1);
 	$self->{over_ro} = PublicInbox::Over->new("$dir/over.sqlite3");
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 4e951bbe..4349d127 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -54,7 +54,7 @@ sub new {
 		-inbox => $ibx,
 		git => $ibx->git,
 		-altid => $altid,
-		version => $version,
+		ibx_ver => $version,
 		indexlevel => $indexlevel,
 	}, $class;
 	$ibx->umask_prepare;
@@ -358,7 +358,7 @@ sub add_xapian ($$$$$$) {
 
 sub _msgmap_init ($) {
 	my ($self) = @_;
-	die "BUG: _msgmap_init is only for v1\n" if $self->{version} != 1;
+	die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1;
 	$self->{mm} //= eval {
 		require PublicInbox::Msgmap;
 		PublicInbox::Msgmap->new($self->{inboxdir}, 1);

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] xapcmd: increase scope of lock
  2020-01-26  1:17 [PATCH 0/3] writing/admin code cleanups lock fix Eric Wong
  2020-01-26  1:17 ` [PATCH 1/3] inbox: add ->version method Eric Wong
  2020-01-26  1:17 ` [PATCH 2/3] search: {version} => {ibx_ver} Eric Wong
@ 2020-01-26  1:17 ` Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2020-01-26  1:17 UTC (permalink / raw)
  To: meta

The old lock scope was only sufficient for protecting against
concurrent modifications from the common -mda, -watch, or -learn
writers.

It was not sufficient for protecting against parallel -compact
or -xcpdb invocations from eager admins.  Most of the time this
only leads to confusing and misleading warning messages, but
parallel xcpdb --reshard could lead to errors.
---
 lib/PublicInbox/Xapcmd.pm | 59 +++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 19c6ff07..2864b0d4 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -122,7 +122,8 @@ sub same_fs_or_die ($$) {
 }
 
 sub process_queue {
-	my ($queue, $cb, $max, $opt) = @_;
+	my ($queue, $cb, $opt) = @_;
+	my $max = $opt->{jobs} || scalar(@$queue);
 	if ($max <= 1) {
 		while (defined(my $args = shift @$queue)) {
 			$cb->($args, $opt);
@@ -152,36 +153,18 @@ sub setup_signals () {
 	$SIG{HUP} = $SIG{PIPE} = $SIG{TERM} = sub { exit(1) };
 }
 
-sub run {
-	my ($ibx, $task, $opt) = @_; # task = 'cpdb' or 'compact'
-	my $cb = \&${\"PublicInbox::Xapcmd::$task"};
-	PublicInbox::Admin::progress_prepare($opt ||= {});
-	my $dir = $ibx->{inboxdir} or die "no inboxdir in inbox\n";
-	runnable_or_die($XAPIAN_COMPACT) if $opt->{compact};
-	my $reindex; # v1:{ from => $x40 }, v2:{ from => [ $x40, $x40, .. ] } }
-	my $from; # per-epoch ranges
-
-	if (!$opt->{-coarse_lock}) {
-		$reindex = $opt->{reindex} = {};
-		$from = $reindex->{from} = [];
-		require PublicInbox::SearchIdx;
-		PublicInbox::SearchIdx::load_xapian_writable();
-	}
+sub prepare_run {
+	my ($ibx, $opt) = @_;
+	my $tmp = {}; # old shard dir => File::Temp->newdir object or undef
+	my @queue; # ([old//src,newdir]) - list of args for cpdb() or compact()
 
-	$ibx->umask_prepare;
 	my $old = $ibx->search->xdir(1);
 	-d $old or die "$old does not exist\n";
-
-	my $tmp = {};
-	my @q;
 	my $reshard = $opt->{reshard};
 	if (defined $reshard && $reshard <= 0) {
 		die "--reshard must be a positive number\n";
 	}
 
-	local %SIG = %SIG;
-	setup_signals();
-
 	# we want temporary directories to be as deep as possible,
 	# so v2 shards can keep "xap$SCHEMA_VERSION" on a separate FS.
 	if ($ibx->version == 1) {
@@ -194,7 +177,7 @@ sub run {
 		my $v = PublicInbox::Search::SCHEMA_VERSION();
 		my $wip = File::Temp->newdir("xapian$v-XXXXXXXX", DIR => $dir);
 		$tmp->{$old} = $wip;
-		push @q, [ $old, $wip ];
+		push @queue, [ $old, $wip ];
 	} else {
 		opendir my $dh, $old or die "Failed to opendir $old: $!\n";
 		my @old_shards;
@@ -223,7 +206,7 @@ sub run {
 			my $wip = File::Temp->newdir($tmpl, DIR => $old);
 			same_fs_or_die($old, $wip->dirname);
 			my $cur = "$old/$dn";
-			push @q, [ $src // $cur , $wip ];
+			push @queue, [ $src // $cur , $wip ];
 			$tmp->{$cur} = $wip;
 		}
 		# mark old shards to be unlinked
@@ -231,10 +214,32 @@ sub run {
 			$tmp->{$_} ||= undef for @$src;
 		}
 	}
-	my $max = $opt->{jobs} || scalar(@q);
+	($tmp, \@queue);
+}
+
+sub run {
+	my ($ibx, $task, $opt) = @_; # task = 'cpdb' or 'compact'
+	my $cb = \&${\"PublicInbox::Xapcmd::$task"};
+	PublicInbox::Admin::progress_prepare($opt ||= {});
+	defined(my $dir = $ibx->{inboxdir}) or die "no inboxdir defined\n";
+	-d $dir or die "inboxdir=$dir does not exist\n";
+	runnable_or_die($XAPIAN_COMPACT) if $opt->{compact};
+	my $reindex; # v1:{ from => $x40 }, v2:{ from => [ $x40, $x40, .. ] } }
+
+	if (!$opt->{-coarse_lock}) {
+		$reindex = $opt->{reindex} = {};
+		$reindex->{from} = []; # per-epoch ranges
+		require PublicInbox::SearchIdx;
+		PublicInbox::SearchIdx::load_xapian_writable();
+	}
+
+	local %SIG = %SIG;
+	setup_signals();
+	$ibx->umask_prepare;
 	$ibx->with_umask(sub {
 		my $im = $ibx->importer(0);
 		$im->lock_acquire;
+		my ($tmp, $queue) = prepare_run($ibx, $opt);
 
 		# fine-grained locking if we prepare for reindex
 		if (!$opt->{-coarse_lock}) {
@@ -243,7 +248,7 @@ sub run {
 		}
 
 		$ibx->cleanup;
-		process_queue(\@q, $cb, $max, $opt);
+		process_queue($queue, $cb, $opt);
 		$im->lock_acquire if !$opt->{-coarse_lock};
 		commit_changes($ibx, $im, $tmp, $opt);
 	});

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-01-26  1:17 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-26  1:17 [PATCH 0/3] writing/admin code cleanups lock fix Eric Wong
2020-01-26  1:17 ` [PATCH 1/3] inbox: add ->version method Eric Wong
2020-01-26  1:17 ` [PATCH 2/3] search: {version} => {ibx_ver} Eric Wong
2020-01-26  1:17 ` [PATCH 3/3] xapcmd: increase scope of lock Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).