user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 2/5] searchidx: put all shard-related stuff in SearchIdxShard.pm
  2020-08-22  6:06  6% [PATCH 0/5] "mairix -t" workalike for mbox.gz downloads Eric Wong
@ 2020-08-22  6:06  7% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-08-22  6:06 UTC (permalink / raw)
  To: meta

We'll also rename the /^remote_/ prefix to "shard_", since
remote implies the process is on a different host.  These
methods only pass messages to a child process on the same host
OR perform operations within the same process.
---
 lib/PublicInbox/SearchIdx.pm      | 34 ---------------------------
 lib/PublicInbox/SearchIdxShard.pm | 39 +++++++++++++++++++++++++++----
 lib/PublicInbox/V2Writable.pm     |  8 +++----
 3 files changed, 39 insertions(+), 42 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index be46b2b9..098fead7 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -793,40 +793,6 @@ sub DESTROY {
 	$_[0]->{lockfh} = undef;
 }
 
-# remote_* subs are only used by SearchIdxPart
-sub remote_commit {
-	my ($self) = @_;
-	if (my $w = $self->{w}) {
-		print $w "commit\n" or die "failed to write commit: $!";
-	} else {
-		$self->commit_txn_lazy;
-	}
-}
-
-sub remote_close {
-	my ($self) = @_;
-	if (my $w = delete $self->{w}) {
-		my $pid = delete $self->{pid} or die "no process to wait on\n";
-		print $w "close\n" or die "failed to write to pid:$pid: $!\n";
-		close $w or die "failed to close pipe for pid:$pid: $!\n";
-		waitpid($pid, 0) == $pid or die "remote process did not finish";
-		$? == 0 or die ref($self)." pid:$pid exited with: $?";
-	} else {
-		die "transaction in progress $self\n" if $self->{txn};
-		idx_release($self) if $self->{xdb};
-	}
-}
-
-sub remote_remove {
-	my ($self, $oid, $num) = @_;
-	if (my $w = $self->{w}) {
-		# triggers remove_by_oid in a shard
-		print $w "D $oid $num\n" or die "failed to write remove $!";
-	} else {
-		$self->remove_by_oid($oid, $num);
-	}
-}
-
 sub _begin_txn {
 	my ($self) = @_;
 	my $xdb = $self->{xdb} || idx_acquire($self);
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index 75521b43..c0f8be89 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -1,7 +1,7 @@
 # Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# used to interface with a single Xapian shard in V2 repos.
+# Internal interface for a single Xapian shard in V2 inboxes.
 # See L<public-inbox-v2-format(5)> for more info on how we shard Xapian
 package PublicInbox::SearchIdxShard;
 use strict;
@@ -47,6 +47,7 @@ sub spawn_worker {
 	close $r or die "failed to close: $!";
 }
 
+# this reads all the writes to $self->{w} from the parent process
 sub shard_worker_loop ($$$$$) {
 	my ($self, $v2w, $r, $shard, $bnote) = @_;
 	$0 = "pi-v2-shard[$shard]";
@@ -87,7 +88,6 @@ sub shard_worker_loop ($$$$$) {
 	$self->worker_done;
 }
 
-# called by V2Writable
 sub index_raw {
 	my ($self, $msgref, $eml, $smsg) = @_;
 	if (my $w = $self->{w}) {
@@ -110,8 +110,7 @@ sub atfork_child {
 	close $_[0]->{w} or die "failed to close write pipe: $!\n";
 }
 
-# called by V2Writable:
-sub remote_barrier {
+sub shard_barrier {
 	my ($self) = @_;
 	if (my $w = $self->{w}) {
 		print $w "barrier\n" or die "failed to print: $!";
@@ -120,4 +119,36 @@ sub remote_barrier {
 	}
 }
 
+sub shard_commit {
+	my ($self) = @_;
+	if (my $w = $self->{w}) {
+		print $w "commit\n" or die "failed to write commit: $!";
+	} else {
+		$self->commit_txn_lazy;
+	}
+}
+
+sub shard_close {
+	my ($self) = @_;
+	if (my $w = delete $self->{w}) {
+		my $pid = delete $self->{pid} or die "no process to wait on\n";
+		print $w "close\n" or die "failed to write to pid:$pid: $!\n";
+		close $w or die "failed to close pipe for pid:$pid: $!\n";
+		waitpid($pid, 0) == $pid or die "remote process did not finish";
+		$? == 0 or die ref($self)." pid:$pid exited with: $?";
+	} else {
+		die "transaction in progress $self\n" if $self->{txn};
+		$self->idx_release if $self->{xdb};
+	}
+}
+
+sub shard_remove {
+	my ($self, $oid, $num) = @_;
+	if (my $w = $self->{w}) { # triggers remove_by_oid in a shard child
+		print $w "D $oid $num\n" or die "failed to write remove $!";
+	} else { # same process
+		$self->remove_by_oid($oid, $num);
+	}
+}
+
 1;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 9c200288..0a91a132 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -630,12 +630,12 @@ sub checkpoint ($;$) {
 			my $barrier = $self->barrier_init(scalar @$shards);
 
 			# each shard needs to issue a barrier command
-			$_->remote_barrier for @$shards;
+			$_->shard_barrier for @$shards;
 
 			# wait for each Xapian shard
 			$self->barrier_wait($barrier);
 		} else {
-			$_->remote_commit for @$shards;
+			$_->shard_commit for @$shards;
 		}
 
 		# last_commit is special, don't commit these until
@@ -675,7 +675,7 @@ sub done {
 	my $shards = delete $self->{idx_shards};
 	if ($shards) {
 		for (@$shards) {
-			eval { $_->remote_close };
+			eval { $_->shard_close };
 			$err .= "shard close: $@\n" if $@;
 		}
 	}
@@ -1107,7 +1107,7 @@ sub unindex_oid_remote ($$$) {
 	my @removed = $self->{over}->remove_oid($oid, $mid);
 	for my $num (@removed) {
 		my $idx = idx_shard($self, $num % $self->{shards});
-		$idx->remote_remove($oid, $num);
+		$idx->shard_remove($oid, $num);
 	}
 }
 

^ permalink raw reply related	[relevance 7%]

* [PATCH 0/5] "mairix -t" workalike for mbox.gz downloads
@ 2020-08-22  6:06  6% Eric Wong
  2020-08-22  6:06  7% ` [PATCH 2/5] searchidx: put all shard-related stuff in SearchIdxShard.pm Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-08-22  6:06 UTC (permalink / raw)
  To: meta

Actually, the Xapian aspect of it turned out to be easy once
I learned ->set_collapse_key.

Getting the tests and compatibility with existing (pre-upgrade)
inboxes was more work.

It requires "public-inbox-index --reindex" to activate;
but PATCH 5/5 makes it safe to upgrade WWW either before
or after --reindex.  That means BOFHs can upgrade without
regard to ordering.

Tested with w3m, links, and lynx (I actually split out
my lynx fix separately):
  https://public-inbox.org/meta/20200822004125.9458-1-e@80x24.org/

TODO: CLI tool support, HTML interface, JMAP, etc...

Eric Wong (5):
  searchidxshard: clear $msgref buffer properly
  searchidx: put all shard-related stuff in SearchIdxShard.pm
  searchidx: index THREADID in Xapian
  search: support downloading mboxes results with full thread
  mbox: disable "&t" on existing Xapian until full reindex

 Documentation/standards.perl      |  4 +++
 lib/PublicInbox/Mbox.pm           | 54 +++++++++++++++++++++++++------
 lib/PublicInbox/Over.pm           | 31 +++++++++++++++++-
 lib/PublicInbox/OverIdx.pm        | 18 +++++------
 lib/PublicInbox/Search.pm         | 16 +++++++--
 lib/PublicInbox/SearchIdx.pm      | 51 +++++++++--------------------
 lib/PublicInbox/SearchIdxShard.pm | 48 ++++++++++++++++++++++-----
 lib/PublicInbox/SearchQuery.pm    |  8 +++--
 lib/PublicInbox/SearchView.pm     | 30 +++++++++++------
 lib/PublicInbox/Smsg.pm           |  3 +-
 lib/PublicInbox/V2Writable.pm     | 19 ++++++++---
 t/init.t                          |  1 +
 t/over.t                          | 13 ++++----
 t/psgi_search.t                   | 39 ++++++++++++++++++++--
 14 files changed, 244 insertions(+), 91 deletions(-)

^ permalink raw reply	[relevance 6%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-08-22  6:06  6% [PATCH 0/5] "mairix -t" workalike for mbox.gz downloads Eric Wong
2020-08-22  6:06  7% ` [PATCH 2/5] searchidx: put all shard-related stuff in SearchIdxShard.pm Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).