user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH 0/9] lei remotes fixes and updates
@ 2021-01-24 11:46 Eric Wong
  2021-01-24 11:46 ` [PATCH 1/9] lei q: limit concurrency to 4 remote connections Eric Wong
                   ` (8 more replies)
  0 siblings, 9 replies; 12+ messages in thread
From: Eric Wong @ 2021-01-24 11:46 UTC (permalink / raw)
  To: meta

Eric Wong (9):
  lei q: limit concurrency to 4 remote connections
  ipc: wq supports arbitrarily large payloads
  ipc: get rid of wq_set_recv_modes
  lei q: disable remote externals if locals exist
  lei q: honor --no-local to force remote searches
  lei_xsearch: use curl -d '' for nginx compatibility
  lei q: fix JSON overview with remote externals
  smsg: make parse_references an object method
  smsg: parse_references: micro-optimization to avoid ++

 lib/PublicInbox/IPC.pm         |  85 +++++++++++++++++----------
 lib/PublicInbox/LEI.pm         |   9 ++-
 lib/PublicInbox/LeiOverview.pm |   2 +-
 lib/PublicInbox/LeiQuery.pm    |  13 ++++-
 lib/PublicInbox/LeiToMail.pm   |   7 +--
 lib/PublicInbox/LeiXSearch.pm  | 101 ++++++++++++++++++---------------
 lib/PublicInbox/OverIdx.pm     |  22 +------
 lib/PublicInbox/SearchIdx.pm   |   2 +-
 lib/PublicInbox/Smsg.pm        |  22 ++++++-
 script/lei                     |  11 ++--
 t/cmd_ipc.t                    |  16 ++++++
 t/ipc.t                        |  21 ++++++-
 t/lei.t                        |   3 +
 13 files changed, 196 insertions(+), 118 deletions(-)


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/9] lei q: limit concurrency to 4 remote connections
  2021-01-24 11:46 [PATCH 0/9] lei remotes fixes and updates Eric Wong
@ 2021-01-24 11:46 ` Eric Wong
  2021-01-24 11:46 ` [PATCH 2/9] ipc: wq supports arbitrarily large payloads Eric Wong
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2021-01-24 11:46 UTC (permalink / raw)
  To: meta

Unfortunately, this isn't a per-host limit, yet; but
nevertheless reduces load on existing PublicInbox::WWW
instances, since requesting a mboxrd is one of the more
expensive operations.
---
 lib/PublicInbox/LeiQuery.pm   |  2 +-
 lib/PublicInbox/LeiXSearch.pm | 82 +++++++++++++++++++++--------------
 2 files changed, 51 insertions(+), 33 deletions(-)

diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index acab3c2c..a7938e8b 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -26,7 +26,7 @@ sub lei_q {
 		my $cb = $lxs->can('prepare_external');
 		$self->_externals_each($cb, $lxs);
 	}
-	my $xj = $opt->{thread} ? $lxs->locals : ($lxs->remotes + 1);
+	my $xj = $lxs->concurrency($opt);
 	my $ovv = PublicInbox::LeiOverview->new($self) or return;
 	$self->atfork_prepare_wq($lxs);
 	$lxs->wq_workers_start('lei_xsearch', $xj, $self->oldset);
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index defe5e67..1c093a94 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -182,25 +182,16 @@ sub each_eml { # callback for MboxReader->mboxrd
 }
 
 sub query_remote_mboxrd {
-	my ($self, $lei, $uri) = @_;
+	my ($self, $lei, $uris) = @_;
 	local $0 = "$0 query_remote_mboxrd";
 	my %sig = $lei->atfork_child_wq($self); # keep $self->{5} startq
 	local @SIG{keys %sig} = values %sig;
-	my $opt = $lei->{opt};
-	$uri->query_form(q => $lei->{mset_opt}->{qstr}, x => 'm',
-			$opt->{thread} ? (t => 1) : ());
-	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $uri);
+	my ($opt, $env) = @$lei{qw(opt env)};
+	my @qform = (q => $lei->{mset_opt}->{qstr}, x => 'm');
+	push(@qform, t => 1) if $opt->{thread};
 	my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
 	$dedupe->prepare_dedupe;
 	my @cmd = qw(curl -XPOST -sSf);
-	$opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
-	my $tor = $opt->{torsocks} //= 'auto';
-	if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' &&
-			(($lei->{env}->{LD_PRELOAD}//'') !~ /torsocks/)) {
-		unshift @cmd, 'torsocks';
-	} elsif (PublicInbox::Config::git_bool($tor)) {
-		unshift @cmd, 'torsocks';
-	}
 	my $verbose = $opt->{verbose};
 	push @cmd, '-v' if $verbose;
 	for my $o ($lei->curl_opt) {
@@ -215,25 +206,36 @@ sub query_remote_mboxrd {
 			push @cmd, "--$o";
 		}
 	}
-	push @cmd, $uri->as_string;
-	$lei->err("# @cmd") if $verbose;
-	$? = 0;
-	my $fh = popen_rd(\@cmd, $lei->{env}, { 2 => $lei->{2} });
-	$fh = IO::Uncompress::Gunzip->new($fh);
-	eval {
-		PublicInbox::MboxReader->mboxrd($fh, \&each_eml,
-						$self, $lei, $each_smsg);
-	};
-	return $lei->fail("E: @cmd: $@") if $@;
-	if (($? >> 8) == 22) { # HTTP 404 from curl(1)
-		$uri->query_form(q => $lei->{mset_opt}->{qstr});
-		$lei->err('# no results from '.$uri->as_string);
-	} elsif ($?) {
-		$uri->query_form(q => $lei->{mset_opt}->{qstr});
-		$lei->err('E: '.$uri->as_string);
-		$lei->child_error($?);
+	$opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
+	my $tor = $opt->{torsocks} //= 'auto';
+	for my $uri (@$uris) {
+		$uri->query_form(@qform);
+		my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $uri);
+		my $cmd = [ @cmd, $uri->as_string ];
+		if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' &&
+				(($env->{LD_PRELOAD}//'') !~ /torsocks/)) {
+			unshift @$cmd, 'torsocks';
+		} elsif (PublicInbox::Config::git_bool($tor)) {
+			unshift @$cmd, 'torsocks';
+		}
+		$lei->err("# @$cmd") if $verbose;
+		$? = 0;
+		my $fh = popen_rd($cmd, $env, { 2 => $lei->{2} });
+		$fh = IO::Uncompress::Gunzip->new($fh);
+		eval {
+			PublicInbox::MboxReader->mboxrd($fh, \&each_eml, $self,
+							$lei, $each_smsg);
+		};
+		return $lei->fail("E: @$cmd: $@") if $@;
+		if (($? >> 8) == 22) { # HTTP 404 from curl(1)
+			$uri->query_form(q => $lei->{mset_opt}->{qstr});
+			$lei->err('# no results from '.$uri->as_string);
+		} elsif ($?) {
+			$uri->query_form(q => $lei->{mset_opt}->{qstr});
+			$lei->err('E: '.$uri->as_string);
+			$lei->child_error($?);
+		}
 	}
-	undef $each_smsg;
 	$lei->{ovv}->ovv_atexit_child($lei);
 }
 
@@ -292,6 +294,17 @@ sub do_post_augment {
 	close $au_done; # triggers wait_startq
 }
 
+my $MAX_PER_HOST = 4;
+sub MAX_PER_HOST { $MAX_PER_HOST }
+
+sub concurrency {
+	my ($self, $opt) = @_;
+	my $nl = $opt->{thread} ? locals($self) : 1;
+	my $nr = remotes($self);
+	$nr = $MAX_PER_HOST if $nr > $MAX_PER_HOST;
+	$nl + $nr;
+}
+
 sub start_query { # always runs in main (lei-daemon) process
 	my ($self, $io, $lei) = @_;
 	if ($lei->{opt}->{thread}) {
@@ -301,8 +314,13 @@ sub start_query { # always runs in main (lei-daemon) process
 	} else {
 		$self->wq_do('query_mset', $io, $lei);
 	}
+	my $i = 0;
+	my $q = [];
 	for my $uri (remotes($self)) {
-		$self->wq_do('query_remote_mboxrd', $io, $lei, $uri);
+		push @{$q->[$i++ % $MAX_PER_HOST]}, $uri;
+	}
+	for my $uris (@$q) {
+		$self->wq_do('query_remote_mboxrd', $io, $lei, $uris);
 	}
 	@$io = ();
 }

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/9] ipc: wq supports arbitrarily large payloads
  2021-01-24 11:46 [PATCH 0/9] lei remotes fixes and updates Eric Wong
  2021-01-24 11:46 ` [PATCH 1/9] lei q: limit concurrency to 4 remote connections Eric Wong
@ 2021-01-24 11:46 ` Eric Wong
  2021-01-24 11:46 ` [PATCH 3/9] ipc: get rid of wq_set_recv_modes Eric Wong
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2021-01-24 11:46 UTC (permalink / raw)
  To: meta

This should not be needed, but somebody using lei could
theoretically create thousands of external URLs and
only have a handful of workers, which means the per-worker
URI list could be large.
---
 lib/PublicInbox/IPC.pm | 82 +++++++++++++++++++++++++++++-------------
 t/cmd_ipc.t            | 16 +++++++++
 t/ipc.t                | 22 +++++++++++-
 3 files changed, 94 insertions(+), 26 deletions(-)

diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm
index 6efaff38..592efd21 100644
--- a/lib/PublicInbox/IPC.pm
+++ b/lib/PublicInbox/IPC.pm
@@ -8,8 +8,10 @@ use v5.10.1;
 use Carp qw(confess croak);
 use PublicInbox::DS qw(dwaitpid);
 use PublicInbox::Spawn;
-use POSIX qw(WNOHANG);
-use Socket qw(AF_UNIX MSG_EOR);
+use POSIX qw(mkfifo WNOHANG);
+use Socket qw(AF_UNIX MSG_EOR SOCK_STREAM);
+use Errno qw(EMSGSIZE);
+use File::Temp 0.19 (); # 0.19 for ->newdir
 my $SEQPACKET = eval { Socket::SOCK_SEQPACKET() }; # portable enough?
 use constant PIPE_BUF => $^O eq 'linux' ? 4096 : POSIX::_POSIX_PIPE_BUF();
 my $WQ_MAX_WORKERS = 4096;
@@ -245,39 +247,69 @@ sub ipc_sibling_atfork_child {
 	$pid == $$ and die "BUG: $$ ipc_atfork_child called on itself";
 }
 
+sub _recv_and_run {
+	my ($self, $s2, $len, $full_stream) = @_;
+	my @fds = $recv_cmd->($s2, my $buf, $len);
+	my $n = length($buf // '') or return;
+	my @m = @{$self->{-wq_recv_modes} // [qw( +<&= >&= >&= )]};
+	my $nfd = 0;
+	for my $fd (@fds) {
+		my $mode = shift(@m);
+		if (open(my $cmdfh, $mode, $fd)) {
+			$self->{$nfd++} = $cmdfh;
+			$cmdfh->autoflush(1);
+		} else {
+			die "$$ open($mode$fd) (FD:$nfd): $!";
+		}
+	}
+	while ($full_stream && $n < $len) {
+		my $r = sysread($s2, $buf, $len - $n, $n) // croak "read: $!";
+		croak "read EOF after $n/$len bytes" if $r == 0;
+		$n = length($buf);
+	}
+	# Sereal dies on truncated data, Storable returns undef
+	my $args = thaw($buf) // die "thaw error on buffer of size: $n";
+	undef $buf;
+	my $sub = shift @$args;
+	eval { $self->$sub(@$args) };
+	warn "$$ wq_worker: $@" if $@ && ref($@) ne 'PublicInbox::SIGPIPE';
+	delete @$self{0..($nfd-1)};
+	$n;
+}
+
 sub wq_worker_loop ($) {
 	my ($self) = @_;
 	my $len = $self->{wq_req_len} // (4096 * 33);
 	my $s2 = $self->{-wq_s2} // die 'BUG: no -wq_s2';
-	while (1) {
-		my @fds = $recv_cmd->($s2, my $buf, $len) or return; # EOF
-		my @m = @{$self->{-wq_recv_modes} // [qw( +<&= >&= >&= )]};
-		my $nfd = 0;
-		for my $fd (@fds) {
-			my $mode = shift(@m);
-			if (open(my $cmdfh, $mode, $fd)) {
-				$self->{$nfd++} = $cmdfh;
-				$cmdfh->autoflush(1);
-			} else {
-				die "$$ open($mode$fd) (FD:$nfd): $!";
-			}
-		}
-		# Sereal dies on truncated data, Storable returns undef
-		my $args = thaw($buf) //
-			die "thaw error on buffer of size:".length($buf);
-		my $sub = shift @$args;
-		eval { $self->$sub(@$args) };
-		warn "$$ wq_worker: $@" if $@ &&
-					ref($@) ne 'PublicInbox::SIGPIPE';
-		delete @$self{0..($nfd-1)};
-	}
+	1 while (_recv_and_run($self, $s2, $len));
+}
+
+sub do_sock_stream { # via wq_do, for big requests
+	my ($self, $len) = @_;
+	_recv_and_run($self, delete $self->{0}, $len, 1);
 }
 
 sub wq_do { # always async
 	my ($self, $sub, $ios, @args) = @_;
 	if (my $s1 = $self->{-wq_s1}) { # run in worker
 		my $fds = [ map { fileno($_) } @$ios ];
-		$send_cmd->($s1, $fds, freeze([$sub, @args]), MSG_EOR);
+		my $n = $send_cmd->($s1, $fds, freeze([$sub, @args]), MSG_EOR);
+		return if defined($n);
+		croak "sendmsg error: $!" if $! != EMSGSIZE;
+		socketpair(my $r, my $w, AF_UNIX, SOCK_STREAM, 0) or
+			croak "socketpair: $!";
+		my $buf = freeze([$sub, @args]);
+		$n = $send_cmd->($s1, [ fileno($r) ],
+				freeze(['do_sock_stream', length($buf)]),
+				MSG_EOR) // croak "sendmsg: $!";
+		undef $r;
+		$n = $send_cmd->($w, $fds, $buf, 0) // croak "sendmsg: $!";
+		while ($n < length($buf)) {
+			my $x = syswrite($w, $buf, length($buf) - $n, $n) //
+					croak "syswrite: $!";
+			croak "syswrite wrote 0 bytes" if $x == 0;
+			$n += $x;
+		}
 	} else {
 		@$self{0..$#$ios} = @$ios;
 		eval { $self->$sub(@args) };
diff --git a/t/cmd_ipc.t b/t/cmd_ipc.t
index 96510175..84f8fb4d 100644
--- a/t/cmd_ipc.t
+++ b/t/cmd_ipc.t
@@ -82,6 +82,22 @@ my $do_test = sub { SKIP: {
 		@fds = $recv->($s2, $buf, length($src));
 		is(scalar(@fds), 0, 'no FDs received');
 		is($buf, $src, 'recv w/o FDs');
+
+		my $nr = 2 * 1024 * 1024;
+		while (1) {
+			vec(my $vec = '', $nr * 8 - 1, 1) = 1;
+			my $n = $send->($s1, [], $vec, $flag);
+			if (defined($n)) {
+				$n == length($vec) or
+					fail "short send: $n != ".length($vec);
+				diag "sent $nr, retrying with more";
+				$nr += 2 * 1024 * 1024;
+			} else {
+				ok($!{EMSGSIZE}, 'got EMSGSIZE');
+				# diag "$nr bytes hits EMSGSIZE";
+				last;
+			}
+		}
 	}
 } };
 
diff --git a/t/ipc.t b/t/ipc.t
index 22423a78..f25f2491 100644
--- a/t/ipc.t
+++ b/t/ipc.t
@@ -6,11 +6,13 @@ use v5.10.1;
 use Test::More;
 use PublicInbox::TestCommon;
 use Fcntl qw(SEEK_SET);
+use Digest::SHA qw(sha1_hex);
 require_mods(qw(Storable||Sereal));
 require_ok 'PublicInbox::IPC';
 state $once = eval <<'';
 package PublicInbox::IPC;
 use strict;
+use Digest::SHA qw(sha1_hex);
 sub test_array { qw(test array) }
 sub test_scalar { 'scalar' }
 sub test_scalarref { \'scalarref' }
@@ -24,6 +26,11 @@ sub test_write_each_fd {
 		$self->{$fd}->flush;
 	}
 }
+sub test_sha {
+	my ($self, $buf) = @_;
+	print { $self->{1} } sha1_hex($buf), "\n";
+	$self->{1}->flush;
+}
 1;
 
 my $ipc = bless {}, 'PublicInbox::IPC';
@@ -112,7 +119,7 @@ $test->('local');
 $ipc->ipc_worker_stop; # idempotent
 
 # work queues
-$ipc->wq_set_recv_modes(qw( >&= >&= >&= ));
+$ipc->wq_set_recv_modes(qw( +>&= >&= >&= ));
 pipe(my ($ra, $wa)) or BAIL_OUT $!;
 pipe(my ($rb, $wb)) or BAIL_OUT $!;
 pipe(my ($rc, $wc)) or BAIL_OUT $!;
@@ -120,6 +127,10 @@ open my $warn, '+>', undef or BAIL_OUT;
 $warn->autoflush(0);
 local $SIG{__WARN__} = sub { print $warn "PID:$$ ", @_ };
 my @ppids;
+open my $agpl, '<', 'COPYING' or BAIL_OUT "AGPL-3 missing: $!";
+my $big = do { local $/; <$agpl> } // BAIL_OUT "read: $!";
+close $agpl or BAIL_OUT "close: $!";
+
 for my $t ('local', 'worker', 'worker again') {
 	$ipc->wq_do('test_write_each_fd', [ $wa, $wb, $wc ], 'hello world');
 	my $i = 0;
@@ -130,6 +141,15 @@ for my $t ('local', 'worker', 'worker again') {
 		$i++;
 	}
 	$ipc->wq_do('test_die', [ $wa, $wb, $wc ]);
+	$ipc->wq_do('test_sha', [ $wa, $wb ], 'hello world');
+	is(readline($rb), sha1_hex('hello world')."\n", "SHA small ($t)");
+	{
+		my $bigger = $big x 10;
+		$ipc->wq_do('test_sha', [ $wa, $wb ], $bigger);
+		my $exp = sha1_hex($bigger)."\n";
+		undef $bigger;
+		is(readline($rb), $exp, "SHA big ($t)");
+	}
 	my $ppid = $ipc->wq_workers_start('wq', 1);
 	push(@ppids, $ppid);
 }

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/9] ipc: get rid of wq_set_recv_modes
  2021-01-24 11:46 [PATCH 0/9] lei remotes fixes and updates Eric Wong
  2021-01-24 11:46 ` [PATCH 1/9] lei q: limit concurrency to 4 remote connections Eric Wong
  2021-01-24 11:46 ` [PATCH 2/9] ipc: wq supports arbitrarily large payloads Eric Wong
@ 2021-01-24 11:46 ` Eric Wong
  2021-01-24 11:46 ` [PATCH 4/9] lei q: disable remote externals if locals exist Eric Wong
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2021-01-24 11:46 UTC (permalink / raw)
  To: meta

Just open every FD as read/write.  Perl (or any non-broken
runtime) won't care and won't attempt to use F_SETFL to alter
file description flags; as attempting to change those would
lead to unpleasant side effects if the file description is
shared with another process.
---
 lib/PublicInbox/IPC.pm        | 11 ++---------
 lib/PublicInbox/LEI.pm        |  7 +++----
 lib/PublicInbox/LeiToMail.pm  |  7 ++-----
 lib/PublicInbox/LeiXSearch.pm |  5 ++---
 script/lei                    | 11 +++++------
 t/ipc.t                       |  1 -
 6 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm
index 592efd21..c52441f7 100644
--- a/lib/PublicInbox/IPC.pm
+++ b/lib/PublicInbox/IPC.pm
@@ -46,11 +46,6 @@ my $send_cmd = PublicInbox::Spawn->can('send_cmd4') // do {
 	PublicInbox::CmdIPC4->can('send_cmd4');
 };
 
-sub wq_set_recv_modes {
-	my ($self, @modes) = @_;
-	$self->{-wq_recv_modes} = \@modes;
-}
-
 sub _get_rec ($) {
 	my ($r) = @_;
 	defined(my $len = <$r>) or return;
@@ -251,15 +246,13 @@ sub _recv_and_run {
 	my ($self, $s2, $len, $full_stream) = @_;
 	my @fds = $recv_cmd->($s2, my $buf, $len);
 	my $n = length($buf // '') or return;
-	my @m = @{$self->{-wq_recv_modes} // [qw( +<&= >&= >&= )]};
 	my $nfd = 0;
 	for my $fd (@fds) {
-		my $mode = shift(@m);
-		if (open(my $cmdfh, $mode, $fd)) {
+		if (open(my $cmdfh, '+<&=', $fd)) {
 			$self->{$nfd++} = $cmdfh;
 			$cmdfh->autoflush(1);
 		} else {
-			die "$$ open($mode$fd) (FD:$nfd): $!";
+			die "$$ open(+<&=$fd) (FD:$nfd): $!";
 		}
 	}
 	while ($full_stream && $n < $len) {
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index a9123c6e..473a28a9 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -765,11 +765,10 @@ sub accept_dispatch { # Listener {post_accept} callback
 		return send($sock, 'timed out waiting to recv FDs', MSG_EOR);
 	my @fds = $recv_cmd->($sock, my $buf, 4096 * 33); # >MAX_ARG_STRLEN
 	if (scalar(@fds) == 4) {
-		my $i = 0;
-		for my $rdr (qw(<&= >&= >&= <&=)) {
+		for my $i (0..3) {
 			my $fd = shift(@fds);
-			open($self->{$i++}, $rdr, $fd) and next;
-			send($sock, "open($rdr$fd) (FD=$i): $!", MSG_EOR);
+			open($self->{$i}, '+<&=', $fd) and next;
+			send($sock, "open(+<&=$fd) (FD=$i): $!", MSG_EOR);
 		}
 	} else {
 		return send($sock, "recv_cmd failed: $!", MSG_EOR);
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 5f38add1..08a1570d 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -227,9 +227,7 @@ sub decompress_src ($$$) {
 
 sub dup_src ($) {
 	my ($in) = @_;
-	# fileno needed because wq_set_recv_modes only used ">&=" for {1}
-	# and Perl blindly trusts that to reject the '+' (readability flag)
-	open my $dup, '+>>&=', fileno($in) or die "dup: $!";
+	open my $dup, '+>>&', $in or die "dup: $!";
 	$dup;
 }
 
@@ -475,8 +473,7 @@ sub write_mail { # via ->wq_do
 
 sub ipc_atfork_prepare {
 	my ($self) = @_;
-	# (done_wr, stdout|mbox, stderr, 3: sock, 4: each_smsg_done_wr)
-	$self->wq_set_recv_modes(qw[+<&= >&= >&= +<&= >&=]);
+	# FDs: (done_wr, stdout|mbox, stderr, 3: sock, 4: each_smsg_done_wr)
 	$self->SUPER::ipc_atfork_prepare; # PublicInbox::IPC
 }
 
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 1c093a94..c396c597 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -389,9 +389,8 @@ sub ipc_atfork_prepare {
 		require PublicInbox::MboxReader;
 		require IO::Uncompress::Gunzip;
 	}
-	# (0: done_wr, 1: stdout|mbox, 2: stderr,
-	#  3: sock, 4: $l2m->{-wq_s1}, 5: $startq)
-	$self->wq_set_recv_modes(qw[+<&= >&= >&= +<&= +<&= <&=]);
+	# FDS: (0: done_wr, 1: stdout|mbox, 2: stderr,
+	#       3: sock, 4: $l2m->{-wq_s1}, 5: $startq)
 	$self->SUPER::ipc_atfork_prepare; # PublicInbox::IPC
 }
 
diff --git a/script/lei b/script/lei
index 8c40bf12..006c1180 100755
--- a/script/lei
+++ b/script/lei
@@ -23,20 +23,19 @@ sub sigchld {
 
 sub exec_cmd {
 	my ($fds, $argc, @argv) = @_;
-	my @m = (*STDIN{IO}, '<&=',  *STDOUT{IO}, '>&=', *STDERR{IO}, '>&=');
+	my @old = (*STDIN{IO}, *STDOUT{IO}, *STDERR{IO});
 	my @rdr;
 	for my $fd (@$fds) {
-		my ($old_io, $mode) = splice(@m, 0, 2);
-		open(my $tmpfh, $mode, $fd) or die "open $mode$fd: $!";
-		push @rdr, $old_io, $mode, $tmpfh;
+		open(my $tmpfh, '+<&=', $fd) or die "open +<&=$fd: $!";
+		push @rdr, shift(@old), $tmpfh;
 	}
 	require POSIX; # WNOHANG
 	$SIG{CHLD} = \&sigchld;
 	my $pid = fork // die "fork: $!";
 	if ($pid == 0) {
 		my %env = map { split(/=/, $_, 2) } splice(@argv, $argc);
-		while (my ($old_io, $mode, $tmpfh) = splice(@rdr, 0, 3)) {
-			open $old_io, $mode, $tmpfh or die "open $mode: $!";
+		while (my ($old_io, $tmpfh) = splice(@rdr, 0, 2)) {
+			open $old_io, '+<&', $tmpfh or die "open +<&=: $!";
 		}
 		%ENV = (%ENV, %env);
 		exec(@argv);
diff --git a/t/ipc.t b/t/ipc.t
index f25f2491..5801c760 100644
--- a/t/ipc.t
+++ b/t/ipc.t
@@ -119,7 +119,6 @@ $test->('local');
 $ipc->ipc_worker_stop; # idempotent
 
 # work queues
-$ipc->wq_set_recv_modes(qw( +>&= >&= >&= ));
 pipe(my ($ra, $wa)) or BAIL_OUT $!;
 pipe(my ($rb, $wb)) or BAIL_OUT $!;
 pipe(my ($rc, $wc)) or BAIL_OUT $!;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 4/9] lei q: disable remote externals if locals exist
  2021-01-24 11:46 [PATCH 0/9] lei remotes fixes and updates Eric Wong
                   ` (2 preceding siblings ...)
  2021-01-24 11:46 ` [PATCH 3/9] ipc: get rid of wq_set_recv_modes Eric Wong
@ 2021-01-24 11:46 ` Eric Wong
  2021-01-24 11:46 ` [PATCH 5/9] lei q: honor --no-local to force remote searches Eric Wong
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2021-01-24 11:46 UTC (permalink / raw)
  To: meta

--remote should be explicitly enabled if local externals are
present, since users may be offline or on expensive + metered
Internet while traveling.

In the future, --remote will probably default to
caching/memoizing all messages it fetches to increase the
usefulness of --local.
---
 lib/PublicInbox/LEI.pm      | 2 +-
 lib/PublicInbox/LeiQuery.pm | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 473a28a9..378113e8 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -83,7 +83,7 @@ sub _config_path ($) {
 our %CMD = ( # sorted in order of importance/use:
 'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
 	save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a
-	sort|s=s reverse|r offset=i remote local! external! pretty mua-cmd=s
+	sort|s=s reverse|r offset=i remote! local! external! pretty mua-cmd=s
 	torsocks=s no-torsocks verbose|v since|after=s until|before=s),
 	PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ],
 
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index a7938e8b..7713902b 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -24,7 +24,9 @@ sub lei_q {
 	# --external is enabled by default, but allow --no-external
 	if ($opt->{external} //= 1) {
 		my $cb = $lxs->can('prepare_external');
-		$self->_externals_each($cb, $lxs);
+		my $ne = $self->_externals_each($cb, $lxs);
+		$opt->{remote} //= $ne == $lxs->remotes;
+		delete($lxs->{remotes}) if !$opt->{remote};
 	}
 	my $xj = $lxs->concurrency($opt);
 	my $ovv = PublicInbox::LeiOverview->new($self) or return;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 5/9] lei q: honor --no-local to force remote searches
  2021-01-24 11:46 [PATCH 0/9] lei remotes fixes and updates Eric Wong
                   ` (3 preceding siblings ...)
  2021-01-24 11:46 ` [PATCH 4/9] lei q: disable remote externals if locals exist Eric Wong
@ 2021-01-24 11:46 ` Eric Wong
  2021-01-24 12:31   ` exit codes [was: [PATCH 5/9] lei q: honor --no-local to force remote searches] Eric Wong
  2021-01-24 11:46 ` [PATCH 6/9] lei_xsearch: use curl -d '' for nginx compatibility Eric Wong
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 12+ messages in thread
From: Eric Wong @ 2021-01-24 11:46 UTC (permalink / raw)
  To: meta

This can be useful for testing remote behavior, or for
augmenting local results.  It'll also be possible to explicitly
include/exclude externals via CLI switches (once names are
decided).
---
 lib/PublicInbox/LeiQuery.pm   | 9 ++++++++-
 lib/PublicInbox/LeiXSearch.pm | 2 +-
 t/lei.t                       | 3 +++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index 7713902b..953d1fc2 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -26,7 +26,14 @@ sub lei_q {
 		my $cb = $lxs->can('prepare_external');
 		my $ne = $self->_externals_each($cb, $lxs);
 		$opt->{remote} //= $ne == $lxs->remotes;
-		delete($lxs->{remotes}) if !$opt->{remote};
+		if ($opt->{'local'}) {
+			delete($lxs->{remotes}) if !$opt->{remote};
+		} else {
+			delete($lxs->{locals});
+		}
+	}
+	unless ($lxs->locals || $lxs->remotes) {
+		return $self->fail('no local or remote inboxes to search');
 	}
 	my $xj = $lxs->concurrency($opt);
 	my $ovv = PublicInbox::LeiOverview->new($self) or return;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index c396c597..0417db24 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -311,7 +311,7 @@ sub start_query { # always runs in main (lei-daemon) process
 		for my $ibxish (locals($self)) {
 			$self->wq_do('query_thread_mset', $io, $lei, $ibxish);
 		}
-	} else {
+	} elsif (locals($self)) {
 		$self->wq_do('query_mset', $io, $lei);
 	}
 	my $i = 0;
diff --git a/t/lei.t b/t/lei.t
index 60ca75c5..3fd1d1fe 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -277,6 +277,9 @@ my $test_external = sub {
 	}
 	ok(!$lei->('q', '-o', "$home/mbox", 's:nope'),
 			'fails if mbox format unspecified');
+	ok(!$lei->(qw(q --no-local s:see)), '--no-local');
+	is($? >> 8, 1, 'proper exit code');
+	like($err, qr/no local or remote.+? to search/, 'no inbox');
 	my %e = (
 		TEST_LEI_EXTERNAL_HTTPS => 'https://public-inbox.org/meta/',
 		TEST_LEI_EXTERNAL_ONION => $onions[int(rand(scalar(@onions)))],

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 6/9] lei_xsearch: use curl -d '' for nginx compatibility
  2021-01-24 11:46 [PATCH 0/9] lei remotes fixes and updates Eric Wong
                   ` (4 preceding siblings ...)
  2021-01-24 11:46 ` [PATCH 5/9] lei q: honor --no-local to force remote searches Eric Wong
@ 2021-01-24 11:46 ` Eric Wong
  2021-01-24 11:46 ` [PATCH 7/9] lei q: fix JSON overview with remote externals Eric Wong
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2021-01-24 11:46 UTC (permalink / raw)
  To: meta; +Cc: Kyle Meyer

It appears Content-Length and/or Content-Type headers are
required by nginx with POST requests.

varnish alone doesn't have this requirement and my (perhaps
lossy) reading of RFC 2616, 7230, 7231 didn't note this, either.

In any case, we must support nginx even if it's overly strict.

Reported-By: Kyle Meyer <kyle@kyleam.com>
Link: https://public-inbox.org/meta/87v9bmswkh.fsf@kyleam.com/
---
 lib/PublicInbox/LeiXSearch.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 0417db24..c6ff5679 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -191,7 +191,7 @@ sub query_remote_mboxrd {
 	push(@qform, t => 1) if $opt->{thread};
 	my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
 	$dedupe->prepare_dedupe;
-	my @cmd = qw(curl -XPOST -sSf);
+	my @cmd = (qw(curl -sSf -d), '');
 	my $verbose = $opt->{verbose};
 	push @cmd, '-v' if $verbose;
 	for my $o ($lei->curl_opt) {

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 7/9] lei q: fix JSON overview with remote externals
  2021-01-24 11:46 [PATCH 0/9] lei remotes fixes and updates Eric Wong
                   ` (5 preceding siblings ...)
  2021-01-24 11:46 ` [PATCH 6/9] lei_xsearch: use curl -d '' for nginx compatibility Eric Wong
@ 2021-01-24 11:46 ` Eric Wong
  2021-01-24 12:37   ` Eric Wong
  2021-01-24 11:46 ` [PATCH 8/9] smsg: make parse_references an object method Eric Wong
  2021-01-24 11:46 ` [PATCH 9/9] smsg: parse_references: micro-optimization Eric Wong
  8 siblings, 1 reply; 12+ messages in thread
From: Eric Wong @ 2021-01-24 11:46 UTC (permalink / raw)
  To: meta

We can't (and don't need to) repeatedly get the $each_smsg
callback for each URI since that clobbers {ovv_buf} before
it can be output.

I initially thought this was a dedupe-related bug and
moved the dedupe code into the $each_smsg callback to
minimize differences.  Nevertheless it's a nice code
reduction.

I also thought it was related to incomplete smsg info,
so {references} is now filled in correctly for dedupe.
---
 lib/PublicInbox/LeiOverview.pm |  2 +-
 lib/PublicInbox/LeiXSearch.pm  | 15 +++++----------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 49538a60..928d66cb 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -209,7 +209,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		$json->ascii(1) if $lei->{opt}->{ascii};
 	}
 	my $l2m = $lei->{l2m};
-	if ($l2m && $ibxish->can('scheme')) { # remote https?:// mboxrd
+	if ($l2m && !$ibxish) { # remote https?:// mboxrd
 		delete $l2m->{-wq_s1};
 		my $g2m = $l2m->can('git_to_mail');
 		my $wcb = $l2m->write_cb($lei);
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index c6ff5679..2bedf000 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -15,6 +15,7 @@ use File::Temp 0.19 (); # 0.19 for ->newdir
 use File::Spec ();
 use PublicInbox::Search qw(xap_terms);
 use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::MID qw(mids);
 
 sub new {
 	my ($class) = @_;
@@ -120,8 +121,6 @@ sub query_thread_mset { # for --thread
 	my $mo = { %{$lei->{mset_opt}} };
 	my $mset;
 	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $ibxish);
-	my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
-	$dedupe->prepare_dedupe;
 	do {
 		$mset = $srch->mset($mo->{qstr}, $mo);
 		my $ids = $srch->mset_to_artnums($mset, $mo);
@@ -132,7 +131,6 @@ sub query_thread_mset { # for --thread
 			for my $n (@{$ctx->{xids}}) {
 				my $smsg = $over->get_art($n) or next;
 				wait_startq($startq) if $startq;
-				next if $dedupe->is_smsg_dup($smsg);
 				my $mitem = delete $n2item{$smsg->{num}};
 				$each_smsg->($smsg, $mitem);
 			}
@@ -155,14 +153,11 @@ sub query_mset { # non-parallel for non-"--thread" users
 		attach_external($self, $loc);
 	}
 	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $self);
-	my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
-	$dedupe->prepare_dedupe;
 	do {
 		$mset = $self->mset($mo->{qstr}, $mo);
 		for my $mitem ($mset->items) {
 			my $smsg = smsg_for($self, $mitem) or next;
 			wait_startq($startq) if $startq;
-			next if $dedupe->is_smsg_dup($smsg);
 			$each_smsg->($smsg, $mitem);
 		}
 	} while (_mset_more($mset, $mo));
@@ -174,10 +169,10 @@ sub each_eml { # callback for MboxReader->mboxrd
 	my ($eml, $self, $lei, $each_smsg) = @_;
 	my $smsg = bless {}, 'PublicInbox::Smsg';
 	$smsg->populate($eml);
+	PublicInbox::OverIdx::parse_references($smsg, $eml, mids($eml));
 	$smsg->{$_} //= '' for qw(from to cc ds subject references mid);
 	delete @$smsg{qw(From Subject -ds -ts)};
 	if (my $startq = delete($self->{5})) { wait_startq($startq) }
-	return if !$lei->{l2m} && $lei->{dedupe}->is_smsg_dup($smsg);
 	$each_smsg->($smsg, undef, $eml);
 }
 
@@ -189,8 +184,6 @@ sub query_remote_mboxrd {
 	my ($opt, $env) = @$lei{qw(opt env)};
 	my @qform = (q => $lei->{mset_opt}->{qstr}, x => 'm');
 	push(@qform, t => 1) if $opt->{thread};
-	my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
-	$dedupe->prepare_dedupe;
 	my @cmd = (qw(curl -sSf -d), '');
 	my $verbose = $opt->{verbose};
 	push @cmd, '-v' if $verbose;
@@ -208,9 +201,9 @@ sub query_remote_mboxrd {
 	}
 	$opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
 	my $tor = $opt->{torsocks} //= 'auto';
+	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $uris->[0]);
 	for my $uri (@$uris) {
 		$uri->query_form(@qform);
-		my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $uri);
 		my $cmd = [ @cmd, $uri->as_string ];
 		if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' &&
 				(($env->{LD_PRELOAD}//'') !~ /torsocks/)) {
@@ -236,6 +229,7 @@ sub query_remote_mboxrd {
 			$lei->child_error($?);
 		}
 	}
+	undef $each_smsg;
 	$lei->{ovv}->ovv_atexit_child($lei);
 }
 
@@ -387,6 +381,7 @@ sub ipc_atfork_prepare {
 	my ($self) = @_;
 	if (exists $self->{remotes}) {
 		require PublicInbox::MboxReader;
+		require PublicInbox::OverIdx; # parse_references
 		require IO::Uncompress::Gunzip;
 	}
 	# FDS: (0: done_wr, 1: stdout|mbox, 2: stderr,

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 8/9] smsg: make parse_references an object method
  2021-01-24 11:46 [PATCH 0/9] lei remotes fixes and updates Eric Wong
                   ` (6 preceding siblings ...)
  2021-01-24 11:46 ` [PATCH 7/9] lei q: fix JSON overview with remote externals Eric Wong
@ 2021-01-24 11:46 ` Eric Wong
  2021-01-24 11:46 ` [PATCH 9/9] smsg: parse_references: micro-optimization Eric Wong
  8 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2021-01-24 11:46 UTC (permalink / raw)
  To: meta

Having parse_references in OverIdx was awkward and Smsg is
a better place for it.
---
 lib/PublicInbox/LeiXSearch.pm |  3 +--
 lib/PublicInbox/OverIdx.pm    | 22 +---------------------
 lib/PublicInbox/SearchIdx.pm  |  2 +-
 lib/PublicInbox/Smsg.pm       | 22 +++++++++++++++++++++-
 4 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 2bedf000..841257c1 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -169,7 +169,7 @@ sub each_eml { # callback for MboxReader->mboxrd
 	my ($eml, $self, $lei, $each_smsg) = @_;
 	my $smsg = bless {}, 'PublicInbox::Smsg';
 	$smsg->populate($eml);
-	PublicInbox::OverIdx::parse_references($smsg, $eml, mids($eml));
+	$smsg->parse_references($eml, mids($eml));
 	$smsg->{$_} //= '' for qw(from to cc ds subject references mid);
 	delete @$smsg{qw(From Subject -ds -ts)};
 	if (my $startq = delete($self->{5})) { wait_startq($startq) }
@@ -381,7 +381,6 @@ sub ipc_atfork_prepare {
 	my ($self) = @_;
 	if (exists $self->{remotes}) {
 		require PublicInbox::MboxReader;
-		require PublicInbox::OverIdx; # parse_references
 		require IO::Uncompress::Gunzip;
 	}
 	# FDS: (0: done_wr, 1: stdout|mbox, 2: stderr,
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index e606dcf5..985c5473 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -243,26 +243,6 @@ sub link_refs {
 	$tid;
 }
 
-sub parse_references ($$$) {
-	my ($smsg, $hdr, $mids) = @_;
-	my $refs = references($hdr);
-	push(@$refs, @$mids) if scalar(@$mids) > 1;
-	return $refs if scalar(@$refs) == 0;
-
-	# prevent circular references here:
-	my %seen = ( $smsg->{mid} => 1 );
-	my @keep;
-	foreach my $ref (@$refs) {
-		if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
-			warn "References: <$ref> too long, ignoring\n";
-			next;
-		}
-		push(@keep, $ref) unless $seen{$ref}++;
-	}
-	$smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
-	\@keep;
-}
-
 # normalize subjects so they are suitable as pathnames for URLs
 # XXX: consider for removal
 sub subject_path ($) {
@@ -283,7 +263,7 @@ sub add_overview {
 	my ($self, $eml, $smsg) = @_;
 	$smsg->{lines} = $eml->body_raw =~ tr!\n!\n!;
 	my $mids = mids_for_index($eml);
-	my $refs = parse_references($smsg, $eml, $mids);
+	my $refs = $smsg->parse_references($eml, $mids);
 	$mids->[0] //= $smsg->{mid} //= $eml->{-lei_fake_mid};
 	$smsg->{mid} //= '';
 	my $subj = $smsg->{subject};
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 7f7b980d..826302de 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -380,7 +380,7 @@ sub eml2doc ($$$;$) {
 	if (!$self->{-skip_docdata}) {
 		# WWW doesn't need {to} or {cc}, only NNTP
 		$smsg->{to} = $smsg->{cc} = '';
-		PublicInbox::OverIdx::parse_references($smsg, $eml, $mids);
+		$smsg->parse_references($eml, $mids);
 		my $data = $smsg->to_doc_data;
 		$doc->set_data($data);
 	}
diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm
index c6ff7f52..2b72e8b5 100644
--- a/lib/PublicInbox/Smsg.pm
+++ b/lib/PublicInbox/Smsg.pm
@@ -12,7 +12,7 @@ use strict;
 use warnings;
 use base qw(Exporter);
 our @EXPORT_OK = qw(subject_normalized);
-use PublicInbox::MID qw(mids);
+use PublicInbox::MID qw(mids references);
 use PublicInbox::Address;
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
 
@@ -69,6 +69,26 @@ sub psgi_cull ($) {
 	$self;
 }
 
+sub parse_references ($$$) {
+	my ($smsg, $hdr, $mids) = @_;
+	my $refs = references($hdr);
+	push(@$refs, @$mids) if scalar(@$mids) > 1;
+	return $refs if scalar(@$refs) == 0;
+
+	# prevent circular references here:
+	my %seen = ( $smsg->{mid} => 1 );
+	my @keep;
+	foreach my $ref (@$refs) {
+		if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
+			warn "References: <$ref> too long, ignoring\n";
+			next;
+		}
+		push(@keep, $ref) unless $seen{$ref}++;
+	}
+	$smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
+	\@keep;
+}
+
 # used for v2, Import and v1 non-SQLite WWW code paths
 sub populate {
 	my ($self, $hdr, $sync) = @_;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 9/9] smsg: parse_references: micro-optimization
  2021-01-24 11:46 [PATCH 0/9] lei remotes fixes and updates Eric Wong
                   ` (7 preceding siblings ...)
  2021-01-24 11:46 ` [PATCH 8/9] smsg: make parse_references an object method Eric Wong
@ 2021-01-24 11:46 ` Eric Wong
  8 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2021-01-24 11:46 UTC (permalink / raw)
  To: meta

With Perl 5.10+, we can rely on the defined-or-assignment (//=)
operator to avoid repeatedly rewriting an SV.

This may not provide a measurable difference here, but
it's more consistent with current style where things like
commit a05445fb400108e60ede7d377cf3b26a0392eb24
("config: config_fh_parse: micro-optimize") provide a measurable
improvement.
---
 lib/PublicInbox/Smsg.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm
index 2b72e8b5..b4cc2ecb 100644
--- a/lib/PublicInbox/Smsg.pm
+++ b/lib/PublicInbox/Smsg.pm
@@ -83,7 +83,7 @@ sub parse_references ($$$) {
 			warn "References: <$ref> too long, ignoring\n";
 			next;
 		}
-		push(@keep, $ref) unless $seen{$ref}++;
+		$seen{$ref} //= push(@keep, $ref);
 	}
 	$smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
 	\@keep;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* exit codes [was: [PATCH 5/9] lei q: honor --no-local to force remote searches]
  2021-01-24 11:46 ` [PATCH 5/9] lei q: honor --no-local to force remote searches Eric Wong
@ 2021-01-24 12:31   ` Eric Wong
  0 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2021-01-24 12:31 UTC (permalink / raw)
  To: meta

Eric Wong <e@80x24.org> wrote:
> --- a/t/lei.t
> +++ b/t/lei.t
> @@ -277,6 +277,9 @@ my $test_external = sub {
>  	}
>  	ok(!$lei->('q', '-o', "$home/mbox", 's:nope'),
>  			'fails if mbox format unspecified');
> +	ok(!$lei->(qw(q --no-local s:see)), '--no-local');
> +	is($? >> 8, 1, 'proper exit code');

I'm wondering if BSD EX_* constants from sysexits.h makes sense
for lei and if users will care, but keep in mind git doesn't use
them.  But they may conflict with curl(1) exit codes which we
propagate back to the user.

public-inbox-mda uses those codes because MTAs understand them;
but I'm not sure if lei will be invoked by MTAs via procmail
and what not...

sysexits.ph is distributed with Debian and FreeBSD Perl,
but not CentOS7.  However, the codes are stable across
architectures and OSes (AFAIK), unlike syscall numbers.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 7/9] lei q: fix JSON overview with remote externals
  2021-01-24 11:46 ` [PATCH 7/9] lei q: fix JSON overview with remote externals Eric Wong
@ 2021-01-24 12:37   ` Eric Wong
  0 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2021-01-24 12:37 UTC (permalink / raw)
  To: meta

Eric Wong <e@80x24.org> wrote:
> --- a/lib/PublicInbox/LeiOverview.pm
> +++ b/lib/PublicInbox/LeiOverview.pm
> @@ -209,7 +209,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
>  		$json->ascii(1) if $lei->{opt}->{ascii};
>  	}
>  	my $l2m = $lei->{l2m};
> -	if ($l2m && $ibxish->can('scheme')) { # remote https?:// mboxrd
> +	if ($l2m && !$ibxish) { # remote https?:// mboxrd

I made this change last, thus necessitating changes to callers:

> --- a/lib/PublicInbox/LeiXSearch.pm
> +++ b/lib/PublicInbox/LeiXSearch.pm

> @@ -208,9 +201,9 @@ sub query_remote_mboxrd {
>  	}
>  	$opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
>  	my $tor = $opt->{torsocks} //= 'auto';
> +	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $uris->[0]);

Will squash this in, otherwise --remote --no-local won't work:

diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 841257c1..fb608d00 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -201,7 +201,7 @@ sub query_remote_mboxrd {
 	}
 	$opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
 	my $tor = $opt->{torsocks} //= 'auto';
-	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $uris->[0]);
+	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
 	for my $uri (@$uris) {
 		$uri->query_form(@qform);
 		my $cmd = [ @cmd, $uri->as_string ];

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-01-24 12:37 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-24 11:46 [PATCH 0/9] lei remotes fixes and updates Eric Wong
2021-01-24 11:46 ` [PATCH 1/9] lei q: limit concurrency to 4 remote connections Eric Wong
2021-01-24 11:46 ` [PATCH 2/9] ipc: wq supports arbitrarily large payloads Eric Wong
2021-01-24 11:46 ` [PATCH 3/9] ipc: get rid of wq_set_recv_modes Eric Wong
2021-01-24 11:46 ` [PATCH 4/9] lei q: disable remote externals if locals exist Eric Wong
2021-01-24 11:46 ` [PATCH 5/9] lei q: honor --no-local to force remote searches Eric Wong
2021-01-24 12:31   ` exit codes [was: [PATCH 5/9] lei q: honor --no-local to force remote searches] Eric Wong
2021-01-24 11:46 ` [PATCH 6/9] lei_xsearch: use curl -d '' for nginx compatibility Eric Wong
2021-01-24 11:46 ` [PATCH 7/9] lei q: fix JSON overview with remote externals Eric Wong
2021-01-24 12:37   ` Eric Wong
2021-01-24 11:46 ` [PATCH 8/9] smsg: make parse_references an object method Eric Wong
2021-01-24 11:46 ` [PATCH 9/9] smsg: parse_references: micro-optimization Eric Wong

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).