user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 05/11] lei: propagate curl errors, improve internal consistency
Date: Tue,  2 Feb 2021 22:11:37 -1000	[thread overview]
Message-ID: <20210203081143.24424-6-e@80x24.org> (raw)
In-Reply-To: <20210203081143.24424-1-e@80x24.org>

IO::Uncompress::Gunzip seems to be losing $? when closing
PublicInbox::ProcessPipe.  To workaround this, do a synchronous
waitpid ourselves to force proper $? reporting update tests to
use the new --only feature for testing invalid URLs.

This improves internal code consistency by having {pkt_op}
parse the same ASCII-only protocol script/lei understands.

We no longer pass {sock} to worker processes at all,
further reducing FD pressure on per-user limits.
---
 lib/PublicInbox/LEI.pm         | 15 ++++++++-------
 lib/PublicInbox/LeiOverview.pm |  2 --
 lib/PublicInbox/LeiXSearch.pm  | 16 +++++++---------
 lib/PublicInbox/PktOp.pm       | 15 +++++++++++----
 t/lei.t                        | 29 ++++++++++++++++-------------
 5 files changed, 42 insertions(+), 35 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 9b4d4e0b..05a39cad 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -285,8 +285,8 @@ sub x_it ($$) {
 	# make sure client sees stdout before exit
 	$self->{1}->autoflush(1) if $self->{1};
 	dump_and_clear_log();
-	if (my $sock = $self->{sock}) {
-		send($sock, "x_it $code", MSG_EOR);
+	if (my $s = $self->{pkt_op} // $self->{sock}) {
+		send($s, "x_it $code", MSG_EOR);
 	} elsif ($self->{oneshot}) {
 		# don't want to end up using $? from child processes
 		for my $f (qw(lxs l2m)) {
@@ -339,9 +339,10 @@ sub puts ($;@) { out(shift, map { "$_\n" } @_) }
 
 sub child_error { # passes non-fatal curl exit codes to user
 	my ($self, $child_error) = @_; # child_error is $?
-	if (my $sock = $self->{sock}) { # send to lei(1) client
-		send($sock, "child_error $child_error", MSG_EOR);
-	} elsif ($self->{oneshot}) {
+	if (my $s = $self->{pkt_op} // $self->{sock}) {
+		# send to the parent lei-daemon or to lei(1) client
+		send($s, "child_error $child_error", MSG_EOR);
+	} elsif (!$PublicInbox::DS::in_loop) {
 		$self->{child_error} = $child_error;
 	} # else noop if client disconnected
 }
@@ -420,9 +421,9 @@ sub atfork_parent_wq {
 		$lei->{$f} = $wq->deep_clone($tmp);
 	}
 	$self->{env} = $env;
-	delete @$lei{qw(3 -lei_store cfg old_1 pgr lxs)}; # keep l2m
+	delete @$lei{qw(sock 3 -lei_store cfg old_1 pgr lxs)}; # keep l2m
 	my @io = (delete(@$lei{qw(0 1 2)}),
-			io_extract($lei, qw(sock pkt_op startq)));
+			io_extract($lei, qw(pkt_op startq)));
 	my $l2m = $lei->{l2m};
 	if ($l2m && $l2m != $wq) { # $wq == lxs
 		if (my $wq_s1 = $l2m->{-wq_s1}) {
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 366af8b2..88034ada 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -216,9 +216,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 			$wcb->(undef, $smsg, $eml);
 		};
 	} elsif ($l2m && $l2m->{-wq_s1}) {
-		my $sock = delete $lei->{sock}; # lei2mail doesn't need it
 		my ($lei_ipc, @io) = $lei->atfork_parent_wq($l2m);
-		$lei->{sock} = $sock if $sock;
 		# $io[0] becomes a notification pipe that triggers EOF
 		# in this wq worker when all outstanding ->write_mail
 		# calls are complete
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 23a9c020..d33064bb 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -113,8 +113,7 @@ sub mset_progress {
 	if ($lei->{pkt_op}) { # called via pkt_op/pkt_do from workers
 		pkt_do($lei->{pkt_op}, 'mset_progress', @_);
 	} else { # single lei-daemon consumer
-		my @args = ref($_[-1]) eq 'ARRAY' ? @{$_[-1]} : @_;
-		my ($desc, $mset_size, $mset_total_est) = @args;
+		my ($desc, $mset_size, $mset_total_est) = @_;
 		$lei->{-mset_total} += $mset_size;
 		$lei->err("# $desc $mset_size/$mset_total_est");
 	}
@@ -264,14 +263,11 @@ sub query_remote_mboxrd {
 		shift(@$cmd) if !$cmd->[0];
 
 		$lei->err("# @$cmd") if $verbose;
-		$? = 0;
-		my $fh = popen_rd($cmd, $env, $rdr);
+		my ($fh, $pid) = popen_rd($cmd, $env, $rdr);
 		$fh = IO::Uncompress::Gunzip->new($fh);
-		eval {
-			PublicInbox::MboxReader->mboxrd($fh, \&each_eml, $self,
-							$lei, $each_smsg);
-		};
-		return $lei->fail("E: @$cmd: $@") if $@;
+		PublicInbox::MboxReader->mboxrd($fh, \&each_eml, $self,
+						$lei, $each_smsg);
+		waitpid($pid, 0) == $pid or die "BUG: waitpid (curl): $!";
 		if ($? == 0) {
 			my $nr = $lei->{-nr_remote_eml};
 			mset_progress($lei, $lei->{-current_url}, $nr, $nr);
@@ -420,6 +416,8 @@ sub do_query {
 		'.' => [ \&do_post_augment, $lei, $zpipe, $au_done ],
 		'' => [ \&query_done, $lei ],
 		'mset_progress' => [ \&mset_progress, $lei ],
+		'x_it' => [ $lei->can('x_it'), $lei ],
+		'child_error' => [ $lei->can('child_error'), $lei ],
 	};
 	(my $op, $lei->{pkt_op}) = PublicInbox::PktOp->pair($ops);
 	my ($lei_ipc, @io) = $lei->atfork_parent_wq($self);
diff --git a/lib/PublicInbox/PktOp.pm b/lib/PublicInbox/PktOp.pm
index 40c7262a..10d76da0 100644
--- a/lib/PublicInbox/PktOp.pm
+++ b/lib/PublicInbox/PktOp.pm
@@ -4,8 +4,7 @@
 # op dispatch socket, reads a message, runs a sub
 # There may be multiple producers, but (for now) only one consumer
 # Used for lei_xsearch and maybe other things
-# "literal" => [ sub, @operands ]
-# /regexp/ => [ sub, @operands ]
+# "command" => [ $sub, @fixed_operands ]
 package PublicInbox::PktOp;
 use strict;
 use v5.10.1;
@@ -57,11 +56,19 @@ sub event_step {
 			$self->close;
 			die "recv: $!";
 		}
-		my ($cmd, $pargs) = split(/\0/, $msg, 2);
+		my ($cmd, @pargs);
+		if (index($msg, "\0") > 0) {
+			($cmd, my $pargs) = split(/\0/, $msg, 2);
+			@pargs = @{ipc_thaw($pargs)};
+		} else {
+			# for compatibility with the script/lei in client mode,
+			# it doesn't load Sereal||Storable for startup speed
+			($cmd, @pargs) = split(/ /, $msg);
+		}
 		my $op = $self->{ops}->{$cmd //= $msg};
 		die "BUG: unknown message: `$cmd'" unless $op;
 		my ($sub, @args) = @$op;
-		$sub->(@args, $pargs ? ipc_thaw($pargs) : ());
+		$sub->(@args, @pargs);
 		return $self->close if $msg eq ''; # close on EOF
 	} while (1);
 }
diff --git a/t/lei.t b/t/lei.t
index 33f47ae4..461669a8 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -14,6 +14,7 @@ require_mods(qw(json DBD::SQLite Search::Xapian));
 my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
 my ($home, $for_destroy) = tmpdir();
 my $err_filter;
+my $curl = which('curl');
 my @onions = qw(http://hjrcffqmbrq6wope.onion/meta/
 	http://czquwvybam4bgbro.onion/meta/
 	http://ou63pmih66umazou.onion/meta/);
@@ -39,7 +40,7 @@ local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run";
 local $ENV{HOME} = $home;
 local $ENV{FOO} = 'BAR';
 mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
-my $home_trash = [ "$home/.local", "$home/.config" ];
+my $home_trash = [ "$home/.local", "$home/.config", "$home/junk" ];
 my $cleanup = sub { rmtree([@$home_trash, @_]) };
 my $config_file = "$home/.config/lei/config";
 my $store_dir = "$home/.local/share/lei";
@@ -162,26 +163,19 @@ my $setup_publicinboxes = sub {
 my $test_external_remote = sub {
 	my ($url, $k) = @_;
 SKIP: {
-	my $nr = 4;
+	my $nr = 5;
 	skip "$k unset", $nr if !$url;
-	which('curl') or skip 'no curl', $nr;
+	$curl or skip 'no curl', $nr;
 	which('torsocks') or skip 'no torsocks', $nr if $url =~ m!\.onion/!;
-	$lei->('ls-external');
-	for my $e (split(/^/ms, $out)) {
-		$e =~ s/\s+boost.*//s;
-		$lei->('forget-external', '-q', $e) or
-			fail "error forgetting $e: $err"
-	}
-	$lei->('add-external', $url);
 	my $mid = '20140421094015.GA8962@dcvr.yhbt.net';
-	ok($lei->('q', '-q', "m:$mid"), "query $url");
+	my @cmd = ('q', '--only', $url, '-q', "m:$mid");
+	ok($lei->(@cmd), "query $url");
 	is($err, '', "no errors on $url");
 	my $res = $json->decode($out);
 	is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url");
-	ok($lei->('q', '-q', "m:$mid", 'd:..20101002'), 'no results, no error');
+	ok($lei->(@cmd, 'd:..20101002'), 'no results, no error');
 	is($err, '', 'no output on 404, matching local FS behavior');
 	is($out, "[null]\n", 'got null results');
-	$lei->('forget-external', $url);
 } # /SKIP
 }; # /sub
 
@@ -355,12 +349,21 @@ my $test_completion = sub {
 	}
 };
 
+my $test_fail = sub {
+	$lei->(qw(q --only http://127.0.0.1:99999/bogus/ t:m));
+	is($? >> 8, 3, 'got curl exit for bogus URL');
+	$lei->(qw(q --only http://127.0.0.1:99999/bogus/ t:m -o), "$home/junk");
+	is($? >> 8, 3, 'got curl exit for bogus URL with Maildir');
+	is($out, '', 'no output');
+};
+
 my $test_lei_common = sub {
 	$test_help->();
 	$test_config->();
 	$test_init->();
 	$test_external->();
 	$test_completion->();
+	$test_fail->();
 };
 
 if ($ENV{TEST_LEI_ONESHOT}) {

  parent reply	other threads:[~2021-02-03  8:11 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-03  8:11 [PATCH 00/11] lei q --stdin, shortcut names, etc Eric Wong
2021-02-03  8:11 ` [PATCH 01/11] lei: reduce FD pressure from lei2mail worker Eric Wong
2021-02-03  8:11 ` [PATCH 02/11] lei: further reduce lei2mail FD pressure Eric Wong
2021-02-03  8:11 ` [PATCH 03/11] pkt_op: rely on DS::in_loop global Eric Wong
2021-02-03  8:11 ` [PATCH 04/11] lei: err: avoid uninitialized variable warnings Eric Wong
2021-02-03  8:11 ` Eric Wong [this message]
2021-02-03  8:11 ` [PATCH 06/11] lei q: -I/--exclude/--only support globs and basenames Eric Wong
2021-02-03  8:11 ` [PATCH 07/11] lei: complete basenames for include|exclude|only Eric Wong
2021-02-03  8:11 ` [PATCH 08/11] lei: help starts pager Eric Wong
2021-02-03  8:11 ` [PATCH 09/11] lei add-external: completion for existing URL basenames Eric Wong
2021-02-03  8:11 ` [PATCH 10/11] lei: use sleep(1) loop for infinite sleep Eric Wong
2021-02-03  8:11 ` [PATCH 11/11] lei q: support reading queries from stdin Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210203081143.24424-6-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).