user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/5] lei q: demangle and quiet curl output
Date: Sun, 24 Jan 2021 17:18:55 -0800	[thread overview]
Message-ID: <20210125011857.563-4-e@80x24.org> (raw)
In-Reply-To: <20210125011857.563-1-e@80x24.org>

curl(1) writes to stderr one byte-at-a-time (presumably for the
progress bar).  This ends up being unreadable on my terminal
when parallel processes are trying to write error messages.

So instead, we'll capture the output to a file and run
'tail -f' on it if --verbose is enabled.

Since HTTP 404s from non-existent results are a common response,
we'll ignore them and stay silent, matching behavior of local
searches.
---
 lib/PublicInbox/LeiXSearch.pm | 45 ++++++++++++++++++++++++++---------
 t/lei.t                       |  2 +-
 2 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index fb608d00..68be8ada 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -14,8 +14,9 @@ use PublicInbox::Import;
 use File::Temp 0.19 (); # 0.19 for ->newdir
 use File::Spec ();
 use PublicInbox::Search qw(xap_terms);
-use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Spawn qw(popen_rd spawn);
 use PublicInbox::MID qw(mids);
+use Fcntl qw(SEEK_SET F_SETFL O_APPEND O_RDWR);
 
 sub new {
 	my ($class) = @_;
@@ -176,6 +177,13 @@ sub each_eml { # callback for MboxReader->mboxrd
 	$each_smsg->($smsg, undef, $eml);
 }
 
+# PublicInbox::OnDestroy callback
+sub kill_reap {
+	my ($pid) = @_;
+	kill('KILL', $pid); # spawn() blocks other signals
+	waitpid($pid, 0);
+}
+
 sub query_remote_mboxrd {
 	my ($self, $lei, $uris) = @_;
 	local $0 = "$0 query_remote_mboxrd";
@@ -186,7 +194,20 @@ sub query_remote_mboxrd {
 	push(@qform, t => 1) if $opt->{thread};
 	my @cmd = (qw(curl -sSf -d), '');
 	my $verbose = $opt->{verbose};
-	push @cmd, '-v' if $verbose;
+	my $reap;
+	my $cerr = File::Temp->new(TEMPLATE => 'curl.err-XXXX', TMPDIR => 1);
+	fcntl($cerr, F_SETFL, O_APPEND|O_RDWR) or warn "set O_APPEND: $!";
+	my $rdr = { 2 => $cerr };
+	my $coff = 0;
+	if ($verbose) {
+		# spawn a process to force line-buffering, otherwise curl
+		# will write 1 character at-a-time and parallel outputs
+		# mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss
+		push @cmd, '-v';
+		my $o = { 1 => $lei->{2}, 2 => $lei->{2} };
+		my $pid = spawn(['tail', '-f', $cerr->filename], undef, $o);
+		$reap = PublicInbox::OnDestroy->new(\&kill_reap, $pid);
+	}
 	for my $o ($lei->curl_opt) {
 		$o =~ s/\|[a-z0-9]\b//i; # remove single char short option
 		if ($o =~ s/=[is]@\z//) {
@@ -213,21 +234,23 @@ sub query_remote_mboxrd {
 		}
 		$lei->err("# @$cmd") if $verbose;
 		$? = 0;
-		my $fh = popen_rd($cmd, $env, { 2 => $lei->{2} });
+		my $fh = popen_rd($cmd, $env, $rdr);
 		$fh = IO::Uncompress::Gunzip->new($fh);
 		eval {
 			PublicInbox::MboxReader->mboxrd($fh, \&each_eml, $self,
 							$lei, $each_smsg);
 		};
 		return $lei->fail("E: @$cmd: $@") if $@;
-		if (($? >> 8) == 22) { # HTTP 404 from curl(1)
-			$uri->query_form(q => $lei->{mset_opt}->{qstr});
-			$lei->err('# no results from '.$uri->as_string);
-		} elsif ($?) {
-			$uri->query_form(q => $lei->{mset_opt}->{qstr});
-			$lei->err('E: '.$uri->as_string);
-			$lei->child_error($?);
-		}
+		next unless $?;
+		seek($cerr, $coff, SEEK_SET) or warn "seek(curl stderr): $!\n";
+		my $e = do { local $/; <$cerr> } //
+				die "read(curl stderr): $!\n";
+		$coff += length($e);
+		next if (($? >> 8) == 22 && $e =~ /\b404\b/);
+		$lei->child_error($?);
+		$uri->query_form(q => $lei->{mset_opt}->{qstr});
+		# --verbose already showed the error via tail(1)
+		$lei->err("E: $uri \$?=$?\n", $verbose ? () : $e);
 	}
 	undef $each_smsg;
 	$lei->{ovv}->ovv_atexit_child($lei);
diff --git a/t/lei.t b/t/lei.t
index f826a966..69338257 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -179,7 +179,7 @@ SKIP: {
 	my $res = $json->decode($out);
 	is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url");
 	ok($lei->('q', "m:$mid", 'd:..20101002'), 'no results, no error');
-	like($err, qr/404/, 'noted 404');
+	is($err, '', 'no output on 404, matching local FS behavior');
 	is($out, "[null]\n", 'got null results');
 	$lei->('forget-external', $url);
 } # /SKIP

  parent reply	other threads:[~2021-01-25  1:18 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-25  1:18 [PATCH 0/5] lei: more fixes and usability enhancement Eric Wong
2021-01-25  1:18 ` [PATCH 1/5] lei: reinstate JSON smsg output deduplication Eric Wong
2021-01-25  1:18 ` [PATCH 2/5] lei q: drop "oid" output format Eric Wong
2021-01-25  1:18 ` Eric Wong [this message]
2021-01-25  1:18 ` [PATCH 4/5] lei q: reject remotes early if curl(1) is missing Eric Wong
2021-01-25  1:18 ` [PATCH 5/5] lei q: continue remote search if torsocks(1) " Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210125011857.563-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).