user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 08/18] treewide: update read_all to avoid eof|close checks
Date: Mon, 13 Nov 2023 13:15:41 +0000	[thread overview]
Message-ID: <20231113131551.843230-9-e@80x24.org> (raw)
In-Reply-To: <20231113131551.843230-1-e@80x24.org>

read_all can be expanded to support FIFOs/pipes/sockets where
read-until-EOF behavior is desired.  We can also rely on
wantarray to support splitting on EOL markers, but it's
hard-coded to support only `$/ eq "\n"' since (AFAIK)
it's the only way we use the wantarray form `readline'.
---
 lib/PublicInbox/CodeSearchIdx.pm |  3 +--
 lib/PublicInbox/Gcf2.pm          |  3 +--
 lib/PublicInbox/IO.pm            | 18 +++++++++++++-----
 lib/PublicInbox/LeiInput.pm      | 10 ++++------
 lib/PublicInbox/LeiMirror.pm     | 10 ++++------
 lib/PublicInbox/LeiToMail.pm     |  3 +--
 lib/PublicInbox/Spawn.pm         |  4 +---
 lib/PublicInbox/TestCommon.pm    |  6 +++---
 script/public-inbox-learn        |  2 +-
 script/public-inbox-mda          |  2 +-
 script/public-inbox-purge        |  2 +-
 11 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index ffd443e1..3601176c 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -627,8 +627,7 @@ sub index_repo { # run_git cb
 	my $repo = delete $git->{-repo} or return index_next($self);
 	my $roots_fh = delete $repo->{roots_fh} // die 'BUG: no {roots_fh}';
 	seek($roots_fh, 0, SEEK_SET);
-	chomp(my @roots = <$roots_fh>);
-	$roots_fh = eof($roots_fh) | close $roots_fh; # detect readline errors
+	chomp(my @roots = PublicInbox::IO::read_all $roots_fh);
 	if (!@roots) {
 		warn("E: $git->{git_dir} has no root commits\n");
 		return index_next($self);
diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm
index 5490049d..e0219b55 100644
--- a/lib/PublicInbox/Gcf2.pm
+++ b/lib/PublicInbox/Gcf2.pm
@@ -81,8 +81,7 @@ sub add_alt ($$) {
 	#
 	# See https://bugs.debian.org/975607
 	if (open(my $fh, '<', "$objdir/info/alternates")) {
-		chomp(my @abs_alt = grep(m!^/!, <$fh>));
-		$fh = eof($fh) | close $fh; # detect readline errors
+		chomp(my @abs_alt = grep m!^/!, PublicInbox::IO::read_all $fh);
 		$gcf2->add_alternate($_) for @abs_alt;
 	}
 	$gcf2->add_alternate($objdir);
diff --git a/lib/PublicInbox/IO.pm b/lib/PublicInbox/IO.pm
index 0d303500..11ce9be1 100644
--- a/lib/PublicInbox/IO.pm
+++ b/lib/PublicInbox/IO.pm
@@ -62,13 +62,21 @@ sub poll_in ($;$) {
 	IO::Poll::_poll($_[1] // -1, fileno($_[0]), my $ev = POLLIN);
 }
 
-sub read_all ($;$$) {
+sub read_all ($;$$$) { # pass $len=0 to read until EOF for :utf8 handles
 	use autodie qw(read);
-	my ($io, $len, $bref) = @_;
+	my ($io, $len, $bref, $off) = @_;
 	$bref //= \(my $buf);
-	my $r = read($io, $$bref, $len //= -s $io);
-	croak("read($io) ($r != $len)") if $len != $r;
-	$$bref;
+	$off //= 0;
+	my $r = 0;
+	if (my $left = $len //= -s $io) { # known size (binmode :raw/:unix)
+		do { # retry for binmode :unix
+			$r = read($io, $$bref, $left, $off += $r) or croak(
+				"read($io) premature EOF ($left/$len remain)");
+		} while ($left -= $r);
+	} else { # read until EOF
+		while (($r = read($io, $$bref, 65536, $off += $r))) {}
+	}
+	wantarray ? split(/^/sm, $$bref) : $$bref
 }
 
 sub try_cat ($) {
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 68c3c459..daba9a8e 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -79,12 +79,10 @@ sub input_net_cb { # imap_each, nntp_each cb
 sub input_fh {
 	my ($self, $ifmt, $fh, $name, @args) = @_;
 	if ($ifmt eq 'eml') {
-		my $buf = do { local $/; <$fh> };
-		my $ok = defined($buf) ? 1 : 0;
-		++$ok if eof($fh);
-		++$ok if $fh->close;
-		$ok == 3 or return $self->{lei}->child_error($?, <<"");
-error reading $name: $! (\$?=$?)
+		my $buf = eval { PublicInbox::IO::read_all $fh, 0 };
+		my $e = $@;
+		return $self->{lei}->child_error($?, <<"") if !$fh->close || $e;
+error reading $name: $! (\$?=$?) (\$@=$e)
 
 		PublicInbox::Eml::strip_from($buf);
 
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 84266d03..e048a807 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -307,10 +307,9 @@ sub fgrp_update {
 	my $dstfh = delete $fgrp->{dstfh} or return;
 	seek($srcfh, 0, SEEK_SET);
 	seek($dstfh, 0, SEEK_SET);
-	my %src = map { chomp; split(/\0/) } (<$srcfh>);
-	my %dst = map { chomp; split(/\0/) } (<$dstfh>);
-	$srcfh = eof($srcfh) | close $srcfh; # detects readline errors
-	$dstfh = eof($dstfh) | close $dstfh; # ditto
+	my %src = map { chomp; split /\0/ } PublicInbox::IO::read_all $srcfh;
+	my %dst = map { chomp; split /\0/ } PublicInbox::IO::read_all $dstfh;
+	$srcfh = $dstfh = undef;
 	my $w = start_update_ref($fgrp) or return;
 	my $lei = $fgrp->{lei};
 	my $ndel;
@@ -508,8 +507,7 @@ EOM
 		my $l = File::Spec->abs2rel($alt, File::Spec->rel2abs($o));
 		open my $fh, '+>>', my $f = "$o/info/alternates";
 		seek($fh, 0, SEEK_SET); # Perl did SEEK_END when it saw '>>'
-		my $seen = grep(/\A\Q$l\E\n/, <$fh>); # error check with eof
-		eof($fh) or die "not at `$f' EOF ($!)"; # $! was set by readline
+		my $seen = grep /\A\Q$l\E\n/, PublicInbox::IO::read_all $fh;
 		print $fh "$l\n" if !$seen;
 		close $fh;
 	}
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 0d2f586a..2928be45 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -687,8 +687,7 @@ sub _pre_augment_v2 {
 	my $d = "$lei->{ale}->{git}->{git_dir}/objects";
 	open my $fh, '+>>', my $f = "$dir/git/0.git/objects/info/alternates";
 	seek($fh, 0, SEEK_SET); # Perl did SEEK_END when it saw '>>'
-	my $seen = grep(/\A\Q$d\E\n/, <$fh>);
-	eof($fh) or die "not at `$f' EOF ($!)"; # $! was set by readline
+	my $seen = grep /\A\Q$d\E\n/, PublicInbox::IO::read_all $fh;
 	print $fh "$d\n" if !$seen;
 	close $fh;
 }
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index 8cc4dfaf..849438bc 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -396,14 +396,12 @@ sub popen_wr {
 
 sub read_out_err ($) {
 	my ($opt) = @_;
-	local $/;
 	for my $fd (1, 2) { # read stdout/stderr
 		my $fh = delete($opt->{"fh.$fd"}) // next;
 		seek($fh, 0, SEEK_SET);
 		my $dst = $opt->{$fd};
 		$dst = $opt->{$fd} = $dst->[1] if ref($dst) eq 'ARRAY';
-		$$dst .= <$fh>;
-		$fh = eof($fh) | close $fh; # detects readline errors
+		PublicInbox::IO::read_all $fh, 0, $dst, length($$dst);
 	}
 }
 
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index b84886a0..caf709c2 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -48,16 +48,16 @@ sub require_bsd (;$) {
 
 sub xbail (@) { BAIL_OUT join(' ', map { ref() ? (explain($_)) : ($_) } @_) }
 
-sub read_all ($;$$) {
+sub read_all ($;$$$) {
 	require PublicInbox::IO;
-	PublicInbox::IO::read_all($_[0], $_[1], $_[2])
+	PublicInbox::IO::read_all($_[0], $_[1], $_[2], $_[3])
 }
 
 sub eml_load ($) {
 	my ($path, $cb) = @_;
 	open(my $fh, '<', $path);
 	require PublicInbox::Eml;
-	PublicInbox::Eml->new(\(read_all($fh)));
+	PublicInbox::Eml->new(\(scalar read_all $fh));
 }
 
 sub tmpdir (;$) {
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 6a1bc890..a955cdf6 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -42,7 +42,7 @@ local $PublicInbox::Import::DROP_UNIQUE_UNSUB;
 PublicInbox::Import::load_config($pi_cfg);
 my $err;
 my $mime = PublicInbox::Eml->new(do{
-	defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n";
+	my $data = PublicInbox::IO::read_all \*STDIN;
 	PublicInbox::Eml::strip_from($data);
 
 	if ($train ne 'rm') {
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index b2e0908d..b463b07b 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -44,7 +44,7 @@ use PublicInbox::Spamcheck;
 # in case there's bugs in our code or user error.
 my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/";
 $ems = PublicInbox::Emergency->new($emergency);
-my $str = do { local $/; <STDIN> };
+my $str = PublicInbox::IO::read_all \*STDIN;
 PublicInbox::Eml::strip_from($str);
 $ems->prepare(\$str);
 my $eml = PublicInbox::Eml->new(\$str);
diff --git a/script/public-inbox-purge b/script/public-inbox-purge
index 8f9b0b16..618cfec4 100755
--- a/script/public-inbox-purge
+++ b/script/public-inbox-purge
@@ -33,7 +33,7 @@ PublicInbox::Admin::do_chdir(delete $opt->{C});
 my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt);
 PublicInbox::AdminEdit::check_editable(\@ibxs);
 
-defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n";
+my $data = PublicInbox::IO::read_all \*STDIN;
 PublicInbox::Eml::strip_from($data);
 my $n_purged = 0;
 

  parent reply	other threads:[~2023-11-13 13:15 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-13 13:15 [PATCH 00/18] cindex: some --associate work Eric Wong
2023-11-13 13:15 ` [PATCH 01/18] cindex: check `say' errors w/ close or ->flush Eric Wong
2023-11-13 13:15 ` [PATCH 02/18] tmpfile: check `stat' errors, use autodie for unlink Eric Wong
2023-11-13 13:15 ` [PATCH 03/18] cindex: use `local' for pipes between processes Eric Wong
2023-11-13 13:15 ` [PATCH 04/18] xap_helper_cxx: use write_file helper Eric Wong
2023-11-13 13:15 ` [PATCH 05/18] xap_helper_cxx: make the build process ccache-friendly Eric Wong
2023-11-13 13:15 ` [PATCH 06/18] xap_helper_cxx: use -pipe by default in CXXFLAGS Eric Wong
2023-11-13 13:15 ` [PATCH 07/18] xap_client: spawn C++ xap_helper directly Eric Wong
2023-11-13 13:15 ` Eric Wong [this message]
2023-11-13 13:15 ` [PATCH 09/18] spawn: don't append to scalarrefs on stdout/stderr Eric Wong
2023-11-13 13:15 ` [PATCH 10/18] cindex: imply --all with --associate w/o -I/--only Eric Wong
2023-11-13 13:15 ` [PATCH 11/18] cindex: delay associate until prune+indexing finish Eric Wong
2023-11-13 13:15 ` [PATCH 12/18] xap_helper: Perl dump_ibx respects `-m MAX' Eric Wong
2023-11-13 13:15 ` [PATCH 13/18] cidx_xap_helper_aux: complain about truncated inputs Eric Wong
2023-11-13 13:15 ` [PATCH 14/18] xap_helper: stricter and harsher error handling Eric Wong
2023-11-13 13:15 ` [PATCH 15/18] xap_helper: better variable naming for key buffer Eric Wong
2023-11-13 13:15 ` [PATCH 16/18] cindex: do not guess integer maximum for Xapian Eric Wong
2023-11-13 13:15 ` [PATCH 17/18] cindex: rename associate-max => window Eric Wong
2023-11-13 13:15 ` [PATCH 18/18] cindex: support --associate-aggressive shortcut Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231113131551.843230-9-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).