user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 08/18] treewide: update read_all to avoid eof|close checks
  2023-11-13 13:15  7% [PATCH 00/18] cindex: some --associate work Eric Wong
@ 2023-11-13 13:15  4% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-11-13 13:15 UTC (permalink / raw)
  To: meta

read_all can be expanded to support FIFOs/pipes/sockets where
read-until-EOF behavior is desired.  We can also rely on
wantarray to support splitting on EOL markers, but it's
hard-coded to support only `$/ eq "\n"' since (AFAIK)
it's the only way we use the wantarray form `readline'.
---
 lib/PublicInbox/CodeSearchIdx.pm |  3 +--
 lib/PublicInbox/Gcf2.pm          |  3 +--
 lib/PublicInbox/IO.pm            | 18 +++++++++++++-----
 lib/PublicInbox/LeiInput.pm      | 10 ++++------
 lib/PublicInbox/LeiMirror.pm     | 10 ++++------
 lib/PublicInbox/LeiToMail.pm     |  3 +--
 lib/PublicInbox/Spawn.pm         |  4 +---
 lib/PublicInbox/TestCommon.pm    |  6 +++---
 script/public-inbox-learn        |  2 +-
 script/public-inbox-mda          |  2 +-
 script/public-inbox-purge        |  2 +-
 11 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index ffd443e1..3601176c 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -627,8 +627,7 @@ sub index_repo { # run_git cb
 	my $repo = delete $git->{-repo} or return index_next($self);
 	my $roots_fh = delete $repo->{roots_fh} // die 'BUG: no {roots_fh}';
 	seek($roots_fh, 0, SEEK_SET);
-	chomp(my @roots = <$roots_fh>);
-	$roots_fh = eof($roots_fh) | close $roots_fh; # detect readline errors
+	chomp(my @roots = PublicInbox::IO::read_all $roots_fh);
 	if (!@roots) {
 		warn("E: $git->{git_dir} has no root commits\n");
 		return index_next($self);
diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm
index 5490049d..e0219b55 100644
--- a/lib/PublicInbox/Gcf2.pm
+++ b/lib/PublicInbox/Gcf2.pm
@@ -81,8 +81,7 @@ sub add_alt ($$) {
 	#
 	# See https://bugs.debian.org/975607
 	if (open(my $fh, '<', "$objdir/info/alternates")) {
-		chomp(my @abs_alt = grep(m!^/!, <$fh>));
-		$fh = eof($fh) | close $fh; # detect readline errors
+		chomp(my @abs_alt = grep m!^/!, PublicInbox::IO::read_all $fh);
 		$gcf2->add_alternate($_) for @abs_alt;
 	}
 	$gcf2->add_alternate($objdir);
diff --git a/lib/PublicInbox/IO.pm b/lib/PublicInbox/IO.pm
index 0d303500..11ce9be1 100644
--- a/lib/PublicInbox/IO.pm
+++ b/lib/PublicInbox/IO.pm
@@ -62,13 +62,21 @@ sub poll_in ($;$) {
 	IO::Poll::_poll($_[1] // -1, fileno($_[0]), my $ev = POLLIN);
 }
 
-sub read_all ($;$$) {
+sub read_all ($;$$$) { # pass $len=0 to read until EOF for :utf8 handles
 	use autodie qw(read);
-	my ($io, $len, $bref) = @_;
+	my ($io, $len, $bref, $off) = @_;
 	$bref //= \(my $buf);
-	my $r = read($io, $$bref, $len //= -s $io);
-	croak("read($io) ($r != $len)") if $len != $r;
-	$$bref;
+	$off //= 0;
+	my $r = 0;
+	if (my $left = $len //= -s $io) { # known size (binmode :raw/:unix)
+		do { # retry for binmode :unix
+			$r = read($io, $$bref, $left, $off += $r) or croak(
+				"read($io) premature EOF ($left/$len remain)");
+		} while ($left -= $r);
+	} else { # read until EOF
+		while (($r = read($io, $$bref, 65536, $off += $r))) {}
+	}
+	wantarray ? split(/^/sm, $$bref) : $$bref
 }
 
 sub try_cat ($) {
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 68c3c459..daba9a8e 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -79,12 +79,10 @@ sub input_net_cb { # imap_each, nntp_each cb
 sub input_fh {
 	my ($self, $ifmt, $fh, $name, @args) = @_;
 	if ($ifmt eq 'eml') {
-		my $buf = do { local $/; <$fh> };
-		my $ok = defined($buf) ? 1 : 0;
-		++$ok if eof($fh);
-		++$ok if $fh->close;
-		$ok == 3 or return $self->{lei}->child_error($?, <<"");
-error reading $name: $! (\$?=$?)
+		my $buf = eval { PublicInbox::IO::read_all $fh, 0 };
+		my $e = $@;
+		return $self->{lei}->child_error($?, <<"") if !$fh->close || $e;
+error reading $name: $! (\$?=$?) (\$@=$e)
 
 		PublicInbox::Eml::strip_from($buf);
 
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 84266d03..e048a807 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -307,10 +307,9 @@ sub fgrp_update {
 	my $dstfh = delete $fgrp->{dstfh} or return;
 	seek($srcfh, 0, SEEK_SET);
 	seek($dstfh, 0, SEEK_SET);
-	my %src = map { chomp; split(/\0/) } (<$srcfh>);
-	my %dst = map { chomp; split(/\0/) } (<$dstfh>);
-	$srcfh = eof($srcfh) | close $srcfh; # detects readline errors
-	$dstfh = eof($dstfh) | close $dstfh; # ditto
+	my %src = map { chomp; split /\0/ } PublicInbox::IO::read_all $srcfh;
+	my %dst = map { chomp; split /\0/ } PublicInbox::IO::read_all $dstfh;
+	$srcfh = $dstfh = undef;
 	my $w = start_update_ref($fgrp) or return;
 	my $lei = $fgrp->{lei};
 	my $ndel;
@@ -508,8 +507,7 @@ EOM
 		my $l = File::Spec->abs2rel($alt, File::Spec->rel2abs($o));
 		open my $fh, '+>>', my $f = "$o/info/alternates";
 		seek($fh, 0, SEEK_SET); # Perl did SEEK_END when it saw '>>'
-		my $seen = grep(/\A\Q$l\E\n/, <$fh>); # error check with eof
-		eof($fh) or die "not at `$f' EOF ($!)"; # $! was set by readline
+		my $seen = grep /\A\Q$l\E\n/, PublicInbox::IO::read_all $fh;
 		print $fh "$l\n" if !$seen;
 		close $fh;
 	}
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 0d2f586a..2928be45 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -687,8 +687,7 @@ sub _pre_augment_v2 {
 	my $d = "$lei->{ale}->{git}->{git_dir}/objects";
 	open my $fh, '+>>', my $f = "$dir/git/0.git/objects/info/alternates";
 	seek($fh, 0, SEEK_SET); # Perl did SEEK_END when it saw '>>'
-	my $seen = grep(/\A\Q$d\E\n/, <$fh>);
-	eof($fh) or die "not at `$f' EOF ($!)"; # $! was set by readline
+	my $seen = grep /\A\Q$d\E\n/, PublicInbox::IO::read_all $fh;
 	print $fh "$d\n" if !$seen;
 	close $fh;
 }
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index 8cc4dfaf..849438bc 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -396,14 +396,12 @@ sub popen_wr {
 
 sub read_out_err ($) {
 	my ($opt) = @_;
-	local $/;
 	for my $fd (1, 2) { # read stdout/stderr
 		my $fh = delete($opt->{"fh.$fd"}) // next;
 		seek($fh, 0, SEEK_SET);
 		my $dst = $opt->{$fd};
 		$dst = $opt->{$fd} = $dst->[1] if ref($dst) eq 'ARRAY';
-		$$dst .= <$fh>;
-		$fh = eof($fh) | close $fh; # detects readline errors
+		PublicInbox::IO::read_all $fh, 0, $dst, length($$dst);
 	}
 }
 
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index b84886a0..caf709c2 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -48,16 +48,16 @@ sub require_bsd (;$) {
 
 sub xbail (@) { BAIL_OUT join(' ', map { ref() ? (explain($_)) : ($_) } @_) }
 
-sub read_all ($;$$) {
+sub read_all ($;$$$) {
 	require PublicInbox::IO;
-	PublicInbox::IO::read_all($_[0], $_[1], $_[2])
+	PublicInbox::IO::read_all($_[0], $_[1], $_[2], $_[3])
 }
 
 sub eml_load ($) {
 	my ($path, $cb) = @_;
 	open(my $fh, '<', $path);
 	require PublicInbox::Eml;
-	PublicInbox::Eml->new(\(read_all($fh)));
+	PublicInbox::Eml->new(\(scalar read_all $fh));
 }
 
 sub tmpdir (;$) {
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 6a1bc890..a955cdf6 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -42,7 +42,7 @@ local $PublicInbox::Import::DROP_UNIQUE_UNSUB;
 PublicInbox::Import::load_config($pi_cfg);
 my $err;
 my $mime = PublicInbox::Eml->new(do{
-	defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n";
+	my $data = PublicInbox::IO::read_all \*STDIN;
 	PublicInbox::Eml::strip_from($data);
 
 	if ($train ne 'rm') {
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index b2e0908d..b463b07b 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -44,7 +44,7 @@ use PublicInbox::Spamcheck;
 # in case there's bugs in our code or user error.
 my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/";
 $ems = PublicInbox::Emergency->new($emergency);
-my $str = do { local $/; <STDIN> };
+my $str = PublicInbox::IO::read_all \*STDIN;
 PublicInbox::Eml::strip_from($str);
 $ems->prepare(\$str);
 my $eml = PublicInbox::Eml->new(\$str);
diff --git a/script/public-inbox-purge b/script/public-inbox-purge
index 8f9b0b16..618cfec4 100755
--- a/script/public-inbox-purge
+++ b/script/public-inbox-purge
@@ -33,7 +33,7 @@ PublicInbox::Admin::do_chdir(delete $opt->{C});
 my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt);
 PublicInbox::AdminEdit::check_editable(\@ibxs);
 
-defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n";
+my $data = PublicInbox::IO::read_all \*STDIN;
 PublicInbox::Eml::strip_from($data);
 my $n_purged = 0;
 

^ permalink raw reply related	[relevance 4%]

* [PATCH 00/18] cindex: some --associate work
@ 2023-11-13 13:15  7% Eric Wong
  2023-11-13 13:15  4% ` [PATCH 08/18] treewide: update read_all to avoid eof|close checks Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2023-11-13 13:15 UTC (permalink / raw)
  To: meta

Still very much in flux, but some treewide cleanups in there...

And I've been wondering if "join" is a better word than
"associate" to denote the relationship between inboxes
and coderepos.

But "join" (even if we use join(1) internally) probably
implies strict relationships, whereas our current "associate"
is always going to be fuzzy due to patchids being fuzzy
and blobs OIDs being abbreviated in patches.

I'm also thinking about moving --associate-* CLI switches
into suboptions (e.g. what getsubopt(3) supports), so:

	--associate=aggressive,prefixes=patchid+dfblob

But Perl doesn't ship with getsubopt(3) emulation
out-of-the-box

Eric Wong (18):
  cindex: check `say' errors w/ close or ->flush
  tmpfile: check `stat' errors, use autodie for unlink
  cindex: use `local' for pipes between processes
  xap_helper_cxx: use write_file helper
  xap_helper_cxx: make the build process ccache-friendly
  xap_helper_cxx: use -pipe by default in CXXFLAGS
  xap_client: spawn C++ xap_helper directly
  treewide: update read_all to avoid eof|close checks
  spawn: don't append to scalarrefs on stdout/stderr
  cindex: imply --all with --associate w/o -I/--only
  cindex: delay associate until prune+indexing finish
  xap_helper: Perl dump_ibx respects `-m MAX'
  cidx_xap_helper_aux: complain about truncated inputs
  xap_helper: stricter and harsher error handling
  xap_helper: better variable naming for key buffer
  cindex: do not guess integer maximum for Xapian
  cindex: rename associate-max => window
  cindex: support --associate-aggressive shortcut

 lib/PublicInbox/CidxComm.pm         |   6 +-
 lib/PublicInbox/CidxXapHelperAux.pm |   6 +-
 lib/PublicInbox/CodeSearchIdx.pm    | 122 ++++++++++-----
 lib/PublicInbox/Gcf2.pm             |   3 +-
 lib/PublicInbox/IO.pm               |  18 ++-
 lib/PublicInbox/LeiInput.pm         |  10 +-
 lib/PublicInbox/LeiMirror.pm        |  10 +-
 lib/PublicInbox/LeiToMail.pm        |   3 +-
 lib/PublicInbox/Spawn.pm            |   4 +-
 lib/PublicInbox/TestCommon.pm       |   6 +-
 lib/PublicInbox/Tmpfile.pm          |  10 +-
 lib/PublicInbox/XapClient.pm        |  28 ++--
 lib/PublicInbox/XapHelper.pm        |  30 ++--
 lib/PublicInbox/XapHelperCxx.pm     |  55 +++----
 lib/PublicInbox/xap_helper.h        | 233 ++++++++++++----------------
 script/public-inbox-cindex          |   3 +-
 script/public-inbox-learn           |   2 +-
 script/public-inbox-mda             |   2 +-
 script/public-inbox-purge           |   2 +-
 t/spawn.t                           |   2 +-
 t/xap_helper.t                      |  27 ++--
 21 files changed, 287 insertions(+), 295 deletions(-)

Yay, less code!

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-11-13 13:15  7% [PATCH 00/18] cindex: some --associate work Eric Wong
2023-11-13 13:15  4% ` [PATCH 08/18] treewide: update read_all to avoid eof|close checks Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).