* [PATCH 08/18] treewide: update read_all to avoid eof|close checks
2023-11-13 13:15 7% [PATCH 00/18] cindex: some --associate work Eric Wong
@ 2023-11-13 13:15 4% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-11-13 13:15 UTC (permalink / raw)
To: meta
read_all can be expanded to support FIFOs/pipes/sockets where
read-until-EOF behavior is desired. We can also rely on
wantarray to support splitting on EOL markers, but it's
hard-coded to support only `$/ eq "\n"' since (AFAIK)
it's the only way we use the wantarray form `readline'.
---
lib/PublicInbox/CodeSearchIdx.pm | 3 +--
lib/PublicInbox/Gcf2.pm | 3 +--
lib/PublicInbox/IO.pm | 18 +++++++++++++-----
lib/PublicInbox/LeiInput.pm | 10 ++++------
lib/PublicInbox/LeiMirror.pm | 10 ++++------
lib/PublicInbox/LeiToMail.pm | 3 +--
lib/PublicInbox/Spawn.pm | 4 +---
lib/PublicInbox/TestCommon.pm | 6 +++---
script/public-inbox-learn | 2 +-
script/public-inbox-mda | 2 +-
script/public-inbox-purge | 2 +-
11 files changed, 31 insertions(+), 32 deletions(-)
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index ffd443e1..3601176c 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -627,8 +627,7 @@ sub index_repo { # run_git cb
my $repo = delete $git->{-repo} or return index_next($self);
my $roots_fh = delete $repo->{roots_fh} // die 'BUG: no {roots_fh}';
seek($roots_fh, 0, SEEK_SET);
- chomp(my @roots = <$roots_fh>);
- $roots_fh = eof($roots_fh) | close $roots_fh; # detect readline errors
+ chomp(my @roots = PublicInbox::IO::read_all $roots_fh);
if (!@roots) {
warn("E: $git->{git_dir} has no root commits\n");
return index_next($self);
diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm
index 5490049d..e0219b55 100644
--- a/lib/PublicInbox/Gcf2.pm
+++ b/lib/PublicInbox/Gcf2.pm
@@ -81,8 +81,7 @@ sub add_alt ($$) {
#
# See https://bugs.debian.org/975607
if (open(my $fh, '<', "$objdir/info/alternates")) {
- chomp(my @abs_alt = grep(m!^/!, <$fh>));
- $fh = eof($fh) | close $fh; # detect readline errors
+ chomp(my @abs_alt = grep m!^/!, PublicInbox::IO::read_all $fh);
$gcf2->add_alternate($_) for @abs_alt;
}
$gcf2->add_alternate($objdir);
diff --git a/lib/PublicInbox/IO.pm b/lib/PublicInbox/IO.pm
index 0d303500..11ce9be1 100644
--- a/lib/PublicInbox/IO.pm
+++ b/lib/PublicInbox/IO.pm
@@ -62,13 +62,21 @@ sub poll_in ($;$) {
IO::Poll::_poll($_[1] // -1, fileno($_[0]), my $ev = POLLIN);
}
-sub read_all ($;$$) {
+sub read_all ($;$$$) { # pass $len=0 to read until EOF for :utf8 handles
use autodie qw(read);
- my ($io, $len, $bref) = @_;
+ my ($io, $len, $bref, $off) = @_;
$bref //= \(my $buf);
- my $r = read($io, $$bref, $len //= -s $io);
- croak("read($io) ($r != $len)") if $len != $r;
- $$bref;
+ $off //= 0;
+ my $r = 0;
+ if (my $left = $len //= -s $io) { # known size (binmode :raw/:unix)
+ do { # retry for binmode :unix
+ $r = read($io, $$bref, $left, $off += $r) or croak(
+ "read($io) premature EOF ($left/$len remain)");
+ } while ($left -= $r);
+ } else { # read until EOF
+ while (($r = read($io, $$bref, 65536, $off += $r))) {}
+ }
+ wantarray ? split(/^/sm, $$bref) : $$bref
}
sub try_cat ($) {
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 68c3c459..daba9a8e 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -79,12 +79,10 @@ sub input_net_cb { # imap_each, nntp_each cb
sub input_fh {
my ($self, $ifmt, $fh, $name, @args) = @_;
if ($ifmt eq 'eml') {
- my $buf = do { local $/; <$fh> };
- my $ok = defined($buf) ? 1 : 0;
- ++$ok if eof($fh);
- ++$ok if $fh->close;
- $ok == 3 or return $self->{lei}->child_error($?, <<"");
-error reading $name: $! (\$?=$?)
+ my $buf = eval { PublicInbox::IO::read_all $fh, 0 };
+ my $e = $@;
+ return $self->{lei}->child_error($?, <<"") if !$fh->close || $e;
+error reading $name: $! (\$?=$?) (\$@=$e)
PublicInbox::Eml::strip_from($buf);
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 84266d03..e048a807 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -307,10 +307,9 @@ sub fgrp_update {
my $dstfh = delete $fgrp->{dstfh} or return;
seek($srcfh, 0, SEEK_SET);
seek($dstfh, 0, SEEK_SET);
- my %src = map { chomp; split(/\0/) } (<$srcfh>);
- my %dst = map { chomp; split(/\0/) } (<$dstfh>);
- $srcfh = eof($srcfh) | close $srcfh; # detects readline errors
- $dstfh = eof($dstfh) | close $dstfh; # ditto
+ my %src = map { chomp; split /\0/ } PublicInbox::IO::read_all $srcfh;
+ my %dst = map { chomp; split /\0/ } PublicInbox::IO::read_all $dstfh;
+ $srcfh = $dstfh = undef;
my $w = start_update_ref($fgrp) or return;
my $lei = $fgrp->{lei};
my $ndel;
@@ -508,8 +507,7 @@ EOM
my $l = File::Spec->abs2rel($alt, File::Spec->rel2abs($o));
open my $fh, '+>>', my $f = "$o/info/alternates";
seek($fh, 0, SEEK_SET); # Perl did SEEK_END when it saw '>>'
- my $seen = grep(/\A\Q$l\E\n/, <$fh>); # error check with eof
- eof($fh) or die "not at `$f' EOF ($!)"; # $! was set by readline
+ my $seen = grep /\A\Q$l\E\n/, PublicInbox::IO::read_all $fh;
print $fh "$l\n" if !$seen;
close $fh;
}
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 0d2f586a..2928be45 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -687,8 +687,7 @@ sub _pre_augment_v2 {
my $d = "$lei->{ale}->{git}->{git_dir}/objects";
open my $fh, '+>>', my $f = "$dir/git/0.git/objects/info/alternates";
seek($fh, 0, SEEK_SET); # Perl did SEEK_END when it saw '>>'
- my $seen = grep(/\A\Q$d\E\n/, <$fh>);
- eof($fh) or die "not at `$f' EOF ($!)"; # $! was set by readline
+ my $seen = grep /\A\Q$d\E\n/, PublicInbox::IO::read_all $fh;
print $fh "$d\n" if !$seen;
close $fh;
}
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index 8cc4dfaf..849438bc 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -396,14 +396,12 @@ sub popen_wr {
sub read_out_err ($) {
my ($opt) = @_;
- local $/;
for my $fd (1, 2) { # read stdout/stderr
my $fh = delete($opt->{"fh.$fd"}) // next;
seek($fh, 0, SEEK_SET);
my $dst = $opt->{$fd};
$dst = $opt->{$fd} = $dst->[1] if ref($dst) eq 'ARRAY';
- $$dst .= <$fh>;
- $fh = eof($fh) | close $fh; # detects readline errors
+ PublicInbox::IO::read_all $fh, 0, $dst, length($$dst);
}
}
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index b84886a0..caf709c2 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -48,16 +48,16 @@ sub require_bsd (;$) {
sub xbail (@) { BAIL_OUT join(' ', map { ref() ? (explain($_)) : ($_) } @_) }
-sub read_all ($;$$) {
+sub read_all ($;$$$) {
require PublicInbox::IO;
- PublicInbox::IO::read_all($_[0], $_[1], $_[2])
+ PublicInbox::IO::read_all($_[0], $_[1], $_[2], $_[3])
}
sub eml_load ($) {
my ($path, $cb) = @_;
open(my $fh, '<', $path);
require PublicInbox::Eml;
- PublicInbox::Eml->new(\(read_all($fh)));
+ PublicInbox::Eml->new(\(scalar read_all $fh));
}
sub tmpdir (;$) {
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 6a1bc890..a955cdf6 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -42,7 +42,7 @@ local $PublicInbox::Import::DROP_UNIQUE_UNSUB;
PublicInbox::Import::load_config($pi_cfg);
my $err;
my $mime = PublicInbox::Eml->new(do{
- defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n";
+ my $data = PublicInbox::IO::read_all \*STDIN;
PublicInbox::Eml::strip_from($data);
if ($train ne 'rm') {
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index b2e0908d..b463b07b 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -44,7 +44,7 @@ use PublicInbox::Spamcheck;
# in case there's bugs in our code or user error.
my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/";
$ems = PublicInbox::Emergency->new($emergency);
-my $str = do { local $/; <STDIN> };
+my $str = PublicInbox::IO::read_all \*STDIN;
PublicInbox::Eml::strip_from($str);
$ems->prepare(\$str);
my $eml = PublicInbox::Eml->new(\$str);
diff --git a/script/public-inbox-purge b/script/public-inbox-purge
index 8f9b0b16..618cfec4 100755
--- a/script/public-inbox-purge
+++ b/script/public-inbox-purge
@@ -33,7 +33,7 @@ PublicInbox::Admin::do_chdir(delete $opt->{C});
my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt);
PublicInbox::AdminEdit::check_editable(\@ibxs);
-defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n";
+my $data = PublicInbox::IO::read_all \*STDIN;
PublicInbox::Eml::strip_from($data);
my $n_purged = 0;
^ permalink raw reply related [relevance 4%]
* [PATCH 00/18] cindex: some --associate work
@ 2023-11-13 13:15 7% Eric Wong
2023-11-13 13:15 4% ` [PATCH 08/18] treewide: update read_all to avoid eof|close checks Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2023-11-13 13:15 UTC (permalink / raw)
To: meta
Still very much in flux, but some treewide cleanups in there...
And I've been wondering if "join" is a better word than
"associate" to denote the relationship between inboxes
and coderepos.
But "join" (even if we use join(1) internally) probably
implies strict relationships, whereas our current "associate"
is always going to be fuzzy due to patchids being fuzzy
and blobs OIDs being abbreviated in patches.
I'm also thinking about moving --associate-* CLI switches
into suboptions (e.g. what getsubopt(3) supports), so:
--associate=aggressive,prefixes=patchid+dfblob
But Perl doesn't ship with getsubopt(3) emulation
out-of-the-box
Eric Wong (18):
cindex: check `say' errors w/ close or ->flush
tmpfile: check `stat' errors, use autodie for unlink
cindex: use `local' for pipes between processes
xap_helper_cxx: use write_file helper
xap_helper_cxx: make the build process ccache-friendly
xap_helper_cxx: use -pipe by default in CXXFLAGS
xap_client: spawn C++ xap_helper directly
treewide: update read_all to avoid eof|close checks
spawn: don't append to scalarrefs on stdout/stderr
cindex: imply --all with --associate w/o -I/--only
cindex: delay associate until prune+indexing finish
xap_helper: Perl dump_ibx respects `-m MAX'
cidx_xap_helper_aux: complain about truncated inputs
xap_helper: stricter and harsher error handling
xap_helper: better variable naming for key buffer
cindex: do not guess integer maximum for Xapian
cindex: rename associate-max => window
cindex: support --associate-aggressive shortcut
lib/PublicInbox/CidxComm.pm | 6 +-
lib/PublicInbox/CidxXapHelperAux.pm | 6 +-
lib/PublicInbox/CodeSearchIdx.pm | 122 ++++++++++-----
lib/PublicInbox/Gcf2.pm | 3 +-
lib/PublicInbox/IO.pm | 18 ++-
lib/PublicInbox/LeiInput.pm | 10 +-
lib/PublicInbox/LeiMirror.pm | 10 +-
lib/PublicInbox/LeiToMail.pm | 3 +-
lib/PublicInbox/Spawn.pm | 4 +-
lib/PublicInbox/TestCommon.pm | 6 +-
lib/PublicInbox/Tmpfile.pm | 10 +-
lib/PublicInbox/XapClient.pm | 28 ++--
lib/PublicInbox/XapHelper.pm | 30 ++--
lib/PublicInbox/XapHelperCxx.pm | 55 +++----
lib/PublicInbox/xap_helper.h | 233 ++++++++++++----------------
script/public-inbox-cindex | 3 +-
script/public-inbox-learn | 2 +-
script/public-inbox-mda | 2 +-
script/public-inbox-purge | 2 +-
t/spawn.t | 2 +-
t/xap_helper.t | 27 ++--
21 files changed, 287 insertions(+), 295 deletions(-)
Yay, less code!
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-11-13 13:15 7% [PATCH 00/18] cindex: some --associate work Eric Wong
2023-11-13 13:15 4% ` [PATCH 08/18] treewide: update read_all to avoid eof|close checks Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).