From 5c94a55c24a17c8250cf80d78246851c0a7c4087 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:35 +0000 Subject: git: allow async_cat to pass arg to callback This allows callers to avoid allocating several KB for for every call to ->async_cat. --- lib/PublicInbox/Git.pm | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 90595840..af3a5712 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -148,16 +148,18 @@ sub read_cat_in_full ($$$) { sub _cat_async_step ($$$) { my ($self, $inflight, $in) = @_; - my $cb = shift @$inflight or die 'BUG: inflight empty'; + my $pair = shift @$inflight or die 'BUG: inflight empty'; + my ($cb, $arg) = @$pair; local $/ = "\n"; my $head = $in->getline; - return eval { $cb->(undef) } if $head =~ / missing$/; + $head =~ / missing$/ and return + eval { $cb->(undef, undef, undef, undef, $arg) }; $head =~ /^([0-9a-f]{40}) (\S+) ([0-9]+)$/ or fail($self, "Unexpected result from async git cat-file: $head"); my ($oid_hex, $type, $size) = ($1, $2, $3 + 0); my $bref = read_cat_in_full($self, $in, $size); - eval { $cb->($bref, $oid_hex, $type, $size) }; + eval { $cb->($bref, $oid_hex, $type, $size, $arg) }; } sub cat_async_wait ($) { @@ -319,15 +321,15 @@ sub cat_async_begin { $self->{inflight} = []; } -sub cat_async ($$$) { - my ($self, $oid, $cb) = @_; +sub cat_async ($$$;$) { + my ($self, $oid, $cb, $arg) = @_; my $inflight = $self->{inflight} or die 'BUG: not in async'; if (scalar(@$inflight) >= MAX_INFLIGHT) { _cat_async_step($self, $inflight, $self->{in}); } $self->{out}->print($oid, "\n") or fail($self, "write error: $!"); - push @$inflight, $cb; + push(@$inflight, [ $cb, $arg ]); } sub commit_title ($$) { -- cgit v1.2.3-24-ge0c7 From bfa40d0c4a970e5af0eae0441920a491c4f6fa1d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:36 +0000 Subject: httpd/async: support passing arg to callbacks Another step towards removing anonymous subs to eliminate a possible source of memory leaks and high memory use. --- lib/PublicInbox/HTTPD/Async.pm | 8 +++++--- lib/PublicInbox/Qspawn.pm | 4 ++-- lib/PublicInbox/SolverGit.pm | 16 +++++----------- 3 files changed, 12 insertions(+), 16 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/HTTPD/Async.pm b/lib/PublicInbox/HTTPD/Async.pm index d5628ee8..d182c118 100644 --- a/lib/PublicInbox/HTTPD/Async.pm +++ b/lib/PublicInbox/HTTPD/Async.pm @@ -10,7 +10,7 @@ package PublicInbox::HTTPD::Async; use strict; use warnings; use base qw(PublicInbox::DS); -use fields qw(cb end); +use fields qw(cb arg end end_arg); use Errno qw(EAGAIN); use PublicInbox::Syscall qw(EPOLLIN EPOLLET); @@ -18,12 +18,12 @@ use PublicInbox::Syscall qw(EPOLLIN EPOLLET); # $io is a read-only pipe ($rpipe) for now, but may be a # bidirectional socket in the future. sub new { - my ($class, $io, $cb, $end) = @_; + my ($class, $io, $cb, $arg, $end, $end_arg) = @_; # no $io? call $cb at the top of the next event loop to # avoid recursion: unless (defined($io)) { - PublicInbox::DS::requeue($cb); + PublicInbox::DS::requeue($cb ? $cb : $arg); die '$end unsupported w/o $io' if $end; return; } @@ -32,7 +32,9 @@ sub new { IO::Handle::blocking($io, 0); $self->SUPER::new($io, EPOLLIN | EPOLLET); $self->{cb} = $cb; # initial read callback, later replaced by main_cb + $self->{arg} = $arg; # arg for $cb $self->{end} = $end; # like END {}, but only for this object + $self->{end_arg} = $end_arg; # arg for $end $self; } diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index 651fa390..c2856609 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -185,7 +185,7 @@ reread: ($rpipe) = @_; # popen_rd result if ($async) { # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end) - $async = $async->($rpipe, $cb, $end); + $async = $async->($rpipe, $cb, undef, $end); # $cb will call ->async_pass or ->close } else { # generic PSGI $cb->() while $qx; @@ -297,7 +297,7 @@ sub psgi_return { ($rpipe) = @_; if ($async) { # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end) - $async = $async->($rpipe, $cb, $end); + $async = $async->($rpipe, $cb, undef, $end); # $cb will call ->async_pass or ->close } else { # generic PSGI $cb->() while $rd_hdr; diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index eea59b6d..b3fc5bef 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -327,7 +327,7 @@ sub do_finish ($$) { $user_cb->(undef); } -sub do_step ($) { +sub event_step ($) { my ($self) = @_; eval { # step 1: resolve blobs to patches in the todo queue @@ -363,18 +363,13 @@ sub do_step ($) { } } -sub step_cb ($) { - my ($self) = @_; - sub { do_step($self) }; -} - sub next_step ($) { my ($self) = @_; # if outside of public-inbox-httpd, caller is expected to be - # looping step_cb, anyways + # looping event_step, anyways my $async = $self->{psgi_env}->{'pi-httpd.async'} or return; # PublicInbox::HTTPD::Async->new - $async->(undef, step_cb($self)); + $async->(undef, undef, $self); } sub mark_found ($$$) { @@ -598,12 +593,11 @@ sub solve ($$$$$) { $self->{tmp} = File::Temp->newdir("solver.$oid_want-XXXXXXXX", TMPDIR => 1); dbg($self, "solving $oid_want ..."); - my $step_cb = step_cb($self); if (my $async = $env->{'pi-httpd.async'}) { # PublicInbox::HTTPD::Async->new - $async->(undef, $step_cb); + $async->(undef, undef, $self); } else { - $step_cb->() while $self->{user_cb}; + event_step($self) while $self->{user_cb}; } } -- cgit v1.2.3-24-ge0c7 From 2b8dff053f61a15cd2987fdab8e60c2e4b6aa819 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:37 +0000 Subject: qspawn: remove some anonymous subs for psgi_qx By passing a user-supplied arg to $qx_cb, we can eliminate the callers' need to capture on-stack variables with a closure. This saves several kilobytes of memory allocation at the expense of some extra hash table lookups in user-supplied callbacks. It also reduces the risk of memory leaks by eliminating a common source of circular references. --- lib/PublicInbox/Qspawn.pm | 4 +- lib/PublicInbox/SolverGit.pm | 88 +++++++++++++++++++++++++------------------- lib/PublicInbox/ViewVCS.pm | 33 ++++++++++------- 3 files changed, 71 insertions(+), 54 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index c2856609..22603ca7 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -155,13 +155,13 @@ sub start { # $env is the PSGI env. As with ``/qx; only use this when output is small # and safe to slurp. sub psgi_qx { - my ($self, $env, $limiter, $qx_cb) = @_; + my ($self, $env, $limiter, $qx_cb, $cb_arg) = @_; my $scalar = ''; open(my $qx, '+>', \$scalar) or die; # PerlIO::scalar my $end = sub { my $err = $_[0]; # $! log_err($env, "psgi_qx: $err") if defined($err); - finish($self, $env, sub { $qx_cb->(\$scalar) }); + finish($self, $env, sub { $qx_cb->(\$scalar, $cb_arg) }); $qx = undef; }; my $rpipe; # comes from popen_rd diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index b3fc5bef..fd43940f 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -221,6 +221,16 @@ sub find_extract_diffs ($$$) { @di ? \@di : undef; } +sub update_index_result ($$) { + my ($bref, $self) = @_; + my ($qsp, $msg) = delete @$self{qw(-qsp -msg)}; + if (my $err = $qsp->{err}) { + ERR($self, "git update-index error: $err"); + } + dbg($self, $msg); + next_step($self); # onto do_git_apply +} + sub prepare_index ($) { my ($self) = @_; my $patches = $self->{patches}; @@ -248,15 +258,10 @@ sub prepare_index ($) { my $rdr = { 0 => fileno($in), -hold => $in }; my $cmd = [ qw(git update-index -z --index-info) ]; my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr); - $qsp->psgi_qx($self->{psgi_env}, undef, sub { - my ($bref) = @_; - if (my $err = $qsp->{err}) { - ERR($self, "git update-index error: $err"); - } - dbg($self, "index prepared:\n" . - "$mode_a $oid_full\t" . git_quote($path_a)); - next_step($self); # onto do_git_apply - }); + $path_a = git_quote($path_a); + $self->{-qsp} = $qsp; + $self->{-msg} = "index prepared:\n$mode_a $oid_full\t$path_a"; + $qsp->psgi_qx($self->{psgi_env}, undef, \&update_index_result, $self); } # pure Perl "git init" @@ -383,8 +388,9 @@ sub mark_found ($$$) { } } -sub parse_ls_files ($$$$) { - my ($self, $qsp, $bref, $di) = @_; +sub parse_ls_files ($$) { + my ($self, $bref) = @_; + my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)}; if (my $err = $qsp->{err}) { die "git ls-files error: $err"; } @@ -410,15 +416,10 @@ sub parse_ls_files ($$$$) { next_step($self); # onto the next patch } -sub start_ls_files ($$) { - my ($self, $di) = @_; - my $cmd = [qw(git ls-files -s -z)]; - my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}); - $qsp->psgi_qx($self->{psgi_env}, undef, sub { - my ($bref) = @_; - eval { parse_ls_files($self, $qsp, $bref, $di) }; - ERR($self, $@) if $@; - }); +sub ls_files_result { + my ($bref, $self) = @_; + eval { parse_ls_files($self, $bref) }; + ERR($self, $@) if $@; } sub oids_same_ish ($$) { @@ -438,6 +439,31 @@ sub skip_identical ($$$) { } } +sub apply_result ($$) { + my ($bref, $self) = @_; + my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)}; + dbg($self, $$bref); + my $patches = $self->{patches}; + if (my $err = $qsp->{err}) { + my $msg = "git apply error: $err"; + my $nxt = $patches->[0]; + if ($nxt && oids_same_ish($nxt->{oid_b}, $di->{oid_b})) { + dbg($self, $msg); + dbg($self, 'trying '.di_url($self, $nxt)); + } else { + ERR($self, $msg); + } + } else { + skip_identical($self, $patches, $di->{oid_b}); + } + + my @cmd = qw(git ls-files -s -z); + $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}); + $self->{-cur_di} = $di; + $self->{-qsp} = $qsp; + $qsp->psgi_qx($self->{psgi_env}, undef, \&ls_files_result, $self); +} + sub do_git_apply ($) { my ($self) = @_; my $dn = $self->{tmp}->dirname; @@ -465,24 +491,9 @@ sub do_git_apply ($) { my $rdr = { 2 => 1 }; my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}, $rdr); - $qsp->psgi_qx($self->{psgi_env}, undef, sub { - my ($bref) = @_; - dbg($self, $$bref); - if (my $err = $qsp->{err}) { - my $msg = "git apply error: $err"; - my $nxt = $patches->[0]; - if ($nxt && oids_same_ish($nxt->{oid_b}, $prv_oid_b)) { - dbg($self, $msg); - dbg($self, 'trying '.di_url($self, $nxt)); - } else { - ERR($self, $msg); - } - } else { - skip_identical($self, $patches, $di->{oid_b}); - } - eval { start_ls_files($self, $di) }; - ERR($self, $@) if $@; - }); + $self->{-cur_di} = $di; + $self->{-qsp} = $qsp; + $qsp->psgi_qx($self->{psgi_env}, undef, \&apply_result, $self); } sub di_url ($$) { @@ -564,6 +575,7 @@ sub new { bless { gits => $ibx->{-repo_objs}, user_cb => $user_cb, + # -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks # TODO: config option for searching related inboxes inboxes => [ $ibx ], diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index 842c873c..886e10cb 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -73,6 +73,22 @@ sub stream_large_blob ($$$$) { }); } +sub show_other_result ($$) { + my ($bref, $ctx) = @_; + my ($qsp, $logref) = delete @$ctx{qw(-qsp -logref)}; + if (my $err = $qsp->{err}) { + utf8::decode($$err); + $$logref .= "git show error: $err"; + return html_page($ctx, 500, $logref); + } + my $l = PublicInbox::Linkify->new; + utf8::decode($$bref); + $l->linkify_1($$bref); + $$bref = '
'. $l->linkify_2(ascii_html($$bref));
+	$$bref .= '

' . $$logref; + html_page($ctx, 200, $bref); +} + sub show_other ($$$$) { my ($ctx, $res, $logref, $fn) = @_; my ($git, $oid, $type, $size) = @$res; @@ -84,20 +100,9 @@ sub show_other ($$$$) { qw(show --encoding=UTF-8 --no-color --no-abbrev), $oid ]; my $qsp = PublicInbox::Qspawn->new($cmd); my $env = $ctx->{env}; - $qsp->psgi_qx($env, undef, sub { - my ($bref) = @_; - if (my $err = $qsp->{err}) { - utf8::decode($$err); - $$logref .= "git show error: $err"; - return html_page($ctx, 500, $logref); - } - my $l = PublicInbox::Linkify->new; - utf8::decode($$bref); - $l->linkify_1($$bref); - $$bref = '
'. $l->linkify_2(ascii_html($$bref));
-		$$bref .= '

' . $$logref; - html_page($ctx, 200, $bref); - }); + $ctx->{-qsp} = $qsp; + $ctx->{-logref} = $logref; + $qsp->psgi_qx($env, undef, \&show_other_result, $ctx); } sub solve_result { -- cgit v1.2.3-24-ge0c7 From 5c887bfc4ccc6fd79e29d89ee8edfccd5cd9685b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:38 +0000 Subject: qspawn: disambiguate command vs PSGI env Make things easier-to-follow and paves the way for future work to reduce dependencies on anonymous subs capturing local variables. --- lib/PublicInbox/Qspawn.pm | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index 22603ca7..ba980e73 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -36,17 +36,17 @@ my $def_limiter; # declares a command to spawn (but does not spawn it). # $cmd is the command to spawn -# $env is the environ for the child process +# $cmd_env is the environ for the child process (not PSGI env) # $opt can include redirects and perhaps other process spawning options sub new ($$$;) { - my ($class, $cmd, $env, $opt) = @_; - bless { args => [ $cmd, $env, $opt ] }, $class; + my ($class, $cmd, $cmd_env, $opt) = @_; + bless { args => [ $cmd, $cmd_env, $opt ] }, $class; } sub _do_spawn { my ($self, $start_cb, $limiter) = @_; my $err; - my ($cmd, $env, $opts) = @{$self->{args}}; + my ($cmd, $cmd_env, $opts) = @{$self->{args}}; my %opts = %{$opts || {}}; $self->{limiter} = $limiter; foreach my $k (PublicInbox::Spawn::RLIMITS()) { @@ -55,7 +55,7 @@ sub _do_spawn { } } - ($self->{rpipe}, $self->{pid}) = popen_rd($cmd, $env, \%opts); + ($self->{rpipe}, $self->{pid}) = popen_rd($cmd, $cmd_env, \%opts); # drop any IO handles opt was holding open via $opt->{hold} # No need to hold onto the descriptor once the child process has it. @@ -94,7 +94,7 @@ sub waitpid_err ($$) { $err = "W: waitpid($xpid, 0) => $pid: $!"; } # else should not be called with pid == 0 - my $env = delete $self->{env}; + my $env = delete $self->{psgi_env}; # done, spawn whatever's in the queue my $limiter = $self->{limiter}; @@ -118,9 +118,8 @@ sub waitpid_err ($$) { } sub do_waitpid ($;$$) { - my ($self, $env, $fin_cb) = @_; + my ($self, $fin_cb) = @_; my $pid = $self->{pid}; - $self->{env} = $env; $self->{fin_cb} = $fin_cb; # PublicInbox::DS may not be loaded eval { PublicInbox::DS::dwaitpid($pid, \&waitpid_err, $self) }; @@ -132,10 +131,10 @@ sub do_waitpid ($;$$) { } } -sub finish ($;$$) { - my ($self, $env, $fin_cb) = @_; +sub finish ($;$) { + my ($self, $fin_cb) = @_; if (delete $self->{rpipe}) { - do_waitpid($self, $env, $fin_cb); + do_waitpid($self, $fin_cb); } elsif ($fin_cb) { eval { $fin_cb->() }; } @@ -156,12 +155,13 @@ sub start { # and safe to slurp. sub psgi_qx { my ($self, $env, $limiter, $qx_cb, $cb_arg) = @_; + $self->{psgi_env} = $env; my $scalar = ''; open(my $qx, '+>', \$scalar) or die; # PerlIO::scalar my $end = sub { my $err = $_[0]; # $! log_err($env, "psgi_qx: $err") if defined($err); - finish($self, $env, sub { $qx_cb->(\$scalar, $cb_arg) }); + finish($self, sub { $qx_cb->(\$scalar, $cb_arg) }); $qx = undef; }; my $rpipe; # comes from popen_rd @@ -230,11 +230,12 @@ sub filter_fh ($$) { # immediately (or streamed via ->getline (pull-based)). sub psgi_return { my ($self, $env, $limiter, $parse_hdr) = @_; + $self->{psgi_env} = $env; my ($fh, $rpipe); my $end = sub { my $err = $_[0]; # $! log_err($env, "psgi_return: $err") if defined($err); - finish($self, $env); + finish($self); $fh->close if $fh; # async-only }; -- cgit v1.2.3-24-ge0c7 From dd57a7d007bf756d856fe3d2b414657ebf009941 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:39 +0000 Subject: qspawn: replace anonymous $end callbacks w/ event_step This will tie into the DS event loop if that's used, but event_step an be called directly without relying on the event loop from Apache or other HTTP servers (or PSGI tests). --- lib/PublicInbox/GetlineBody.pm | 8 ++-- lib/PublicInbox/HTTPD/Async.pm | 16 ++++---- lib/PublicInbox/Qspawn.pm | 90 ++++++++++++++++++++++-------------------- 3 files changed, 61 insertions(+), 53 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/GetlineBody.pm b/lib/PublicInbox/GetlineBody.pm index f8cdd1b7..750a8c53 100644 --- a/lib/PublicInbox/GetlineBody.pm +++ b/lib/PublicInbox/GetlineBody.pm @@ -13,10 +13,11 @@ use strict; use warnings; sub new { - my ($class, $rpipe, $end, $buf, $filter) = @_; + my ($class, $rpipe, $end, $end_arg, $buf, $filter) = @_; bless { rpipe => $rpipe, end => $end, + end_arg => $end_arg, buf => $buf, filter => $filter || 0, }, $class; @@ -40,10 +41,9 @@ sub getline { sub close { my ($self) = @_; - my $rpipe = delete $self->{rpipe}; + my ($rpipe, $end, $end_arg) = delete @$self{qw(rpipe end end_arg)}; close $rpipe if $rpipe; - my $end = delete $self->{end}; - $end->() if $end; + $end->($end_arg) if $end; } 1; diff --git a/lib/PublicInbox/HTTPD/Async.pm b/lib/PublicInbox/HTTPD/Async.pm index d182c118..8956f719 100644 --- a/lib/PublicInbox/HTTPD/Async.pm +++ b/lib/PublicInbox/HTTPD/Async.pm @@ -10,7 +10,7 @@ package PublicInbox::HTTPD::Async; use strict; use warnings; use base qw(PublicInbox::DS); -use fields qw(cb arg end end_arg); +use fields qw(cb arg end_obj); use Errno qw(EAGAIN); use PublicInbox::Syscall qw(EPOLLIN EPOLLET); @@ -18,13 +18,13 @@ use PublicInbox::Syscall qw(EPOLLIN EPOLLET); # $io is a read-only pipe ($rpipe) for now, but may be a # bidirectional socket in the future. sub new { - my ($class, $io, $cb, $arg, $end, $end_arg) = @_; + my ($class, $io, $cb, $arg, $end_obj) = @_; # no $io? call $cb at the top of the next event loop to # avoid recursion: unless (defined($io)) { PublicInbox::DS::requeue($cb ? $cb : $arg); - die '$end unsupported w/o $io' if $end; + die '$end_obj unsupported w/o $io' if $end_obj; return; } @@ -33,8 +33,7 @@ sub new { $self->SUPER::new($io, EPOLLIN | EPOLLET); $self->{cb} = $cb; # initial read callback, later replaced by main_cb $self->{arg} = $arg; # arg for $cb - $self->{end} = $end; # like END {}, but only for this object - $self->{end_arg} = $end_arg; # arg for $end + $self->{end_obj} = $end_obj; # like END{}, can ->event_step $self; } @@ -98,8 +97,11 @@ sub close { $self->SUPER::close; # DS::close # we defer this to the next timer loop since close is deferred - if (my $end = delete $self->{end}) { - PublicInbox::DS::requeue($end); + if (my $end_obj = delete $self->{end_obj}) { + # this calls $end_obj->event_step + # (likely PublicInbox::Qspawn::event_step, + # NOT PublicInbox::HTTPD::Async::event_step) + PublicInbox::DS::requeue($end_obj); } } diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index ba980e73..6cb28b9a 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -94,7 +94,8 @@ sub waitpid_err ($$) { $err = "W: waitpid($xpid, 0) => $pid: $!"; } # else should not be called with pid == 0 - my $env = delete $self->{psgi_env}; + my ($env, $qx_cb, $qx_arg, $qx_buf) = + delete @$self{qw(psgi_env qx_cb qx_arg qx_buf)}; # done, spawn whatever's in the queue my $limiter = $self->{limiter}; @@ -112,15 +113,12 @@ sub waitpid_err ($$) { log_err($env, join(' ', @{$self->{args}}) . ": $err"); } } - if (my $fin_cb = delete $self->{fin_cb}) { - eval { $fin_cb->() } - } + eval { $qx_cb->($qx_buf, $qx_arg) } if $qx_cb; } -sub do_waitpid ($;$$) { - my ($self, $fin_cb) = @_; +sub do_waitpid ($) { + my ($self) = @_; my $pid = $self->{pid}; - $self->{fin_cb} = $fin_cb; # PublicInbox::DS may not be loaded eval { PublicInbox::DS::dwaitpid($pid, \&waitpid_err, $self) }; # done if we're running in PublicInbox::DS::EventLoop @@ -131,12 +129,14 @@ sub do_waitpid ($;$$) { } } -sub finish ($;$) { - my ($self, $fin_cb) = @_; +sub finish ($) { + my ($self) = @_; if (delete $self->{rpipe}) { - do_waitpid($self, $fin_cb); - } elsif ($fin_cb) { - eval { $fin_cb->() }; + do_waitpid($self); + } else { + my ($env, $qx_cb, $qx_arg, $qx_buf) = + delete @$self{qw(psgi_env qx_cb qx_arg qx_buf)}; + eval { $qx_cb->($qx_buf, $qx_arg) } if $qx_cb; } } @@ -154,16 +154,14 @@ sub start { # $env is the PSGI env. As with ``/qx; only use this when output is small # and safe to slurp. sub psgi_qx { - my ($self, $env, $limiter, $qx_cb, $cb_arg) = @_; + my ($self, $env, $limiter, $qx_cb, $qx_arg) = @_; $self->{psgi_env} = $env; - my $scalar = ''; - open(my $qx, '+>', \$scalar) or die; # PerlIO::scalar - my $end = sub { - my $err = $_[0]; # $! - log_err($env, "psgi_qx: $err") if defined($err); - finish($self, sub { $qx_cb->(\$scalar, $cb_arg) }); - $qx = undef; - }; + my $qx_buf = ''; + open(my $qx_fh, '+>', \$qx_buf) or die; # PerlIO::scalar + $self->{qx_cb} = $qx_cb; + $self->{qx_arg} = $qx_arg; + $self->{qx_fh} = $qx_fh; + $self->{qx_buf} = \$qx_buf; my $rpipe; # comes from popen_rd my $async = $env->{'pi-httpd.async'}; my $cb = sub { @@ -171,24 +169,24 @@ sub psgi_qx { reread: $r = sysread($rpipe, $buf, 65536); if ($async) { - $async->async_pass($env->{'psgix.io'}, $qx, \$buf); + $async->async_pass($env->{'psgix.io'}, $qx_fh, \$buf); } elsif (defined $r) { - $r ? $qx->write($buf) : $end->(); + $r ? $qx_fh->write($buf) : event_step($self, undef); } else { return if $! == EAGAIN; # try again when notified goto reread if $! == EINTR; - $end->($!); + event_step($self, $!); } }; $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32); $self->start($limiter, sub { # start_cb, may run later, much later... ($rpipe) = @_; # popen_rd result if ($async) { - # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end) - $async = $async->($rpipe, $cb, undef, $end); + # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end_obj) + $async = $async->($rpipe, $cb, undef, $self); # $cb will call ->async_pass or ->close } else { # generic PSGI - $cb->() while $qx; + $cb->() while $self->{qx_fh}; } }); } @@ -206,6 +204,17 @@ sub filter_fh ($$) { }); } +# this is called on pipe EOF to reap the process, may be called +# via PublicInbox::DS event loop OR via GetlineBody for generic +# PSGI servers. +sub event_step { + my ($self, $err) = @_; # $err: $! + log_err($self->{psgi_env}, "psgi_{return,qx} $err") if defined($err); + finish($self); + my ($fh, $qx_fh) = delete(@$self{qw(fh qx_fh)}); + $fh->close if $fh; # async-only (psgi_return) +} + # Used for streaming the stdout of one process as a PSGI response. # # $env is the PSGI env. @@ -231,14 +240,7 @@ sub filter_fh ($$) { sub psgi_return { my ($self, $env, $limiter, $parse_hdr) = @_; $self->{psgi_env} = $env; - my ($fh, $rpipe); - my $end = sub { - my $err = $_[0]; # $! - log_err($env, "psgi_return: $err") if defined($err); - finish($self); - $fh->close if $fh; # async-only - }; - + my $rpipe; my $buf = ''; my $rd_hdr = sub { # typically used for reading CGI headers @@ -271,21 +273,24 @@ sub psgi_return { my $filter = delete $env->{'qspawn.filter'}; if (scalar(@$r) == 3) { # error if ($async) { - $async->close; # calls rpipe->close and $end + # calls rpipe->close && ->event_step + $async->close; } else { $rpipe->close; - $end->(); + event_step($self); } $wcb->($r); } elsif ($async) { # done reading headers, handoff to read body - $fh = $wcb->($r); # scalar @$r == 2 + my $fh = $wcb->($r); # scalar @$r == 2 $fh = filter_fh($fh, $filter) if $filter; + $self->{fh} = $fh; $async->async_pass($env->{'psgix.io'}, $fh, \$buf); } else { # for synchronous PSGI servers require PublicInbox::GetlineBody; - $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end, - $buf, $filter); + $r->[2] = PublicInbox::GetlineBody->new($rpipe, + \&event_step, $self, + $buf, $filter); $wcb->($r); } @@ -297,8 +302,9 @@ sub psgi_return { my $start_cb = sub { # may run later, much later... ($rpipe) = @_; if ($async) { - # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end) - $async = $async->($rpipe, $cb, undef, $end); + # PublicInbox::HTTPD::Async->new($rpipe, $cb, $cb_arg, + # $end_obj) + $async = $async->($rpipe, $cb, undef, $self); # $cb will call ->async_pass or ->close } else { # generic PSGI $cb->() while $rd_hdr; -- cgit v1.2.3-24-ge0c7 From e0e53f21b15d40841f184f22223896cf7cf0b8d8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:40 +0000 Subject: msg_iter: provide means to stop using anonymous subs And remove the last anonymous sub in SolverGit itself. --- lib/PublicInbox/MsgIter.pm | 8 ++++---- lib/PublicInbox/SolverGit.pm | 19 +++++++++---------- 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm index 6453d9f1..cdd78b39 100644 --- a/lib/PublicInbox/MsgIter.pm +++ b/lib/PublicInbox/MsgIter.pm @@ -12,8 +12,8 @@ use PublicInbox::MIME; # Like Email::MIME::walk_parts, but this is: # * non-recursive # * passes depth and indices to the iterator callback -sub msg_iter ($$) { - my ($mime, $cb) = @_; +sub msg_iter ($$;$) { + my ($mime, $cb, $cb_arg) = @_; my @parts = $mime->subparts; if (@parts) { my $i = 0; @@ -27,11 +27,11 @@ sub msg_iter ($$) { @sub = map { [ $_, $depth, @idx, ++$i ] } @sub; @parts = (@sub, @parts); } else { - $cb->($p); + $cb->($p, $cb_arg); } } } else { - $cb->([$mime, 0, 0]); + $cb->([$mime, 0, 0], $cb_arg); } } diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index fd43940f..9266fb08 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -93,8 +93,9 @@ sub solve_existing ($$) { scalar(@ambiguous) ? \@ambiguous : undef; } -sub extract_diff ($$$$$) { - my ($self, $p, $re, $ibx, $smsg) = @_; +sub extract_diff ($$) { + my ($p, $arg) = @_; + my ($self, $diffs, $re, $ibx, $smsg) = @$arg; my ($part) = @$p; # ignore $depth and @idx; my $hdr_lines; # diff --git a/... b/... my $tmp; @@ -170,7 +171,7 @@ sub extract_diff ($$$$$) { } return undef unless $tmp; close $tmp or die "close(tmp): $!"; - $di; + push @$diffs, $di; } sub path_searchable ($) { defined($_[0]) && $_[0] =~ m!\A[\w/\. \-]+\z! } @@ -209,16 +210,14 @@ sub find_extract_diffs ($$$) { my $msgs = $srch->query($q, { relevance => 1 }); my $re = qr/\Aindex ($pre[a-f0-9]*)\.\.($post[a-f0-9]*)(?: ([0-9]+))?/; - - my @di; + my $diffs = []; foreach my $smsg (@$msgs) { $ibx->smsg_mime($smsg) or next; - msg_iter(delete($smsg->{mime}), sub { - my $di = extract_diff($self, $_[0], $re, $ibx, $smsg); - push @di, $di if defined($di); - }); + my $mime = delete $smsg->{mime}; + msg_iter($mime, \&extract_diff, + [$self, $diffs, $re, $ibx, $smsg]); } - @di ? \@di : undef; + @$diffs ? $diffs : undef; } sub update_index_result ($$) { -- cgit v1.2.3-24-ge0c7 From 2131a3cd0a1cad6f7d6a2c6439676f9f2a039ff7 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:41 +0000 Subject: qspawn: reduce local vars, de-anonymize rd_hdr rd_hdr() now becomes a named subroutine instead of a per-call local variable, so kilobytes of memory will not have to be allocated for it on every ->psgi_return call. --- lib/PublicInbox/Qspawn.pm | 79 ++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 39 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index 6cb28b9a..1985dccd 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -162,12 +162,11 @@ sub psgi_qx { $self->{qx_arg} = $qx_arg; $self->{qx_fh} = $qx_fh; $self->{qx_buf} = \$qx_buf; - my $rpipe; # comes from popen_rd my $async = $env->{'pi-httpd.async'}; my $cb = sub { my ($r, $buf); reread: - $r = sysread($rpipe, $buf, 65536); + $r = sysread($self->{rpipe}, $buf, 65536); if ($async) { $async->async_pass($env->{'psgix.io'}, $qx_fh, \$buf); } elsif (defined $r) { @@ -180,10 +179,9 @@ reread: }; $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32); $self->start($limiter, sub { # start_cb, may run later, much later... - ($rpipe) = @_; # popen_rd result if ($async) { - # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end_obj) - $async = $async->($rpipe, $cb, undef, $self); + # PublicInbox::HTTPD::Async->new(rpipe, $cb, $end_obj) + $async = $async->($self->{rpipe}, $cb, undef, $self); # $cb will call ->async_pass or ->close } else { # generic PSGI $cb->() while $self->{qx_fh}; @@ -215,6 +213,32 @@ sub event_step { $fh->close if $fh; # async-only (psgi_return) } +sub rd_hdr ($) { + my ($self) = @_; + # typically used for reading CGI headers + # we must loop until EAGAIN for EPOLLET in HTTPD/Async.pm + # We also need to check EINTR for generic PSGI servers. + my $ret; + my $total_rd = 0; + my $hdr_buf = $self->{hdr_buf}; + do { + my $r = sysread($self->{rpipe}, $$hdr_buf, 4096, + length($$hdr_buf)); + if (defined($r)) { + $total_rd += $r; + $ret = $self->{parse_hdr}->($total_rd, $hdr_buf); + } else { + # caller should notify us when it's ready: + return if $! == EAGAIN; + next if $! == EINTR; # immediate retry + log_err($self->{psgi_env}, "error reading header: $!"); + $ret = [ 500, [], [ "Internal error\n" ] ]; + } + } until (defined $ret); + delete $self->{parse_hdr}; # done parsing headers + $ret; +} + # Used for streaming the stdout of one process as a PSGI response. # # $env is the PSGI env. @@ -240,43 +264,20 @@ sub event_step { sub psgi_return { my ($self, $env, $limiter, $parse_hdr) = @_; $self->{psgi_env} = $env; - my $rpipe; - my $buf = ''; - my $rd_hdr = sub { - # typically used for reading CGI headers - # we must loop until EAGAIN for EPOLLET in HTTPD/Async.pm - # We also need to check EINTR for generic PSGI servers. - my $ret; - my $total_rd = 0; - do { - my $r = sysread($rpipe, $buf, 4096, length($buf)); - if (defined($r)) { - $total_rd += $r; - $ret = $parse_hdr->($r ? $total_rd : 0, \$buf); - } else { - # caller should notify us when it's ready: - return if $! == EAGAIN; - next if $! == EINTR; # immediate retry - log_err($env, "error reading header: $!"); - $ret = [ 500, [], [ "Internal error\n" ] ]; - } - } until (defined $ret); - $ret; - }; - + $self->{hdr_buf} = \(my $hdr_buf = ''); + $self->{parse_hdr} = $parse_hdr; my $wcb = delete $env->{'qspawn.wcb'}; # or PSGI server supplies it my $async = $env->{'pi-httpd.async'}; my $cb = sub { - my $r = $rd_hdr->() or return; - $rd_hdr = undef; # done reading headers + my $r = rd_hdr($self) or return; my $filter = delete $env->{'qspawn.filter'}; if (scalar(@$r) == 3) { # error if ($async) { # calls rpipe->close && ->event_step $async->close; } else { - $rpipe->close; + $self->{rpipe}->close; event_step($self); } $wcb->($r); @@ -285,12 +286,13 @@ sub psgi_return { my $fh = $wcb->($r); # scalar @$r == 2 $fh = filter_fh($fh, $filter) if $filter; $self->{fh} = $fh; - $async->async_pass($env->{'psgix.io'}, $fh, \$buf); + $async->async_pass($env->{'psgix.io'}, $fh, + delete($self->{hdr_buf})); } else { # for synchronous PSGI servers require PublicInbox::GetlineBody; - $r->[2] = PublicInbox::GetlineBody->new($rpipe, + $r->[2] = PublicInbox::GetlineBody->new($self->{rpipe}, \&event_step, $self, - $buf, $filter); + ${$self->{hdr_buf}}, $filter); $wcb->($r); } @@ -300,14 +302,13 @@ sub psgi_return { }; $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32); my $start_cb = sub { # may run later, much later... - ($rpipe) = @_; if ($async) { - # PublicInbox::HTTPD::Async->new($rpipe, $cb, $cb_arg, + # PublicInbox::HTTPD::Async->new(rpipe, $cb, $cb_arg, # $end_obj) - $async = $async->($rpipe, $cb, undef, $self); + $async = $async->($self->{rpipe}, $cb, undef, $self); # $cb will call ->async_pass or ->close } else { # generic PSGI - $cb->() while $rd_hdr; + $cb->() while $self->{parse_hdr}; } }; -- cgit v1.2.3-24-ge0c7 From b7fbffd1f8c125566b944e37a21ab0747728bdc1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:42 +0000 Subject: httpd/async: get rid of ephemeral main_cb Cheaper to use up two hash table slots than creating a new sub. --- lib/PublicInbox/HTTPD/Async.pm | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/HTTPD/Async.pm b/lib/PublicInbox/HTTPD/Async.pm index 8956f719..f4de2719 100644 --- a/lib/PublicInbox/HTTPD/Async.pm +++ b/lib/PublicInbox/HTTPD/Async.pm @@ -10,7 +10,7 @@ package PublicInbox::HTTPD::Async; use strict; use warnings; use base qw(PublicInbox::DS); -use fields qw(cb arg end_obj); +use fields qw(http fh cb arg end_obj); use Errno qw(EAGAIN); use PublicInbox::Syscall qw(EPOLLIN EPOLLET); @@ -31,21 +31,24 @@ sub new { my $self = fields::new($class); IO::Handle::blocking($io, 0); $self->SUPER::new($io, EPOLLIN | EPOLLET); - $self->{cb} = $cb; # initial read callback, later replaced by main_cb + $self->{cb} = $cb; # initial read callback $self->{arg} = $arg; # arg for $cb $self->{end_obj} = $end_obj; # like END{}, can ->event_step $self; } -sub main_cb ($$) { - my ($http, $fh) = @_; - sub { - my ($self) = @_; +sub event_step { + my ($self) = @_; + if (my $cb = delete $self->{cb}) { + # this may call async_pass when headers are done + $cb->(delete $self->{arg}); + } elsif (my $sock = $self->{sock}) { + my $http = $self->{http}; # $self->{sock} is a read pipe for git-http-backend or cgit # and 65536 is the default Linux pipe size - my $r = sysread($self->{sock}, my $buf, 65536); + my $r = sysread($sock, my $buf, 65536); if ($r) { - $fh->write($buf); # may call $http->close + $self->{fh}->write($buf); # may call $http->close if ($http->{sock}) { # !closed $self->requeue; # let other clients get some work done, too @@ -57,11 +60,11 @@ sub main_cb ($$) { return; # EPOLLET means we'll be notified } - # Done! Error handling will happen in $fh->close - # called by the {end} handler + # Done! Error handling will happen in $self->{fh}->close + # called by end_obj->event_step handler delete $http->{forward}; - $self->close; # queues ->{end} to be called - } + $self->close; # queues end_obj->event_step to be called + } # else { # we may've been requeued but closed by $http } # once this is called, all data we read is passed to the @@ -79,21 +82,16 @@ sub async_pass { # calls after this may use much memory: $$bref = undef; - # replace the header read callback with the main one - my $cb = $self->{cb} = main_cb($http, $fh); - $cb->($self); # either hit EAGAIN or ->requeue to keep EPOLLET happy -} + $self->{http} = $http; + $self->{fh} = $fh; -sub event_step { - # {cb} may be undef after ->requeue due to $http->close happening - my $cb = $_[0]->{cb} or return; - $cb->(@_); + # either hit EAGAIN or ->requeue to keep EPOLLET happy + event_step($self); } -# may be called as $forward->close in PublicInbox::HTTP or EOF (main_cb) +# may be called as $forward->close in PublicInbox::HTTP or EOF (event_step) sub close { my $self = $_[0]; - delete $self->{cb}; $self->SUPER::close; # DS::close # we defer this to the next timer loop since close is deferred -- cgit v1.2.3-24-ge0c7 From e61e5fcff30370f355d8aeea6e47c06f3606e994 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:43 +0000 Subject: qspawn: psgi_return: initial cb can be named We can take advantage of HTTPD::Async being able to pass user-supplied args to callbacks to get rid of one (of many) anonymous subs in the code path. --- lib/PublicInbox/Qspawn.pm | 83 ++++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 40 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index 1985dccd..0967bcfa 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -239,6 +239,42 @@ sub rd_hdr ($) { $ret; } +sub psgi_return_init_cb { + my ($self) = @_; + my $r = rd_hdr($self) or return; + my $env = $self->{psgi_env}; + my $filter = delete $env->{'qspawn.filter'}; + my $wcb = delete $env->{'qspawn.wcb'}; + my $async = delete $self->{async}; + if (scalar(@$r) == 3) { # error + if ($async) { + # calls rpipe->close && ->event_step + $async->close; + } else { + $self->{rpipe}->close; + event_step($self); + } + $wcb->($r); + } elsif ($async) { + # done reading headers, handoff to read body + my $fh = $wcb->($r); # scalar @$r == 2 + $fh = filter_fh($fh, $filter) if $filter; + $self->{fh} = $fh; + $async->async_pass($env->{'psgix.io'}, $fh, + delete($self->{hdr_buf})); + } else { # for synchronous PSGI servers + require PublicInbox::GetlineBody; + $r->[2] = PublicInbox::GetlineBody->new($self->{rpipe}, + \&event_step, $self, + ${$self->{hdr_buf}}, $filter); + $wcb->($r); + } + + # Workaround a leak under Perl 5.16.3 when combined with + # Plack::Middleware::Deflater: + $wcb = undef; +} + # Used for streaming the stdout of one process as a PSGI response. # # $env is the PSGI env. @@ -266,62 +302,29 @@ sub psgi_return { $self->{psgi_env} = $env; $self->{hdr_buf} = \(my $hdr_buf = ''); $self->{parse_hdr} = $parse_hdr; - my $wcb = delete $env->{'qspawn.wcb'}; # or PSGI server supplies it - my $async = $env->{'pi-httpd.async'}; - - my $cb = sub { - my $r = rd_hdr($self) or return; - my $filter = delete $env->{'qspawn.filter'}; - if (scalar(@$r) == 3) { # error - if ($async) { - # calls rpipe->close && ->event_step - $async->close; - } else { - $self->{rpipe}->close; - event_step($self); - } - $wcb->($r); - } elsif ($async) { - # done reading headers, handoff to read body - my $fh = $wcb->($r); # scalar @$r == 2 - $fh = filter_fh($fh, $filter) if $filter; - $self->{fh} = $fh; - $async->async_pass($env->{'psgix.io'}, $fh, - delete($self->{hdr_buf})); - } else { # for synchronous PSGI servers - require PublicInbox::GetlineBody; - $r->[2] = PublicInbox::GetlineBody->new($self->{rpipe}, - \&event_step, $self, - ${$self->{hdr_buf}}, $filter); - $wcb->($r); - } - - # Workaround a leak under Perl 5.16.3 when combined with - # Plack::Middleware::Deflater: - $wcb = undef; - }; $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32); my $start_cb = sub { # may run later, much later... - if ($async) { + if (my $async = $env->{'pi-httpd.async'}) { # PublicInbox::HTTPD::Async->new(rpipe, $cb, $cb_arg, # $end_obj) - $async = $async->($self->{rpipe}, $cb, undef, $self); - # $cb will call ->async_pass or ->close + $self->{async} = $async->($self->{rpipe}, + \&psgi_return_init_cb, $self, + $self); } else { # generic PSGI - $cb->() while $self->{parse_hdr}; + psgi_return_init_cb($self) while $self->{parse_hdr}; } }; # the caller already captured the PSGI write callback from # the PSGI server, so we can call ->start, here: - return $self->start($limiter, $start_cb) if $wcb; + return $self->start($limiter, $start_cb) if $env->{'qspawn.wcb'}; # the caller will return this sub to the PSGI server, so # it can set the response callback (that is, for PublicInbox::HTTP, # the chunked_wcb or identity_wcb callback), but other HTTP servers # are supported: sub { - ($wcb) = @_; + $self->{psgi_env}->{'qspawn.wcb'} = $_[0]; $self->start($limiter, $start_cb); }; } -- cgit v1.2.3-24-ge0c7 From 574cf9098d09c7c2e10c28cf577321161ddecd61 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:44 +0000 Subject: qspawn: psgi_return_start: hoist out from psgi_return Instead of just passing the rpipe to the start_cb, pass the entire qspawn ref to start_cb. Update existing callers to avoid circular refs. --- lib/PublicInbox/Qspawn.pm | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index 0967bcfa..33e20147 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -66,7 +66,7 @@ sub _do_spawn { } else { $self->{err} = $!; } - $start_cb->($self->{rpipe}); + $start_cb->($self); } sub child_err ($) { @@ -140,7 +140,7 @@ sub finish ($) { } } -sub start { +sub start ($$$) { my ($self, $limiter, $start_cb) = @_; if ($limiter->{running} < $limiter->{max}) { _do_spawn($self, $start_cb, $limiter); @@ -275,6 +275,17 @@ sub psgi_return_init_cb { $wcb = undef; } +sub psgi_return_start { # may run later, much later... + my ($self) = @_; + if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) { + # PublicInbox::HTTPD::Async->new(rpipe, $cb, $cb_arg, $end_obj) + $self->{async} = $async->($self->{rpipe}, + \&psgi_return_init_cb, $self, $self); + } else { # generic PSGI + psgi_return_init_cb($self) while $self->{parse_hdr}; + } +} + # Used for streaming the stdout of one process as a PSGI response. # # $env is the PSGI env. @@ -303,30 +314,20 @@ sub psgi_return { $self->{hdr_buf} = \(my $hdr_buf = ''); $self->{parse_hdr} = $parse_hdr; $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32); - my $start_cb = sub { # may run later, much later... - if (my $async = $env->{'pi-httpd.async'}) { - # PublicInbox::HTTPD::Async->new(rpipe, $cb, $cb_arg, - # $end_obj) - $self->{async} = $async->($self->{rpipe}, - \&psgi_return_init_cb, $self, - $self); - } else { # generic PSGI - psgi_return_init_cb($self) while $self->{parse_hdr}; - } - }; # the caller already captured the PSGI write callback from # the PSGI server, so we can call ->start, here: - return $self->start($limiter, $start_cb) if $env->{'qspawn.wcb'}; + $env->{'qspawn.wcb'} and + return start($self, $limiter, \&psgi_return_start); # the caller will return this sub to the PSGI server, so - # it can set the response callback (that is, for PublicInbox::HTTP, - # the chunked_wcb or identity_wcb callback), but other HTTP servers - # are supported: + # it can set the response callback (that is, for + # PublicInbox::HTTP, the chunked_wcb or identity_wcb callback), + # but other HTTP servers are supported: sub { - $self->{psgi_env}->{'qspawn.wcb'} = $_[0]; - $self->start($limiter, $start_cb); - }; + $env->{'qspawn.wcb'} = $_[0]; + start($self, $limiter, \&psgi_return_start); + } } package PublicInbox::Qspawn::Limiter; -- cgit v1.2.3-24-ge0c7 From 1971bbe4cd599b3d9583084145266369525bfca2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:45 +0000 Subject: qspawn: psgi_qx: eliminate anonymous subs We can follow what we did in psgi_return to make psgi_qx allocate less memory on each call. --- lib/PublicInbox/Qspawn.pm | 56 +++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 24 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index 33e20147..9e161234 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -149,6 +149,37 @@ sub start ($$$) { } } +sub psgi_qx_init_cb { + my ($self) = @_; + my $async = delete $self->{async}; + my ($r, $buf); + my $qx_fh = $self->{qx_fh}; +reread: + $r = sysread($self->{rpipe}, $buf, 65536); + if ($async) { + $async->async_pass($self->{psgi_env}->{'psgix.io'}, + $qx_fh, \$buf); + } elsif (defined $r) { + $r ? $qx_fh->write($buf) : event_step($self, undef); + } else { + return if $! == EAGAIN; # try again when notified + goto reread if $! == EINTR; + event_step($self, $!); + } +} + +sub psgi_qx_start { + my ($self) = @_; + if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) { + # PublicInbox::HTTPD::Async->new(rpipe, $cb, cb_arg, $end_obj) + $self->{async} = $async->($self->{rpipe}, + \&psgi_qx_init_cb, $self, $self); + # init_cb will call ->async_pass or ->close + } else { # generic PSGI + psgi_qx_init_cb($self) while $self->{qx_fh}; + } +} + # Similar to `backtick` or "qx" ("perldoc -f qx"), it calls $qx_cb with # the stdout of the given command when done; but respects the given limiter # $env is the PSGI env. As with ``/qx; only use this when output is small @@ -162,31 +193,8 @@ sub psgi_qx { $self->{qx_arg} = $qx_arg; $self->{qx_fh} = $qx_fh; $self->{qx_buf} = \$qx_buf; - my $async = $env->{'pi-httpd.async'}; - my $cb = sub { - my ($r, $buf); -reread: - $r = sysread($self->{rpipe}, $buf, 65536); - if ($async) { - $async->async_pass($env->{'psgix.io'}, $qx_fh, \$buf); - } elsif (defined $r) { - $r ? $qx_fh->write($buf) : event_step($self, undef); - } else { - return if $! == EAGAIN; # try again when notified - goto reread if $! == EINTR; - event_step($self, $!); - } - }; $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32); - $self->start($limiter, sub { # start_cb, may run later, much later... - if ($async) { - # PublicInbox::HTTPD::Async->new(rpipe, $cb, $end_obj) - $async = $async->($self->{rpipe}, $cb, undef, $self); - # $cb will call ->async_pass or ->close - } else { # generic PSGI - $cb->() while $self->{qx_fh}; - } - }); + start($self, $limiter, \&psgi_qx_start); } # create a filter for "push"-based streaming PSGI writes used by HTTPD::Async -- cgit v1.2.3-24-ge0c7 From 3eba4cbd05b348622e772889b06a5994ba69d157 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:46 +0000 Subject: qspawn: drop "qspawn.filter" support, for now This feature was added in preparation for future changes that have yet to materialize after nearly 3 years. We can re-add it if needed in the future. --- lib/PublicInbox/GetlineBody.pm | 6 +++--- lib/PublicInbox/Qspawn.pm | 21 +-------------------- 2 files changed, 4 insertions(+), 23 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/GetlineBody.pm b/lib/PublicInbox/GetlineBody.pm index 750a8c53..bcabc04a 100644 --- a/lib/PublicInbox/GetlineBody.pm +++ b/lib/PublicInbox/GetlineBody.pm @@ -13,13 +13,13 @@ use strict; use warnings; sub new { - my ($class, $rpipe, $end, $end_arg, $buf, $filter) = @_; + my ($class, $rpipe, $end, $end_arg, $buf) = @_; bless { rpipe => $rpipe, end => $end, end_arg => $end_arg, buf => $buf, - filter => $filter || 0, + filter => 0, }, $class; } @@ -36,7 +36,7 @@ sub getline { my $buf = delete $self->{buf}; # initial buffer $buf = $self->{rpipe}->getline unless defined $buf; $self->{filter} = -1 unless defined $buf; # set EOF for next call - $filter ? $filter->($buf) : $buf; + $buf; } sub close { diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index 9e161234..d1a34bea 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -27,7 +27,6 @@ package PublicInbox::Qspawn; use strict; use warnings; use PublicInbox::Spawn qw(popen_rd); -require Plack::Util; # n.b.: we get EAGAIN with public-inbox-httpd, and EINTR on other PSGI servers use Errno qw(EAGAIN EINTR); @@ -197,19 +196,6 @@ sub psgi_qx { start($self, $limiter, \&psgi_qx_start); } -# create a filter for "push"-based streaming PSGI writes used by HTTPD::Async -sub filter_fh ($$) { - my ($fh, $filter) = @_; - Plack::Util::inline_object( - close => sub { - $fh->write($filter->(undef)); - $fh->close; - }, - write => sub { - $fh->write($filter->($_[0])); - }); -} - # this is called on pipe EOF to reap the process, may be called # via PublicInbox::DS event loop OR via GetlineBody for generic # PSGI servers. @@ -251,7 +237,6 @@ sub psgi_return_init_cb { my ($self) = @_; my $r = rd_hdr($self) or return; my $env = $self->{psgi_env}; - my $filter = delete $env->{'qspawn.filter'}; my $wcb = delete $env->{'qspawn.wcb'}; my $async = delete $self->{async}; if (scalar(@$r) == 3) { # error @@ -266,7 +251,6 @@ sub psgi_return_init_cb { } elsif ($async) { # done reading headers, handoff to read body my $fh = $wcb->($r); # scalar @$r == 2 - $fh = filter_fh($fh, $filter) if $filter; $self->{fh} = $fh; $async->async_pass($env->{'psgix.io'}, $fh, delete($self->{hdr_buf})); @@ -274,7 +258,7 @@ sub psgi_return_init_cb { require PublicInbox::GetlineBody; $r->[2] = PublicInbox::GetlineBody->new($self->{rpipe}, \&event_step, $self, - ${$self->{hdr_buf}}, $filter); + ${$self->{hdr_buf}}); $wcb->($r); } @@ -304,9 +288,6 @@ sub psgi_return_start { # may run later, much later... # psgi_return will return an anonymous # sub for the PSGI server to call # -# $env->{'qspawn.filter'} - filter callback, receives a string as input, -# undef on EOF -# # $limiter - the Limiter object to use (uses the def_limiter if not given) # # $parse_hdr - Initial read function; often for parsing CGI header output. -- cgit v1.2.3-24-ge0c7 From a7949988f7f8690c868d2150fe3000fcf6a6d5f4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:47 +0000 Subject: qspawn: psgi_return: allow non-anon parse_hdr callback Callers can supply an arg to parse_hdr, now, eliminating the need for closures to capture local variables. --- lib/PublicInbox/Cgit.pm | 12 ++++++---- lib/PublicInbox/GitHTTPBackend.pm | 12 ++++++---- lib/PublicInbox/Qspawn.pm | 7 +++--- lib/PublicInbox/ViewVCS.pm | 48 +++++++++++++++++++++------------------ 4 files changed, 44 insertions(+), 35 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm index 353f4162..094f146e 100644 --- a/lib/PublicInbox/Cgit.pm +++ b/lib/PublicInbox/Cgit.pm @@ -95,6 +95,12 @@ my @PASS_ENV = qw( ); # XXX: cgit filters may care about more variables... +sub cgit_parse_hdr { # {parse_hdr} for Qspawn + my ($r, $bref) = @_; + my $res = parse_cgi_headers($r, $bref) or return; # incomplete + $res; +} + sub call { my ($self, $env) = @_; my $path_info = $env->{PATH_INFO}; @@ -123,11 +129,7 @@ sub call { my $rdr = input_prepare($env) or return r(500); my $qsp = PublicInbox::Qspawn->new($self->{cmd}, $cgi_env, $rdr); my $limiter = $self->{pi_config}->limiter('-cgit'); - $qsp->psgi_return($env, $limiter, sub { - my ($r, $bref) = @_; - my $res = parse_cgi_headers($r, $bref) or return; # incomplete - $res; - }); + $qsp->psgi_return($env, $limiter, \&cgit_parse_hdr); } 1; diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm index ec8e6516..537a1947 100644 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -184,6 +184,12 @@ sub prepare_range { ($code, $len); } +sub git_parse_hdr { # {parse_hdr} for Qspawn + my ($r, $bref, $dumb_args) = @_; + my $res = parse_cgi_headers($r, $bref) or return; # incomplete + $res->[0] == 403 ? serve_dumb(@$dumb_args) : $res; +} + # returns undef if 403 so it falls back to dumb HTTP sub serve_smart { my ($env, $git, $path) = @_; @@ -204,11 +210,7 @@ sub serve_smart { $env{PATH_TRANSLATED} = "$git->{git_dir}/$path"; my $rdr = input_prepare($env) or return r(500); my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr); - $qsp->psgi_return($env, $limiter, sub { # parse_hdr - my ($r, $bref) = @_; - my $res = parse_cgi_headers($r, $bref) or return; # incomplete - $res->[0] == 403 ? serve_dumb($env, $git, $path) : $res; - }); + $qsp->psgi_return($env, $limiter, \&git_parse_hdr, [$env, $git, $path]); } sub input_prepare { diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index d1a34bea..1a2b70e7 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -215,12 +215,13 @@ sub rd_hdr ($) { my $ret; my $total_rd = 0; my $hdr_buf = $self->{hdr_buf}; + my ($ph_cb, $ph_arg) = @{$self->{parse_hdr}}; do { my $r = sysread($self->{rpipe}, $$hdr_buf, 4096, length($$hdr_buf)); if (defined($r)) { $total_rd += $r; - $ret = $self->{parse_hdr}->($total_rd, $hdr_buf); + $ret = $ph_cb->($total_rd, $hdr_buf, $ph_arg); } else { # caller should notify us when it's ready: return if $! == EAGAIN; @@ -298,10 +299,10 @@ sub psgi_return_start { # may run later, much later... # psgix.io. 3-element arrays means the body is available # immediately (or streamed via ->getline (pull-based)). sub psgi_return { - my ($self, $env, $limiter, $parse_hdr) = @_; + my ($self, $env, $limiter, $parse_hdr, $hdr_arg) = @_; $self->{psgi_env} = $env; $self->{hdr_buf} = \(my $hdr_buf = ''); - $self->{parse_hdr} = $parse_hdr; + $self->{parse_hdr} = [ $parse_hdr, $hdr_arg ]; $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32); # the caller already captured the PSGI write callback from diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index 886e10cb..7618b198 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -42,35 +42,39 @@ sub html_page ($$$) { $wcb ? $wcb->($res) : $res; } +sub stream_blob_parse_hdr { # {parse_hdr} for Qspawn + my ($r, $bref, $ctx) = @_; + my ($res, $logref) = delete @$ctx{qw(-res -logref)}; + my ($git, $oid, $type, $size, $di) = @$res; + my @cl = ('Content-Length', $size); + if (!defined $r) { # error + html_page($ctx, 500, $logref); + } elsif (index($$bref, "\0") >= 0) { + [200, [qw(Content-Type application/octet-stream), @cl] ]; + } else { + my $n = bytes::length($$bref); + if ($n >= $BIN_DETECT || $n == $size) { + return [200, [ 'Content-Type', + 'text/plain; charset=UTF-8', @cl ] ]; + } + if ($r == 0) { + warn "premature EOF on $oid $$logref\n"; + return html_page($ctx, 500, $logref); + } + undef; # bref keeps growing + } +} + sub stream_large_blob ($$$$) { my ($ctx, $res, $logref, $fn) = @_; + $ctx->{-logref} = $logref; + $ctx->{-res} = $res; my ($git, $oid, $type, $size, $di) = @$res; my $cmd = ['git', "--git-dir=$git->{git_dir}", 'cat-file', $type, $oid]; my $qsp = PublicInbox::Qspawn->new($cmd); - my @cl = ('Content-Length', $size); my $env = $ctx->{env}; - $env->{'public-inbox.tmpgit'} = $git; # for {-tmp}/File::Temp::Dir $env->{'qspawn.wcb'} = delete $ctx->{-wcb}; - $qsp->psgi_return($env, undef, sub { - my ($r, $bref) = @_; - if (!defined $r) { # error - html_page($ctx, 500, $logref); - } elsif (index($$bref, "\0") >= 0) { - my $ct = 'application/octet-stream'; - [200, ['Content-Type', $ct, @cl ] ]; - } else { - my $n = bytes::length($$bref); - if ($n >= $BIN_DETECT || $n == $size) { - my $ct = 'text/plain; charset=UTF-8'; - return [200, ['Content-Type', $ct, @cl] ]; - } - if ($r == 0) { - warn "premature EOF on $oid $$logref\n"; - return html_page($ctx, 500, $logref); - } - undef; # bref keeps growing - } - }); + $qsp->psgi_return($env, undef, \&stream_blob_parse_hdr, $ctx); } sub show_other_result ($$) { -- cgit v1.2.3-24-ge0c7 From 07d0e2d336d4697c3284fe3dd59dae0583984e23 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:48 +0000 Subject: githttpbackend: split out wwwstatic Make it easier to share code between our GitHTTPBackend and Cgit packages, for now, and possibly other packages in the future. We can avoid inline_object and anonymous subs at the same time, reducing per-request memory overhead. --- lib/PublicInbox/Cgit.pm | 6 +-- lib/PublicInbox/GitHTTPBackend.pm | 91 ++------------------------------- lib/PublicInbox/WwwStatic.pm | 105 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 91 deletions(-) create mode 100644 lib/PublicInbox/WwwStatic.pm (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm index 094f146e..68da9178 100644 --- a/lib/PublicInbox/Cgit.pm +++ b/lib/PublicInbox/Cgit.pm @@ -13,9 +13,9 @@ use PublicInbox::GitHTTPBackend; *input_prepare = *PublicInbox::GitHTTPBackend::input_prepare; *parse_cgi_headers = *PublicInbox::GitHTTPBackend::parse_cgi_headers; *serve = *PublicInbox::GitHTTPBackend::serve; -*static_result = *PublicInbox::GitHTTPBackend::static_result; use warnings; use PublicInbox::Qspawn; +use PublicInbox::WwwStatic; use Plack::MIME; sub locate_cgit ($) { @@ -115,8 +115,8 @@ sub call { } elsif ($path_info =~ m!$self->{static}! && defined($cgit_data = $self->{cgit_data})) { my $f = $1; - my $type = Plack::MIME->mime_type($f); - return static_result($env, [], $cgit_data.$f, $type); + return PublicInbox::WwwStatic::response($env, [], $cgit_data.$f, + Plack::MIME->mime_type($f)); } my $cgi_env = { PATH_INFO => $path_info }; diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm index 537a1947..b7640d42 100644 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -10,9 +10,9 @@ use Fcntl qw(:seek); use IO::Handle; use HTTP::Date qw(time2str); use HTTP::Status qw(status_message); -use Plack::Util; use PublicInbox::Qspawn; use PublicInbox::Tmpfile; +use PublicInbox::WwwStatic; # 32 is same as the git-daemon connection limit my $default_limiter = PublicInbox::Qspawn::Limiter->new(32); @@ -66,12 +66,6 @@ sub err ($@) { $env->{'psgi.errors'}->print(@msg, "\n"); } -sub drop_client ($) { - if (my $io = $_[0]->{'psgix.io'}) { - $io->close; # this is PublicInbox::DS::close - } -} - my $prev = 0; my $exp; sub cache_one_year { @@ -81,44 +75,6 @@ sub cache_one_year { 'Cache-Control', 'public, max-age=31536000'; } -sub static_result ($$$$) { - my ($env, $h, $f, $type) = @_; - return r(404) unless -f $f && -r _; # just in case it's a FIFO :P - - # TODO: If-Modified-Since and Last-Modified? - open my $in, '<', $f or return r(404); - my $size = -s $in; - my $len = $size; - my $code = 200; - push @$h, 'Content-Type', $type; - if (($env->{HTTP_RANGE} || '') =~ /\bbytes=([0-9]*)-([0-9]*)\z/) { - ($code, $len) = prepare_range($env, $in, $h, $1, $2, $size); - if ($code == 416) { - push @$h, 'Content-Range', "bytes */$size"; - return [ 416, $h, [] ]; - } - } - push @$h, 'Content-Length', $len; - my $n = 65536; - [ $code, $h, Plack::Util::inline_object(close => sub { close $in }, - getline => sub { - return if $len == 0; - $n = $len if $len < $n; - my $r = sysread($in, my $buf, $n); - if (!defined $r) { - err($env, "$f read error: $!"); - } elsif ($r <= 0) { - err($env, "$f EOF with $len bytes left"); - } else { - $len -= $r; - $n = 8192; - return $buf; - } - drop_client($env); - return; - })] -} - sub serve_dumb { my ($env, $git, $path) = @_; @@ -139,49 +95,8 @@ sub serve_dumb { } else { return r(404); } - - static_result($env, $h, "$git->{git_dir}/$path", $type); -} - -sub prepare_range { - my ($env, $in, $h, $beg, $end, $size) = @_; - my $code = 200; - my $len = $size; - if ($beg eq '') { - if ($end ne '') { # "bytes=-$end" => last N bytes - $beg = $size - $end; - $beg = 0 if $beg < 0; - $end = $size - 1; - $code = 206; - } else { - $code = 416; - } - } else { - if ($beg > $size) { - $code = 416; - } elsif ($end eq '' || $end >= $size) { - $end = $size - 1; - $code = 206; - } elsif ($end < $size) { - $code = 206; - } else { - $code = 416; - } - } - if ($code == 206) { - $len = $end - $beg + 1; - if ($len <= 0) { - $code = 416; - } else { - sysseek($in, $beg, SEEK_SET) or return [ 500, [], [] ]; - push @$h, qw(Accept-Ranges bytes Content-Range); - push @$h, "bytes $beg-$end/$size"; - - # FIXME: Plack::Middleware::Deflater bug? - $env->{'psgix.no-compress'} = 1; - } - } - ($code, $len); + $path = "$git->{git_dir}/$path"; + PublicInbox::WwwStatic::response($env, $h, $path, $type) // r(404); } sub git_parse_hdr { # {parse_hdr} for Qspawn diff --git a/lib/PublicInbox/WwwStatic.pm b/lib/PublicInbox/WwwStatic.pm new file mode 100644 index 00000000..76e50c78 --- /dev/null +++ b/lib/PublicInbox/WwwStatic.pm @@ -0,0 +1,105 @@ +# Copyright (C) 2016-2019 all contributors +# License: AGPL-3.0+ + +package PublicInbox::WwwStatic; +use strict; +use Fcntl qw(:seek); + +sub prepare_range { + my ($env, $in, $h, $beg, $end, $size) = @_; + my $code = 200; + my $len = $size; + if ($beg eq '') { + if ($end ne '') { # "bytes=-$end" => last N bytes + $beg = $size - $end; + $beg = 0 if $beg < 0; + $end = $size - 1; + $code = 206; + } else { + $code = 416; + } + } else { + if ($beg > $size) { + $code = 416; + } elsif ($end eq '' || $end >= $size) { + $end = $size - 1; + $code = 206; + } elsif ($end < $size) { + $code = 206; + } else { + $code = 416; + } + } + if ($code == 206) { + $len = $end - $beg + 1; + if ($len <= 0) { + $code = 416; + } else { + sysseek($in, $beg, SEEK_SET) or return [ 500, [], [] ]; + push @$h, qw(Accept-Ranges bytes Content-Range); + push @$h, "bytes $beg-$end/$size"; + + # FIXME: Plack::Middleware::Deflater bug? + $env->{'psgix.no-compress'} = 1; + } + } + ($code, $len); +} + +sub response { + my ($env, $h, $path, $type) = @_; + return unless -f $path && -r _; # just in case it's a FIFO :P + + # TODO: If-Modified-Since and Last-Modified? + open my $in, '<', $path or return; + my $size = -s $in; + my $len = $size; + my $code = 200; + push @$h, 'Content-Type', $type; + if (($env->{HTTP_RANGE} || '') =~ /\bbytes=([0-9]*)-([0-9]*)\z/) { + ($code, $len) = prepare_range($env, $in, $h, $1, $2, $size); + if ($code == 416) { + push @$h, 'Content-Range', "bytes */$size"; + return [ 416, $h, [] ]; + } + } + push @$h, 'Content-Length', $len; + my $body = bless { + initial_rd => 65536, + len => $len, + in => $in, + path => $path, + env => $env, + }, __PACKAGE__; + [ $code, $h, $body ]; +} + +# called by PSGI servers: +sub getline { + my ($self) = @_; + my $len = $self->{len}; + return if $len == 0; + my $n = delete($self->{initial_rd}) // 8192; + $n = $len if $len < $n; + my $r = sysread($self->{in}, my $buf, $n); + if (!defined $r) { + $self->{env}->{'psgi.errors'}->print( + "$self->{path} read error: $!\n"); + } elsif ($r > 0) { # success! + $self->{len} = $len - $r; + return $buf; + } else { + $self->{env}->{'psgi.errors'}->print( + "$self->{path} EOF with $len bytes left\n"); + } + + # drop the client on error + if (my $io = $self->{env}->{'psgix.io'}) { + $io->close; # this is PublicInbox::DS::close + } + undef; +} + +sub close {} # noop, just let everything go out-of-scope + +1; -- cgit v1.2.3-24-ge0c7 From db748229054d52cdcc95843db5d827b49defeaff Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:49 +0000 Subject: www: lazy load Plack::Util cgit users won't need Plack::Util, here. --- lib/PublicInbox/WWW.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 4b7177c1..251979d5 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -15,7 +15,6 @@ use 5.010_001; use strict; use warnings; use bytes (); # only for bytes::length -use Plack::Util; use PublicInbox::Config; use PublicInbox::Hval; use URI::Escape qw(uri_unescape); @@ -472,6 +471,7 @@ sub cgit { require PublicInbox::Cgit; PublicInbox::Cgit->new($pi_config); } else { + require Plack::Util; Plack::Util::inline_object(call => sub { r404() }); } } -- cgit v1.2.3-24-ge0c7 From 07c51ae017dae0ea221ba756e9c6ec46f6a587de Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:50 +0000 Subject: mboxgz: pass $ctx to callback to avoid anon subs Another place where we can rid ourselves of most anonymous subs by passing the $ctx arg to the callback. --- lib/PublicInbox/Mbox.pm | 132 +++++++++++++++++++++++++--------------------- lib/PublicInbox/MboxGz.pm | 2 +- 2 files changed, 73 insertions(+), 61 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index c50a118b..3fdda5ea 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -134,28 +134,30 @@ sub msg_body ($) { $_[0] .= "\n"; } +sub thread_cb { + my ($ctx) = @_; + my $msgs = $ctx->{msgs}; + while (1) { + if (my $smsg = shift @$msgs) { + return $smsg; + } + # refill result set + $ctx->{msgs} = $msgs = $ctx->{over}->get_thread($ctx->{mid}, + $ctx->{prev}); + return unless @$msgs; + $ctx->{prev} = $msgs->[-1]; + } +} + sub thread_mbox { my ($ctx, $over, $sfx) = @_; eval { require PublicInbox::MboxGz }; - return sub { need_gzip(@_) } if $@; - my $mid = $ctx->{mid}; - my $msgs = $over->get_thread($mid, {}); + return need_gzip() if $@; + my $msgs = $ctx->{msgs} = $over->get_thread($ctx->{mid}, {}); return [404, [qw(Content-Type text/plain)], []] if !@$msgs; - my $prev = $msgs->[-1]; - my $i = 0; - my $cb = sub { - while (1) { - if (my $smsg = $msgs->[$i++]) { - return $smsg; - } - # refill result set - $msgs = $over->get_thread($mid, $prev); - return unless @$msgs; - $prev = $msgs->[-1]; - $i = 0; - } - }; - PublicInbox::MboxGz->response($ctx, $cb, $msgs->[0]->subject); + $ctx->{prev} = $msgs->[-1]; + $ctx->{over} = $over; # bump refcnt + PublicInbox::MboxGz->response($ctx, \&thread_cb, $msgs->[0]->subject); } sub emit_range { @@ -170,72 +172,82 @@ sub emit_range { mbox_all($ctx, $query); } +sub all_ids_cb { + my ($ctx) = @_; + my $ids = $ctx->{ids}; + do { + while ((my $num = shift @$ids)) { + my $smsg = $ctx->{over}->get_art($num) or next; + return $smsg; + } + $ctx->{ids} = $ids = $ctx->{mm}->ids_after(\($ctx->{prev})); + } while (@$ids); +} + sub mbox_all_ids { my ($ctx) = @_; - my $prev = 0; my $ibx = $ctx->{-inbox}; - my $ids = $ibx->mm->ids_after(\$prev) or return + my $prev = 0; + my $mm = $ctx->{mm} = $ibx->mm; + my $ids = $mm->ids_after(\$prev) or return [404, [qw(Content-Type text/plain)], ["No results found\n"]]; - my $i = 0; - my $over = $ibx->over or + $ctx->{over} = $ibx->over or return PublicInbox::WWW::need($ctx, 'Overview'); - my $cb = sub { - do { - while ((my $num = $ids->[$i++])) { - my $smsg = $over->get_art($num) or next; - return $smsg; - } - $ids = $ibx->mm->ids_after(\$prev); - $i = 0; - } while (@$ids); - undef; - }; - return PublicInbox::MboxGz->response($ctx, $cb, 'all'); + $ctx->{ids} = $ids; + $ctx->{prev} = $prev; + return PublicInbox::MboxGz->response($ctx, \&all_ids_cb, 'all'); +} + +sub results_cb { + my ($ctx) = @_; + my $mset = $ctx->{mset}; + my $srch = $ctx->{srch}; + while (1) { + while (my $mi = (($mset->items)[$ctx->{iter}++])) { + my $doc = $mi->get_document; + my $smsg = $srch->retry_reopen(sub { + PublicInbox::SearchMsg->load_doc($doc); + }) or next; + return $smsg; + } + # refill result set + $mset = $ctx->{mset} = $srch->query($ctx->{query}, + $ctx->{qopts}); + my $size = $mset->size or return; + $ctx->{qopts}->{offset} += $size; + $ctx->{iter} = 0; + } } sub mbox_all { my ($ctx, $query) = @_; eval { require PublicInbox::MboxGz }; - return sub { need_gzip(@_) } if $@; + return need_gzip() if $@; return mbox_all_ids($ctx) if $query eq ''; - my $opts = { mset => 2 }; - my $srch = $ctx->{-inbox}->search or + my $qopts = $ctx->{qopts} = { mset => 2 }; + my $srch = $ctx->{srch} = $ctx->{-inbox}->search or return PublicInbox::WWW::need($ctx, 'Search');; - my $mset = $srch->query($query, $opts); - $opts->{offset} = $mset->size or + my $mset = $ctx->{mset} = $srch->query($query, $qopts); + $qopts->{offset} = $mset->size or return [404, [qw(Content-Type text/plain)], ["No results found\n"]]; - my $i = 0; - my $cb = sub { # called by MboxGz->getline - while (1) { - while (my $mi = (($mset->items)[$i++])) { - my $doc = $mi->get_document; - my $smsg = $srch->retry_reopen(sub { - PublicInbox::SearchMsg->load_doc($doc); - }) or next; - return $smsg; - } - # refill result set - $mset = $srch->query($query, $opts); - my $size = $mset->size or return; - $opts->{offset} += $size; - $i = 0; - } - }; - PublicInbox::MboxGz->response($ctx, $cb, 'results-'.$query); + $ctx->{iter} = 0; + $ctx->{query} = $query; + PublicInbox::MboxGz->response($ctx, \&results_cb, 'results-'.$query); } sub need_gzip { - my $fh = $_[0]->([501, ['Content-Type' => 'text/html']]); my $title = 'gzipped mbox not available'; - $fh->write(<$title
$title
 The administrator needs to install the Compress::Raw::Zlib Perl module
 to support gzipped mboxes.
 Return to index
EOF - $fh->close; + + [501,[qw(Content-Type text/html Content-Length), bytes::length($body)], + [ $body ] ]; } 1; diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm index ef560426..2b873451 100644 --- a/lib/PublicInbox/MboxGz.pm +++ b/lib/PublicInbox/MboxGz.pm @@ -42,7 +42,7 @@ sub getline { my $ctx = $self->{ctx} or return; my $gz = $self->{gz}; my $buf = delete($self->{buf}); - while (my $smsg = $self->{cb}->()) { + while (my $smsg = $self->{cb}->($ctx)) { my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next; my $h = Email::Simple->new($mref)->header_obj; -- cgit v1.2.3-24-ge0c7 From 8b192073ae9e940ec066811aaa5618a1601f0565 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:51 +0000 Subject: feed: avoid anonymous subs WwwStream already passes the WWW $ctx to the user-supplied callback, and it's a trivial change for WwwAtomStream to do the same. Callers in Feed.pm can now take advantage of that to save a few kilobytes of memory on every response. --- lib/PublicInbox/Feed.pm | 51 +++++++++++++++++++--------------------- lib/PublicInbox/WwwAtomStream.pm | 2 +- 2 files changed, 25 insertions(+), 28 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 6d659759..cbf25d46 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -10,32 +10,26 @@ use PublicInbox::View; use PublicInbox::WwwAtomStream; use PublicInbox::SearchMsg; # this loads w/o Search::Xapian +sub generate_i { + my ($ctx) = @_; + while (my $smsg = shift @{$ctx->{msgs}}) { + $ctx->{-inbox}->smsg_mime($smsg) and return $smsg; + } +} + # main function sub generate { my ($ctx) = @_; - my $msgs = recent_msgs($ctx); + my $msgs = $ctx->{msgs} = recent_msgs($ctx); return _no_thread() unless @$msgs; - - my $ibx = $ctx->{-inbox}; - PublicInbox::WwwAtomStream->response($ctx, 200, sub { - while (my $smsg = shift @$msgs) { - $ibx->smsg_mime($smsg) and return $smsg; - } - }); + PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i); } sub generate_thread_atom { my ($ctx) = @_; - my $mid = $ctx->{mid}; - my $ibx = $ctx->{-inbox}; - my $msgs = $ibx->over->get_thread($mid); + my $msgs = $ctx->{msgs} = $ctx->{-inbox}->over->get_thread($ctx->{mid}); return _no_thread() unless @$msgs; - - PublicInbox::WwwAtomStream->response($ctx, 200, sub { - while (my $smsg = shift @$msgs) { - $ibx->smsg_mime($smsg) and return $smsg; - } - }); + PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i); } sub generate_html_index { @@ -56,9 +50,20 @@ sub generate_html_index { [ "Redirecting to $url\n" ] ]; } +sub new_html_i { + my ($nr, $ctx) = @_; + my $msgs = $ctx->{msgs}; + while (my $smsg = shift @$msgs) { + my $m = $ctx->{-inbox}->smsg_mime($smsg) or next; + my $more = scalar @$msgs; + return PublicInbox::View::index_entry($m, $ctx, $more); + } + PublicInbox::View::pagination_footer($ctx, './new.html'); +} + sub new_html { my ($ctx) = @_; - my $msgs = recent_msgs($ctx); + my $msgs = $ctx->{msgs} = recent_msgs($ctx); if (!@$msgs) { return [404, ['Content-Type', 'text/plain'], ["No messages, yet\n"] ]; @@ -66,15 +71,7 @@ sub new_html { $ctx->{-html_tip} = '
';
 	$ctx->{-upfx} = '';
 	$ctx->{-hr} = 1;
-	my $ibx = $ctx->{-inbox};
-	PublicInbox::WwwStream->response($ctx, 200, sub {
-		while (my $smsg = shift @$msgs) {
-			my $m = $ibx->smsg_mime($smsg) or next;
-			my $more = scalar @$msgs;
-			return PublicInbox::View::index_entry($m, $ctx, $more);
-		}
-		PublicInbox::View::pagination_footer($ctx, './new.html');
-	});
+	PublicInbox::WwwStream->response($ctx, 200, \&new_html_i);
 }
 
 # private subs
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 83984d37..84060cfa 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -36,7 +36,7 @@ sub response {
 sub getline {
 	my ($self) = @_;
 	if (my $middle = $self->{cb}) {
-		my $smsg = $middle->();
+		my $smsg = $middle->($self->{ctx});
 		return feed_entry($self, $smsg) if $smsg;
 	}
 	delete $self->{cb} ? '' : undef;
-- 
cgit v1.2.3-24-ge0c7


From 4bffd23fdd592c356c9b3d56b202d178a12fec43 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 25 Dec 2019 07:50:52 +0000
Subject: config: each_inbox: pass user arg to callback

Another place where we can replace anonymous subs with named
subs by passing a user-supplied arg.
---
 lib/PublicInbox/Cgit.pm       |  2 +-
 lib/PublicInbox/Config.pm     | 11 +++++-----
 lib/PublicInbox/ExtMsg.pm     | 48 ++++++++++++++++++++++++-------------------
 lib/PublicInbox/NewsWWW.pm    | 16 ++++++++-------
 lib/PublicInbox/WwwListing.pm | 37 +++++++++++++++++++--------------
 5 files changed, 65 insertions(+), 49 deletions(-)

(limited to 'lib/PublicInbox')

diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm
index 68da9178..ab4065bd 100644
--- a/lib/PublicInbox/Cgit.pm
+++ b/lib/PublicInbox/Cgit.pm
@@ -63,7 +63,7 @@ sub new {
 		pi_config => $pi_config,
 	}, $class;
 
-	$pi_config->each_inbox(sub {}); # fill in -code_repos mapped to inboxes
+	$pi_config->fill_all; # fill in -code_repos mapped to inboxes
 
 	# some cgit repos may not be mapped to inboxes, so ensure those exist:
 	my $code_repos = $pi_config->{-code_repos};
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index bdde3dbc..8ecf549d 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -63,12 +63,13 @@ sub new {
 	$self;
 }
 
-sub _fill_all ($) { each_inbox($_[0], sub {}) }
+sub noop {}
+sub fill_all ($) { each_inbox($_[0], \&noop) }
 
 sub _lookup_fill ($$$) {
 	my ($self, $cache, $key) = @_;
 	$self->{$cache}->{$key} // do {
-		_fill_all($self);
+		fill_all($self);
 		$self->{$cache}->{$key};
 	}
 }
@@ -89,12 +90,12 @@ sub lookup_name ($$) {
 }
 
 sub each_inbox {
-	my ($self, $cb) = @_;
+	my ($self, $cb, $arg) = @_;
 	# may auto-vivify if config file is non-existent:
 	foreach my $section (@{$self->{-section_order}}) {
 		next if $section !~ m!\Apublicinbox\.([^/]+)\z!;
 		my $ibx = lookup_name($self, $1) or next;
-		$cb->($ibx);
+		$cb->($ibx, $arg);
 	}
 }
 
@@ -417,7 +418,7 @@ sub _fill {
 	if ($ibx->{obfuscate}) {
 		$ibx->{-no_obfuscate} = $self->{-no_obfuscate};
 		$ibx->{-no_obfuscate_re} = $self->{-no_obfuscate_re};
-		_fill_all($self); # noop to populate -no_obfuscate
+		fill_all($self); # noop to populate -no_obfuscate
 	}
 
 	if (my $ibx_code_repos = $ibx->{coderepo}) {
diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 47f00b5e..0138d373 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -74,33 +74,39 @@ sub search_partial ($$) {
 	}
 }
 
+sub ext_msg_i {
+	my ($other, $arg) = @_;
+	my ($cur, $mid, $ibxs, $found) = @$arg;
+
+	return if $other->{name} eq $cur->{name} || !$other->base_url;
+
+	my $mm = $other->mm or return;
+
+	# try to find the URL with Msgmap to avoid forking
+	my $num = $mm->num_for($mid);
+	if (defined $num) {
+		push @$found, $other;
+	} else {
+		# no point in trying the fork fallback if we
+		# know Xapian is up-to-date but missing the
+		# message in the current repo
+		push @$ibxs, $other;
+	}
+}
+
 sub ext_msg {
 	my ($ctx) = @_;
 	my $cur = $ctx->{-inbox};
 	my $mid = $ctx->{mid};
 
 	eval { require PublicInbox::Msgmap };
-	my (@ibx, @found);
-
-	$ctx->{www}->{pi_config}->each_inbox(sub {
-		my ($other) = @_;
-		return if $other->{name} eq $cur->{name} || !$other->base_url;
-
-		my $mm = $other->mm or return;
-
-		# try to find the URL with Msgmap to avoid forking
-		my $num = $mm->num_for($mid);
-		if (defined $num) {
-			push @found, $other;
-		} else {
-			# no point in trying the fork fallback if we
-			# know Xapian is up-to-date but missing the
-			# message in the current repo
-			push @ibx, $other;
-		}
-	});
+	my $ibxs = [];
+	my $found = [];
+	my $arg = [ $cur, $mid, $ibxs, $found ];
+
+	$ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, $arg);
 
-	return exact($ctx, \@found, $mid) if @found;
+	return exact($ctx, $found, $mid) if @$found;
 
 	# fall back to partial MID matching
 	my @partial;
@@ -114,7 +120,7 @@ sub ext_msg {
 
 	# can't find a partial match in current inbox, try the others:
 	if (!$n_partial && length($mid) >= $MIN_PARTIAL_LEN) {
-		foreach my $ibx (@ibx) {
+		foreach my $ibx (@$ibxs) {
 			$srch = $ibx->search or next;
 			$mids = search_partial($srch, $mid) or next;
 			$n_partial += scalar(@$mids);
diff --git a/lib/PublicInbox/NewsWWW.pm b/lib/PublicInbox/NewsWWW.pm
index 80bb4886..ee11a089 100644
--- a/lib/PublicInbox/NewsWWW.pm
+++ b/lib/PublicInbox/NewsWWW.pm
@@ -24,16 +24,19 @@ sub redirect ($$) {
 	  [ "Redirecting to $url\n" ] ]
 }
 
-sub try_inbox ($$) {
-	my ($ibx, $mid) = @_;
+sub try_inbox {
+	my ($ibx, $arg) = @_;
+	return if scalar(@$arg) > 1;
+
 	# do not pass $env since HTTP_HOST may differ
 	my $url = $ibx->base_url or return;
 
+	my ($mid) = @$arg;
 	eval { $ibx->mm->num_for($mid) } or return;
 
 	# 302 since the same message may show up on
 	# multiple inboxes and inboxes can be added/reordered
-	redirect(302, $url .= mid_escape($mid) . '/');
+	$arg->[1] = redirect(302, $url .= mid_escape($mid) . '/');
 }
 
 sub call {
@@ -70,10 +73,9 @@ sub call {
 	}
 
 	foreach my $mid (@try) {
-		$pi_config->each_inbox(sub {
-			$res ||= try_inbox($_[0], $mid);
-		});
-		last if defined $res;
+		my $arg = [ $mid ];
+		$pi_config->each_inbox(\&try_inbox, $arg);
+		defined($res = $arg->[1]) and last;
 	}
 	$res || [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ];
 }
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index e19ae8a1..7995b315 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -16,29 +16,36 @@ require Digest::SHA;
 require File::Spec;
 *try_cat = \&PublicInbox::Inbox::try_cat;
 
+sub list_all_i {
+	my ($ibx, $arg) = @_;
+	my ($list, $hide_key) = @$arg;
+	push @$list, $ibx unless $ibx->{-hide}->{$hide_key};
+}
+
 sub list_all ($$$) {
 	my ($self, $env, $hide_key) = @_;
-	my @list;
-	$self->{pi_config}->each_inbox(sub {
-		my ($ibx) = @_;
-		push @list, $ibx unless $ibx->{-hide}->{$hide_key};
-	});
-	\@list;
+	my $list = [];
+	$self->{pi_config}->each_inbox(\&list_all_i, [ $list, $hide_key ]);
+	$list;
+}
+
+sub list_match_domain_i {
+	my ($ibx, $arg) = @_;
+	my ($list, $hide_key, $re) = @$arg;
+	if (!$ibx->{-hide}->{$hide_key} && $ibx->{url} =~ $re) {
+		push @$list, $ibx;
+	}
 }
 
 sub list_match_domain ($$$) {
 	my ($self, $env, $hide_key) = @_;
-	my @list;
+	my $list = [];
 	my $host = $env->{HTTP_HOST} // $env->{SERVER_NAME};
 	$host =~ s/:[0-9]+\z//;
-	my $re = qr!\A(?:https?:)?//\Q$host\E(?::[0-9]+)?/!i;
-	$self->{pi_config}->each_inbox(sub {
-		my ($ibx) = @_;
-		if (!$ibx->{-hide}->{$hide_key} && $ibx->{url} =~ $re) {
-			push @list, $ibx;
-		}
-	});
-	\@list;
+	my $arg = [ $list, $hide_key,
+		qr!\A(?:https?:)?//\Q$host\E(?::[0-9]+)?/!i ];
+	$self->{pi_config}->each_inbox(\&list_match_domain_i, $arg);
+	$list;
 }
 
 sub list_404 ($$) { [] }
-- 
cgit v1.2.3-24-ge0c7


From dd5b6a982381327caabaa8942f6f82aa503a044e Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 25 Dec 2019 07:50:53 +0000
Subject: view: avoid anon sub in stream_thread

WwwStream already passes the WWW $ctx to the callback sub, so we
don't need to create a new sub every call to capture local variables
for the callback.
---
 lib/PublicInbox/View.pm | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'lib/PublicInbox')

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 0b102638..1ce6ba85 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -397,12 +397,29 @@ sub thread_index_entry {
 	$beg . '
' . index_entry($smsg, $ctx, 0) . '
' . $end; } +sub stream_thread_i { # PublicInbox::WwwStream::getline callback + my ($nr, $ctx) = @_; + return unless exists($ctx->{dst}); + my $q = $ctx->{-queue}; + while (@$q) { + my $level = shift @$q; + my $node = shift @$q or next; + my $cl = $level + 1; + unshift @$q, map { ($cl, $_) } @{$node->{children}}; + if (my $smsg = $ctx->{-inbox}->smsg_mime($node->{smsg})) { + return thread_index_entry($ctx, $level, $smsg); + } else { + return ghost_index_entry($ctx, $level, $node); + } + } + join('', thread_adj_level($ctx, 0)) . ${delete $ctx->{dst}}; # skel +} + sub stream_thread ($$) { my ($rootset, $ctx) = @_; my $ibx = $ctx->{-inbox}; my @q = map { (0, $_) } @$rootset; - my $level; - my $smsg; + my ($smsg, $level); while (@q) { $level = shift @q; my $node = shift @q or next; @@ -415,25 +432,8 @@ sub stream_thread ($$) { $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; $ctx->{-title_html} = ascii_html($smsg->subject); $ctx->{-html_tip} = thread_index_entry($ctx, $level, $smsg); - $smsg = undef; - PublicInbox::WwwStream->response($ctx, 200, sub { - return unless $ctx; - while (@q) { - $level = shift @q; - my $node = shift @q or next; - my $cl = $level + 1; - unshift @q, map { ($cl, $_) } @{$node->{children}}; - if ($smsg = $ibx->smsg_mime($node->{smsg})) { - return thread_index_entry($ctx, $level, $smsg); - } else { - return ghost_index_entry($ctx, $level, $node); - } - } - my $ret = join('', thread_adj_level($ctx, 0)); - $ret .= ${$ctx->{dst}}; # skel - $ctx = undef; - $ret; - }); + $ctx->{-queue} = \@q; + PublicInbox::WwwStream->response($ctx, 200, \&stream_thread_i); } sub thread_html { -- cgit v1.2.3-24-ge0c7 From 1f53f1bf532a390f130c2029168cf90706595570 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:54 +0000 Subject: view: msg_html: stop using an anonymous sub Stash 5 local variables into the WWW $ctx hash table instead of allocating several kilobytes for an anonymous sub. --- lib/PublicInbox/View.pm | 57 +++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 25 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 1ce6ba85..1e2d3b55 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -24,37 +24,44 @@ use constant INDENT => ' '; use constant TCHILD => '` '; sub th_pfx ($) { $_[0] == 0 ? '' : TCHILD }; +sub msg_html_i { + my ($nr, $ctx) = @_; + my $more = $ctx->{more}; + if ($nr == 1) { + # $more cannot be true w/o $smsg being defined: + my $upfx = $more ? '../'.mid_escape($ctx->{smsg}->mid).'/' : ''; + $ctx->{tip} . + multipart_text_as_html($ctx->{mime}, $upfx, $ctx) . + '

' + } elsif ($more && @$more) { + ++$ctx->{end_nr}; + msg_html_more($ctx, $more, $nr); + } elsif ($nr == $ctx->{end_nr}) { + # fake an EOF if generating the footer fails; + # we want to at least show the message if something + # here crashes: + eval { + my $hdr = delete($ctx->{mime})->header_obj; + '
' . html_footer($hdr, 1, $ctx) .
+			'
' . msg_reply($ctx, $hdr) + }; + } else { + undef + } +} + # public functions: (unstable) sub msg_html { my ($ctx, $mime, $more, $smsg) = @_; - my $hdr = $mime->header_obj; my $ibx = $ctx->{-inbox}; $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; - my $tip = _msg_html_prepare($hdr, $ctx, $more, 0); - my $end = 2; - PublicInbox::WwwStream->response($ctx, 200, sub { - my ($nr, undef) = @_; - if ($nr == 1) { - # $more cannot be true w/o $smsg being defined: - my $upfx = $more ? '../'.mid_escape($smsg->mid).'/' : ''; - $tip . multipart_text_as_html($mime, $upfx, $ctx) . - '
' - } elsif ($more && @$more) { - ++$end; - msg_html_more($ctx, $more, $nr); - } elsif ($nr == $end) { - # fake an EOF if generating the footer fails; - # we want to at least show the message if something - # here crashes: - eval { - '
' . html_footer($hdr, 1, $ctx) .
-				'
' . msg_reply($ctx, $hdr) - }; - } else { - undef - } - }); + $ctx->{tip} = _msg_html_prepare($mime->header_obj, $ctx, $more, 0); + $ctx->{more} = $more; + $ctx->{end_nr} = 2; + $ctx->{smsg} = $smsg; + $ctx->{mime} = $mime; + PublicInbox::WwwStream->response($ctx, 200, \&msg_html_i); } sub msg_page { -- cgit v1.2.3-24-ge0c7 From a77caf8cd92fcc13d90f7e8b68da6e87008241f8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:55 +0000 Subject: contentid: no anonymous sub msg_iter now passes a user specified arg into the supplied callback, so we can use that to pass the Digest object into the \&content_dig_i callback. --- lib/PublicInbox/ContentId.pm | 53 +++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 25 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/ContentId.pm b/lib/PublicInbox/ContentId.pm index 9d9be417..eb937a0e 100644 --- a/lib/PublicInbox/ContentId.pm +++ b/lib/PublicInbox/ContentId.pm @@ -25,6 +25,33 @@ sub digest_addr ($$$) { $dig->add("$h\0$v\0"); } +sub content_dig_i { + my ($dig) = $_[1]; + my ($part, $depth, @idx) = @{$_[0]}; + $dig->add("\0$depth:".join('.', @idx)."\0"); + my $fn = $part->filename; + if (defined $fn) { + utf8::encode($fn); + $dig->add("fn\0$fn\0"); + } + my @d = $part->header('Content-Description'); + foreach my $d (@d) { + utf8::encode($d); + $dig->add("d\0$d\0"); + } + $dig->add("b\0"); + my $ct = $part->content_type || 'text/plain'; + my ($s, undef) = msg_part_text($part, $ct); + if (defined $s) { + $s =~ s/\r\n/\n/gs; + $s =~ s/\s*\z//s; + utf8::encode($s); + } else { + $s = $part->body; + } + $dig->add($s); +} + sub content_digest ($) { my ($mime) = @_; my $dig = Digest::SHA->new(256); @@ -65,31 +92,7 @@ sub content_digest ($) { my @v = $hdr->header($h); digest_addr($dig, $h, $_) foreach @v; } - msg_iter($mime, sub { - my ($part, $depth, @idx) = @{$_[0]}; - $dig->add("\0$depth:".join('.', @idx)."\0"); - my $fn = $part->filename; - if (defined $fn) { - utf8::encode($fn); - $dig->add("fn\0$fn\0"); - } - my @d = $part->header('Content-Description'); - foreach my $d (@d) { - utf8::encode($d); - $dig->add("d\0$d\0"); - } - $dig->add("b\0"); - my $ct = $part->content_type || 'text/plain'; - my ($s, undef) = msg_part_text($part, $ct); - if (defined $s) { - $s =~ s/\r\n/\n/gs; - $s =~ s/\s*\z//s; - utf8::encode($s); - } else { - $s = $part->body; - } - $dig->add($s); - }); + msg_iter($mime, \&content_dig_i, $dig); $dig; } -- cgit v1.2.3-24-ge0c7 From ecd50473d81ae1ca7273e9f446139fa5a224b099 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:56 +0000 Subject: wwwtext: avoid anonymous sub in response We can pass arbitrary local variables via WWW $ctx, so just pass that into the one-off _do_linkify sub which already exists. --- lib/PublicInbox/WwwText.pm | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm index 1c7b92bd..2e4aeec0 100644 --- a/lib/PublicInbox/WwwText.pm +++ b/lib/PublicInbox/WwwText.pm @@ -53,27 +53,25 @@ sub get_text { # Follow git commit message conventions, # first line is the Subject/title my ($title) = ($txt =~ /\A([^\n]*)/s); - _do_linkify($txt); + $ctx->{txt} = \$txt; $ctx->{-title_html} = ascii_html($title); - my $nslash = ($key =~ tr!/!/!); $ctx->{-upfx} = '../../../' . ('../' x $nslash); - - PublicInbox::WwwStream->response($ctx, $code, sub { - my ($nr, undef) = @_; - $nr == 1 ? '
'.$txt.'
' : undef - }); + PublicInbox::WwwStream->response($ctx, $code, \&_do_linkify); } sub _do_linkify { + my ($nr, $ctx) = @_; + return unless $nr == 1; my $l = PublicInbox::Linkify->new; - $l->linkify_1($_[0]); + my $txt = delete $ctx->{txt}; + $l->linkify_1($$txt); if ($hl) { - $hl->do_hl_text(\($_[0])); + $hl->do_hl_text($txt); } else { - $_[0] = ascii_html($_[0]); + $$txt = ascii_html($$txt); } - $_[0] = $l->linkify_2($_[0]); + '
' . $l->linkify_2($$txt) . '
'; } sub _srch_prefix ($$) { -- cgit v1.2.3-24-ge0c7 From 15b14846a2fceff10d409bd1fd6d9a73a841df97 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:57 +0000 Subject: searchview: pass named subs to Www*Stream Both WwwStream and WwwAtomStream ->response pass the WWW $ctx to the callback nowadays, so we can pass named subs to them. --- lib/PublicInbox/SearchView.pm | 55 +++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 26 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 7afb0754..6aa815ca 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -275,8 +275,7 @@ sub get_pct ($) { sub mset_thread { my ($ctx, $mset, $q) = @_; my %pct; - my $ibx = $ctx->{-inbox}; - my $msgs = $ibx->search->retry_reopen(sub { [ map { + my $msgs = $ctx->{-inbox}->search->retry_reopen(sub { [ map { my $i = $_; my $smsg = PublicInbox::SearchMsg->load_doc($i->get_document); $pct{$smsg->mid} = get_pct($i); @@ -303,19 +302,21 @@ sub mset_thread { *PublicInbox::View::pre_thread); @$msgs = reverse @$msgs if $r; - sub { - return unless $msgs; - my $smsg; - while (my $m = pop @$msgs) { - $smsg = $ibx->smsg_mime($m) and last; - } - if ($smsg) { - return PublicInbox::View::index_entry($smsg, $ctx, - scalar @$msgs); - } - $msgs = undef; - $skel .= "\n"; - }; + $ctx->{msgs} = $msgs; + \&mset_thread_i; +} + +# callback for PublicInbox::WwwStream::getline +sub mset_thread_i { + my ($nr, $ctx) = @_; + my $msgs = $ctx->{msgs} or return; + while (my $smsg = pop @$msgs) { + $ctx->{-inbox}->smsg_mime($smsg) or next; + return PublicInbox::View::index_entry($smsg, $ctx, + scalar @$msgs); + } + my ($skel) = delete @$ctx{qw(dst msgs)}; + $$skel .= "\n"; } sub ctx_prepare { @@ -337,17 +338,19 @@ sub ctx_prepare { sub adump { my ($cb, $mset, $q, $ctx) = @_; - my $ibx = $ctx->{-inbox}; - my @items = $mset->items; - $ctx->{search_query} = $q; - my $srch = $ibx->search; - PublicInbox::WwwAtomStream->response($ctx, 200, sub { - while (my $x = shift @items) { - $x = load_doc_retry($srch, $x); - $x = $ibx->smsg_mime($x) and return $x; - } - return undef; - }); + $ctx->{items} = [ $mset->items ]; + $ctx->{search_query} = $q; # used by WwwAtomStream::atom_header + $ctx->{srch} = $ctx->{-inbox}->search; + PublicInbox::WwwAtomStream->response($ctx, 200, \&adump_i); +} + +# callback for PublicInbox::WwwAtomStream::getline +sub adump_i { + my ($ctx) = @_; + while (my $mi = shift @{$ctx->{items}}) { + my $smsg = load_doc_retry($ctx->{srch}, $mi) or next; + $ctx->{-inbox}->smsg_mime($smsg) and return $smsg; + } } package PublicInbox::SearchQuery; -- cgit v1.2.3-24-ge0c7 From 2d2b53538f1121e98ddc0652e2749bb90ea97769 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 25 Dec 2019 07:50:58 +0000 Subject: view: thread_html: pass named sub to WwwStream We can pass everything we need into the WWW $ctx to avoid allocating kilobytes of memory for an anonymous sub for every $MESSAGE_ID/t/ request. --- lib/PublicInbox/View.pm | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 1e2d3b55..5c64441a 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -484,17 +484,19 @@ sub thread_html { return missing_thread($ctx) unless $smsg; $ctx->{-title_html} = ascii_html($smsg->subject); $ctx->{-html_tip} = '
'.index_entry($smsg, $ctx, scalar @$msgs);
-	$smsg = undef;
-	PublicInbox::WwwStream->response($ctx, 200, sub {
-		return unless $msgs;
-		$smsg = undef;
-		while (my $m = shift @$msgs) {
-			$smsg = $ibx->smsg_mime($m) and last;
-		}
-		return index_entry($smsg, $ctx, scalar @$msgs) if $smsg;
-		$msgs = undef;
-		$skel;
-	});
+	$ctx->{msgs} = $msgs;
+	PublicInbox::WwwStream->response($ctx, 200, \&thread_html_i);
+}
+
+sub thread_html_i { # PublicInbox::WwwStream::getline callback
+	my ($nr, $ctx) = @_;
+	my $msgs = $ctx->{msgs} or return;
+	while (my $smsg = shift @$msgs) {
+		$ctx->{-inbox}->smsg_mime($smsg) or next;
+		return index_entry($smsg, $ctx, scalar @$msgs);
+	}
+	my ($skel) = delete @$ctx{qw(dst msgs)};
+	$$skel;
 }
 
 sub multipart_text_as_html {
-- 
cgit v1.2.3-24-ge0c7


From 9b5bf24a115346ef6095f4c613b4579fde112714 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 25 Dec 2019 07:50:59 +0000
Subject: searchview: remove anonymous sub when sorting threads by relevance

We don't need to return a closure or have a separate hash
for sorting threads by relevance.  Instead, we can stuff
the relevance {pct} into the SearchMsg object itself and
use that.

Note: upon reviewing this code, the sort-by-relevance seems
bogus as it only considers the relevance of the topmost message.
Instead, it would make more sense to the user to sort by the
highest relevance of all messages in that particular thread.
---
 lib/PublicInbox/SearchView.pm | 17 +++++++----------
 lib/PublicInbox/View.pm       | 11 +++++------
 2 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'lib/PublicInbox')

diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 6aa815ca..8cffdedb 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -256,12 +256,10 @@ sub search_nav_bot {
 }
 
 sub sort_relevance {
-	my ($pct) = @_;
-	sub {
-		[ sort { (eval { $pct->{$b->topmost->{id}} } || 0)
-				<=>
-			(eval { $pct->{$a->topmost->{id}} } || 0)
-	} @{$_[0]} ] };
+	[ sort {
+		(eval { $b->topmost->{smsg}->{pct} } // 0) <=>
+		(eval { $a->topmost->{smsg}->{pct} } // 0)
+	} @{$_[0]} ]
 }
 
 sub get_pct ($) {
@@ -274,16 +272,15 @@ sub get_pct ($) {
 
 sub mset_thread {
 	my ($ctx, $mset, $q) = @_;
-	my %pct;
 	my $msgs = $ctx->{-inbox}->search->retry_reopen(sub { [ map {
 		my $i = $_;
 		my $smsg = PublicInbox::SearchMsg->load_doc($i->get_document);
-		$pct{$smsg->mid} = get_pct($i);
+		$smsg->{pct} = get_pct($i);
 		$smsg;
 	} ($mset->items) ]});
 	my $r = $q->{r};
 	my $rootset = PublicInbox::SearchThread::thread($msgs,
-		$r ? sort_relevance(\%pct) : \&PublicInbox::View::sort_ds,
+		$r ? \&sort_relevance : \&PublicInbox::View::sort_ds,
 		$ctx);
 	my $skel = search_nav_bot($mset, $q). "
";
 	$ctx->{-upfx} = '';
@@ -291,7 +288,7 @@ sub mset_thread {
 	$ctx->{cur_level} = 0;
 	$ctx->{dst} = \$skel;
 	$ctx->{mapping} = {};
-	$ctx->{pct} = \%pct;
+	$ctx->{searchview} = 1;
 	$ctx->{prev_attr} = '';
 	$ctx->{prev_level} = 0;
 	$ctx->{s_nr} = scalar(@$msgs).'+ results';
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 5c64441a..6f827754 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -279,8 +279,8 @@ sub index_entry {
 		" reply";
 
 	my $hr;
-	if (my $pct = $ctx->{pct}) { # used by SearchView.pm
-		$rv .= "\t[relevance $pct->{$mid_raw}%]";
+	if (defined(my $pct = $smsg->{pct})) { # used by SearchView.pm
+		$rv .= "\t[relevance $pct%]";
 		$hr = 1;
 	} elsif ($mapping) {
 		my $nested = 'nested';
@@ -961,9 +961,8 @@ sub skel_dump {
 
 	my $d = fmt_ts($smsg->{ds});
 	my $unmatched; # if lazy-loaded by SearchThread::Msg::visible()
-	if (my $pct = $ctx->{pct}) {
-		$pct = $pct->{$smsg->{mid}};
-		if (defined $pct) {
+	if (exists $ctx->{searchview}) {
+		if (defined(my $pct = $smsg->{pct})) {
 			$d .= (sprintf(' % 2u', $pct) . '%');
 		} else {
 			$unmatched = 1;
@@ -1031,7 +1030,7 @@ sub _skel_ghost {
 
 	my $mid = $node->{id};
 	my $d = '     [not found] ';
-	$d .= '    '  if exists $ctx->{pct};
+	$d .= '    '  if exists $ctx->{searchview};
 	$d .= indent_for($level) . th_pfx($level);
 	my $upfx = $ctx->{-upfx};
 	my $m = PublicInbox::Hval->new_msgid($mid);
-- 
cgit v1.2.3-24-ge0c7


From 9bdd81dc16ba6511eb767bb3c1902bb7cb562ed7 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 25 Dec 2019 07:51:00 +0000
Subject: view: msg_iter calls add_body_text directly

No need to waste several kilobytes creating an anonymous sub for
every invocation of msg_iter.
---
 lib/PublicInbox/View.pm | 53 ++++++++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 25 deletions(-)

(limited to 'lib/PublicInbox')

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 6f827754..c38a1289 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -269,8 +269,10 @@ sub index_entry {
 	$rv .= "\n";
 
 	# scan through all parts, looking for displayable text
-	my $ibx = $ctx->{-inbox};
-	msg_iter($mime, sub { $rv .= add_text_body($mhref, $ctx, $_[0]) });
+	$ctx->{mhref} = $mhref;
+	$ctx->{rv} = \$rv;
+	msg_iter($mime, \&add_text_body, $ctx);
+	delete $ctx->{rv};
 
 	# add the footer
 	$rv .= "\n^ ".
@@ -500,12 +502,13 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
 }
 
 sub multipart_text_as_html {
-	my ($mime, $upfx, $ctx) = @_;
-	my $rv = "";
+	my ($mime, $mhref, $ctx) = @_;
+	$ctx->{mhref} = $mhref;
+	$ctx->{rv} = \(my $rv = '');
 
 	# scan through all parts, looking for displayable text
-	msg_iter($mime, sub { $rv .= add_text_body($upfx, $ctx, $_[0]) });
-	$rv;
+	msg_iter($mime, \&add_text_body, $ctx);
+	${delete $ctx->{rv}};
 }
 
 sub flush_quote {
@@ -523,7 +526,7 @@ sub flush_quote {
 }
 
 sub attach_link ($$$$;$) {
-	my ($upfx, $ct, $p, $fn, $err) = @_;
+	my ($ctx, $ct, $p, $fn, $err) = @_;
 	my ($part, $depth, @idx) = @$p;
 	my $nl = $idx[-1] > 1 ? "\n" : '';
 	my $idx = join('.', @idx);
@@ -544,29 +547,29 @@ sub attach_link ($$$$;$) {
 	} else {
 		$sfn = 'a.bin';
 	}
-	my $ret = qq($nl);
+	my $rv = $ctx->{rv};
+	$$rv .= qq($nl{mhref}$idx-$sfn">);
 	if ($err) {
-		$ret .=
-"[-- Warning: decoded text below may be mangled --]\n";
+		$$rv .= "[-- Warning: decoded text below may be mangled --]\n";
 	}
-	$ret .= "[-- Attachment #$idx: ";
+	$$rv .= "[-- Attachment #$idx: ";
 	my $ts = "Type: $ct, Size: $size bytes";
 	$desc = ascii_html($desc);
-	$ret .= ($desc eq '') ? "$ts --]" : "$desc --]\n[-- $ts --]";
-	$ret .= "\n";
+	$$rv .= ($desc eq '') ? "$ts --]" : "$desc --]\n[-- $ts --]";
+	$$rv .= "\n";
+	undef;
 }
 
-sub add_text_body {
-	my ($upfx, $ctx, $p) = @_;
+sub add_text_body { # callback for msg_iter
+	my ($p, $ctx) = @_;
+	my $upfx = $ctx->{mhref};
 	my $ibx = $ctx->{-inbox};
-	my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
 	# $p - from msg_iter: [ Email::MIME, depth, @idx ]
 	my ($part, $depth, @idx) = @$p;
 	my $ct = $part->content_type || 'text/plain';
 	my $fn = $part->filename;
 	my ($s, $err) = msg_part_text($part, $ct);
-
-	return attach_link($upfx, $ct, $p, $fn) unless defined $s;
+	return attach_link($ctx, $ct, $p, $fn) unless defined $s;
 
 	# makes no difference to browsers, and don't screw up filename
 	# link generation in diffs with the extra '%0D'
@@ -607,29 +610,29 @@ sub add_text_body {
 	# split off quoted and unquoted blocks:
 	my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s);
 	$s = '';
+	my $rv = $ctx->{rv};
 	if (defined($fn) || $depth > 0 || $err) {
 		# badly-encoded message with $err? tell the world about it!
-		$s .= attach_link($upfx, $ct, $p, $fn, $err);
-		$s .= "\n";
+		attach_link($ctx, $ct, $p, $fn, $err);
+		$$rv .= "\n";
 	}
 	my $l = PublicInbox::Linkify->new;
 	foreach my $cur (@sections) {
 		if ($cur =~ /\A>/) {
-			flush_quote(\$s, $l, \$cur);
+			flush_quote($rv, $l, \$cur);
 		} elsif ($diff) {
 			@$diff = split(/^/m, $cur);
 			$cur = undef;
-			flush_diff(\$s, $ctx, $l);
+			flush_diff($rv, $ctx, $l);
 		} else {
 			# regular lines, OK
 			$l->linkify_1($cur);
-			$s .= $l->linkify_2(ascii_html($cur));
+			$$rv .= $l->linkify_2(ascii_html($cur));
 			$cur = undef;
 		}
 	}
 
-	obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
-	$s;
+	obfuscate_addrs($ibx, $$rv) if $ibx->{obfuscate};
 }
 
 sub _msg_html_prepare {
-- 
cgit v1.2.3-24-ge0c7


From 40f250660b42caa69f5533da5501f8d3f31f30ac Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 25 Dec 2019 07:51:01 +0000
Subject: wwwattach: avoid anonymous sub for msg_iter

We can pass arguments to msg_iter for msg_iter to pass
to our user-supplied callback, now.
---
 lib/PublicInbox/WwwAttach.pm | 49 ++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

(limited to 'lib/PublicInbox')

diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index 2de56804..cda1c6c8 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -10,34 +10,39 @@ use Email::MIME::ContentType qw(parse_content_type);
 use PublicInbox::MIME;
 use PublicInbox::MsgIter;
 
+sub get_attach_i { # msg_iter callback
+	my ($part, $depth, @idx) = @{$_[0]};
+	my $res = $_[1];
+	return if join('.', @idx) ne $res->[3]; # $idx
+	$res->[0] = 200;
+	my $ct = $part->content_type;
+	$ct = parse_content_type($ct) if $ct;
+
+	# discrete == type, we remain Debian wheezy-compatible
+	if ($ct && (($ct->{discrete} || '') eq 'text')) {
+		# display all text as text/plain:
+		my $cset = $ct->{attributes}->{charset};
+		if ($cset && ($cset =~ /\A[a-zA-Z0-9_\-]+\z/)) {
+			$res->[1]->[1] .= qq(; charset=$cset);
+		}
+	} else { # TODO: allow user to configure safe types
+		$res->[1]->[1] = 'application/octet-stream';
+	}
+	$part = $part->body;
+	push @{$res->[1]}, 'Content-Length', bytes::length($part);
+	$res->[2]->[0] = $part;
+}
+
 # /$LISTNAME/$MESSAGE_ID/$IDX-$FILENAME
 sub get_attach ($$$) {
 	my ($ctx, $idx, $fn) = @_;
 	my $res = [ 404, [ 'Content-Type', 'text/plain' ], [ "Not found\n" ] ];
 	my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res;
 	$mime = PublicInbox::MIME->new($mime);
-	msg_iter($mime, sub {
-		my ($part, $depth, @idx) = @{$_[0]};
-		return if join('.', @idx) ne $idx;
-		$res->[0] = 200;
-		my $ct = $part->content_type;
-		$ct = parse_content_type($ct) if $ct;
-
-		# discrete == type, we remain Debian wheezy-compatible
-		if ($ct && (($ct->{discrete} || '') eq 'text')) {
-			# display all text as text/plain:
-			my $cset = $ct->{attributes}->{charset};
-			if ($cset && ($cset =~ /\A[a-zA-Z0-9_\-]+\z/)) {
-				$res->[1]->[1] .= qq(; charset=$cset);
-			}
-		} else { # TODO: allow user to configure safe types
-			$res->[1]->[1] = 'application/octet-stream';
-		}
-		$part = $part->body;
-		push @{$res->[1]}, 'Content-Length', bytes::length($part);
-		$res->[2]->[0] = $part;
-	});
-	$res;
+	$res->[3] = $idx;
+	msg_iter($mime, \&get_attach_i, $res);
+	pop @$res; # cleanup before letting PSGI server see it
+	$res
 }
 
 1;
-- 
cgit v1.2.3-24-ge0c7


From bc23e3dc4b252dc7e9b6f3824afe74d85456b689 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 25 Dec 2019 07:51:02 +0000
Subject: viewvcs: avoid anonymous sub for HTML response

No need to create a new sub for every HTML page we render
with our VCS viewer.
---
 lib/PublicInbox/ViewVCS.pm | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'lib/PublicInbox')

diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 7618b198..a6dbb9a9 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -31,14 +31,17 @@ my %QP_MAP = ( A => 'oid_a', B => 'oid_b', a => 'path_a', b => 'path_b' );
 our $MAX_SIZE = 1024 * 1024; # TODO: configurable
 my $BIN_DETECT = 8000; # same as git
 
+sub html_i { # WwwStream::getline callback
+	my ($nr, $ctx) =  @_;
+	$nr == 1 ? ${delete $ctx->{rv}} : undef;
+}
+
 sub html_page ($$$) {
 	my ($ctx, $code, $strref) = @_;
 	my $wcb = delete $ctx->{-wcb};
 	$ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/"
-	my $res = PublicInbox::WwwStream->response($ctx, $code, sub {
-		my ($nr, undef) =  @_;
-		$nr == 1 ? $$strref : undef;
-	});
+	$ctx->{rv} = $strref;
+	my $res = PublicInbox::WwwStream->response($ctx, $code, \&html_i);
 	$wcb ? $wcb->($res) : $res;
 }
 
-- 
cgit v1.2.3-24-ge0c7


From b316f7541ec263fd04ea50530a1d500f43773299 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 25 Dec 2019 07:51:03 +0000
Subject: solvergit: allow passing arg to user-supplied callback

This allows us to get rid of the requirement to capture
on-stack variables with an anonymous sub, as illustrated
with the update to viewvcs to take advantage of this.

v2: fix error handling for missing OIDs
---
 lib/PublicInbox/SolverGit.pm | 38 +++++++++++++++++++++-----------------
 lib/PublicInbox/ViewVCS.pm   | 19 ++++++++++---------
 2 files changed, 31 insertions(+), 26 deletions(-)

(limited to 'lib/PublicInbox')

diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 9266fb08..17a43060 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -55,11 +55,16 @@ sub dbg ($$) {
 	print { $_[0]->{out} } $_[1], "\n" or ERR($_[0], "print(dbg): $!");
 }
 
+sub done ($$) {
+	my ($self, $res) = @_;
+	my $ucb = delete($self->{user_cb}) or return;
+	$ucb->($res, $self->{uarg});
+}
+
 sub ERR ($$) {
 	my ($self, $err) = @_;
 	print { $self->{out} } $err, "\n";
-	my $ucb = delete($self->{user_cb});
-	eval { $ucb->($err) } if $ucb;
+	eval { done($self, $err) };
 	die $err;
 }
 
@@ -311,24 +316,23 @@ sub extract_old_mode ($) {
 	'100644';
 }
 
-sub do_finish ($$) {
-	my ($self, $user_cb) = @_;
-	my $found = $self->{found};
-	my $oid_want = $self->{oid_want};
+sub do_finish ($) {
+	my ($self) = @_;
+	my ($found, $oid_want) = @$self{qw(found oid_want)};
 	if (my $exists = $found->{$oid_want}) {
-		return $user_cb->($exists);
+		return done($self, $exists);
 	}
 
 	# let git disambiguate if oid_want was too short,
 	# but long enough to be unambiguous:
 	my $tmp_git = $self->{tmp_git};
 	if (my @res = $tmp_git->check($oid_want)) {
-		return $user_cb->($found->{$res[0]});
+		return done($self, $found->{$res[0]});
 	}
 	if (my $err = $tmp_git->last_check_err) {
 		dbg($self, $err);
 	}
-	$user_cb->(undef);
+	done($self, undef);
 }
 
 sub event_step ($) {
@@ -352,8 +356,8 @@ sub event_step ($) {
 		# our result: (which may be undef)
 		# Other steps may call user_cb to terminate prematurely
 		# on error
-		} elsif (my $user_cb = delete($self->{user_cb})) {
-			do_finish($self, $user_cb);
+		} elsif (exists $self->{user_cb}) {
+			do_finish($self);
 		} else {
 			die 'about to call user_cb twice'; # Oops :x
 		}
@@ -362,8 +366,7 @@ sub event_step ($) {
 	if ($err) {
 		$err =~ s/^\s*Exception:\s*//; # bad word to show users :P
 		dbg($self, "E: $err");
-		my $ucb = delete($self->{user_cb});
-		eval { $ucb->($err) } if $ucb;
+		eval { done($self, $err) };
 	}
 }
 
@@ -524,7 +527,7 @@ sub resolve_patch ($$) {
 			join("\n", $found_git->pub_urls($self->{psgi_env})));
 
 		if ($cur_want eq $self->{oid_want} || $type ne 'blob') {
-			eval { delete($self->{user_cb})->($existing) };
+			eval { done($self, $existing) };
 			die "E: $@" if $@;
 			return;
 		}
@@ -562,18 +565,19 @@ sub resolve_patch ($$) {
 	}
 
 	dbg($self, "could not find $cur_want");
-	eval { delete($self->{user_cb})->(undef) }; # not found! :<
+	eval { done($self, undef) };
 	die "E: $@" if $@;
 }
 
 # this API is designed to avoid creating self-referential structures;
 # so user_cb never references the SolverGit object
 sub new {
-	my ($class, $ibx, $user_cb) = @_;
+	my ($class, $ibx, $user_cb, $uarg) = @_;
 
 	bless {
 		gits => $ibx->{-repo_objs},
 		user_cb => $user_cb,
+		uarg => $uarg,
 		# -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks
 
 		# TODO: config option for searching related inboxes
@@ -591,7 +595,7 @@ sub solve ($$$$$) {
 
 	# should we even get here? Probably not, but somebody
 	# could be manually typing URLs:
-	return (delete $self->{user_cb})->(undef) if $oid_want =~ /\A0+\z/;
+	return done($self, undef) if $oid_want =~ /\A0+\z/;
 
 	$self->{oid_want} = $oid_want;
 	$self->{out} = $out;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index a6dbb9a9..ead8c2b4 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -112,8 +112,10 @@ sub show_other ($$$$) {
 	$qsp->psgi_qx($env, undef, \&show_other_result, $ctx);
 }
 
+# user_cb for SolverGit, called as: user_cb->($result_or_error, $uarg)
 sub solve_result {
-	my ($ctx, $res, $log, $hints, $fn) = @_;
+	my ($res, $ctx) = @_;
+	my ($log, $hints, $fn) = delete @$ctx{qw(log hints fn)};
 
 	unless (seek($log, 0, 0)) {
 		$ctx->{env}->{'psgi.errors'}->print("seek(log): $!\n");
@@ -192,21 +194,20 @@ sub solve_result {
 sub show ($$;$) {
 	my ($ctx, $oid_b, $fn) = @_;
 	my $qp = $ctx->{qp};
-	my $hints = {};
+	my $hints = $ctx->{hints} = {};
 	while (my ($from, $to) = each %QP_MAP) {
 		defined(my $v = $qp->{$from}) or next;
 		$hints->{$to} = $v;
 	}
 
-	my $log = tmpfile("solve.$oid_b");
-	my $solver = PublicInbox::SolverGit->new($ctx->{-inbox}, sub {
-		solve_result($ctx, $_[0], $log, $hints, $fn);
-	});
-
-	# PSGI server will call this and give us a callback
+	$ctx->{'log'} = tmpfile("solve.$oid_b");
+	$ctx->{fn} = $fn;
+	my $solver = PublicInbox::SolverGit->new($ctx->{-inbox},
+						\&solve_result, $ctx);
+	# PSGI server will call this immediately and give us a callback (-wcb)
 	sub {
 		$ctx->{-wcb} = $_[0]; # HTTP write callback
-		$solver->solve($ctx->{env}, $log, $oid_b, $hints);
+		$solver->solve($ctx->{env}, $ctx->{log}, $oid_b, $hints);
 	};
 }
 
-- 
cgit v1.2.3-24-ge0c7


From f2364c5765f0692d2f1e82b61804359a38f3fdfc Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 25 Dec 2019 07:51:04 +0000
Subject: search: retry_reopen passes user arg to callback

This allows callers to pass named (not anonymous) subs.

Update all retry_reopen callers to use this feature, and
fix some places where we failed to use retry_reopen :x
---
 lib/PublicInbox/ExtMsg.pm     | 12 ++++++------
 lib/PublicInbox/Mbox.pm       |  7 +++----
 lib/PublicInbox/Search.pm     | 16 +++++++---------
 lib/PublicInbox/SearchMsg.pm  |  9 +++++----
 lib/PublicInbox/SearchView.pm | 34 +++++++++++++++-------------------
 5 files changed, 36 insertions(+), 42 deletions(-)

(limited to 'lib/PublicInbox')

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 0138d373..0f3e392d 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -29,6 +29,10 @@ our @EXT_URL = map { ascii_html($_) } (
 
 sub PARTIAL_MAX () { 100 }
 
+sub mids_from_mset { # Search::retry_reopen callback
+	[ map { PublicInbox::SearchMsg::from_mitem($_)->mid } $_[0]->items ];
+}
+
 sub search_partial ($$) {
 	my ($srch, $mid) = @_;
 	return if length($mid) < $MIN_PARTIAL_LEN;
@@ -65,12 +69,8 @@ sub search_partial ($$) {
 		# Search::Xapian::QueryParserError or even:
 		# "something terrible happened at ../Search/Xapian/Enquire.pm"
 		my $mset = eval { $srch->query($m, $opt) } or next;
-
-		my @mids = map {
-			my $doc = $_->get_document;
-			PublicInbox::SearchMsg->load_doc($doc)->mid;
-		} $mset->items;
-		return \@mids if scalar(@mids);
+		my $mids = $srch->retry_reopen(\&mids_from_mset, $mset);
+		return $mids if scalar(@$mids);
 	}
 }
 
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 3fdda5ea..1f9ac6ec 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -12,6 +12,7 @@ use strict;
 use warnings;
 use PublicInbox::MID qw/mid_escape/;
 use PublicInbox::Hval qw/to_filename/;
+use PublicInbox::SearchMsg;
 use Email::Simple;
 use Email::MIME::Encode;
 
@@ -204,10 +205,8 @@ sub results_cb {
 	my $srch = $ctx->{srch};
 	while (1) {
 		while (my $mi = (($mset->items)[$ctx->{iter}++])) {
-			my $doc = $mi->get_document;
-			my $smsg = $srch->retry_reopen(sub {
-				PublicInbox::SearchMsg->load_doc($doc);
-			}) or next;
+			my $smsg = PublicInbox::SearchMsg::from_mitem($mi,
+								$srch) or next;
 			return $smsg;
 		}
 		# refill result set
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 65c6ee83..eb1a1446 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -231,15 +231,15 @@ sub query {
 }
 
 sub retry_reopen {
-	my ($self, $cb) = @_;
+	my ($self, $cb, $arg) = @_;
 	for my $i (1..10) {
 		if (wantarray) {
 			my @ret;
-			eval { @ret = $cb->() };
+			eval { @ret = $cb->($arg) };
 			return @ret unless $@;
 		} else {
 			my $ret;
-			eval { $ret = $cb->() };
+			eval { $ret = $cb->($arg) };
 			return $ret unless $@;
 		}
 		# Exception: The revision being read has been discarded -
@@ -259,11 +259,11 @@ sub retry_reopen {
 
 sub _do_enquire {
 	my ($self, $query, $opts) = @_;
-	retry_reopen($self, sub { _enquire_once($self, $query, $opts) });
+	retry_reopen($self, \&_enquire_once, [ $self, $query, $opts ]);
 }
 
-sub _enquire_once {
-	my ($self, $query, $opts) = @_;
+sub _enquire_once { # retry_reopen callback
+	my ($self, $query, $opts) = @{$_[0]};
 	my $xdb = xdb($self);
 	my $enquire = $X{Enquire}->new($xdb);
 	$enquire->set_query($query);
@@ -281,9 +281,7 @@ sub _enquire_once {
 	my $limit = $opts->{limit} || 50;
 	my $mset = $enquire->get_mset($offset, $limit);
 	return $mset if $opts->{mset};
-	my @msgs = map {
-		PublicInbox::SearchMsg->load_doc($_->get_document);
-	} $mset->items;
+	my @msgs = map { PublicInbox::SearchMsg::from_mitem($_) } $mset->items;
 	return \@msgs unless wantarray;
 
 	($mset->get_matches_estimated, \@msgs)
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 53882f73..ea54ba8a 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -92,10 +92,11 @@ sub psgi_cull ($) {
 }
 
 # Only called by PSGI interface, not NNTP
-sub load_doc {
-	my ($class, $doc) = @_;
-	my $self = bless {}, $class;
-	psgi_cull(load_expand($self, $doc));
+sub from_mitem {
+	my ($mitem, $srch) = @_;
+	return $srch->retry_reopen(\&from_mitem, $mitem) if $srch;
+	my $self = bless {}, __PACKAGE__;
+	psgi_cull(load_expand($self, $mitem->get_document));
 }
 
 # :bytes and :lines metadata in RFC 3977
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 8cffdedb..6587d37f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -88,17 +88,6 @@ retry:
 	PublicInbox::WwwStream->response($ctx, $code, $cb);
 }
 
-# allow undef for individual doc loads...
-sub load_doc_retry {
-	my ($srch, $mitem) = @_;
-
-	eval {
-		$srch->retry_reopen(sub {
-			PublicInbox::SearchMsg->load_doc($mitem->get_document)
-		});
-	}
-}
-
 # display non-nested search results similar to what users expect from
 # regular WWW search engines:
 sub mset_summary {
@@ -114,7 +103,7 @@ sub mset_summary {
 	foreach my $m ($mset->items) {
 		my $rank = sprintf("%${pad}d", $m->get_rank + 1);
 		my $pct = get_pct($m);
-		my $smsg = load_doc_retry($srch, $m);
+		my $smsg = PublicInbox::SearchMsg::from_mitem($m, $srch);
 		unless ($smsg) {
 			eval {
 				$m = "$m ".$m->get_docid . " expired\n";
@@ -270,14 +259,19 @@ sub get_pct ($) {
 	$n > 99 ? 99 : $n;
 }
 
+sub load_msgs {
+	my ($mset) = @_;
+	[ map {
+		my $mi = $_;
+		my $smsg = PublicInbox::SearchMsg::from_mitem($mi);
+		$smsg->{pct} = get_pct($mi);
+		$smsg;
+	} ($mset->items) ]
+}
+
 sub mset_thread {
 	my ($ctx, $mset, $q) = @_;
-	my $msgs = $ctx->{-inbox}->search->retry_reopen(sub { [ map {
-		my $i = $_;
-		my $smsg = PublicInbox::SearchMsg->load_doc($i->get_document);
-		$smsg->{pct} = get_pct($i);
-		$smsg;
-	} ($mset->items) ]});
+	my $msgs = $ctx->{-inbox}->search->retry_reopen(\&load_msgs, $mset);
 	my $r = $q->{r};
 	my $rootset = PublicInbox::SearchThread::thread($msgs,
 		$r ? \&sort_relevance : \&PublicInbox::View::sort_ds,
@@ -345,7 +339,9 @@ sub adump {
 sub adump_i {
 	my ($ctx) = @_;
 	while (my $mi = shift @{$ctx->{items}}) {
-		my $smsg = load_doc_retry($ctx->{srch}, $mi) or next;
+		my $smsg = eval {
+			PublicInbox::SearchMsg::from_mitem($mi, $ctx->{srch});
+		} or next;
 		$ctx->{-inbox}->smsg_mime($smsg) and return $smsg;
 	}
 }
-- 
cgit v1.2.3-24-ge0c7