From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id CFB451F669 for ; Sat, 10 Sep 2022 08:18:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1662797936; bh=gm1nj2DVRH7Tdq1859PAu5adC0U6cz0VAKz1R3uqWfQ=; h=From:To:Subject:Date:In-Reply-To:References:From; b=YtcC8ih0CYR+9sWCKKzrFSdieKIXa8Ykq+wMQdA4hFFW19R9SoFtSZOPNbYasejMS QBjcsp6QYh3asQ7lhhEX/HK5o2nQ008cJfnSMTtnCLxw+wZ8N3zUF9pWVcHRMGXiuh kqfBaWPth37Fvdp7tgBEP6NU8pEnzoXnI/AnDBss= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 28/38] www: use PerlIO::scalar (zfh) for buffering Date: Sat, 10 Sep 2022 08:17:19 +0000 Message-Id: <20220910081729.2011934-29-e@80x24.org> In-Reply-To: <20220910081729.2011934-1-e@80x24.org> References: <20220910081729.2011934-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Calling Compress::Raw::Zlib::deflate is fairly expensive. Relying on the `.=' (concat) operator inside ->zadd operator is faster, but the method dispatch overhead is noticeable compared to the original code where we had bare `.=' littered throughout. Fortunately, `print' and `say' with the PerlIO::scalar IO layer appears to offer better performance without high method dispatch overhead. This doesn't allow us to save as much memory as I originally hoped, but does allow us to rely less on concat operators in other places and just pass a list of args to `print' and `say' as a appropriate. This does reduce scratchpad use, however, allowing for large memory savings, and we still ->deflate every single $eml. --- Documentation/mknews.perl | 3 +- lib/PublicInbox/GzipFilter.pm | 41 ++++++++++--------- lib/PublicInbox/Mbox.pm | 2 +- lib/PublicInbox/SearchView.pm | 4 +- lib/PublicInbox/View.pm | 58 +++++++++++++-------------- lib/PublicInbox/ViewDiff.pm | 69 ++++++++++++++++---------------- lib/PublicInbox/WwwAtomStream.pm | 8 ++-- lib/PublicInbox/WwwStream.pm | 7 ++-- 8 files changed, 96 insertions(+), 96 deletions(-) diff --git a/Documentation/mknews.perl b/Documentation/mknews.perl index 1936cea7..68866f44 100755 --- a/Documentation/mknews.perl +++ b/Documentation/mknews.perl @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2019-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # Generates NEWS, NEWS.atom, and NEWS.html files using release emails # this uses unstable internal APIs of public-inbox, and this script @@ -46,6 +46,7 @@ if ($dst eq 'NEWS') { ibx => $ibx, -upfx => "$base_url/", -hr => 1, + zfh => $out, }; if ($dst eq 'NEWS.html') { html_start($out, $ctx); diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm index 1f11acb8..848370ce 100644 --- a/lib/PublicInbox/GzipFilter.pm +++ b/lib/PublicInbox/GzipFilter.pm @@ -127,38 +127,39 @@ sub write { http_out($_[0])->write(translate($_[0], $_[1])); } -sub zadd { - my $self = shift; - $self->{pbuf} .= $_ for @_; # perl internal pad memory use here +sub zfh { + $_[0]->{zfh} // do { + open($_[0]->{zfh}, '>>', \($_[0]->{pbuf} //= '')) or + die "open: $!"; + $_[0]->{zfh} + }; } # similar to ->translate; use this when we're sure we know we have # more data to buffer after this sub zmore { - my $self = shift; # $_[1] => input + my $self = shift; + my $zfh = delete $self->{zfh}; + if (@_ > 1 || $zfh) { + print { $zfh // zfh($self) } @_; + @_ = (delete $self->{pbuf}); + delete $self->{zfh}; + }; http_out($self); - my $x; - defined($x = delete($self->{pbuf})) and unshift(@_, $x); - for (@_) { - ($x = $self->{gz}->deflate($_, $self->{zbuf})) == Z_OK or - die "gzip->deflate: $x"; - } - undef; + my $err; + ($err = $self->{gz}->deflate($_[0], $self->{zbuf})) == Z_OK or + die "gzip->deflate: $err"; } # flushes and returns the final bit of gzipped data sub zflush ($;@) { my $self = shift; # $_[1..Inf] => final input (optional) + zmore($self, @_) if scalar(@_) || $self->{zfh}; + # not a bug, recursing on DS->write failure + my $gz = delete $self->{gz} // return ''; + my $err; my $zbuf = delete $self->{zbuf}; - my $gz = delete $self->{gz}; - my $x; - defined($x = delete($self->{pbuf})) and unshift(@_, $x); - for (@_) { # it's a bug iff $gz is undef if @_ isn't empty, here: - ($x = $gz->deflate($_, $zbuf)) == Z_OK or - die "gzip->deflate: $x"; - } - $gz // return ''; # not a bug, recursing on DS->write failure - ($x = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $x"; + ($err = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $err"; $zbuf; } diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index cfe34d9c..2ef8ff2b 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -20,7 +20,7 @@ sub getline { my $ibx = $ctx->{ibx}; my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return; my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}}); - $ctx->zadd(msg_hdr($ctx, $eml)); + $ctx->zmore(msg_hdr($ctx, $eml)); if ($n) { $ctx->translate(msg_body($eml)); } else { # last message diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index b18947ee..8932c73d 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -331,10 +331,10 @@ sub mset_thread { # callback for PublicInbox::WwwStream::getline sub mset_thread_i { my ($ctx, $eml) = @_; - $ctx->zadd($ctx->html_top) if exists $ctx->{-html_tip}; + print { $ctx->zfh } $ctx->html_top if exists $ctx->{-html_tip}; $eml and return PublicInbox::View::eml_entry($ctx, $eml); my $smsg = shift @{$ctx->{msgs}} or - $ctx->zmore(${delete($ctx->{skel})}); + print { $ctx->zfh } ${delete($ctx->{skel})}; $smsg; } diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 85dc3bd8..0753c06e 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -40,7 +40,7 @@ sub msg_page_i { "../${\mid_href($smsg->{mid})}/" : ''; if (_msg_page_prepare($eml, $ctx)) { $eml->each_part(\&add_text_body, $ctx, 1); - $ctx->zadd('
'); + print { $ctx->{zfh} } '
'; } html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg}; ''; # XXX TODO cleanup @@ -58,7 +58,7 @@ sub no_over_html ($) { PublicInbox::WwwStream::init($ctx); if (_msg_page_prepare($eml, $ctx)) { # sets {-title_html} $eml->each_part(\&add_text_body, $ctx, 1); - $ctx->zadd('
'); + print { $ctx->{zfh} } '
'; } html_footer($ctx, $eml); $ctx->html_done; @@ -240,12 +240,11 @@ sub eml_entry { my $html = ascii_html($irt); $rv .= qq(In-Reply-To: <$html>\n) } - $rv .= "\n"; + say { $ctx->zfh } $rv; # scan through all parts, looking for displayable text $ctx->{mhref} = $mhref; $ctx->{changed_href} = "#e$id"; # for diffstat "files? changed," - $ctx->zadd($rv); # XXX $rv is small, reuse below $eml->each_part(\&add_text_body, $ctx, 1); # expensive # add the footer @@ -386,8 +385,8 @@ sub pre_thread { # walk_thread callback sub thread_eml_entry { my ($ctx, $eml) = @_; my ($beg, $end) = thread_adj_level($ctx, $ctx->{level}); - $ctx->zadd($beg.'
');
-	eml_entry($ctx, $eml) . '
' . $end; + print { $ctx->zfh } $beg, '
';
+	print { $ctx->{zfh} } eml_entry($ctx, $eml), '
', $end; } sub next_in_queue ($$) { @@ -414,15 +413,15 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback if (!$ghost_ok) { # first non-ghost $ctx->{-title_html} = ascii_html($smsg->{subject}); - $ctx->zadd($ctx->html_top); + print { $ctx->zfh } $ctx->html_top; } return $smsg; } # buffer the ghost entry and loop - $ctx->zadd(ghost_index_entry($ctx, $lvl, $smsg)); + print { $ctx->zfh } ghost_index_entry($ctx, $lvl, $smsg) } else { # all done - $ctx->zadd(join('', thread_adj_level($ctx, 0))); - $ctx->zadd(${delete($ctx->{skel})}); + print { $ctx->zfh } thread_adj_level($ctx, 0), + ${delete($ctx->{skel})}; return; } } @@ -491,7 +490,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback my $smsg = $ctx->{smsg}; if (exists $ctx->{-html_tip}) { $ctx->{-title_html} = ascii_html($smsg->{subject}); - $ctx->zadd($ctx->html_top); + print { $ctx->zfh } $ctx->html_top; } return eml_entry($ctx, $eml); } else { @@ -499,7 +498,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback return $smsg if exists($smsg->{blob}); } my $skel = delete($ctx->{skel}) or return; # all done - $ctx->zadd($$skel); + print { $ctx->zfh } $$skel; undef; } } @@ -560,8 +559,9 @@ sub add_text_body { # callback for each_part my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; my ($s, $err) = msg_part_text($part, $ct); - $s // return $ctx->zadd(attach_link($ctx, $ct, $p, $fn) // ''); - my $buf = $part->{is_submsg} ? submsg_hdr($ctx, $part)."\n" : ''; + my $zfh = $ctx->zfh; + $s // return print $zfh (attach_link($ctx, $ct, $p, $fn) // ''); + say $zfh submsg_hdr($ctx, $part) if $part->{is_submsg}; # makes no difference to browsers, and don't screw up filename # link generation in diffs with the extra '%0D' @@ -609,21 +609,19 @@ sub add_text_body { # callback for each_part undef $s; # free memory if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) { # badly-encoded message with $err? tell the world about it! - $buf .= attach_link($ctx, $ct, $p, $fn, $err) . "\n"; + say $zfh attach_link($ctx, $ct, $p, $fn, $err); } delete $part->{bdy}; # save memory - $ctx->zadd($buf); - undef $buf; for my $cur (@sections) { # $cur may be huge if ($cur =~ /\A>/) { # we use a here to allow users to specify # their own color for quoted text - $ctx->zadd(qq(), - $l->to_html($cur), ''); + print $zfh qq(), + $l->to_html($cur), ''; } elsif ($diff) { flush_diff($ctx, \$cur); } else { # regular lines, OK - $ctx->zadd($l->to_html($cur)); + print $zfh $l->to_html($cur); } undef $cur; # free memory } @@ -685,10 +683,10 @@ sub _msg_page_prepare { $hbuf .= qq[Message-ID: <$x> (raw)\n]; } if (!$nr) { # first (and only) message, common case - $ctx->zadd($ctx->html_top, $hbuf); + print { $ctx->zfh } $ctx->html_top, $hbuf; } else { delete $ctx->{-title_html}; - $ctx->zadd($ctx->{-html_tip}, $hbuf); + print { $ctx->zfh } $ctx->{-html_tip}, $hbuf; } $ctx->{-linkify} //= PublicInbox::Linkify->new; $hbuf = ''; @@ -699,7 +697,7 @@ sub _msg_page_prepare { $hbuf .= "$h: $_\n" for ($eml->header_raw($h)); } $ctx->{-linkify}->linkify_mids('..', \$hbuf, 1); # escapes HTML - $ctx->zadd($hbuf); + print { $ctx->{zfh} } $hbuf; $hbuf = ''; } my @irt = $eml->header_raw('In-Reply-To'); @@ -717,7 +715,7 @@ sub _msg_page_prepare { $hbuf .= 'References: <'.join(">\n\t<", @$refs).">\n" if @$refs; } $ctx->{-linkify}->linkify_mids('..', \$hbuf); # escapes HTML - $ctx->zadd($hbuf .= "\n"); + say { $ctx->{zfh} } $hbuf; 1; } @@ -771,7 +769,7 @@ sub thread_skel ($$$) { sub html_footer { my ($ctx, $hdr) = @_; my $upfx = '../'; - my ($related, $skel); + my (@related, $skel); my $foot = '
';
 	my $qry = delete $ctx->{-qry};
 	if ($qry && $ctx->{ibx}->isrch) {
@@ -786,7 +784,7 @@ sub html_footer {
 		$q = wrap('', '', $q);
 		my $rows = ($q =~ tr/\n/\n/) + 1;
 		$q = ascii_html($q);
-		$related = <
find likely ancestor, descendant, or conflicting patches for {ibx}->over) {
 		my $t = ts2str($ctx->{-t_max});
 		my $t_fmt = fmt_ts($ctx->{-t_max});
-		my $fallback = $related ? "\t" : "\t";
+		my $fallback = @related ? "\t" : "\t";
 		$skel = <~$t_fmt UTC|latest);
 	}
-	$foot .= qq(reply);
 	# $skel may be big for big threads, don't append it to $foot
-	$skel .= '
' . ($related // ''); - $ctx->zadd($foot, $skel .= msg_reply($ctx, $hdr)); + print { $ctx->zfh } $foot, qq(reply), + $skel, '
', @related, + msg_reply($ctx, $hdr); } sub ghost_parent { diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm index 95b615dc..5d23881b 100644 --- a/lib/PublicInbox/ViewDiff.pm +++ b/lib/PublicInbox/ViewDiff.pm @@ -67,8 +67,8 @@ sub oid ($$$) { } # returns true if diffstat anchor written, false otherwise -sub anchor0 ($$$$) { - my ($dst, $ctx, $fn, $rest) = @_; +sub anchor0 ($$$) { + my ($ctx, $fn, $rest) = @_; my $orig = $fn; @@ -84,15 +84,12 @@ sub anchor0 ($$$$) { # long filenames will require us to check in anchor1() push(@{$ctx->{-long_path}}, $fn) if $fn =~ s!\A\.\.\./?!!; - if (defined(my $attr = to_attr($ctx->{-apfx}.$fn))) { - $ctx->{-anchors}->{$attr} = 1; - my $spaces = ($orig =~ s/( +)\z//) ? $1 : ''; - $$dst .= " " . - ascii_html($orig) . '' . $spaces . + my $attr = to_attr($ctx->{-apfx}.$fn) // return; + $ctx->{-anchors}->{$attr} = 1; + my $spaces = ($orig =~ s/( +)\z//) ? $1 : ''; + print { $ctx->{zfh} } " ", + ascii_html($orig), '', $spaces, $ctx->{-linkify}->to_html($rest); - return 1; - } - undef; } # returns "diff --git" anchor destination, undef otherwise @@ -156,33 +153,34 @@ sub diff_header ($$$) { warn "BUG? <$$x> had no ^index line"; } $$x =~ s!^diff --git!anchor1($ctx, $pb) // 'diff --git'!ems; - $ctx->zadd(qq($$x)); + print { $ctx->{zfh} } qq(), $$x, ''; $dctx; } sub diff_before_or_after ($$) { my ($ctx, $x) = @_; - if (exists $ctx->{-anchors} && $$x =~ /\A(.*?) # likely "---\n" + if (exists $ctx->{-anchors} && $$x =~ /\A(.*?) # likely "---\n" # \$1 # diffstat lines: ((?:^\x20(?:[^\n]+?)(?:\x20+\|\x20[^\n]*\n))+) (\x20[0-9]+\x20files?\x20)changed,([^\n]+\n) (.*?)\z/msx) { # notes, commit message, etc my @x = ($5, $4, $3, $2, $1); + undef $$x; my $lnk = $ctx->{-linkify}; - $$x = $lnk->to_html(pop @x); # uninteresting prefix - for my $l (split(/^/m, pop(@x))) { # per-file diffstat lines + my $zfh = $ctx->{zfh}; + print $zfh $lnk->to_html(pop @x); # $1 uninteresting prefix + for my $l (split(/^/m, pop(@x))) { # $2 per-file stat lines $l =~ /^ (.+)( +\| .*\z)/s and - anchor0($x, $ctx, $1, $2) and next; - $$x .= $lnk->to_html($l); + anchor0($ctx, $1, $2) and next; + print $zfh $lnk->to_html($l); } - $$x .= pop @x; # $3 /^ \d+ files? / my $ch = $ctx->{changed_href} // '#related'; - $$x .= qq(changed,); - $$x .= ascii_html(pop @x); # $4: insertions/deletions - # notes, commit message, etc - $ctx->zadd($$x .= $lnk->to_html(pop @x)); + print $zfh pop(@x), # $3 /^ \d+ files? / + qq(changed,), + ascii_html(pop @x), # insertions/deletions + $lnk->to_html(@x); # notes, commit message, etc } else { - $ctx->zadd($ctx->{-linkify}->to_html($$x)); + print { $ctx->{zfh} } $ctx->{-linkify}->to_html($$x); } } @@ -195,6 +193,7 @@ sub flush_diff ($$) { my $lnk = $ctx->{-linkify}; my $dctx; # {}, keys: Q, oid_a, oid_b + my $zfh = $ctx->zfh; while (defined(my $x = shift @top)) { if (scalar(@top) >= 4 && @@ -202,7 +201,7 @@ sub flush_diff ($$) { $top[0] =~ $IS_OID) { $dctx = diff_header(\$x, $ctx, \@top); } elsif ($dctx) { - my $after = ''; + open(my $afh, '>>', \(my $after='')) or die "open: $!"; # Quiet "Complex regular subexpression recursion limit" # warning. Perl will truncate matches upon hitting @@ -218,25 +217,25 @@ sub flush_diff ($$) { (?:(?:^-[^\n]*\n)+)| (?:^@@ [^\n]+\n))/xsm, $x)) { if (!defined($dctx)) { - $after .= $s; + print $afh $s; } elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) { - $ctx->zadd(qq() . - diff_hunk($dctx, $1, $2) . - $lnk->to_html($s) . - ''); + print $zfh qq(), + diff_hunk($dctx, $1, $2), + $lnk->to_html($s), + ''; } elsif ($s =~ /\A\+/) { # $s may be huge - $ctx->zadd(qq(), + print $zfh qq(), $lnk->to_html($s), - ''); + ''; } elsif ($s =~ /\A-- $/sm) { # email sig starts $dctx = undef; - $after .= $s; + print $afh $s; } elsif ($s =~ /\A-/) { # $s may be huge - $ctx->zadd(qq(), - $lnk->to_html($s), - ''); + print $zfh qq(), + $lnk->to_html($s), + ''; } else { # $s may be huge - $ctx->zadd($lnk->to_html($s)); + print $zfh $lnk->to_html($s); } } diff_before_or_after($ctx, \$after) if !$dctx; diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index 1c7ae881..33da3244 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -146,15 +146,15 @@ sub feed_entry { my $name = ascii_html(join(', ', PublicInbox::Address::names($from))); $email = ascii_html($email // $ctx->{ibx}->{-primary_address}); - $ctx->zadd( - (delete($ctx->{emit_header}) ? atom_header($ctx, $title) : ''). + print { $ctx->zfh } + (delete($ctx->{emit_header}) ? atom_header($ctx, $title) : ''), "$name$email" . "$title$updated" . - qq(). + qq() . "$uuid$irt" . qq{} . qq{} . - qq()); + qq(); $ctx->{mhref} = $href; $ctx->{changed_href} = "${href}#related"; $eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1); diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 2a318e5e..77b6f9c2 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -181,11 +181,12 @@ sub html_oneshot ($$;@) { 'Content-Length' => undef ]; bless $ctx, __PACKAGE__; $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res_hdr, $ctx->{env}); + my @top; $ctx->{base_url} // do { - $ctx->zadd(html_top($ctx)); + @top = html_top($ctx); $ctx->{base_url} = base_url($ctx); }; - my $bdy = $ctx->zflush(@_[2..$#_], _html_end($ctx)); + my $bdy = $ctx->zflush(@top, @_[2..$#_], _html_end($ctx)); $res_hdr->[3] = length($bdy); [ $code, $res_hdr, [ $bdy ] ] } @@ -216,7 +217,7 @@ sub html_init { my $h = $ctx->{-res_hdr} = ['Content-Type', 'text/html; charset=UTF-8']; $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($h, $ctx->{env}); bless $ctx, __PACKAGE__; - $ctx->zadd(html_top($ctx)); + print { $ctx->zfh } html_top($ctx); } 1;