From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 9282C1F667 for ; Sat, 10 Sep 2022 08:18:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1662797936; bh=GC/tGcKwUf3p/E6gS14OJszC1NnfmIiZvXWchnyXyVY=; h=From:To:Subject:Date:In-Reply-To:References:From; b=wcWrMX3b7MUE6Gxpe6Q5rCjsjb/dESboe/33MmGiAYkmckiIg95Kk4QiRUfo2ZZ/l wpUOusPuIHF+XPsVtZxvUXnGU8WM1pZD4RxH1EulhLwRv4zPDYg9Y83ouMdN0gNhA5 q1Uy8vRaxKU07NE3sIxvi/X/0gfzDMOaLuxt10lE= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 27/38] www: switch to zadd for the majority of buffering Date: Sat, 10 Sep 2022 08:17:18 +0000 Message-Id: <20220910081729.2011934-28-e@80x24.org> In-Reply-To: <20220910081729.2011934-1-e@80x24.org> References: <20220910081729.2011934-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This allows us to focus string concatenations in one place to allow Perl internal scratchpad optimizations to reuse memory. Calling Compress::Raw::Zlib::deflate repeatedly proves too expensive in terms of CPU cycles. --- lib/PublicInbox/GzipFilter.pm | 22 +++++++++++------- lib/PublicInbox/Mbox.pm | 2 +- lib/PublicInbox/SearchView.pm | 2 +- lib/PublicInbox/View.pm | 40 ++++++++++++++++---------------- lib/PublicInbox/ViewDiff.pm | 14 +++++------ lib/PublicInbox/WwwAtomStream.pm | 2 +- lib/PublicInbox/WwwStream.pm | 4 ++-- 7 files changed, 46 insertions(+), 40 deletions(-) diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm index eb0046ce..1f11acb8 100644 --- a/lib/PublicInbox/GzipFilter.pm +++ b/lib/PublicInbox/GzipFilter.pm @@ -127,15 +127,21 @@ sub write { http_out($_[0])->write(translate($_[0], $_[1])); } +sub zadd { + my $self = shift; + $self->{pbuf} .= $_ for @_; # perl internal pad memory use here +} + # similar to ->translate; use this when we're sure we know we have # more data to buffer after this sub zmore { my $self = shift; # $_[1] => input http_out($self); - my $err; + my $x; + defined($x = delete($self->{pbuf})) and unshift(@_, $x); for (@_) { - $err = $self->{gz}->deflate($_, $self->{zbuf}); - die "gzip->deflate: $err" if $err != Z_OK; + ($x = $self->{gz}->deflate($_, $self->{zbuf})) == Z_OK or + die "gzip->deflate: $x"; } undef; } @@ -145,14 +151,14 @@ sub zflush ($;@) { my $self = shift; # $_[1..Inf] => final input (optional) my $zbuf = delete $self->{zbuf}; my $gz = delete $self->{gz}; - my $err; + my $x; + defined($x = delete($self->{pbuf})) and unshift(@_, $x); for (@_) { # it's a bug iff $gz is undef if @_ isn't empty, here: - $err = $gz->deflate($_, $zbuf); - die "gzip->deflate: $err" if $err != Z_OK; + ($x = $gz->deflate($_, $zbuf)) == Z_OK or + die "gzip->deflate: $x"; } $gz // return ''; # not a bug, recursing on DS->write failure - $err = $gz->flush($zbuf); - die "gzip->flush: $err" if $err != Z_OK; + ($x = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $x"; $zbuf; } diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 2ef8ff2b..cfe34d9c 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -20,7 +20,7 @@ sub getline { my $ibx = $ctx->{ibx}; my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return; my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}}); - $ctx->zmore(msg_hdr($ctx, $eml)); + $ctx->zadd(msg_hdr($ctx, $eml)); if ($n) { $ctx->translate(msg_body($eml)); } else { # last message diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index e0404e5f..b18947ee 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -331,7 +331,7 @@ sub mset_thread { # callback for PublicInbox::WwwStream::getline sub mset_thread_i { my ($ctx, $eml) = @_; - $ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip}; + $ctx->zadd($ctx->html_top) if exists $ctx->{-html_tip}; $eml and return PublicInbox::View::eml_entry($ctx, $eml); my $smsg = shift @{$ctx->{msgs}} or $ctx->zmore(${delete($ctx->{skel})}); diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 630f1e42..85dc3bd8 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -40,10 +40,10 @@ sub msg_page_i { "../${\mid_href($smsg->{mid})}/" : ''; if (_msg_page_prepare($eml, $ctx)) { $eml->each_part(\&add_text_body, $ctx, 1); - $ctx->zmore('
'); + $ctx->zadd('
'); } html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg}; - \''; # XXX TODO cleanup + ''; # XXX TODO cleanup } else { # called by WwwStream::async_next or getline $ctx->{smsg}; # may be undef } @@ -58,7 +58,7 @@ sub no_over_html ($) { PublicInbox::WwwStream::init($ctx); if (_msg_page_prepare($eml, $ctx)) { # sets {-title_html} $eml->each_part(\&add_text_body, $ctx, 1); - $ctx->zmore('
'); + $ctx->zadd('
'); } html_footer($ctx, $eml); $ctx->html_done; @@ -245,7 +245,7 @@ sub eml_entry { # scan through all parts, looking for displayable text $ctx->{mhref} = $mhref; $ctx->{changed_href} = "#e$id"; # for diffstat "files? changed," - $ctx->zmore($rv); # XXX $rv is small, reuse below + $ctx->zadd($rv); # XXX $rv is small, reuse below $eml->each_part(\&add_text_body, $ctx, 1); # expensive # add the footer @@ -386,7 +386,7 @@ sub pre_thread { # walk_thread callback sub thread_eml_entry { my ($ctx, $eml) = @_; my ($beg, $end) = thread_adj_level($ctx, $ctx->{level}); - $ctx->zmore($beg.'
');
+	$ctx->zadd($beg.'
');
 	eml_entry($ctx, $eml) . '
' . $end; } @@ -414,15 +414,15 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback if (!$ghost_ok) { # first non-ghost $ctx->{-title_html} = ascii_html($smsg->{subject}); - $ctx->zmore($ctx->html_top); + $ctx->zadd($ctx->html_top); } return $smsg; } # buffer the ghost entry and loop - $ctx->zmore(ghost_index_entry($ctx, $lvl, $smsg)); + $ctx->zadd(ghost_index_entry($ctx, $lvl, $smsg)); } else { # all done - $ctx->zmore(join('', thread_adj_level($ctx, 0))); - $ctx->zmore(${delete($ctx->{skel})}); + $ctx->zadd(join('', thread_adj_level($ctx, 0))); + $ctx->zadd(${delete($ctx->{skel})}); return; } } @@ -491,7 +491,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback my $smsg = $ctx->{smsg}; if (exists $ctx->{-html_tip}) { $ctx->{-title_html} = ascii_html($smsg->{subject}); - $ctx->zmore($ctx->html_top); + $ctx->zadd($ctx->html_top); } return eml_entry($ctx, $eml); } else { @@ -499,7 +499,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback return $smsg if exists($smsg->{blob}); } my $skel = delete($ctx->{skel}) or return; # all done - $ctx->zmore($$skel); + $ctx->zadd($$skel); undef; } } @@ -560,7 +560,7 @@ sub add_text_body { # callback for each_part my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; my ($s, $err) = msg_part_text($part, $ct); - $s // return $ctx->zmore(attach_link($ctx, $ct, $p, $fn) // ''); + $s // return $ctx->zadd(attach_link($ctx, $ct, $p, $fn) // ''); my $buf = $part->{is_submsg} ? submsg_hdr($ctx, $part)."\n" : ''; # makes no difference to browsers, and don't screw up filename @@ -612,18 +612,18 @@ sub add_text_body { # callback for each_part $buf .= attach_link($ctx, $ct, $p, $fn, $err) . "\n"; } delete $part->{bdy}; # save memory - $ctx->zmore($buf); + $ctx->zadd($buf); undef $buf; for my $cur (@sections) { # $cur may be huge if ($cur =~ /\A>/) { # we use a here to allow users to specify # their own color for quoted text - $ctx->zmore(qq(), + $ctx->zadd(qq(), $l->to_html($cur), ''); } elsif ($diff) { flush_diff($ctx, \$cur); } else { # regular lines, OK - $ctx->zmore($l->to_html($cur)); + $ctx->zadd($l->to_html($cur)); } undef $cur; # free memory } @@ -685,10 +685,10 @@ sub _msg_page_prepare { $hbuf .= qq[Message-ID: <$x> (raw)\n]; } if (!$nr) { # first (and only) message, common case - $ctx->zmore($ctx->html_top, $hbuf); + $ctx->zadd($ctx->html_top, $hbuf); } else { delete $ctx->{-title_html}; - $ctx->zmore($ctx->{-html_tip}, $hbuf); + $ctx->zadd($ctx->{-html_tip}, $hbuf); } $ctx->{-linkify} //= PublicInbox::Linkify->new; $hbuf = ''; @@ -699,7 +699,7 @@ sub _msg_page_prepare { $hbuf .= "$h: $_\n" for ($eml->header_raw($h)); } $ctx->{-linkify}->linkify_mids('..', \$hbuf, 1); # escapes HTML - $ctx->zmore($hbuf); + $ctx->zadd($hbuf); $hbuf = ''; } my @irt = $eml->header_raw('In-Reply-To'); @@ -717,7 +717,7 @@ sub _msg_page_prepare { $hbuf .= 'References: <'.join(">\n\t<", @$refs).">\n" if @$refs; } $ctx->{-linkify}->linkify_mids('..', \$hbuf); # escapes HTML - $ctx->zmore($hbuf .= "\n"); + $ctx->zadd($hbuf .= "\n"); 1; } @@ -837,7 +837,7 @@ EOF $foot .= qq(reply); # $skel may be big for big threads, don't append it to $foot $skel .= '
' . ($related // ''); - $ctx->zmore($foot, $skel .= msg_reply($ctx, $hdr)); + $ctx->zadd($foot, $skel .= msg_reply($ctx, $hdr)); } sub ghost_parent { diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm index 36601910..95b615dc 100644 --- a/lib/PublicInbox/ViewDiff.pm +++ b/lib/PublicInbox/ViewDiff.pm @@ -156,7 +156,7 @@ sub diff_header ($$$) { warn "BUG? <$$x> had no ^index line"; } $$x =~ s!^diff --git!anchor1($ctx, $pb) // 'diff --git'!ems; - $ctx->zmore(qq($$x)); + $ctx->zadd(qq($$x)); $dctx; } @@ -180,9 +180,9 @@ sub diff_before_or_after ($$) { $$x .= qq(changed,); $$x .= ascii_html(pop @x); # $4: insertions/deletions # notes, commit message, etc - $ctx->zmore($$x .= $lnk->to_html(pop @x)); + $ctx->zadd($$x .= $lnk->to_html(pop @x)); } else { - $ctx->zmore($ctx->{-linkify}->to_html($$x)); + $ctx->zadd($ctx->{-linkify}->to_html($$x)); } } @@ -220,23 +220,23 @@ sub flush_diff ($$) { if (!defined($dctx)) { $after .= $s; } elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) { - $ctx->zmore(qq() . + $ctx->zadd(qq() . diff_hunk($dctx, $1, $2) . $lnk->to_html($s) . ''); } elsif ($s =~ /\A\+/) { # $s may be huge - $ctx->zmore(qq(), + $ctx->zadd(qq(), $lnk->to_html($s), ''); } elsif ($s =~ /\A-- $/sm) { # email sig starts $dctx = undef; $after .= $s; } elsif ($s =~ /\A-/) { # $s may be huge - $ctx->zmore(qq(), + $ctx->zadd(qq(), $lnk->to_html($s), ''); } else { # $s may be huge - $ctx->zmore($lnk->to_html($s)); + $ctx->zadd($lnk->to_html($s)); } } diff_before_or_after($ctx, \$after) if !$dctx; diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index cdfbf393..1c7ae881 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -146,7 +146,7 @@ sub feed_entry { my $name = ascii_html(join(', ', PublicInbox::Address::names($from))); $email = ascii_html($email // $ctx->{ibx}->{-primary_address}); - $ctx->zmore( + $ctx->zadd( (delete($ctx->{emit_header}) ? atom_header($ctx, $title) : ''). "$name$email" . "$title$updated" . diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index c23668a4..2a318e5e 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -182,7 +182,7 @@ sub html_oneshot ($$;@) { bless $ctx, __PACKAGE__; $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res_hdr, $ctx->{env}); $ctx->{base_url} // do { - $ctx->zmore(html_top($ctx)); + $ctx->zadd(html_top($ctx)); $ctx->{base_url} = base_url($ctx); }; my $bdy = $ctx->zflush(@_[2..$#_], _html_end($ctx)); @@ -216,7 +216,7 @@ sub html_init { my $h = $ctx->{-res_hdr} = ['Content-Type', 'text/html; charset=UTF-8']; $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($h, $ctx->{env}); bless $ctx, __PACKAGE__; - $ctx->zmore(html_top($ctx)); + $ctx->zadd(html_top($ctx)); } 1;