* [PATCH 27/38] www: switch to zadd for the majority of buffering
2022-09-10 8:16 7% [PATCH 00/38] www: reduce memory usage Eric Wong
@ 2022-09-10 8:17 5% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
This allows us to focus string concatenations in one place to
allow Perl internal scratchpad optimizations to reuse memory.
Calling Compress::Raw::Zlib::deflate repeatedly proves too
expensive in terms of CPU cycles.
---
lib/PublicInbox/GzipFilter.pm | 22 +++++++++++-------
lib/PublicInbox/Mbox.pm | 2 +-
lib/PublicInbox/SearchView.pm | 2 +-
lib/PublicInbox/View.pm | 40 ++++++++++++++++----------------
lib/PublicInbox/ViewDiff.pm | 14 +++++------
lib/PublicInbox/WwwAtomStream.pm | 2 +-
lib/PublicInbox/WwwStream.pm | 4 ++--
7 files changed, 46 insertions(+), 40 deletions(-)
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index eb0046ce..1f11acb8 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -127,15 +127,21 @@ sub write {
http_out($_[0])->write(translate($_[0], $_[1]));
}
+sub zadd {
+ my $self = shift;
+ $self->{pbuf} .= $_ for @_; # perl internal pad memory use here
+}
+
# similar to ->translate; use this when we're sure we know we have
# more data to buffer after this
sub zmore {
my $self = shift; # $_[1] => input
http_out($self);
- my $err;
+ my $x;
+ defined($x = delete($self->{pbuf})) and unshift(@_, $x);
for (@_) {
- $err = $self->{gz}->deflate($_, $self->{zbuf});
- die "gzip->deflate: $err" if $err != Z_OK;
+ ($x = $self->{gz}->deflate($_, $self->{zbuf})) == Z_OK or
+ die "gzip->deflate: $x";
}
undef;
}
@@ -145,14 +151,14 @@ sub zflush ($;@) {
my $self = shift; # $_[1..Inf] => final input (optional)
my $zbuf = delete $self->{zbuf};
my $gz = delete $self->{gz};
- my $err;
+ my $x;
+ defined($x = delete($self->{pbuf})) and unshift(@_, $x);
for (@_) { # it's a bug iff $gz is undef if @_ isn't empty, here:
- $err = $gz->deflate($_, $zbuf);
- die "gzip->deflate: $err" if $err != Z_OK;
+ ($x = $gz->deflate($_, $zbuf)) == Z_OK or
+ die "gzip->deflate: $x";
}
$gz // return ''; # not a bug, recursing on DS->write failure
- $err = $gz->flush($zbuf);
- die "gzip->flush: $err" if $err != Z_OK;
+ ($x = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $x";
$zbuf;
}
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 2ef8ff2b..cfe34d9c 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -20,7 +20,7 @@ sub getline {
my $ibx = $ctx->{ibx};
my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return;
my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
- $ctx->zmore(msg_hdr($ctx, $eml));
+ $ctx->zadd(msg_hdr($ctx, $eml));
if ($n) {
$ctx->translate(msg_body($eml));
} else { # last message
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index e0404e5f..b18947ee 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -331,7 +331,7 @@ sub mset_thread {
# callback for PublicInbox::WwwStream::getline
sub mset_thread_i {
my ($ctx, $eml) = @_;
- $ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip};
+ $ctx->zadd($ctx->html_top) if exists $ctx->{-html_tip};
$eml and return PublicInbox::View::eml_entry($ctx, $eml);
my $smsg = shift @{$ctx->{msgs}} or
$ctx->zmore(${delete($ctx->{skel})});
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 630f1e42..85dc3bd8 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -40,10 +40,10 @@ sub msg_page_i {
"../${\mid_href($smsg->{mid})}/" : '';
if (_msg_page_prepare($eml, $ctx)) {
$eml->each_part(\&add_text_body, $ctx, 1);
- $ctx->zmore('</pre><hr>');
+ $ctx->zadd('</pre><hr>');
}
html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
- \''; # XXX TODO cleanup
+ ''; # XXX TODO cleanup
} else { # called by WwwStream::async_next or getline
$ctx->{smsg}; # may be undef
}
@@ -58,7 +58,7 @@ sub no_over_html ($) {
PublicInbox::WwwStream::init($ctx);
if (_msg_page_prepare($eml, $ctx)) { # sets {-title_html}
$eml->each_part(\&add_text_body, $ctx, 1);
- $ctx->zmore('</pre><hr>');
+ $ctx->zadd('</pre><hr>');
}
html_footer($ctx, $eml);
$ctx->html_done;
@@ -245,7 +245,7 @@ sub eml_entry {
# scan through all parts, looking for displayable text
$ctx->{mhref} = $mhref;
$ctx->{changed_href} = "#e$id"; # for diffstat "files? changed,"
- $ctx->zmore($rv); # XXX $rv is small, reuse below
+ $ctx->zadd($rv); # XXX $rv is small, reuse below
$eml->each_part(\&add_text_body, $ctx, 1); # expensive
# add the footer
@@ -386,7 +386,7 @@ sub pre_thread { # walk_thread callback
sub thread_eml_entry {
my ($ctx, $eml) = @_;
my ($beg, $end) = thread_adj_level($ctx, $ctx->{level});
- $ctx->zmore($beg.'<pre>');
+ $ctx->zadd($beg.'<pre>');
eml_entry($ctx, $eml) . '</pre>' . $end;
}
@@ -414,15 +414,15 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
if (!$ghost_ok) { # first non-ghost
$ctx->{-title_html} =
ascii_html($smsg->{subject});
- $ctx->zmore($ctx->html_top);
+ $ctx->zadd($ctx->html_top);
}
return $smsg;
}
# buffer the ghost entry and loop
- $ctx->zmore(ghost_index_entry($ctx, $lvl, $smsg));
+ $ctx->zadd(ghost_index_entry($ctx, $lvl, $smsg));
} else { # all done
- $ctx->zmore(join('', thread_adj_level($ctx, 0)));
- $ctx->zmore(${delete($ctx->{skel})});
+ $ctx->zadd(join('', thread_adj_level($ctx, 0)));
+ $ctx->zadd(${delete($ctx->{skel})});
return;
}
}
@@ -491,7 +491,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
my $smsg = $ctx->{smsg};
if (exists $ctx->{-html_tip}) {
$ctx->{-title_html} = ascii_html($smsg->{subject});
- $ctx->zmore($ctx->html_top);
+ $ctx->zadd($ctx->html_top);
}
return eml_entry($ctx, $eml);
} else {
@@ -499,7 +499,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
return $smsg if exists($smsg->{blob});
}
my $skel = delete($ctx->{skel}) or return; # all done
- $ctx->zmore($$skel);
+ $ctx->zadd($$skel);
undef;
}
}
@@ -560,7 +560,7 @@ sub add_text_body { # callback for each_part
my $ct = $part->content_type || 'text/plain';
my $fn = $part->filename;
my ($s, $err) = msg_part_text($part, $ct);
- $s // return $ctx->zmore(attach_link($ctx, $ct, $p, $fn) // '');
+ $s // return $ctx->zadd(attach_link($ctx, $ct, $p, $fn) // '');
my $buf = $part->{is_submsg} ? submsg_hdr($ctx, $part)."\n" : '';
# makes no difference to browsers, and don't screw up filename
@@ -612,18 +612,18 @@ sub add_text_body { # callback for each_part
$buf .= attach_link($ctx, $ct, $p, $fn, $err) . "\n";
}
delete $part->{bdy}; # save memory
- $ctx->zmore($buf);
+ $ctx->zadd($buf);
undef $buf;
for my $cur (@sections) { # $cur may be huge
if ($cur =~ /\A>/) {
# we use a <span> here to allow users to specify
# their own color for quoted text
- $ctx->zmore(qq(<span\nclass="q">),
+ $ctx->zadd(qq(<span\nclass="q">),
$l->to_html($cur), '</span>');
} elsif ($diff) {
flush_diff($ctx, \$cur);
} else { # regular lines, OK
- $ctx->zmore($l->to_html($cur));
+ $ctx->zadd($l->to_html($cur));
}
undef $cur; # free memory
}
@@ -685,10 +685,10 @@ sub _msg_page_prepare {
$hbuf .= qq[Message-ID: <$x> (<a href="raw">raw</a>)\n];
}
if (!$nr) { # first (and only) message, common case
- $ctx->zmore($ctx->html_top, $hbuf);
+ $ctx->zadd($ctx->html_top, $hbuf);
} else {
delete $ctx->{-title_html};
- $ctx->zmore($ctx->{-html_tip}, $hbuf);
+ $ctx->zadd($ctx->{-html_tip}, $hbuf);
}
$ctx->{-linkify} //= PublicInbox::Linkify->new;
$hbuf = '';
@@ -699,7 +699,7 @@ sub _msg_page_prepare {
$hbuf .= "$h: $_\n" for ($eml->header_raw($h));
}
$ctx->{-linkify}->linkify_mids('..', \$hbuf, 1); # escapes HTML
- $ctx->zmore($hbuf);
+ $ctx->zadd($hbuf);
$hbuf = '';
}
my @irt = $eml->header_raw('In-Reply-To');
@@ -717,7 +717,7 @@ sub _msg_page_prepare {
$hbuf .= 'References: <'.join(">\n\t<", @$refs).">\n" if @$refs;
}
$ctx->{-linkify}->linkify_mids('..', \$hbuf); # escapes HTML
- $ctx->zmore($hbuf .= "\n");
+ $ctx->zadd($hbuf .= "\n");
1;
}
@@ -837,7 +837,7 @@ EOF
$foot .= qq(<a\nhref="#R">reply</a>);
# $skel may be big for big threads, don't append it to $foot
$skel .= '</pre>' . ($related // '');
- $ctx->zmore($foot, $skel .= msg_reply($ctx, $hdr));
+ $ctx->zadd($foot, $skel .= msg_reply($ctx, $hdr));
}
sub ghost_parent {
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 36601910..95b615dc 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -156,7 +156,7 @@ sub diff_header ($$$) {
warn "BUG? <$$x> had no ^index line";
}
$$x =~ s!^diff --git!anchor1($ctx, $pb) // 'diff --git'!ems;
- $ctx->zmore(qq(<span\nclass="head">$$x</span>));
+ $ctx->zadd(qq(<span\nclass="head">$$x</span>));
$dctx;
}
@@ -180,9 +180,9 @@ sub diff_before_or_after ($$) {
$$x .= qq(<a href="$ch">changed</a>,);
$$x .= ascii_html(pop @x); # $4: insertions/deletions
# notes, commit message, etc
- $ctx->zmore($$x .= $lnk->to_html(pop @x));
+ $ctx->zadd($$x .= $lnk->to_html(pop @x));
} else {
- $ctx->zmore($ctx->{-linkify}->to_html($$x));
+ $ctx->zadd($ctx->{-linkify}->to_html($$x));
}
}
@@ -220,23 +220,23 @@ sub flush_diff ($$) {
if (!defined($dctx)) {
$after .= $s;
} elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) {
- $ctx->zmore(qq(<span\nclass="hunk">) .
+ $ctx->zadd(qq(<span\nclass="hunk">) .
diff_hunk($dctx, $1, $2) .
$lnk->to_html($s) .
'</span>');
} elsif ($s =~ /\A\+/) { # $s may be huge
- $ctx->zmore(qq(<span\nclass="add">),
+ $ctx->zadd(qq(<span\nclass="add">),
$lnk->to_html($s),
'</span>');
} elsif ($s =~ /\A-- $/sm) { # email sig starts
$dctx = undef;
$after .= $s;
} elsif ($s =~ /\A-/) { # $s may be huge
- $ctx->zmore(qq(<span\nclass="del">),
+ $ctx->zadd(qq(<span\nclass="del">),
$lnk->to_html($s),
'</span>');
} else { # $s may be huge
- $ctx->zmore($lnk->to_html($s));
+ $ctx->zadd($lnk->to_html($s));
}
}
diff_before_or_after($ctx, \$after) if !$dctx;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index cdfbf393..1c7ae881 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -146,7 +146,7 @@ sub feed_entry {
my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
$email = ascii_html($email // $ctx->{ibx}->{-primary_address});
- $ctx->zmore(
+ $ctx->zadd(
(delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '').
"<entry><author><name>$name</name><email>$email</email>" .
"</author>$title$updated" .
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index c23668a4..2a318e5e 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -182,7 +182,7 @@ sub html_oneshot ($$;@) {
bless $ctx, __PACKAGE__;
$ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res_hdr, $ctx->{env});
$ctx->{base_url} // do {
- $ctx->zmore(html_top($ctx));
+ $ctx->zadd(html_top($ctx));
$ctx->{base_url} = base_url($ctx);
};
my $bdy = $ctx->zflush(@_[2..$#_], _html_end($ctx));
@@ -216,7 +216,7 @@ sub html_init {
my $h = $ctx->{-res_hdr} = ['Content-Type', 'text/html; charset=UTF-8'];
$ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($h, $ctx->{env});
bless $ctx, __PACKAGE__;
- $ctx->zmore(html_top($ctx));
+ $ctx->zadd(html_top($ctx));
}
1;
^ permalink raw reply related [relevance 5%]
* [PATCH 00/38] www: reduce memory usage
@ 2022-09-10 8:16 7% Eric Wong
2022-09-10 8:17 5% ` [PATCH 27/38] www: switch to zadd for the majority of buffering Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2022-09-10 8:16 UTC (permalink / raw)
To: meta
I'm over the moon with this series since this drops dozens of
megabytes of scratchpad use while providing tiny speedups along
the way. For me, that's a 10-15% reduction in memory use under
public-inbox-netd w/ mwrap-perl[1] overhead.
This scratchpad use has been bothering me for a long time
(since I fixed all the other leaks, including one in the core
Encode module).
There's more coming, of course, but this series is big enough
and shown good results on https://yhbt.net/lore/
Also, it also provides a good pattern/guidance going forward
on how to efficiently implement future features.
I actually started out in this series trying to buffer
everything using gzip to avoid space-wasting uncompressed
strings living in memory. Unfortunately,
Compress::Raw::Zlib::deflate calls proved too expensive to call
frequently for short strings.
Going back to `.=' ops via a ->zadd method brought back some of
the speed while consolidating the scratchpad to a single place;
but I didn't like the performance regression.
I kept those detours in the history presented here since I
figure it's worth showing
Finally relying on PerlIO::scalar with print|say ops proved to
be the fastest since OO ->method dispatch overhead can be avoided
and there's no scratchpad use at all from these, either.
As before, we still call C:R:Z:deflate after every full message
and flush to the socket periodically.
I may even consider using PerlIO::gzip in the future, but that's
a non-standard module. However, I definitely took inspiration
from it since I saw that it would buffer uncompressed data into
memory before compressing it.
There's also a few small simplifications and speedups I noticed
along the way, and several other bugfixes I posted independently
while working on this series.
[1] I used https://80x24.org/mwrap-perl.git to check malloc use
Eric Wong (38):
xt: fold perf-obfuscate into perf-msgview, future-proof
www: gzip_filter: implicitly flush {obuf} on zmore/zflush
view: rework single message page to compress earlier
www_atom_stream: require 200 response
www_stream: aresponse assumes 200, too
www_text: reduce parameter passing for response header
viewvcs: use shorter and simpler ctx->html_done
www_listing: consolidate some ->zmore dispatches
www_listing: avoid unnecessary work for common cases
www: viewdiff: use return value for diff_hunk
view: simplify _parent_headers
view: eml_entry: reduce manipulation of ctx->{obuf}
gzip_filter: ->translate can reuse zmore/zflush
view: remove multipart_text_as_html
view: reduce subroutine calls for submsg_hdr
view: attach_link: reduce obuf manipulation
viewdiff: reuse existing string in diff_before_or_after
view: _th_index_lite: avoid one s///, improve symmetry
view: _th_index_lite: use `//' defined-or op
view: reduce ascii_html calls and {obuf} use
view: html_footer: golf out a few lines
view: html_footer: remove obuf dependency
view: html_footer: avoid escaping " in a few places
viewdiff: diff_hunk: shorten conditionals, slightly
view: switch a few things to ctx->zmore
www: drop {obuf} use entirely, for now
www: switch to zadd for the majority of buffering
www: use PerlIO::scalar (zfh) for buffering
viewdiff: diff_before_or_after: avoid extra capture
viewdiff: diff_header: shorten function, slightly
www_static: switch to `print $zfh', and optimize
httpd/async: describe which ->write subs it can call
translate: support multiple buffer args
gzip_filter: write: use multi-arg translate
feed: new_html_i: switch from zmore to `print $zfh'
mbox*: use multi-arg ->translate and ->write
www_listing: switch to `print $zfh'
viewvcs: switch to `print $zfh'
Documentation/mknews.perl | 3 +-
MANIFEST | 1 -
lib/PublicInbox/CompressNoop.pm | 4 +-
lib/PublicInbox/Feed.pm | 12 +-
lib/PublicInbox/GzipFilter.pm | 62 +++---
lib/PublicInbox/HTTPD/Async.pm | 9 +-
lib/PublicInbox/Mbox.pm | 11 +-
lib/PublicInbox/MboxGz.pm | 3 +-
lib/PublicInbox/SearchView.pm | 8 +-
lib/PublicInbox/View.pm | 312 ++++++++++++-------------------
lib/PublicInbox/ViewDiff.pm | 115 +++++-------
lib/PublicInbox/ViewVCS.pm | 17 +-
lib/PublicInbox/WwwAtomStream.pm | 19 +-
lib/PublicInbox/WwwListing.pm | 40 ++--
lib/PublicInbox/WwwStatic.pm | 32 ++--
lib/PublicInbox/WwwStream.pm | 23 ++-
lib/PublicInbox/WwwText.pm | 35 ++--
t/psgi_v2.t | 4 +-
xt/perf-msgview.t | 10 +-
xt/perf-obfuscate.t | 66 -------
20 files changed, 320 insertions(+), 466 deletions(-)
delete mode 100644 xt/perf-obfuscate.t
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2022-09-10 8:16 7% [PATCH 00/38] www: reduce memory usage Eric Wong
2022-09-10 8:17 5% ` [PATCH 27/38] www: switch to zadd for the majority of buffering Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).