user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 26/38] www: drop {obuf} use entirely, for now
  2022-09-10  8:16  7% [PATCH 00/38] www: reduce memory usage Eric Wong
@ 2022-09-10  8:17  6% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2022-09-10  8:17 UTC (permalink / raw)
  To: meta

This may help us identify hot spots and reduce pad space
as needed.
---
 lib/PublicInbox/GzipFilter.pm    | 13 ++++++-------
 lib/PublicInbox/View.pm          | 21 ++++++++-------------
 lib/PublicInbox/ViewVCS.pm       |  3 ++-
 lib/PublicInbox/WwwAtomStream.pm |  8 ++++----
 4 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 77d570b6..eb0046ce 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -130,11 +130,11 @@ sub write {
 # similar to ->translate; use this when we're sure we know we have
 # more data to buffer after this
 sub zmore {
-	my $self = $_[0]; # $_[1] => input
+	my $self = shift; # $_[1] => input
 	http_out($self);
 	my $err;
-	for (delete $self->{obuf}, @_[1..$#_]) {
-		$err = $self->{gz}->deflate($_ // next, $self->{zbuf});
+	for (@_) {
+		$err = $self->{gz}->deflate($_, $self->{zbuf});
 		die "gzip->deflate: $err" if $err != Z_OK;
 	}
 	undef;
@@ -142,13 +142,12 @@ sub zmore {
 
 # flushes and returns the final bit of gzipped data
 sub zflush ($;@) {
-	my $self = $_[0]; # $_[1..Inf] => final input (optional)
+	my $self = shift; # $_[1..Inf] => final input (optional)
 	my $zbuf = delete $self->{zbuf};
 	my $gz = delete $self->{gz};
 	my $err;
-	# it's a bug iff $gz is undef w/ $obuf or $_[1..]
-	for (delete $self->{obuf}, @_[1..$#_]) {
-		$err = $gz->deflate($_ // next, $zbuf);
+	for (@_) { # it's a bug iff $gz is undef if @_ isn't empty, here:
+		$err = $gz->deflate($_, $zbuf);
 		die "gzip->deflate: $err" if $err != Z_OK;
 	}
 	$gz // return ''; # not a bug, recursing on DS->write failure
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 3dbf8bac..630f1e42 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -43,7 +43,7 @@ sub msg_page_i {
 			$ctx->zmore('</pre><hr>');
 		}
 		html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
-		delete($ctx->{obuf}) // \'';
+		\''; # XXX TODO cleanup
 	} else { # called by WwwStream::async_next or getline
 		$ctx->{smsg}; # may be undef
 	}
@@ -245,9 +245,8 @@ sub eml_entry {
 	# scan through all parts, looking for displayable text
 	$ctx->{mhref} = $mhref;
 	$ctx->{changed_href} = "#e$id"; # for diffstat "files? changed,"
-	$ctx->{obuf} = \$rv;
+	$ctx->zmore($rv); # XXX $rv is small, reuse below
 	$eml->each_part(\&add_text_body, $ctx, 1); # expensive
-	$ctx->zmore; # TODO: remove once add_text_body is updated
 
 	# add the footer
 	$rv = "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
@@ -560,13 +559,9 @@ sub add_text_body { # callback for each_part
 	my ($part, $depth, $idx) = @$p;
 	my $ct = $part->content_type || 'text/plain';
 	my $fn = $part->filename;
-	my $rv = $ctx->{obuf} //= \(my $obuf = '');
 	my ($s, $err) = msg_part_text($part, $ct);
-	$s // return $$rv .= (attach_link($ctx, $ct, $p, $fn) // '');
-	if ($part->{is_submsg}) {
-		$$rv .= submsg_hdr($ctx, $part);
-		$$rv .= "\n";
-	}
+	$s // return $ctx->zmore(attach_link($ctx, $ct, $p, $fn) // '');
+	my $buf = $part->{is_submsg} ? submsg_hdr($ctx, $part)."\n" : '';
 
 	# makes no difference to browsers, and don't screw up filename
 	# link generation in diffs with the extra '%0D'
@@ -614,10 +609,11 @@ sub add_text_body { # callback for each_part
 	undef $s; # free memory
 	if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
 		# badly-encoded message with $err? tell the world about it!
-		$$rv .= attach_link($ctx, $ct, $p, $fn, $err);
-		$$rv .= "\n";
+		$buf .= attach_link($ctx, $ct, $p, $fn, $err) . "\n";
 	}
 	delete $part->{bdy}; # save memory
+	$ctx->zmore($buf);
+	undef $buf;
 	for my $cur (@sections) { # $cur may be huge
 		if ($cur =~ /\A>/) {
 			# we use a <span> here to allow users to specify
@@ -722,7 +718,6 @@ sub _msg_page_prepare {
 	}
 	$ctx->{-linkify}->linkify_mids('..', \$hbuf); # escapes HTML
 	$ctx->zmore($hbuf .= "\n");
-	${$ctx->{obuf}} = ''; # TODO remove
 	1;
 }
 
@@ -842,7 +837,7 @@ EOF
 	$foot .= qq(<a\nhref="#R">reply</a>);
 	# $skel may be big for big threads, don't append it to $foot
 	$skel .= '</pre>' . ($related // '');
-	$ctx->zmore($foot, $skel .= msg_reply($ctx, $hdr)); # flushes obuf
+	$ctx->zmore($foot, $skel .= msg_reply($ctx, $hdr));
 }
 
 sub ghost_parent {
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index d3ac1a7d..57ab378d 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -205,7 +205,8 @@ EOM
 		$ctx->zmore("---\n patch is too large to show\n");
 	} else { # prepare flush_diff:
 		read($fh, $x, -s _);
-		$ctx->{obuf} = \$bdy;
+		$ctx->zmore($bdy);
+		undef $bdy;
 		$ctx->{-apfx} = $ctx->{-spfx} = $upfx;
 		$x =~ s/\r?\n/\n/gs;
 		$ctx->{-anchors} = {} if $x =~ /^diff --git /sm;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 09c79a8a..cdfbf393 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -146,15 +146,15 @@ sub feed_entry {
 	my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
 	$email = ascii_html($email // $ctx->{ibx}->{-primary_address});
 
-	my $s = delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '';
-	$s .= "<entry><author><name>$name</name><email>$email</email>" .
+	$ctx->zmore(
+		(delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '').
+		"<entry><author><name>$name</name><email>$email</email>" .
 		"</author>$title$updated" .
 		qq(<link\nhref="$href"/>).
 		"<id>$uuid</id>$irt" .
 		qq{<content\ntype="xhtml">} .
 		qq{<div\nxmlns="http://www.w3.org/1999/xhtml">} .
-		qq(<pre\nstyle="white-space:pre-wrap">);
-	$ctx->{obuf} = \$s;
+		qq(<pre\nstyle="white-space:pre-wrap">));
 	$ctx->{mhref} = $href;
 	$ctx->{changed_href} = "${href}#related";
 	$eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);

^ permalink raw reply related	[relevance 6%]

* [PATCH 00/38] www: reduce memory usage
@ 2022-09-10  8:16  7% Eric Wong
  2022-09-10  8:17  6% ` [PATCH 26/38] www: drop {obuf} use entirely, for now Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2022-09-10  8:16 UTC (permalink / raw)
  To: meta

I'm over the moon with this series since this drops dozens of
megabytes of scratchpad use while providing tiny speedups along
the way.  For me, that's a 10-15% reduction in memory use under
public-inbox-netd w/ mwrap-perl[1] overhead.

This scratchpad use has been bothering me for a long time
(since I fixed all the other leaks, including one in the core
Encode module).

There's more coming, of course, but this series is big enough
and shown good results on https://yhbt.net/lore/

Also, it also provides a good pattern/guidance going forward
on how to efficiently implement future features.

I actually started out in this series trying to buffer
everything using gzip to avoid space-wasting uncompressed
strings living in memory.  Unfortunately,
Compress::Raw::Zlib::deflate calls proved too expensive to call
frequently for short strings.

Going back to `.=' ops via a ->zadd method brought back some of
the speed while consolidating the scratchpad to a single place;
but I didn't like the performance regression.

I kept those detours in the history presented here since I
figure it's worth showing

Finally relying on PerlIO::scalar with print|say ops proved to
be the fastest since OO ->method dispatch overhead can be avoided
and there's no scratchpad use at all from these, either.

As before, we still call C:R:Z:deflate after every full message
and flush to the socket periodically.

I may even consider using PerlIO::gzip in the future, but that's
a non-standard module.  However, I definitely took inspiration
from it since I saw that it would buffer uncompressed data into
memory before compressing it.

There's also a few small simplifications and speedups I noticed
along the way, and several other bugfixes I posted independently
while working on this series.

[1] I used https://80x24.org/mwrap-perl.git to check malloc use

Eric Wong (38):
  xt: fold perf-obfuscate into perf-msgview, future-proof
  www: gzip_filter: implicitly flush {obuf} on zmore/zflush
  view: rework single message page to compress earlier
  www_atom_stream: require 200 response
  www_stream: aresponse assumes 200, too
  www_text: reduce parameter passing for response header
  viewvcs: use shorter and simpler ctx->html_done
  www_listing: consolidate some ->zmore dispatches
  www_listing: avoid unnecessary work for common cases
  www: viewdiff: use return value for diff_hunk
  view: simplify _parent_headers
  view: eml_entry: reduce manipulation of ctx->{obuf}
  gzip_filter: ->translate can reuse zmore/zflush
  view: remove multipart_text_as_html
  view: reduce subroutine calls for submsg_hdr
  view: attach_link: reduce obuf manipulation
  viewdiff: reuse existing string in diff_before_or_after
  view: _th_index_lite: avoid one s///, improve symmetry
  view: _th_index_lite: use `//' defined-or op
  view: reduce ascii_html calls and {obuf} use
  view: html_footer: golf out a few lines
  view: html_footer: remove obuf dependency
  view: html_footer: avoid escaping " in a few places
  viewdiff: diff_hunk: shorten conditionals, slightly
  view: switch a few things to ctx->zmore
  www: drop {obuf} use entirely, for now
  www: switch to zadd for the majority of buffering
  www: use PerlIO::scalar (zfh) for buffering
  viewdiff: diff_before_or_after: avoid extra capture
  viewdiff: diff_header: shorten function, slightly
  www_static: switch to `print $zfh', and optimize
  httpd/async: describe which ->write subs it can call
  translate: support multiple buffer args
  gzip_filter: write: use multi-arg translate
  feed: new_html_i: switch from zmore to `print $zfh'
  mbox*: use multi-arg ->translate and ->write
  www_listing: switch to `print $zfh'
  viewvcs: switch to `print $zfh'

 Documentation/mknews.perl        |   3 +-
 MANIFEST                         |   1 -
 lib/PublicInbox/CompressNoop.pm  |   4 +-
 lib/PublicInbox/Feed.pm          |  12 +-
 lib/PublicInbox/GzipFilter.pm    |  62 +++---
 lib/PublicInbox/HTTPD/Async.pm   |   9 +-
 lib/PublicInbox/Mbox.pm          |  11 +-
 lib/PublicInbox/MboxGz.pm        |   3 +-
 lib/PublicInbox/SearchView.pm    |   8 +-
 lib/PublicInbox/View.pm          | 312 ++++++++++++-------------------
 lib/PublicInbox/ViewDiff.pm      | 115 +++++-------
 lib/PublicInbox/ViewVCS.pm       |  17 +-
 lib/PublicInbox/WwwAtomStream.pm |  19 +-
 lib/PublicInbox/WwwListing.pm    |  40 ++--
 lib/PublicInbox/WwwStatic.pm     |  32 ++--
 lib/PublicInbox/WwwStream.pm     |  23 ++-
 lib/PublicInbox/WwwText.pm       |  35 ++--
 t/psgi_v2.t                      |   4 +-
 xt/perf-msgview.t                |  10 +-
 xt/perf-obfuscate.t              |  66 -------
 20 files changed, 320 insertions(+), 466 deletions(-)
 delete mode 100644 xt/perf-obfuscate.t

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2022-09-10  8:16  7% [PATCH 00/38] www: reduce memory usage Eric Wong
2022-09-10  8:17  6% ` [PATCH 26/38] www: drop {obuf} use entirely, for now Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).