user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 0/2] www: memory reductions for multipart
@ 2020-01-11  6:28  7% Eric Wong
  2020-01-11  6:28  6% ` [PATCH 2/2] www: discard multipart parent on iteration Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-01-11  6:28 UTC (permalink / raw)
  To: meta

Multipart messages in Email::MIME still cost us a lot of memory,
but this makes things less bad (not "good", just "less bad" :P)

Eric Wong (2):
  xt/mem-msgview.t: change to test one multipart message
  www: discard multipart parent on iteration

 lib/PublicInbox/MsgIter.pm       |  5 +++--
 lib/PublicInbox/SolverGit.pm     |  5 ++---
 lib/PublicInbox/View.pm          |  6 +++---
 lib/PublicInbox/WwwAtomStream.pm |  4 ++--
 lib/PublicInbox/WwwAttach.pm     |  2 +-
 xt/mem-msgview.t                 | 32 ++++++++++++++++++++++----------
 6 files changed, 33 insertions(+), 21 deletions(-)

^ permalink raw reply	[relevance 7%]

* [PATCH 2/2] www: discard multipart parent on iteration
  2020-01-11  6:28  7% [PATCH 0/2] www: memory reductions for multipart Eric Wong
@ 2020-01-11  6:28  6% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-01-11  6:28 UTC (permalink / raw)
  To: meta

We're often iterating through messages while writing to another
buffer in our WWW interface, causing memory usage to multiply.
Since we know we won't need to keep the MIME object around in
some cases, and can tell msg_iter to clobber the on-stack
variable while it operates on subparts of multipart messages.

With xt/mem-msgview.t switched to multipart from the previous
commit, this shows a 13 MB memory reduction on that test.
---
 lib/PublicInbox/MsgIter.pm       | 5 +++--
 lib/PublicInbox/SolverGit.pm     | 5 ++---
 lib/PublicInbox/View.pm          | 6 +++---
 lib/PublicInbox/WwwAtomStream.pm | 4 ++--
 lib/PublicInbox/WwwAttach.pm     | 2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm
index cdd78b39..f238954b 100644
--- a/lib/PublicInbox/MsgIter.pm
+++ b/lib/PublicInbox/MsgIter.pm
@@ -12,10 +12,11 @@ use PublicInbox::MIME;
 # Like Email::MIME::walk_parts, but this is:
 # * non-recursive
 # * passes depth and indices to the iterator callback
-sub msg_iter ($$;$) {
-	my ($mime, $cb, $cb_arg) = @_;
+sub msg_iter ($$;$$) {
+	my ($mime, $cb, $cb_arg, $do_undef) = @_;
 	my @parts = $mime->subparts;
 	if (@parts) {
+		$mime = $_[0] = undef if $do_undef; # saves some memory
 		my $i = 0;
 		@parts = map { [ $_, 1, ++$i ] } @parts;
 		while (my $p = shift @parts) {
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 8629f0da..b48e8ac4 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -233,9 +233,8 @@ sub find_extract_diffs ($$$) {
 	my $diffs = [];
 	foreach my $smsg (@$msgs) {
 		$ibx->smsg_mime($smsg) or next;
-		my $mime = delete $smsg->{mime};
-		msg_iter($mime, \&extract_diff,
-				[$self, $diffs, $pre, $post, $ibx, $smsg]);
+		msg_iter(delete $smsg->{mime}, \&extract_diff,
+				[$self, $diffs, $pre, $post, $ibx, $smsg], 1);
 	}
 	@$diffs ? $diffs : undef;
 }
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 405da2a9..d88b34da 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -275,7 +275,7 @@ sub index_entry {
 	# scan through all parts, looking for displayable text
 	$ctx->{mhref} = $mhref;
 	$ctx->{rv} = \$rv;
-	msg_iter($mime, \&add_text_body, $ctx);
+	msg_iter($mime, \&add_text_body, $ctx, 1);
 	delete $ctx->{rv};
 
 	# add the footer
@@ -506,12 +506,12 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
 }
 
 sub multipart_text_as_html {
-	my ($mime, $mhref, $ctx) = @_;
+	my (undef, $mhref, $ctx) = @_; # $mime = $_[0]
 	$ctx->{mhref} = $mhref;
 	$ctx->{rv} = \(my $rv = '');
 
 	# scan through all parts, looking for displayable text
-	msg_iter($mime, \&add_text_body, $ctx);
+	msg_iter($_[0], \&add_text_body, $ctx, 1);
 	${delete $ctx->{rv}};
 }
 
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 9430dd97..9ec1383d 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -101,9 +101,9 @@ sub atom_header {
 sub feed_entry {
 	my ($self, $smsg) = @_;
 	my $ctx = $self->{ctx};
-	my $mime = $smsg->{mime};
+	my $mid = $smsg->mid; # may extract Message-ID from {mime}
+	my $mime = delete $smsg->{mime};
 	my $hdr = $mime->header_obj;
-	my $mid = $smsg->mid;
 	my $irt = PublicInbox::View::in_reply_to($hdr);
 	my $uuid = to_uuid($mid);
 	my $base = $ctx->{feed_base_url};
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index cda1c6c8..92f47e49 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -40,7 +40,7 @@ sub get_attach ($$$) {
 	my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res;
 	$mime = PublicInbox::MIME->new($mime);
 	$res->[3] = $idx;
-	msg_iter($mime, \&get_attach_i, $res);
+	msg_iter($mime, \&get_attach_i, $res, 1);
 	pop @$res; # cleanup before letting PSGI server see it
 	$res
 }

^ permalink raw reply related	[relevance 6%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-01-11  6:28  7% [PATCH 0/2] www: memory reductions for multipart Eric Wong
2020-01-11  6:28  6% ` [PATCH 2/2] www: discard multipart parent on iteration Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).