user/dev discussion of public-inbox itself
 help / color / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/4] mbox: use Email::Simple->new to do in-place modifications
Date: Thu, 27 Jun 2019 22:51:46 +0000
Message-ID: <20190627225148.9657-3-e@80x24.org> (raw)
In-Reply-To: <20190627225148.9657-1-e@80x24.org>

Email::Simple->new will split the head from the body in-place,
and we can avoid using Email::Simple::body.  This saves us from
holding an extra copy of the message in memory, and saves us
around ~30MB when operating on ~30MB messages.
---
 lib/PublicInbox/Mbox.pm | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 15200d3a..1bf71c60 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -38,17 +38,18 @@ sub mb_stream {
 # called by PSGI server as body response
 sub getline {
 	my ($more) = @_; # self
-	my ($ctx, $id, $prev, $next, $cur) = @$more;
-	if ($cur) { # first
+	my ($ctx, $id, $prev, $next, $cur, $mref) = @$more;
+	if ($mref) { # first
 		pop @$more;
-		return msg_str($ctx, $cur);
+		pop @$more;
+		return msg_str($ctx, $cur, $mref);
 	}
 	$cur = $next or return;
 	my $ibx = $ctx->{-inbox};
 	$next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev);
 	@$more = ($ctx, $id, $prev, $next); # $next may be undef, here
-	my $mref = $ibx->msg_by_smsg($cur) or return;
-	msg_str($ctx, Email::Simple->new($mref));
+	$mref = $ibx->msg_by_smsg($cur) or return;
+	msg_str($ctx, Email::Simple->new($mref), $mref);
 }
 
 sub close {} # noop
@@ -57,18 +58,17 @@ sub emit_raw {
 	my ($ctx) = @_;
 	my $mid = $ctx->{mid};
 	my $ibx = $ctx->{-inbox};
-	my $first;
-	my $more;
+	my ($first, $mref, $more);
 	if (my $over = $ibx->over) {
 		my ($id, $prev);
 		my $smsg = $over->next_by_mid($mid, \$id, \$prev) or return;
-		my $mref = $ibx->msg_by_smsg($smsg) or return;
+		$mref = $ibx->msg_by_smsg($smsg) or return;
 		$first = Email::Simple->new($mref);
 		my $next = $over->next_by_mid($mid, \$id, \$prev);
 		# $more is for ->getline
-		$more = [ $ctx, $id, $prev, $next, $first ] if $next;
+		$more = [ $ctx, $id, $prev, $next, $first, $mref ] if $next;
 	} else {
-		my $mref = $ibx->msg_by_mid($mid) or return;
+		$mref = $ibx->msg_by_mid($mid) or return;
 		$first = Email::Simple->new($mref);
 	}
 	return unless defined $first;
@@ -83,11 +83,12 @@ sub emit_raw {
 		$fn .= '.txt';
 	}
 	push @hdr, 'Content-Disposition', "inline; filename=$fn";
-	[ 200, \@hdr, $more ? mb_stream($more) : [ msg_str($ctx, $first) ] ];
+	[ 200, \@hdr,
+		$more ? mb_stream($more) : [ msg_str($ctx, $first, $mref) ] ];
 }
 
 sub msg_str {
-	my ($ctx, $simple, $mid) = @_; # Email::Simple object
+	my ($ctx, $simple, $mref, $mid) = @_; # simple - Email::Simple object
 	my $header_obj = $simple->header_obj;
 
 	# drop potentially confusing headers, ssoma already should've dropped
@@ -104,7 +105,7 @@ sub msg_str {
 		'List-Archive', "<$base>",
 		'List-Post', "<mailto:$ibx->{-primary_address}>",
 	);
-	my $crlf = $simple->crlf;
+	my $crlf = $header_obj->crlf;
 	my $buf = "From mboxrd\@z Thu Jan  1 00:00:00 1970\n" .
 			$header_obj->as_string;
 	for (my $i = 0; $i < @append; $i += 2) {
@@ -123,9 +124,8 @@ sub msg_str {
 
 	# mboxrd quoting style
 	# ref: http://www.qmail.org/man/man5/mbox.html
-	my $body = $simple->body;
-	$body =~ s/^(>*From )/>$1/gm;
-	$buf .= $body;
+	$$mref =~ s/^(>*From )/>$1/gm;
+	$buf .= $$mref;
 	$buf .= "\n";
 }
 
@@ -268,9 +268,9 @@ sub getline {
 	my ($self) = @_;
 	my $ctx = $self->{ctx} or return;
 	while (my $smsg = $self->{cb}->()) {
-		my $msg = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
-		$msg = Email::Simple->new($msg);
-		$self->{gz}->write(PublicInbox::Mbox::msg_str($ctx, $msg,
+		my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
+		my $s = Email::Simple->new($mref);
+		$self->{gz}->write(PublicInbox::Mbox::msg_str($ctx, $s, $mref,
 				$smsg->{mid}));
 		my $bref = $self->{buf};
 		if (length($$bref) >= 8192) {
-- 
EW


  parent reply index

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-27 22:51 [PATCH 0/4] www|nntp: optimize uses of Email::Simple Eric Wong
2019-06-27 22:51 ` [PATCH 1/4] nntp: rework and simplify art_lookup response Eric Wong
2019-06-27 22:51 ` Eric Wong [this message]
2019-06-27 22:51 ` [PATCH 3/4] mbox: split header and body processing Eric Wong
2019-06-27 22:51 ` [PATCH 4/4] nntp: reduce syscalls for ARTICLE and BODY Eric Wong

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190627225148.9657-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.org/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox