From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/4] mbox: use Email::Simple->new to do in-place modifications
Date: Thu, 27 Jun 2019 22:51:46 +0000 [thread overview]
Message-ID: <20190627225148.9657-3-e@80x24.org> (raw)
In-Reply-To: <20190627225148.9657-1-e@80x24.org>
Email::Simple->new will split the head from the body in-place,
and we can avoid using Email::Simple::body. This saves us from
holding an extra copy of the message in memory, and saves us
around ~30MB when operating on ~30MB messages.
---
lib/PublicInbox/Mbox.pm | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 15200d3a..1bf71c60 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -38,17 +38,18 @@ sub mb_stream {
# called by PSGI server as body response
sub getline {
my ($more) = @_; # self
- my ($ctx, $id, $prev, $next, $cur) = @$more;
- if ($cur) { # first
+ my ($ctx, $id, $prev, $next, $cur, $mref) = @$more;
+ if ($mref) { # first
pop @$more;
- return msg_str($ctx, $cur);
+ pop @$more;
+ return msg_str($ctx, $cur, $mref);
}
$cur = $next or return;
my $ibx = $ctx->{-inbox};
$next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev);
@$more = ($ctx, $id, $prev, $next); # $next may be undef, here
- my $mref = $ibx->msg_by_smsg($cur) or return;
- msg_str($ctx, Email::Simple->new($mref));
+ $mref = $ibx->msg_by_smsg($cur) or return;
+ msg_str($ctx, Email::Simple->new($mref), $mref);
}
sub close {} # noop
@@ -57,18 +58,17 @@ sub emit_raw {
my ($ctx) = @_;
my $mid = $ctx->{mid};
my $ibx = $ctx->{-inbox};
- my $first;
- my $more;
+ my ($first, $mref, $more);
if (my $over = $ibx->over) {
my ($id, $prev);
my $smsg = $over->next_by_mid($mid, \$id, \$prev) or return;
- my $mref = $ibx->msg_by_smsg($smsg) or return;
+ $mref = $ibx->msg_by_smsg($smsg) or return;
$first = Email::Simple->new($mref);
my $next = $over->next_by_mid($mid, \$id, \$prev);
# $more is for ->getline
- $more = [ $ctx, $id, $prev, $next, $first ] if $next;
+ $more = [ $ctx, $id, $prev, $next, $first, $mref ] if $next;
} else {
- my $mref = $ibx->msg_by_mid($mid) or return;
+ $mref = $ibx->msg_by_mid($mid) or return;
$first = Email::Simple->new($mref);
}
return unless defined $first;
@@ -83,11 +83,12 @@ sub emit_raw {
$fn .= '.txt';
}
push @hdr, 'Content-Disposition', "inline; filename=$fn";
- [ 200, \@hdr, $more ? mb_stream($more) : [ msg_str($ctx, $first) ] ];
+ [ 200, \@hdr,
+ $more ? mb_stream($more) : [ msg_str($ctx, $first, $mref) ] ];
}
sub msg_str {
- my ($ctx, $simple, $mid) = @_; # Email::Simple object
+ my ($ctx, $simple, $mref, $mid) = @_; # simple - Email::Simple object
my $header_obj = $simple->header_obj;
# drop potentially confusing headers, ssoma already should've dropped
@@ -104,7 +105,7 @@ sub msg_str {
'List-Archive', "<$base>",
'List-Post', "<mailto:$ibx->{-primary_address}>",
);
- my $crlf = $simple->crlf;
+ my $crlf = $header_obj->crlf;
my $buf = "From mboxrd\@z Thu Jan 1 00:00:00 1970\n" .
$header_obj->as_string;
for (my $i = 0; $i < @append; $i += 2) {
@@ -123,9 +124,8 @@ sub msg_str {
# mboxrd quoting style
# ref: http://www.qmail.org/man/man5/mbox.html
- my $body = $simple->body;
- $body =~ s/^(>*From )/>$1/gm;
- $buf .= $body;
+ $$mref =~ s/^(>*From )/>$1/gm;
+ $buf .= $$mref;
$buf .= "\n";
}
@@ -268,9 +268,9 @@ sub getline {
my ($self) = @_;
my $ctx = $self->{ctx} or return;
while (my $smsg = $self->{cb}->()) {
- my $msg = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
- $msg = Email::Simple->new($msg);
- $self->{gz}->write(PublicInbox::Mbox::msg_str($ctx, $msg,
+ my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
+ my $s = Email::Simple->new($mref);
+ $self->{gz}->write(PublicInbox::Mbox::msg_str($ctx, $s, $mref,
$smsg->{mid}));
my $bref = $self->{buf};
if (length($$bref) >= 8192) {
--
EW
next prev parent reply other threads:[~2019-06-27 22:51 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-27 22:51 [PATCH 0/4] www|nntp: optimize uses of Email::Simple Eric Wong
2019-06-27 22:51 ` [PATCH 1/4] nntp: rework and simplify art_lookup response Eric Wong
2019-06-27 22:51 ` Eric Wong [this message]
2019-06-27 22:51 ` [PATCH 3/4] mbox: split header and body processing Eric Wong
2019-06-27 22:51 ` [PATCH 4/4] nntp: reduce syscalls for ARTICLE and BODY Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190627225148.9657-3-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).