From 0c586dc64b3b6642a894e125d09df446667a4079 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 18 Apr 2020 03:38:53 +0000 Subject: reduce scope of mbox From_ line removal It's unnecessary overhead for anything which does Email::MIME parsing. It was never done for v2 indexing, even though v1->v2 conversions did NOT remove those From_ lines. There was never a need to remote From_ lines the v1 SearchIdx paths, either. Hitting a /$INBOX_URL/$MSGID/T/ endpoint with an 18 message thread reveals a ~0.5% speed improvement. This will become more apparent when we have a faster MIME parser. --- lib/PublicInbox/Inbox.pm | 8 ++------ lib/PublicInbox/Mbox.pm | 7 +++++-- lib/PublicInbox/NNTP.pm | 2 ++ lib/PublicInbox/SearchIdx.pm | 2 -- t/psgi_v2.t | 28 ++++++++++++++++++---------- 5 files changed, 27 insertions(+), 20 deletions(-) diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 4bd82989..186eb420 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -311,9 +311,7 @@ sub nntp_usable { # for v1 users w/o SQLite only sub msg_by_path ($$;$) { my ($self, $path, $ref) = @_; - my $str = git($self)->cat_file('HEAD:'.$path, $ref); - $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str; - $str; + git($self)->cat_file('HEAD:'.$path, $ref); } sub msg_by_smsg ($$;$) { @@ -324,9 +322,7 @@ sub msg_by_smsg ($$;$) { return unless defined $smsg; defined(my $blob = $smsg->{blob}) or return; - my $str = git($self)->cat_file($blob, $ref); - $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str; - $str; + git($self)->cat_file($blob, $ref); } sub smsg_mime { diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 16de1a72..9995140c 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -106,8 +106,11 @@ sub msg_hdr ($$;$) { 'List-Post', "{-primary_address}>", ); my $crlf = $header_obj->crlf; - my $buf = 'From mboxrd@z Thu Jan 1 00:00:00 1970' . $crlf . - $header_obj->as_string; + my $buf = $header_obj->as_string; + # fixup old bug from import (pre-a0c07cba0e5d8b6a) + $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + $buf = "From mboxrd\@z Thu Jan 1 00:00:00 1970" . $crlf . $buf; + for (my $i = 0; $i < @append; $i += 2) { my $k = $append[$i]; my $v = $append[$i + 1]; diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index d1f75f6f..c79f198b 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -506,6 +506,8 @@ sub set_art { sub msg_hdr_write ($$$) { my ($self, $hdr, $body_follows) = @_; $hdr = $hdr->as_string; + # fixup old bug from import (pre-a0c07cba0e5d8b6a) + $hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; utf8::encode($hdr); $hdr =~ s/(?cat_file($blob, $sizeref) or die "BUG: $blob not found in $git->{git_dir}"; - # fixup bugs from import: - $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; PublicInbox::MIME->new($str); } diff --git a/t/psgi_v2.t b/t/psgi_v2.t index c4f80869..57017de1 100644 --- a/t/psgi_v2.t +++ b/t/psgi_v2.t @@ -26,16 +26,16 @@ my $new_mid; my $im = PublicInbox::V2Writable->new($ibx, 1); $im->{parallel} = 0; -my $mime = PublicInbox::MIME->create( - header => [ - From => 'a@example.com', - To => 'test@example.com', - Subject => 'this is a subject', - 'Message-ID' => '', - Date => 'Fri, 02 Oct 1993 00:00:00 +0000', - ], - body => "hello world\n", -); +my $mime = PublicInbox::MIME->new(<<'EOF'); +From oldbug-pre-a0c07cba0e5d8b6a Fri Oct 2 00:00:00 1993 +From: a@example.com +To: test@example.com +Subject: this is a subject +Message-ID: +Date: Fri, 02 Oct 1993 00:00:00 +0000 + +hello world +EOF ok($im->add($mime), 'added one message'); $mime->body_set("hello world!\n"); @@ -48,6 +48,10 @@ my $mids = mids($mime->header_obj); $new_mid = $mids->[1]; $im->done; +my $msg = $ibx->msg_by_mid('a-mid@b'); +like($$msg, qr/\AFrom oldbug/s, + '"From_" line stored to test old bug workaround'); + my $cfgpfx = "publicinbox.v2test"; my $cfg = <{-primary_address} @@ -63,6 +67,7 @@ test_psgi(sub { $www->call(@_) }, sub { 'got v2 description missing message'); $res = $cb->(GET('/v2test/a-mid@b/raw')); $raw = $res->content; + unlike($raw, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($raw, qr/^hello world$/m, 'got first message'); like($raw, qr/^hello world!$/m, 'got second message'); @from_ = ($raw =~ m/^From /mg); @@ -123,6 +128,7 @@ test_psgi(sub { $www->call(@_) }, sub { my $out; my $in = $res->content; my $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); + unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in t.mbox.gz'); like($out, qr/^hello world!$/m, 'got second in t.mbox.gz'); like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz'); @@ -133,6 +139,7 @@ test_psgi(sub { $www->call(@_) }, sub { $res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m')); $in = $res->content; $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); + unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in mbox POST'); like($out, qr/^hello world!$/m, 'got second in mbox POST'); like($out, qr/^hello ghosts$/m, 'got third in mbox POST'); @@ -143,6 +150,7 @@ test_psgi(sub { $www->call(@_) }, sub { $res = $cb->(GET('/v2test/all.mbox.gz')); $in = $res->content; $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); + unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in all.mbox'); like($out, qr/^hello world!$/m, 'got second in all.mbox'); like($out, qr/^hello ghosts$/m, 'got third in all.mbox'); -- cgit v1.2.3-24-ge0c7