* [PATCH 8/8] reduce scope of mbox From_ line removal
2020-04-18 3:38 7% [PATCH 0/8] some small yak shaving things Eric Wong
@ 2020-04-18 3:38 5% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-04-18 3:38 UTC (permalink / raw)
To: meta
It's unnecessary overhead for anything which does Email::MIME
parsing. It was never done for v2 indexing, even though v1->v2
conversions did NOT remove those From_ lines. There was never a
need to remote From_ lines the v1 SearchIdx paths, either.
Hitting a /$INBOX_URL/$MSGID/T/ endpoint with an 18 message
thread reveals a ~0.5% speed improvement. This will become
more apparent when we have a faster MIME parser.
---
lib/PublicInbox/Inbox.pm | 8 ++------
lib/PublicInbox/Mbox.pm | 7 +++++--
lib/PublicInbox/NNTP.pm | 2 ++
lib/PublicInbox/SearchIdx.pm | 2 --
t/psgi_v2.t | 28 ++++++++++++++++++----------
5 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 4bd82989..186eb420 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -311,9 +311,7 @@ sub nntp_usable {
# for v1 users w/o SQLite only
sub msg_by_path ($$;$) {
my ($self, $path, $ref) = @_;
- my $str = git($self)->cat_file('HEAD:'.$path, $ref);
- $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str;
- $str;
+ git($self)->cat_file('HEAD:'.$path, $ref);
}
sub msg_by_smsg ($$;$) {
@@ -324,9 +322,7 @@ sub msg_by_smsg ($$;$) {
return unless defined $smsg;
defined(my $blob = $smsg->{blob}) or return;
- my $str = git($self)->cat_file($blob, $ref);
- $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str;
- $str;
+ git($self)->cat_file($blob, $ref);
}
sub smsg_mime {
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 16de1a72..9995140c 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -106,8 +106,11 @@ sub msg_hdr ($$;$) {
'List-Post', "<mailto:$ibx->{-primary_address}>",
);
my $crlf = $header_obj->crlf;
- my $buf = 'From mboxrd@z Thu Jan 1 00:00:00 1970' . $crlf .
- $header_obj->as_string;
+ my $buf = $header_obj->as_string;
+ # fixup old bug from import (pre-a0c07cba0e5d8b6a)
+ $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+ $buf = "From mboxrd\@z Thu Jan 1 00:00:00 1970" . $crlf . $buf;
+
for (my $i = 0; $i < @append; $i += 2) {
my $k = $append[$i];
my $v = $append[$i + 1];
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index d1f75f6f..c79f198b 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -506,6 +506,8 @@ sub set_art {
sub msg_hdr_write ($$$) {
my ($self, $hdr, $body_follows) = @_;
$hdr = $hdr->as_string;
+ # fixup old bug from import (pre-a0c07cba0e5d8b6a)
+ $hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
utf8::encode($hdr);
$hdr =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index d1290dc2..579b85e3 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -553,8 +553,6 @@ sub do_cat_mail {
my ($git, $blob, $sizeref) = @_;
my $str = $git->cat_file($blob, $sizeref) or
die "BUG: $blob not found in $git->{git_dir}";
- # fixup bugs from import:
- $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
PublicInbox::MIME->new($str);
}
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index c4f80869..57017de1 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -26,16 +26,16 @@ my $new_mid;
my $im = PublicInbox::V2Writable->new($ibx, 1);
$im->{parallel} = 0;
-my $mime = PublicInbox::MIME->create(
- header => [
- From => 'a@example.com',
- To => 'test@example.com',
- Subject => 'this is a subject',
- 'Message-ID' => '<a-mid@b>',
- Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
- ],
- body => "hello world\n",
-);
+my $mime = PublicInbox::MIME->new(<<'EOF');
+From oldbug-pre-a0c07cba0e5d8b6a Fri Oct 2 00:00:00 1993
+From: a@example.com
+To: test@example.com
+Subject: this is a subject
+Message-ID: <a-mid@b>
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+
+hello world
+EOF
ok($im->add($mime), 'added one message');
$mime->body_set("hello world!\n");
@@ -48,6 +48,10 @@ my $mids = mids($mime->header_obj);
$new_mid = $mids->[1];
$im->done;
+my $msg = $ibx->msg_by_mid('a-mid@b');
+like($$msg, qr/\AFrom oldbug/s,
+ '"From_" line stored to test old bug workaround');
+
my $cfgpfx = "publicinbox.v2test";
my $cfg = <<EOF;
$cfgpfx.address=$ibx->{-primary_address}
@@ -63,6 +67,7 @@ test_psgi(sub { $www->call(@_) }, sub {
'got v2 description missing message');
$res = $cb->(GET('/v2test/a-mid@b/raw'));
$raw = $res->content;
+ unlike($raw, qr/^From oldbug/sm, 'buggy "From_" line omitted');
like($raw, qr/^hello world$/m, 'got first message');
like($raw, qr/^hello world!$/m, 'got second message');
@from_ = ($raw =~ m/^From /mg);
@@ -123,6 +128,7 @@ test_psgi(sub { $www->call(@_) }, sub {
my $out;
my $in = $res->content;
my $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
like($out, qr/^hello world$/m, 'got first in t.mbox.gz');
like($out, qr/^hello world!$/m, 'got second in t.mbox.gz');
like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz');
@@ -133,6 +139,7 @@ test_psgi(sub { $www->call(@_) }, sub {
$res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m'));
$in = $res->content;
$status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
like($out, qr/^hello world$/m, 'got first in mbox POST');
like($out, qr/^hello world!$/m, 'got second in mbox POST');
like($out, qr/^hello ghosts$/m, 'got third in mbox POST');
@@ -143,6 +150,7 @@ test_psgi(sub { $www->call(@_) }, sub {
$res = $cb->(GET('/v2test/all.mbox.gz'));
$in = $res->content;
$status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
like($out, qr/^hello world$/m, 'got first in all.mbox');
like($out, qr/^hello world!$/m, 'got second in all.mbox');
like($out, qr/^hello ghosts$/m, 'got third in all.mbox');
^ permalink raw reply related [relevance 5%]
* [PATCH 0/8] some small yak shaving things
@ 2020-04-18 3:38 7% Eric Wong
2020-04-18 3:38 5% ` [PATCH 8/8] reduce scope of mbox From_ line removal Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-04-18 3:38 UTC (permalink / raw)
To: meta
Eric Wong (8):
inboxwritable: mime_from_path: reuse in more places
searchidx: die on cat-file failures
inbox: do not memoize description or cloneurl if missing
inbox: replace `eval {}' with `do {}' where appropriate
favor `do {}' over `eval {}' for localized slurp
wwwatomstream: move {emit_header} field to $self
mbox: use per-message line-ending for From_ line
reduce scope of mbox From_ line removal
Documentation/mknews.perl | 2 +-
MANIFEST | 4 ++--
lib/PublicInbox/Inbox.pm | 27 ++++++++++++------------
lib/PublicInbox/InboxWritable.pm | 4 ++--
lib/PublicInbox/Mbox.pm | 7 +++++--
lib/PublicInbox/NNTP.pm | 2 ++
lib/PublicInbox/SearchIdx.pm | 14 +++++--------
lib/PublicInbox/WatchMaildir.pm | 6 +++---
lib/PublicInbox/WwwAtomStream.pm | 5 ++---
script/public-inbox-edit | 5 ++---
script/public-inbox-learn | 6 +++---
script/public-inbox-mda | 2 +-
script/public-inbox-purge | 2 +-
scripts/import_maildir | 4 ++--
scripts/import_slrnspool | 2 +-
scripts/slrnspool2maildir | 2 +-
scripts/ssoma-replay | 5 +----
t/inbox.t | 19 +++++++++++++++++
t/{iso-2202-jp.mbox => iso-2202-jp.eml} | 1 -
t/mda.t | 10 ++++-----
t/msg_iter.t | 18 ++++++----------
t/nntpd-tls.t | 8 +++----
t/psgi_v2.t | 28 ++++++++++++++++---------
t/search.t | 12 ++++-------
t/solver_git.t | 4 ++--
t/{utf8.mbox => utf8.eml} | 1 -
t/v2reindex.t | 3 +--
t/watch_maildir_v2.t | 2 +-
28 files changed, 105 insertions(+), 100 deletions(-)
rename t/{iso-2202-jp.mbox => iso-2202-jp.eml} (84%)
rename t/{utf8.mbox => utf8.eml} (90%)
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-04-18 3:38 7% [PATCH 0/8] some small yak shaving things Eric Wong
2020-04-18 3:38 5% ` [PATCH 8/8] reduce scope of mbox From_ line removal Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).