user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 8/8] reduce scope of mbox From_ line removal
  2020-04-18  3:38  7% [PATCH 0/8] some small yak shaving things Eric Wong
@ 2020-04-18  3:38  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-04-18  3:38 UTC (permalink / raw)
  To: meta

It's unnecessary overhead for anything which does Email::MIME
parsing.  It was never done for v2 indexing, even though v1->v2
conversions did NOT remove those From_ lines.  There was never a
need to remote From_ lines the v1 SearchIdx paths, either.

Hitting a /$INBOX_URL/$MSGID/T/ endpoint with an 18 message
thread reveals a ~0.5% speed improvement.  This will become
more apparent when we have a faster MIME parser.
---
 lib/PublicInbox/Inbox.pm     |  8 ++------
 lib/PublicInbox/Mbox.pm      |  7 +++++--
 lib/PublicInbox/NNTP.pm      |  2 ++
 lib/PublicInbox/SearchIdx.pm |  2 --
 t/psgi_v2.t                  | 28 ++++++++++++++++++----------
 5 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 4bd82989..186eb420 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -311,9 +311,7 @@ sub nntp_usable {
 # for v1 users w/o SQLite only
 sub msg_by_path ($$;$) {
 	my ($self, $path, $ref) = @_;
-	my $str = git($self)->cat_file('HEAD:'.$path, $ref);
-	$$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str;
-	$str;
+	git($self)->cat_file('HEAD:'.$path, $ref);
 }
 
 sub msg_by_smsg ($$;$) {
@@ -324,9 +322,7 @@ sub msg_by_smsg ($$;$) {
 	return unless defined $smsg;
 	defined(my $blob = $smsg->{blob}) or return;
 
-	my $str = git($self)->cat_file($blob, $ref);
-	$$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str;
-	$str;
+	git($self)->cat_file($blob, $ref);
 }
 
 sub smsg_mime {
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 16de1a72..9995140c 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -106,8 +106,11 @@ sub msg_hdr ($$;$) {
 		'List-Post', "<mailto:$ibx->{-primary_address}>",
 	);
 	my $crlf = $header_obj->crlf;
-	my $buf = 'From mboxrd@z Thu Jan  1 00:00:00 1970' . $crlf .
-			$header_obj->as_string;
+	my $buf = $header_obj->as_string;
+	# fixup old bug from import (pre-a0c07cba0e5d8b6a)
+	$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+	$buf = "From mboxrd\@z Thu Jan  1 00:00:00 1970" . $crlf . $buf;
+
 	for (my $i = 0; $i < @append; $i += 2) {
 		my $k = $append[$i];
 		my $v = $append[$i + 1];
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index d1f75f6f..c79f198b 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -506,6 +506,8 @@ sub set_art {
 sub msg_hdr_write ($$$) {
 	my ($self, $hdr, $body_follows) = @_;
 	$hdr = $hdr->as_string;
+	# fixup old bug from import (pre-a0c07cba0e5d8b6a)
+	$hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
 	utf8::encode($hdr);
 	$hdr =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
 
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index d1290dc2..579b85e3 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -553,8 +553,6 @@ sub do_cat_mail {
 	my ($git, $blob, $sizeref) = @_;
 	my $str = $git->cat_file($blob, $sizeref) or
 		die "BUG: $blob not found in $git->{git_dir}";
-	# fixup bugs from import:
-	$$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
 	PublicInbox::MIME->new($str);
 }
 
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index c4f80869..57017de1 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -26,16 +26,16 @@ my $new_mid;
 my $im = PublicInbox::V2Writable->new($ibx, 1);
 $im->{parallel} = 0;
 
-my $mime = PublicInbox::MIME->create(
-	header => [
-		From => 'a@example.com',
-		To => 'test@example.com',
-		Subject => 'this is a subject',
-		'Message-ID' => '<a-mid@b>',
-		Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
-	],
-	body => "hello world\n",
-);
+my $mime = PublicInbox::MIME->new(<<'EOF');
+From oldbug-pre-a0c07cba0e5d8b6a Fri Oct  2 00:00:00 1993
+From: a@example.com
+To: test@example.com
+Subject: this is a subject
+Message-ID: <a-mid@b>
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+
+hello world
+EOF
 ok($im->add($mime), 'added one message');
 $mime->body_set("hello world!\n");
 
@@ -48,6 +48,10 @@ my $mids = mids($mime->header_obj);
 $new_mid = $mids->[1];
 $im->done;
 
+my $msg = $ibx->msg_by_mid('a-mid@b');
+like($$msg, qr/\AFrom oldbug/s,
+	'"From_" line stored to test old bug workaround');
+
 my $cfgpfx = "publicinbox.v2test";
 my $cfg = <<EOF;
 $cfgpfx.address=$ibx->{-primary_address}
@@ -63,6 +67,7 @@ test_psgi(sub { $www->call(@_) }, sub {
 		'got v2 description missing message');
 	$res = $cb->(GET('/v2test/a-mid@b/raw'));
 	$raw = $res->content;
+	unlike($raw, qr/^From oldbug/sm, 'buggy "From_" line omitted');
 	like($raw, qr/^hello world$/m, 'got first message');
 	like($raw, qr/^hello world!$/m, 'got second message');
 	@from_ = ($raw =~ m/^From /mg);
@@ -123,6 +128,7 @@ test_psgi(sub { $www->call(@_) }, sub {
 		my $out;
 		my $in = $res->content;
 		my $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+		unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
 		like($out, qr/^hello world$/m, 'got first in t.mbox.gz');
 		like($out, qr/^hello world!$/m, 'got second in t.mbox.gz');
 		like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz');
@@ -133,6 +139,7 @@ test_psgi(sub { $www->call(@_) }, sub {
 		$res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m'));
 		$in = $res->content;
 		$status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+		unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
 		like($out, qr/^hello world$/m, 'got first in mbox POST');
 		like($out, qr/^hello world!$/m, 'got second in mbox POST');
 		like($out, qr/^hello ghosts$/m, 'got third in mbox POST');
@@ -143,6 +150,7 @@ test_psgi(sub { $www->call(@_) }, sub {
 		$res = $cb->(GET('/v2test/all.mbox.gz'));
 		$in = $res->content;
 		$status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+		unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
 		like($out, qr/^hello world$/m, 'got first in all.mbox');
 		like($out, qr/^hello world!$/m, 'got second in all.mbox');
 		like($out, qr/^hello ghosts$/m, 'got third in all.mbox');

^ permalink raw reply related	[relevance 5%]

* [PATCH 0/8] some small yak shaving things
@ 2020-04-18  3:38  7% Eric Wong
  2020-04-18  3:38  5% ` [PATCH 8/8] reduce scope of mbox From_ line removal Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-04-18  3:38 UTC (permalink / raw)
  To: meta

Eric Wong (8):
  inboxwritable: mime_from_path: reuse in more places
  searchidx: die on cat-file failures
  inbox: do not memoize description or cloneurl if missing
  inbox: replace `eval {}' with `do {}' where appropriate
  favor `do {}' over `eval {}' for localized slurp
  wwwatomstream: move {emit_header} field to $self
  mbox: use per-message line-ending for From_ line
  reduce scope of mbox From_ line removal

 Documentation/mknews.perl               |  2 +-
 MANIFEST                                |  4 ++--
 lib/PublicInbox/Inbox.pm                | 27 ++++++++++++------------
 lib/PublicInbox/InboxWritable.pm        |  4 ++--
 lib/PublicInbox/Mbox.pm                 |  7 +++++--
 lib/PublicInbox/NNTP.pm                 |  2 ++
 lib/PublicInbox/SearchIdx.pm            | 14 +++++--------
 lib/PublicInbox/WatchMaildir.pm         |  6 +++---
 lib/PublicInbox/WwwAtomStream.pm        |  5 ++---
 script/public-inbox-edit                |  5 ++---
 script/public-inbox-learn               |  6 +++---
 script/public-inbox-mda                 |  2 +-
 script/public-inbox-purge               |  2 +-
 scripts/import_maildir                  |  4 ++--
 scripts/import_slrnspool                |  2 +-
 scripts/slrnspool2maildir               |  2 +-
 scripts/ssoma-replay                    |  5 +----
 t/inbox.t                               | 19 +++++++++++++++++
 t/{iso-2202-jp.mbox => iso-2202-jp.eml} |  1 -
 t/mda.t                                 | 10 ++++-----
 t/msg_iter.t                            | 18 ++++++----------
 t/nntpd-tls.t                           |  8 +++----
 t/psgi_v2.t                             | 28 ++++++++++++++++---------
 t/search.t                              | 12 ++++-------
 t/solver_git.t                          |  4 ++--
 t/{utf8.mbox => utf8.eml}               |  1 -
 t/v2reindex.t                           |  3 +--
 t/watch_maildir_v2.t                    |  2 +-
 28 files changed, 105 insertions(+), 100 deletions(-)
 rename t/{iso-2202-jp.mbox => iso-2202-jp.eml} (84%)
 rename t/{utf8.mbox => utf8.eml} (90%)

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-04-18  3:38  7% [PATCH 0/8] some small yak shaving things Eric Wong
2020-04-18  3:38  5% ` [PATCH 8/8] reduce scope of mbox From_ line removal Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).