about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2018-07-29 09:34:41 +0000
committerEric Wong <e@80x24.org>2018-07-29 09:43:06 +0000
commit9015a8af2909b0071e54b332bfafc4e5b8d0f6c2 (patch)
tree7db2a5e86ef3cbf9cd18f948be20df52543c837c
parent31eda8c90633766692156c9c0af061dad6299c84 (diff)
downloadpublic-inbox-9015a8af2909b0071e54b332bfafc4e5b8d0f6c2.tar.gz
We must not clobber the original message string, as Email::MIME(*)
still needs it for iterating through parts in SearchIdx (but not
when handing it as a raw string to git-fast-import).

I've noticed message bodies (especially dfpre/dpost) were not
getting indexed when going through -mda (no problems with
-watch).  This also did not affect v1 repos, since indexing is a
separate process for v1 and requires re-reading the data from
git.

(*) tested Email::MIME 1.937 on Debian stretch
-rw-r--r--MANIFEST1
-rwxr-xr-xscript/public-inbox-mda1
-rw-r--r--t/data/0001.patch46
-rw-r--r--t/v2mda.t10
-rw-r--r--t/watch_maildir_v2.t15
5 files changed, 72 insertions, 1 deletions
diff --git a/MANIFEST b/MANIFEST
index fd74a435..003c3c5b 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -146,6 +146,7 @@ t/config.t
 t/config_limiter.t
 t/content_id.t
 t/convert-compact.t
+t/data/0001.patch
 t/emergency.t
 t/fail-bin/spamc
 t/feed.t
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 2a315378..2b7f298c 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -51,7 +51,6 @@ $emm = PublicInbox::Emergency->new($emergency);
 $emm->prepare(\$str);
 $ems = $ems->abort;
 my $mime = PublicInbox::MIME->new(\$str);
-$str = '';
 do_exit(0) unless $spam_ok;
 
 my $fcfg = $dst->{filter} || '';
diff --git a/t/data/0001.patch b/t/data/0001.patch
new file mode 100644
index 00000000..b7964a2b
--- /dev/null
+++ b/t/data/0001.patch
@@ -0,0 +1,46 @@
+From: Eric Wong <e@80x24.org>
+Date: Fri, 20 Jul 2018 07:21:41 +0000
+To: test@example.com
+Subject: [PATCH] search: use boolean prefix for filenames in diffs, too
+Message-ID: <20180720072141.GA15957@example>
+
+Filenames within a project tend to be reasonably stable within a
+project and I plan on having automated searches hit these.
+
+Also, using no term prefix at all (the default for searching)
+still allows probabilistic searches on everything that's in a
+"git diff", including the blob names which were just made
+boolean.
+
+Note, attachment filenames ("n:" prefix) will stil use
+probabilistic search, as they're hardly standardized.
+---
+ lib/PublicInbox/Search.pm | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
+index 090d998b6c2c..6e006fd73b1d 100644
+--- a/lib/PublicInbox/Search.pm
++++ b/lib/PublicInbox/Search.pm
+@@ -53,6 +53,9 @@ my %bool_pfx_external = (
+         dfpre => 'XDFPRE',
+         dfpost => 'XDFPOST',
+         dfblob => 'XDFPRE XDFPOST',
++        dfn => 'XDFN',
++        dfa => 'XDFA',
++        dfb => 'XDFB',
+ );
+
+ my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST';
+@@ -72,9 +75,6 @@ my %prob_prefix = (
+
+         q => 'XQUOT',
+         nq => $non_quoted_body,
+-        dfn => 'XDFN',
+-        dfa => 'XDFA',
+-        dfb => 'XDFB',
+         dfhh => 'XDFHH',
+         dfctx => 'XDFCTX',
+
+--
+^_^
diff --git a/t/v2mda.t b/t/v2mda.t
index 7df3a43a..61457208 100644
--- a/t/v2mda.t
+++ b/t/v2mda.t
@@ -65,4 +65,14 @@ my $msgs = $ibx->search->query('');
 my $saved = $ibx->smsg_mime($msgs->[0]);
 is($saved->{mime}->as_string, $mime->as_string, 'injected message');
 
+my $patch = 't/data/0001.patch';
+open my $fh, '<', $patch or die "failed to open $patch: $!\n";
+$rdr = { 0 => fileno($fh) };
+ok(PublicInbox::Import::run_die(['public-inbox-mda'], undef, $rdr),
+        'mda delivered a patch');
+my $post = $ibx->search->reopen->query('dfpost:6e006fd7');
+is(scalar(@$post), 1, 'got one result for dfpost');
+my $pre = $ibx->search->query('dfpre:090d998');
+is(scalar(@$pre), 1, 'got one result for dfpre');
+is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases');
 done_testing();
diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t
index a76e413f..fc002dc1 100644
--- a/t/watch_maildir_v2.t
+++ b/t/watch_maildir_v2.t
@@ -120,6 +120,21 @@ More majordomo info at  http://vger.kernel.org/majordomo-info.html\n);
         is($nr, 1, 'inbox has one mail after spamc OK-ed a message');
         my $mref = $ibx->msg_by_smsg($msgs->[0]);
         like($$mref, qr/something\n\z/s, 'message scrubbed on import');
+        delete $config->{'publicinboxwatch.spamcheck'};
+}
+
+{
+        my $patch = 't/data/0001.patch';
+        open my $fh, '<', $patch or die "failed to open $patch: $!\n";
+        $msg = eval { local $/; <$fh> };
+        PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+        PublicInbox::WatchMaildir->new($config)->scan('full');
+        ($nr, $msgs) = $srch->reopen->query('dfpost:6e006fd7');
+        is($nr, 1, 'diff postimage found');
+        my $post = $msgs->[0];
+        ($nr, $msgs) = $srch->query('dfpre:090d998b6c2c');
+        is($nr, 1, 'diff preimage found');
+        is($post->{blob}, $msgs->[0]->{blob}, 'same message');
 }
 
 done_testing;