From 9015a8af2909b0071e54b332bfafc4e5b8d0f6c2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 29 Jul 2018 09:34:41 +0000 Subject: mda: v2: ensure message bodies are indexed We must not clobber the original message string, as Email::MIME(*) still needs it for iterating through parts in SearchIdx (but not when handing it as a raw string to git-fast-import). I've noticed message bodies (especially dfpre/dpost) were not getting indexed when going through -mda (no problems with -watch). This also did not affect v1 repos, since indexing is a separate process for v1 and requires re-reading the data from git. (*) tested Email::MIME 1.937 on Debian stretch --- t/data/0001.patch | 46 ++++++++++++++++++++++++++++++++++++++++++++++ t/v2mda.t | 10 ++++++++++ t/watch_maildir_v2.t | 15 +++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 t/data/0001.patch (limited to 't') diff --git a/t/data/0001.patch b/t/data/0001.patch new file mode 100644 index 00000000..b7964a2b --- /dev/null +++ b/t/data/0001.patch @@ -0,0 +1,46 @@ +From: Eric Wong +Date: Fri, 20 Jul 2018 07:21:41 +0000 +To: test@example.com +Subject: [PATCH] search: use boolean prefix for filenames in diffs, too +Message-ID: <20180720072141.GA15957@example> + +Filenames within a project tend to be reasonably stable within a +project and I plan on having automated searches hit these. + +Also, using no term prefix at all (the default for searching) +still allows probabilistic searches on everything that's in a +"git diff", including the blob names which were just made +boolean. + +Note, attachment filenames ("n:" prefix) will stil use +probabilistic search, as they're hardly standardized. +--- + lib/PublicInbox/Search.pm | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm +index 090d998b6c2c..6e006fd73b1d 100644 +--- a/lib/PublicInbox/Search.pm ++++ b/lib/PublicInbox/Search.pm +@@ -53,6 +53,9 @@ my %bool_pfx_external = ( + dfpre => 'XDFPRE', + dfpost => 'XDFPOST', + dfblob => 'XDFPRE XDFPOST', ++ dfn => 'XDFN', ++ dfa => 'XDFA', ++ dfb => 'XDFB', + ); + + my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST'; +@@ -72,9 +75,6 @@ my %prob_prefix = ( + + q => 'XQUOT', + nq => $non_quoted_body, +- dfn => 'XDFN', +- dfa => 'XDFA', +- dfb => 'XDFB', + dfhh => 'XDFHH', + dfctx => 'XDFCTX', + +-- +^_^ diff --git a/t/v2mda.t b/t/v2mda.t index 7df3a43a..61457208 100644 --- a/t/v2mda.t +++ b/t/v2mda.t @@ -65,4 +65,14 @@ my $msgs = $ibx->search->query(''); my $saved = $ibx->smsg_mime($msgs->[0]); is($saved->{mime}->as_string, $mime->as_string, 'injected message'); +my $patch = 't/data/0001.patch'; +open my $fh, '<', $patch or die "failed to open $patch: $!\n"; +$rdr = { 0 => fileno($fh) }; +ok(PublicInbox::Import::run_die(['public-inbox-mda'], undef, $rdr), + 'mda delivered a patch'); +my $post = $ibx->search->reopen->query('dfpost:6e006fd7'); +is(scalar(@$post), 1, 'got one result for dfpost'); +my $pre = $ibx->search->query('dfpre:090d998'); +is(scalar(@$pre), 1, 'got one result for dfpre'); +is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases'); done_testing(); diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t index a76e413f..fc002dc1 100644 --- a/t/watch_maildir_v2.t +++ b/t/watch_maildir_v2.t @@ -120,6 +120,21 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); is($nr, 1, 'inbox has one mail after spamc OK-ed a message'); my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); + delete $config->{'publicinboxwatch.spamcheck'}; +} + +{ + my $patch = 't/data/0001.patch'; + open my $fh, '<', $patch or die "failed to open $patch: $!\n"; + $msg = eval { local $/; <$fh> }; + PublicInbox::Emergency->new($maildir)->prepare(\$msg); + PublicInbox::WatchMaildir->new($config)->scan('full'); + ($nr, $msgs) = $srch->reopen->query('dfpost:6e006fd7'); + is($nr, 1, 'diff postimage found'); + my $post = $msgs->[0]; + ($nr, $msgs) = $srch->query('dfpre:090d998b6c2c'); + is($nr, 1, 'diff preimage found'); + is($post->{blob}, $msgs->[0]->{blob}, 'same message'); } done_testing; -- cgit v1.2.3-24-ge0c7