From 0df58f99a71268c98bb21cab0a98ddd25a5b83b2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 9 Sep 2016 00:01:31 +0000 Subject: search: index attachment filenames And while we're at it, ensure searching inside displayable attachment bodies works. --- lib/PublicInbox/Search.pm | 3 ++- lib/PublicInbox/SearchIdx.pm | 4 ++++ t/search.t | 44 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index ceee39af..0c056772 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -69,6 +69,7 @@ my %prob_prefix = ( tcf => 'XTO XCC A', b => 'XNQ XQUOT', bs => 'XNQ XQUOT S', + n => 'XFN', # n.b.: leaving out "a:" alias for "tcf:" even though # mairix supports it. It is only mentioned in passing in mairix(1) @@ -77,7 +78,7 @@ my %prob_prefix = ( nq => 'XNQ', # default: - '' => 'XMID S A XNQ XQUOT', + '' => 'XMID S A XNQ XQUOT XFN', ); # not documenting m: and mid: for now, the using the URLs works w/o Xapian diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index fb68f4b1..23aef9f3 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -181,6 +181,10 @@ sub add_message { msg_iter($mime, sub { my ($part, $depth, @idx) = @{$_[0]}; my $ct = $part->content_type || 'text/plain'; + my $fn = $part->filename; + if (defined $fn && $fn ne '') { + $tg->index_text($fn, 1, 'XFN'); + } return if $ct =~ m!\btext/x?html\b!i; diff --git a/t/search.t b/t/search.t index bddb545a..cce3b9e2 100644 --- a/t/search.t +++ b/t/search.t @@ -386,6 +386,50 @@ sub filter_mids { } } +{ + my $part1 = Email::MIME->create( + attributes => { + content_type => 'text/plain', + disposition => 'attachment', + charset => 'US-ASCII', + encoding => 'quoted-printable', + filename => 'attached_fart.txt', + }, + body_str => 'inside the attachment', + ); + my $part2 = Email::MIME->create( + attributes => { + content_type => 'text/plain', + disposition => 'attachment', + charset => 'US-ASCII', + encoding => 'quoted-printable', + filename => 'part_deux.txt', + }, + body_str => 'inside another', + ); + my $amsg = Email::MIME->create( + header_str => [ + Subject => 'see attachment', + 'Message-ID' => '', + From => 'John Smith ', + To => 'list@example.com', + ], + parts => [ $part1, $part2 ], + ); + ok($rw->add_message($amsg), 'added attachment'); + $rw_commit->(); + $ro->reopen; + my $n = $ro->query('n:attached_fart.txt'); + is(scalar @{$n->{msgs}}, 1, 'got result for n:'); + my $res = $ro->query('part_deux.txt'); + is(scalar @{$res->{msgs}}, 1, 'got result without n:'); + is($n->{msgs}->[0]->mid, $res->{msgs}->[0]->mid, + 'same result with and without'); + my $txt = $ro->query('"inside another"'); + is($txt->{msgs}->[0]->mid, $res->{msgs}->[0]->mid, + 'search inside text attachments works'); +} + done_testing(); 1; -- cgit v1.2.3-24-ge0c7