From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id C0EC620719 for ; Fri, 9 Sep 2016 00:01:36 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 10/10] search: index attachment filenames Date: Fri, 9 Sep 2016 00:01:31 +0000 Message-Id: <20160909000131.18584-11-e@80x24.org> In-Reply-To: <20160909000131.18584-1-e@80x24.org> References: <20160909000131.18584-1-e@80x24.org> List-Id: And while we're at it, ensure searching inside displayable attachment bodies works. --- lib/PublicInbox/Search.pm | 3 ++- lib/PublicInbox/SearchIdx.pm | 4 ++++ t/search.t | 44 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index ceee39a..0c05677 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -69,6 +69,7 @@ my %prob_prefix = ( tcf => 'XTO XCC A', b => 'XNQ XQUOT', bs => 'XNQ XQUOT S', + n => 'XFN', # n.b.: leaving out "a:" alias for "tcf:" even though # mairix supports it. It is only mentioned in passing in mairix(1) @@ -77,7 +78,7 @@ my %prob_prefix = ( nq => 'XNQ', # default: - '' => 'XMID S A XNQ XQUOT', + '' => 'XMID S A XNQ XQUOT XFN', ); # not documenting m: and mid: for now, the using the URLs works w/o Xapian diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index fb68f4b..23aef9f 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -181,6 +181,10 @@ sub add_message { msg_iter($mime, sub { my ($part, $depth, @idx) = @{$_[0]}; my $ct = $part->content_type || 'text/plain'; + my $fn = $part->filename; + if (defined $fn && $fn ne '') { + $tg->index_text($fn, 1, 'XFN'); + } return if $ct =~ m!\btext/x?html\b!i; diff --git a/t/search.t b/t/search.t index bddb545..cce3b9e 100644 --- a/t/search.t +++ b/t/search.t @@ -386,6 +386,50 @@ sub filter_mids { } } +{ + my $part1 = Email::MIME->create( + attributes => { + content_type => 'text/plain', + disposition => 'attachment', + charset => 'US-ASCII', + encoding => 'quoted-printable', + filename => 'attached_fart.txt', + }, + body_str => 'inside the attachment', + ); + my $part2 = Email::MIME->create( + attributes => { + content_type => 'text/plain', + disposition => 'attachment', + charset => 'US-ASCII', + encoding => 'quoted-printable', + filename => 'part_deux.txt', + }, + body_str => 'inside another', + ); + my $amsg = Email::MIME->create( + header_str => [ + Subject => 'see attachment', + 'Message-ID' => '', + From => 'John Smith ', + To => 'list@example.com', + ], + parts => [ $part1, $part2 ], + ); + ok($rw->add_message($amsg), 'added attachment'); + $rw_commit->(); + $ro->reopen; + my $n = $ro->query('n:attached_fart.txt'); + is(scalar @{$n->{msgs}}, 1, 'got result for n:'); + my $res = $ro->query('part_deux.txt'); + is(scalar @{$res->{msgs}}, 1, 'got result without n:'); + is($n->{msgs}->[0]->mid, $res->{msgs}->[0]->mid, + 'same result with and without'); + my $txt = $ro->query('"inside another"'); + is($txt->{msgs}->[0]->mid, $res->{msgs}->[0]->mid, + 'search inside text attachments works'); +} + done_testing(); 1; -- EW