diff options
author | Eric Wong <e@80x24.org> | 2016-09-09 00:01:24 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2016-09-09 00:02:18 +0000 |
commit | a9c903a57ff9a18c56a53bcba4316eade423fef6 (patch) | |
tree | 2c3499494c6305dc97b74371e575e632222dd19b | |
parent | 766d9b1ef9e2e2c325c5dae9d17bfeb85c9d2f93 (diff) | |
download | public-inbox-a9c903a57ff9a18c56a53bcba4316eade423fef6.tar.gz |
"bs:" and "b:" are adapted from mairix(1) We will also support searching explicitly for quoted vs non-quoted text via "q:" and "nq:" prefixes since sometimes readers will not care for quoted text. In the future, we will support parsing diffs (perhaps when repobrowse integration is complete). Note: this roughly doubles the size of the Xapian database due to the additional information; so this change may not be worth it.
-rw-r--r-- | lib/PublicInbox/Search.pm | 18 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 17 | ||||
-rw-r--r-- | t/search.t | 25 |
3 files changed, 51 insertions, 9 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 3b25b662..f74129d5 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -58,16 +58,22 @@ my %bool_pfx_external = ( ); my %prob_prefix = ( - s => 'S', # for mairix compatibility + # for mairix compatibility + s => 'S', m => 'Q', # 'mid' is exact, 'm' can do partial - f => 'A', # for mairix compatibility - t => 'XTO', # for mairix compatibility - tc => 'XTC', # for mairix compatibility - c => 'XCC', # for mairix compatibility - tcf => 'XTCF', # for mairix compatibility + f => 'A', + t => 'XTO', + tc => 'XTC', + c => 'XCC', + tcf => 'XTCF', + b => 'XBODY', + bs => 'XBS', + # n.b.: leaving out "a:" alias for "tcf:" even though # mairix supports it. It is only mentioned in passing in mairix(1) # and the extra two letters are not significantly longer. + q => 'XQUOT', + nq => 'XNQ', ); # not documenting m: and mid: for now, the using the URLs works w/o Xapian diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 37fefbea..cd27a294 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -173,7 +173,10 @@ sub add_message { my $tg = $self->term_generator; $tg->set_document($doc); - $tg->index_text($subj, 1, 'S') if $subj; + if ($subj) { + $tg->index_text($subj, 1, 'S'); + $tg->index_text($subj, 1, 'XBS'); + } $tg->increase_termpos; $tg->index_text($subj) if $subj; $tg->increase_termpos; @@ -199,13 +202,21 @@ sub add_message { } } if (@quot) { - $tg->index_text(join("\n", @quot), 0); + my $s = join("\n", @quot); @quot = (); + $tg->index_text($s, 1, 'XQUOT'); + $tg->index_text($s, 0, 'XBS'); + $tg->index_text($s, 0, 'XBODY'); + $tg->index_text($s, 0); $tg->increase_termpos; } if (@orig) { - $tg->index_text(join("\n", @orig)); + my $s = join("\n", @orig); @orig = (); + $tg->index_text($s, 1, 'XNQ'); + $tg->index_text($s, 1, 'XBS'); + $tg->index_text($s, 1, 'XBODY'); + $tg->index_text($s); $tg->increase_termpos; } }); @@ -361,6 +361,31 @@ sub filter_mids { } } +{ + $rw_commit->(); + $ro->reopen; + my $res = $ro->query('b:hello'); + is(scalar @{$res->{msgs}}, 0, 'no match on body search only'); + $res = $ro->query('bs:smith'); + is(scalar @{$res->{msgs}}, 0, + 'no match on body+subject search for From'); + + $res = $ro->query('q:theatre'); + is(scalar @{$res->{msgs}}, 1, 'only one quoted body'); + like($res->{msgs}->[0]->from, qr/\AQuoter/, 'got quoted body'); + + $res = $ro->query('nq:theatre'); + is(scalar @{$res->{msgs}}, 1, 'only one non-quoted body'); + like($res->{msgs}->[0]->from, qr/\ANon-Quoter/, 'got non-quoted body'); + + foreach my $pfx (qw(b: bs:)) { + $res = $ro->query($pfx . 'theatre'); + is(scalar @{$res->{msgs}}, 2, "searched both bodies for $pfx"); + like($res->{msgs}->[0]->from, qr/\ANon-Quoter/, + "non-quoter first for $pfx"); + } +} + done_testing(); 1; |