diff options
author | Eric Wong <e@yhbt.net> | 2020-05-07 03:00:09 +0000 |
---|---|---|
committer | Eric Wong <e@yhbt.net> | 2020-05-09 00:54:34 +0000 |
commit | b714ab45d30d6f0298d73ef4281c1d0263a02493 (patch) | |
tree | f5d3c3b64d9e04811cb372785ca950bddf301307 | |
parent | c2bc9ebcb770a27823d8e989707f434826333b0e (diff) | |
download | public-inbox-b714ab45d30d6f0298d73ef4281c1d0263a02493.tar.gz |
We'll support both probabilistic matches via `l:' and boolean matches via `lid:' for exact matches, similar to how both `m:' and `mid:' are supported. Only text inside angle braces (`<' and `>') are supported, since I'm not sure if there's value in searching on the optional phrases (which would require decoding with ->header_str instead of ->header_raw).
-rw-r--r-- | lib/PublicInbox/Search.pm | 9 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 6 | ||||
-rw-r--r-- | t/search.t | 31 |
3 files changed, 46 insertions, 0 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 86a6ad67..b7db2b9f 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -77,11 +77,17 @@ use constant { # 15 - see public-inbox-v2-format(5) # further bumps likely unnecessary, we'll suggest in-place # "--reindex" use for further fixes and tweaks + # + # public-inbox v1.5.0 adds (still SCHEMA_VERSION=15): + # * "lid:" and "l:" for List-Id searches SCHEMA_VERSION => 15, }; +# note: the non-X term prefix allocations are shared with +# Xapian omega, see xapian-applications/omega/docs/termprefixes.rst my %bool_pfx_external = ( mid => 'Q', # Message-ID (full/exact), this is mostly uniQue + lid => 'G', # newsGroup (or similar entity), just inside <> dfpre => 'XDFPRE', dfpost => 'XDFPOST', dfblob => 'XDFPRE XDFPOST', @@ -92,6 +98,7 @@ my %prob_prefix = ( # for mairix compatibility s => 'S', m => 'XM', # 'mid:' (bool) is exact, 'm:' (prob) can do partial + l => 'XL', # 'lid:' (bool) is exact, 'l:' (prob) can do partial f => 'A', t => 'XTO', tc => 'XTO XCC', @@ -134,6 +141,8 @@ EOF 'f:' => 'match within the From header', 'a:' => 'match within the To, Cc, and From headers', 'tc:' => 'match within the To and Cc headers', + 'lid:' => 'exact contents of the List-Id', + 'l:' => 'partial match contents of the List-Id header', 'bs:' => 'match within the Subject and body', 'dfn:' => 'match filename from diff', 'dfa:' => 'match diff removed (-) lines', diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 25118f43..998341a7 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -352,6 +352,12 @@ sub add_xapian ($$$$) { } } $doc->add_boolean_term('Q' . $_) foreach @$mids; + for my $l ($hdr->header_raw('List-Id')) { + $l =~ /<([^>]+)>/ or next; + my $lid = $1; + $doc->add_boolean_term('G' . $lid); + index_text($self, $lid, 1, 'XL'); # probabilistic + } $self->{xdb}->replace_document($smsg->{num}, $doc); } @@ -66,6 +66,7 @@ Subject: Hello world Message-ID: <root@s> From: John Smith <js@example.com> To: list@example.com +List-Id: I'm not mad <i.m.just.bored> \m/ EOF @@ -77,6 +78,7 @@ Message-ID: <last@s> From: John Smith <js@example.com> To: list@example.com Cc: foo@example.com +List-Id: there's nothing <left.for.me.to.do> goodbye forever :< EOF @@ -448,6 +450,35 @@ EOF is($ro->query("m:Pine m:LNX m:10010260936330", {mset=>1})->size, 1); }); +{ # List-Id searching + my $found = $ro->query('lid:i.m.just.bored'); + is_deeply([ filter_mids($found) ], [ 'root@s' ], + 'got expected mid on exact lid: search'); + + $found = $ro->query('lid:just.bored'); + is_deeply($found, [], 'got nothing on lid: search'); + + $found = $ro->query('lid:*.just.bored'); + is_deeply($found, [], 'got nothing on lid: search'); + + $found = $ro->query('l:i.m.just.bored'); + is_deeply([ filter_mids($found) ], [ 'root@s' ], + 'probabilistic search works on full List-Id contents'); + + $found = $ro->query('l:just.bored'); + is_deeply([ filter_mids($found) ], [ 'root@s' ], + 'probabilistic search works on partial List-Id contents'); + + $found = $ro->query('lid:mad'); + is_deeply($found, [], 'no match on phrase with lid:'); + + $found = $ro->query('lid:bored'); + is_deeply($found, [], 'no match on partial List-Id with lid:'); + + $found = $ro->query('l:nothing'); + is_deeply($found, [], 'matched on phrase with l:'); +} + done_testing(); 1; |