diff options
author | Eric Wong <e@yhbt.net> | 2020-05-07 03:00:09 +0000 |
---|---|---|
committer | Eric Wong <e@yhbt.net> | 2020-05-09 00:54:34 +0000 |
commit | b714ab45d30d6f0298d73ef4281c1d0263a02493 (patch) | |
tree | f5d3c3b64d9e04811cb372785ca950bddf301307 /lib/PublicInbox | |
parent | c2bc9ebcb770a27823d8e989707f434826333b0e (diff) | |
download | public-inbox-b714ab45d30d6f0298d73ef4281c1d0263a02493.tar.gz |
We'll support both probabilistic matches via `l:' and boolean matches via `lid:' for exact matches, similar to how both `m:' and `mid:' are supported. Only text inside angle braces (`<' and `>') are supported, since I'm not sure if there's value in searching on the optional phrases (which would require decoding with ->header_str instead of ->header_raw).
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r-- | lib/PublicInbox/Search.pm | 9 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 6 |
2 files changed, 15 insertions, 0 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 86a6ad67..b7db2b9f 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -77,11 +77,17 @@ use constant { # 15 - see public-inbox-v2-format(5) # further bumps likely unnecessary, we'll suggest in-place # "--reindex" use for further fixes and tweaks + # + # public-inbox v1.5.0 adds (still SCHEMA_VERSION=15): + # * "lid:" and "l:" for List-Id searches SCHEMA_VERSION => 15, }; +# note: the non-X term prefix allocations are shared with +# Xapian omega, see xapian-applications/omega/docs/termprefixes.rst my %bool_pfx_external = ( mid => 'Q', # Message-ID (full/exact), this is mostly uniQue + lid => 'G', # newsGroup (or similar entity), just inside <> dfpre => 'XDFPRE', dfpost => 'XDFPOST', dfblob => 'XDFPRE XDFPOST', @@ -92,6 +98,7 @@ my %prob_prefix = ( # for mairix compatibility s => 'S', m => 'XM', # 'mid:' (bool) is exact, 'm:' (prob) can do partial + l => 'XL', # 'lid:' (bool) is exact, 'l:' (prob) can do partial f => 'A', t => 'XTO', tc => 'XTO XCC', @@ -134,6 +141,8 @@ EOF 'f:' => 'match within the From header', 'a:' => 'match within the To, Cc, and From headers', 'tc:' => 'match within the To and Cc headers', + 'lid:' => 'exact contents of the List-Id', + 'l:' => 'partial match contents of the List-Id header', 'bs:' => 'match within the Subject and body', 'dfn:' => 'match filename from diff', 'dfa:' => 'match diff removed (-) lines', diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 25118f43..998341a7 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -352,6 +352,12 @@ sub add_xapian ($$$$) { } } $doc->add_boolean_term('Q' . $_) foreach @$mids; + for my $l ($hdr->header_raw('List-Id')) { + $l =~ /<([^>]+)>/ or next; + my $lid = $1; + $doc->add_boolean_term('G' . $lid); + index_text($self, $lid, 1, 'XL'); # probabilistic + } $self->{xdb}->replace_document($smsg->{num}, $doc); } |