about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-09-09 00:01:24 +0000
committerEric Wong <e@80x24.org>2016-09-09 00:02:18 +0000
commita9c903a57ff9a18c56a53bcba4316eade423fef6 (patch)
tree2c3499494c6305dc97b74371e575e632222dd19b
parent766d9b1ef9e2e2c325c5dae9d17bfeb85c9d2f93 (diff)
downloadpublic-inbox-a9c903a57ff9a18c56a53bcba4316eade423fef6.tar.gz
"bs:" and "b:" are adapted from mairix(1)

We will also support searching explicitly for quoted vs
non-quoted text via "q:" and "nq:" prefixes since sometimes
readers will not care for quoted text.

In the future, we will support parsing diffs (perhaps when
repobrowse integration is complete).

Note: this roughly doubles the size of the Xapian database due
to the additional information; so this change may not be worth
it.
-rw-r--r--lib/PublicInbox/Search.pm18
-rw-r--r--lib/PublicInbox/SearchIdx.pm17
-rw-r--r--t/search.t25
3 files changed, 51 insertions, 9 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 3b25b662..f74129d5 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -58,16 +58,22 @@ my %bool_pfx_external = (
 );
 
 my %prob_prefix = (
-        s => 'S', # for mairix compatibility
+        # for mairix compatibility
+        s => 'S',
         m => 'Q', # 'mid' is exact, 'm' can do partial
-        f => 'A', # for mairix compatibility
-        t => 'XTO', # for mairix compatibility
-        tc => 'XTC', # for mairix compatibility
-        c => 'XCC', # for mairix compatibility
-        tcf => 'XTCF', # for mairix compatibility
+        f => 'A',
+        t => 'XTO',
+        tc => 'XTC',
+        c => 'XCC',
+        tcf => 'XTCF',
+        b => 'XBODY',
+        bs => 'XBS',
+
         # n.b.: leaving out "a:" alias for "tcf:" even though
         # mairix supports it.  It is only mentioned in passing in mairix(1)
         # and the extra two letters are not significantly longer.
+        q => 'XQUOT',
+        nq => 'XNQ',
 );
 
 # not documenting m: and mid: for now, the using the URLs works w/o Xapian
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 37fefbea..cd27a294 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -173,7 +173,10 @@ sub add_message {
                 my $tg = $self->term_generator;
 
                 $tg->set_document($doc);
-                $tg->index_text($subj, 1, 'S') if $subj;
+                if ($subj) {
+                        $tg->index_text($subj, 1, 'S');
+                        $tg->index_text($subj, 1, 'XBS');
+                }
                 $tg->increase_termpos;
                 $tg->index_text($subj) if $subj;
                 $tg->increase_termpos;
@@ -199,13 +202,21 @@ sub add_message {
                                 }
                         }
                         if (@quot) {
-                                $tg->index_text(join("\n", @quot), 0);
+                                my $s = join("\n", @quot);
                                 @quot = ();
+                                $tg->index_text($s, 1, 'XQUOT');
+                                $tg->index_text($s, 0, 'XBS');
+                                $tg->index_text($s, 0, 'XBODY');
+                                $tg->index_text($s, 0);
                                 $tg->increase_termpos;
                         }
                         if (@orig) {
-                                $tg->index_text(join("\n", @orig));
+                                my $s = join("\n", @orig);
                                 @orig = ();
+                                $tg->index_text($s, 1, 'XNQ');
+                                $tg->index_text($s, 1, 'XBS');
+                                $tg->index_text($s, 1, 'XBODY');
+                                $tg->index_text($s);
                                 $tg->increase_termpos;
                         }
                 });
diff --git a/t/search.t b/t/search.t
index 7abaf832..bddb545a 100644
--- a/t/search.t
+++ b/t/search.t
@@ -361,6 +361,31 @@ sub filter_mids {
         }
 }
 
+{
+        $rw_commit->();
+        $ro->reopen;
+        my $res = $ro->query('b:hello');
+        is(scalar @{$res->{msgs}}, 0, 'no match on body search only');
+        $res = $ro->query('bs:smith');
+        is(scalar @{$res->{msgs}}, 0,
+                'no match on body+subject search for From');
+
+        $res = $ro->query('q:theatre');
+        is(scalar @{$res->{msgs}}, 1, 'only one quoted body');
+        like($res->{msgs}->[0]->from, qr/\AQuoter/, 'got quoted body');
+
+        $res = $ro->query('nq:theatre');
+        is(scalar @{$res->{msgs}}, 1, 'only one non-quoted body');
+        like($res->{msgs}->[0]->from, qr/\ANon-Quoter/, 'got non-quoted body');
+
+        foreach my $pfx (qw(b: bs:)) {
+                $res = $ro->query($pfx . 'theatre');
+                is(scalar @{$res->{msgs}}, 2, "searched both bodies for $pfx");
+                like($res->{msgs}->[0]->from, qr/\ANon-Quoter/,
+                        "non-quoter first for $pfx");
+        }
+}
+
 done_testing();
 
 1;