about summary refs log tree commit homepage
path: root/lib/PublicInbox/Search.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2015-09-30 21:00:25 +0000
committerEric Wong <e@80x24.org>2015-09-30 21:09:23 +0000
commit1d236e649df10515bf042fa2283eef509648d9c9 (patch)
tree504b59e2c719f948b3f3224935ae212941d79a7c /lib/PublicInbox/Search.pm
parent3393117e5ff8faef209bbf4988a59743f00b2a80 (diff)
downloadpublic-inbox-1d236e649df10515bf042fa2283eef509648d9c9.tar.gz
The document data of a search message already contains a good chunk
of the information needed to respond to OVER/XOVER commands quickly.
Expand on that and use the document data to implement OVER/XOVER
quickly.

This adds a dependency on Xapian being available for nntpd usage,
but is probably alright since nntpd is esoteric enough that anybody
willing to run nntpd will also want search functionality offered
by Xapian.

This also speeds up XHDR/HDR with the To: and Cc: headers and
:bytes/:lines article metadata used by some clients for header
displays and marking messages as read/unread.
Diffstat (limited to 'lib/PublicInbox/Search.pm')
-rw-r--r--lib/PublicInbox/Search.pm37
1 files changed, 34 insertions, 3 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 695d56b3..1d13f4b8 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -4,7 +4,13 @@
 package PublicInbox::Search;
 use strict;
 use warnings;
-use constant TS => 0;
+
+# values for searching
+use constant TS => 0; # timestamp
+use constant NUM => 1; # NNTP article number
+use constant BYTES => 2; # :bytes as defined in RFC 3977
+use constant LINES => 3; # :lines as defined in RFC 3977
+
 use Search::Xapian qw/:standard/;
 use PublicInbox::SearchMsg;
 use Email::MIME;
@@ -26,8 +32,9 @@ use constant {
         # 6 - preserve References: order in document data
         # 7 - remove references and inreplyto terms
         # 8 - remove redundant/unneeded document data
-        # 9 - disable Message-ID compression
-        SCHEMA_VERSION => 9,
+        # 9 - disable Message-ID compression (SHA-1)
+        # 10 - optimize doc for NNTP overviews
+        SCHEMA_VERSION => 10,
 
         # n.b. FLAG_PURE_NOT is expensive not suitable for a public website
         # as it could become a denial-of-service vector
@@ -168,6 +175,30 @@ sub date_range_processor {
         $_[0]->{drp} ||= Search::Xapian::DateValueRangeProcessor->new(TS);
 }
 
+sub num_range_processor {
+        $_[0]->{nrp} ||= Search::Xapian::NumberValueRangeProcessor->new(NUM);
+}
+
+# only used for NNTP server
+sub query_xover {
+        my ($self, $beg, $end, $offset) = @_;
+        my $enquire = $self->enquire;
+        my $qp = Search::Xapian::QueryParser->new;
+        $qp->set_database($self->{xdb});
+        $qp->add_valuerangeprocessor($self->num_range_processor);
+        my $query = $qp->parse_query("$beg..$end", QP_FLAGS);
+        $query = Search::Xapian::Query->new(OP_AND, $mail_query, $query);
+        $enquire->set_query($query);
+        $enquire->set_sort_by_value(NUM, 0);
+        my $limit = 200;
+        my $mset = $enquire->get_mset($offset, $limit);
+        my @msgs = map {
+                PublicInbox::SearchMsg->load_doc($_->get_document);
+        } $mset->items;
+
+        { total => $mset->get_matches_estimated, msgs => \@msgs }
+}
+
 sub lookup_message {
         my ($self, $mid) = @_;
         $mid = mid_clean($mid);