about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2015-09-30 21:00:25 +0000
committerEric Wong <e@80x24.org>2015-09-30 21:09:23 +0000
commit1d236e649df10515bf042fa2283eef509648d9c9 (patch)
tree504b59e2c719f948b3f3224935ae212941d79a7c /lib/PublicInbox/SearchIdx.pm
parent3393117e5ff8faef209bbf4988a59743f00b2a80 (diff)
downloadpublic-inbox-1d236e649df10515bf042fa2283eef509648d9c9.tar.gz
The document data of a search message already contains a good chunk
of the information needed to respond to OVER/XOVER commands quickly.
Expand on that and use the document data to implement OVER/XOVER
quickly.

This adds a dependency on Xapian being available for nntpd usage,
but is probably alright since nntpd is esoteric enough that anybody
willing to run nntpd will also want search functionality offered
by Xapian.

This also speeds up XHDR/HDR with the To: and Cc: headers and
:bytes/:lines article metadata used by some clients for header
displays and marking messages as read/unread.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm59
1 files changed, 43 insertions, 16 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 87243268..4b43369f 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -36,8 +36,14 @@ sub new {
         $self;
 }
 
+sub add_val {
+        my ($doc, $col, $num) = @_;
+        $num = Search::Xapian::sortable_serialise($num);
+        $doc->add_value($col, $num);
+}
+
 sub add_message {
-        my ($self, $mime) = @_; # mime = Email::MIME object
+        my ($self, $mime, $bytes, $num) = @_; # mime = Email::MIME object
         my $db = $self->{xdb};
 
         my $doc_id;
@@ -80,8 +86,16 @@ sub add_message {
                         $doc->add_term(xpfx('path') . mid_compress($path));
                 }
 
-                my $ts = Search::Xapian::sortable_serialise($smsg->ts);
-                $doc->add_value(PublicInbox::Search::TS, $ts);
+                add_val($doc, &PublicInbox::Search::TS, $smsg->ts);
+
+                defined($num) and
+                        add_val($doc, &PublicInbox::Search::NUM, $num);
+
+                defined($bytes) and
+                        add_val($doc, &PublicInbox::Search::BYTES, $bytes);
+
+                add_val($doc, &PublicInbox::Search::LINES,
+                                $mime->body_raw =~ tr!\n!\n!);
 
                 my $tg = $self->term_generator;
 
@@ -91,7 +105,7 @@ sub add_message {
                 $tg->index_text($subj) if $subj;
                 $tg->increase_termpos;
 
-                $tg->index_text($smsg->from->format);
+                $tg->index_text($smsg->from);
                 $tg->increase_termpos;
 
                 $mime->walk_parts(sub {
@@ -224,7 +238,7 @@ sub link_message_to_parents {
                 }
         }
         if (@refs) {
-                $smsg->{references_sorted} = '<'.join('><', @refs).'>';
+                $smsg->{references} = '<'.join('> <', @refs).'>';
 
                 # first ref *should* be the thread root,
                 # but we can never trust clients to do the right thing
@@ -245,8 +259,8 @@ sub link_message_to_parents {
 }
 
 sub index_blob {
-        my ($self, $git, $mime) = @_;
-        $self->add_message($mime);
+        my ($self, $git, $mime, $bytes, $num) = @_;
+        $self->add_message($mime, $bytes, $num);
 }
 
 sub unindex_blob {
@@ -265,10 +279,22 @@ sub unindex_mm {
         $self->{mm}->mid_delete(mid_clean($mime->header('Message-ID')));
 }
 
-sub index_both {
+sub index_mm2 {
+        my ($self, $git, $mime, $bytes) = @_;
+        my $num = $self->{mm}->num_for(mid_clean($mime->header('Message-ID')));
+        index_blob($self, $git, $mime, $bytes, $num);
+}
+
+sub unindex_mm2 {
         my ($self, $git, $mime) = @_;
-        index_blob($self, $git, $mime);
-        index_mm($self, $git, $mime);
+        $self->{mm}->mid_delete(mid_clean($mime->header('Message-ID')));
+        unindex_blob($self, $git, $mime);
+}
+
+sub index_both {
+        my ($self, $git, $mime, $bytes) = @_;
+        my $num = index_mm($self, $git, $mime);
+        index_blob($self, $git, $mime, $bytes, $num);
 }
 
 sub unindex_both {
@@ -278,9 +304,9 @@ sub unindex_both {
 }
 
 sub do_cat_mail {
-        my ($git, $blob) = @_;
+        my ($git, $blob, $sizeref) = @_;
         my $mime = eval {
-                my $str = $git->cat_file($blob);
+                my $str = $git->cat_file($blob, $sizeref);
                 Email::MIME->new($str);
         };
         $@ ? undef : $mime;
@@ -304,12 +330,13 @@ sub rlog {
                     qw/--reverse --no-notes --no-color --raw -r --no-abbrev/,
                     $range);
         my $latest;
+        my $bytes;
         my $pid = open(my $log, '-|', @cmd) or
                 die('open` '.join(' ', @cmd) . " pipe failed: $!\n");
         while (my $line = <$log>) {
                 if ($line =~ /$addmsg/o) {
-                        my $mime = do_cat_mail($git, $1) or next;
-                        $add_cb->($self, $git, $mime);
+                        my $mime = do_cat_mail($git, $1, \$bytes) or next;
+                        $add_cb->($self, $git, $mime, $bytes);
                 } elsif ($line =~ /$delmsg/o) {
                         my $mime = do_cat_mail($git, $1) or next;
                         $del_cb->($self, $git, $mime);
@@ -354,11 +381,11 @@ sub _index_sync {
                         $mm->{dbh}->commit;
                         $mm->last_commit($lm) if defined $lm;
 
-                        goto xapian_only;
+                        $lx = $self->rlog($range, *index_mm2, *unindex_mm2);
+                        $db->set_metadata('last_commit', $lx) if defined $lx;
                 }
         } else {
                 # user didn't install DBD::SQLite and DBI
-xapian_only:
                 $lx = $self->rlog($range, *index_blob, *unindex_blob);
                 $db->set_metadata('last_commit', $lx) if defined $lx;
         }