about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-12-20 03:03:57 +0000
committerEric Wong <e@80x24.org>2016-12-20 08:24:25 +0000
commit478d03688600a4c7b50e205d15d76113e019f3cd (patch)
tree298f14286d684a51747c29232e9ddbc20715bab9 /lib
parent1a75ba282c16f8c15b7891090d0997628d7021dc (diff)
downloadpublic-inbox-478d03688600a4c7b50e205d15d76113e019f3cd.tar.gz
Instead, only preload the ->mid field for threading,
as we only need ->thread and ->path once in Search->get_thread
(but we will need the ->mid field repeatedly).

This more than doubles View->load_results performance on
according to thread-all on an inbox with over 300K messages.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Search.pm6
-rw-r--r--lib/PublicInbox/SearchMsg.pm39
-rw-r--r--lib/PublicInbox/View.pm2
3 files changed, 13 insertions, 34 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 24cb2667..d4f6f77a 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -108,12 +108,6 @@ my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
 
 sub xpfx { $all_pfx{$_[0]} }
 
-our %PFX2TERM_RMAP;
-my %meta_pfx = (mid => 1, thread => 1, path => 1);
-while (my ($k, $v) = each %all_pfx) {
-        $PFX2TERM_RMAP{$v} = $k if $meta_pfx{$k};
-}
-
 my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail');
 
 sub xdir {
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index d62f02c8..96406c6f 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -10,7 +10,6 @@ use Search::Xapian;
 use Date::Parse qw/str2time/;
 use PublicInbox::MID qw/mid_clean/;
 use PublicInbox::Address;
-our $PFX2TERM_RE = undef;
 
 sub new {
         my ($class, $mime) = @_;
@@ -121,29 +120,17 @@ sub references {
         defined $x ? $x : '';
 }
 
-sub ensure_metadata {
-        my ($self) = @_;
+sub _get_term_val ($$$) {
+        my ($self, $pfx, $re) = @_;
         my $doc = $self->{doc};
         my $end = $doc->termlist_end;
-
-        unless (defined $PFX2TERM_RE) {
-                my $or = join('|', keys %PublicInbox::Search::PFX2TERM_RMAP);
-                $PFX2TERM_RE = qr/\A($or)/;
-        }
-
-        while (my ($pfx, $field) = each %PublicInbox::Search::PFX2TERM_RMAP) {
-                # ideally we'd move this out of the loop:
-                my $i = $doc->termlist_begin;
-
-                $i->skip_to($pfx);
-                if ($i != $end) {
-                        my $val = $i->get_termname;
-
-                        if ($val =~ s/$PFX2TERM_RE//o) {
-                                $self->{$field} = $val;
-                        }
-                }
+        my $i = $doc->termlist_begin;
+        $i->skip_to($pfx);
+        if ($i != $end) {
+                my $val = $i->get_termname;
+                $val =~ s/$re// and return $val;
         }
+        undef;
 }
 
 sub mid ($;$) {
@@ -154,8 +141,8 @@ sub mid ($;$) {
         } elsif (my $rv = $self->{mid}) {
                 $rv;
         } else {
-                $self->ensure_metadata; # needed for ghosts
-                $self->{mid} ||= $self->_extract_mid;
+                $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/) ||
+                                $self->_extract_mid;
         }
 }
 
@@ -194,16 +181,14 @@ sub thread_id {
         my ($self) = @_;
         my $tid = $self->{thread};
         return $tid if defined $tid;
-        $self->ensure_metadata;
-        $self->{thread};
+        $self->{thread} = _get_term_val($self, 'G', qr/\AG/); # *G*roup
 }
 
 sub path {
         my ($self) = @_;
         my $path = $self->{path};
         return $path if defined $path;
-        $self->ensure_metadata;
-        $self->{path};
+        $self->{path} = _get_term_val($self, 'XPATH', qr/\AXPATH/); # path
 }
 
 1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index fa47a16a..a50cb642 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -737,7 +737,7 @@ sub indent_for {
 sub load_results {
         my ($srch, $sres) = @_;
         my $msgs = delete $sres->{msgs};
-        $srch->retry_reopen(sub { [ map { $_->ensure_metadata; $_ } @$msgs ] });
+        $srch->retry_reopen(sub { [ map { $_->mid; $_ } @$msgs ] });
 }
 
 sub msg_timestamp {