From 478d03688600a4c7b50e205d15d76113e019f3cd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 20 Dec 2016 03:03:57 +0000 Subject: searchmsg: remove ensure_metadata Instead, only preload the ->mid field for threading, as we only need ->thread and ->path once in Search->get_thread (but we will need the ->mid field repeatedly). This more than doubles View->load_results performance on according to thread-all on an inbox with over 300K messages. --- lib/PublicInbox/Search.pm | 6 ------ lib/PublicInbox/SearchMsg.pm | 39 ++++++++++++--------------------------- lib/PublicInbox/View.pm | 2 +- 3 files changed, 13 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 24cb2667..d4f6f77a 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -108,12 +108,6 @@ my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix); sub xpfx { $all_pfx{$_[0]} } -our %PFX2TERM_RMAP; -my %meta_pfx = (mid => 1, thread => 1, path => 1); -while (my ($k, $v) = each %all_pfx) { - $PFX2TERM_RMAP{$v} = $k if $meta_pfx{$k}; -} - my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail'); sub xdir { diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index d62f02c8..96406c6f 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -10,7 +10,6 @@ use Search::Xapian; use Date::Parse qw/str2time/; use PublicInbox::MID qw/mid_clean/; use PublicInbox::Address; -our $PFX2TERM_RE = undef; sub new { my ($class, $mime) = @_; @@ -121,29 +120,17 @@ sub references { defined $x ? $x : ''; } -sub ensure_metadata { - my ($self) = @_; +sub _get_term_val ($$$) { + my ($self, $pfx, $re) = @_; my $doc = $self->{doc}; my $end = $doc->termlist_end; - - unless (defined $PFX2TERM_RE) { - my $or = join('|', keys %PublicInbox::Search::PFX2TERM_RMAP); - $PFX2TERM_RE = qr/\A($or)/; - } - - while (my ($pfx, $field) = each %PublicInbox::Search::PFX2TERM_RMAP) { - # ideally we'd move this out of the loop: - my $i = $doc->termlist_begin; - - $i->skip_to($pfx); - if ($i != $end) { - my $val = $i->get_termname; - - if ($val =~ s/$PFX2TERM_RE//o) { - $self->{$field} = $val; - } - } + my $i = $doc->termlist_begin; + $i->skip_to($pfx); + if ($i != $end) { + my $val = $i->get_termname; + $val =~ s/$re// and return $val; } + undef; } sub mid ($;$) { @@ -154,8 +141,8 @@ sub mid ($;$) { } elsif (my $rv = $self->{mid}) { $rv; } else { - $self->ensure_metadata; # needed for ghosts - $self->{mid} ||= $self->_extract_mid; + $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/) || + $self->_extract_mid; } } @@ -194,16 +181,14 @@ sub thread_id { my ($self) = @_; my $tid = $self->{thread}; return $tid if defined $tid; - $self->ensure_metadata; - $self->{thread}; + $self->{thread} = _get_term_val($self, 'G', qr/\AG/); # *G*roup } sub path { my ($self) = @_; my $path = $self->{path}; return $path if defined $path; - $self->ensure_metadata; - $self->{path}; + $self->{path} = _get_term_val($self, 'XPATH', qr/\AXPATH/); # path } 1; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index fa47a16a..a50cb642 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -737,7 +737,7 @@ sub indent_for { sub load_results { my ($srch, $sres) = @_; my $msgs = delete $sres->{msgs}; - $srch->retry_reopen(sub { [ map { $_->ensure_metadata; $_ } @$msgs ] }); + $srch->retry_reopen(sub { [ map { $_->mid; $_ } @$msgs ] }); } sub msg_timestamp { -- cgit v1.2.3-24-ge0c7