From 11820f6911d21ee1326d52d99a28063acf872911 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 28 Aug 2015 00:00:47 +0000 Subject: search: do not iterate through entire termlist A document may have many terms, so this hurts performance if we blindly iterate. Unfortunately, we can't rely on the order of the termlist just yet, either, so we must repeatedly restart the search for now until we're ready to bump schema versions. --- lib/PublicInbox/SearchMsg.pm | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'lib/PublicInbox/SearchMsg.pm') diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index a9f3180b..4ad8a0c9 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -110,7 +110,6 @@ sub references_sorted { sub ensure_metadata { my ($self) = @_; my $doc = $self->{doc}; - my $i = $doc->termlist_begin; my $end = $doc->termlist_end; unless (defined $PFX2TERM_RE) { @@ -118,12 +117,17 @@ sub ensure_metadata { $PFX2TERM_RE = qr/\A($or)/; } - for (; $i != $end; $i->inc) { - my $val = $i->get_termname; + while (my ($pfx, $field) = each %PublicInbox::Search::PFX2TERM_RMAP) { + # ideally we'd move this out of the loop: + my $i = $doc->termlist_begin; - if ($val =~ s/$PFX2TERM_RE//o) { - my $field = $PublicInbox::Search::PFX2TERM_RMAP{$1}; - $self->{$field} = $val; + $i->skip_to($pfx); + if ($i != $end) { + my $val = $i->get_termname; + + if ($val =~ s/$PFX2TERM_RE//o) { + $self->{$field} = $val; + } } } } -- cgit v1.2.3-24-ge0c7