From 52f9edfb756676b471deac69e5d55df1933aa528 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 10 Feb 2017 01:51:05 +0000 Subject: search: remove unnecessary abstractions and functionality This simplifies the code a bit and reduces the translation overhead for looking directly at data from tools shipped with Xapian. While we're at it, fix thread-all.t :) --- lib/PublicInbox/RepoGitSearch.pm | 2 -- lib/PublicInbox/RepoGitSearchIdx.pm | 1 - lib/PublicInbox/Search.pm | 31 +++++++++---------------------- lib/PublicInbox/SearchIdx.pm | 20 +++++++++----------- lib/PublicInbox/SearchMsg.pm | 2 +- t/search.t | 9 +-------- t/thread-all.t | 5 +++-- 7 files changed, 23 insertions(+), 47 deletions(-) diff --git a/lib/PublicInbox/RepoGitSearch.pm b/lib/PublicInbox/RepoGitSearch.pm index d3760449..36e3fab3 100644 --- a/lib/PublicInbox/RepoGitSearch.pm +++ b/lib/PublicInbox/RepoGitSearch.pm @@ -60,8 +60,6 @@ EOF ); chomp @HELP; -my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix); - sub new { my ($class, $git_dir, $repo_dir) = @_; $repo_dir ||= "$git_dir/public-inbox"; diff --git a/lib/PublicInbox/RepoGitSearchIdx.pm b/lib/PublicInbox/RepoGitSearchIdx.pm index cfd91701..67d7ec3f 100644 --- a/lib/PublicInbox/RepoGitSearchIdx.pm +++ b/lib/PublicInbox/RepoGitSearchIdx.pm @@ -13,7 +13,6 @@ use base qw(PublicInbox::RepoGitSearch); # base is read-only use POSIX qw(strftime); use PublicInbox::Git; use PublicInbox::GitIdx; -*xpfx = *PublicInbox::RepoGitSearch::xpfx; use constant { Z40 => ('0' x 40), STATE_GPGSIG => -0x80000000, diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 8c72fa17..02d1827e 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -55,8 +55,6 @@ my %bool_pfx_internal = ( ); my %bool_pfx_external = ( - # do we still need these? probably not.. - path => 'XPATH', mid => 'Q', # uniQue id (Message-ID) ); @@ -106,11 +104,7 @@ chomp @HELP; # da (diff a/ removed lines) # db (diff b/ added lines) -my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix); - -sub xpfx { $all_pfx{$_[0]} } - -my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail'); +my $mail_query = Search::Xapian::Query->new('T' . 'mail'); sub xdir { my (undef, $git_dir) = @_; @@ -145,11 +139,11 @@ sub get_thread { my $smsg = eval { $self->lookup_message($mid) }; return { total => 0, msgs => [] } unless $smsg; - my $qtid = Search::Xapian::Query->new(xpfx('thread').$smsg->thread_id); + my $qtid = Search::Xapian::Query->new('G' . $smsg->thread_id); my $path = $smsg->path; if (defined $path && $path ne '') { my $path = id_compress($smsg->path); - my $qsub = Search::Xapian::Query->new(xpfx('path').$path); + my $qsub = Search::Xapian::Query->new('XPATH' . $path); $qtid = Search::Xapian::Query->new(OP_OR, $qtid, $qsub); } $opts ||= {}; @@ -278,7 +272,7 @@ sub lookup_message { my ($self, $mid) = @_; $mid = mid_clean($mid); - my $doc_id = $self->find_unique_doc_id('mid', $mid); + my $doc_id = $self->find_unique_doc_id('Q' . $mid); my $smsg; if (defined $doc_id) { # raises on error: @@ -298,9 +292,9 @@ sub lookup_mail { # no ghosts! } sub find_unique_doc_id { - my ($self, $term, $value) = @_; + my ($self, $termval) = @_; - my ($begin, $end) = $self->find_doc_ids($term, $value); + my ($begin, $end) = $self->find_doc_ids($termval); return undef if $begin->equal($end); # not found @@ -308,23 +302,16 @@ sub find_unique_doc_id { # sanity check $begin->inc; - $begin->equal($end) or die "Term '$term:$value' is not unique\n"; + $begin->equal($end) or die "Term '$termval' is not unique\n"; $rv; } # returns begin and end PostingIterator sub find_doc_ids { - my ($self, $term, $value) = @_; - - $self->find_doc_ids_for_term(xpfx($term) . $value); -} - -# returns begin and end PostingIterator -sub find_doc_ids_for_term { - my ($self, $term) = @_; + my ($self, $termval) = @_; my $db = $self->{xdb}; - ($db->postlist_begin($term), $db->postlist_end($term)); + ($db->postlist_begin($termval), $db->postlist_end($termval)); } # normalize subjects so they are suitable as pathnames for URLs diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index c0ea3c1e..2548ddf2 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -20,7 +20,6 @@ use PublicInbox::GitIdx; use Carp qw(croak); use POSIX qw(strftime); require PublicInbox::Git; -*xpfx = *PublicInbox::Search::xpfx; use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian @@ -153,12 +152,12 @@ sub add_message { } $smsg = PublicInbox::SearchMsg->new($mime); my $doc = $smsg->{doc}; - $doc->add_term(xpfx('mid') . $mid); + $doc->add_term('Q' . $mid); my $subj = $smsg->subject; if ($subj ne '') { my $path = $self->subject_path($subj); - $doc->add_term(xpfx('path') . id_compress($path)); + $doc->add_term('XPATH' . id_compress($path)); } add_values($smsg, $bytes, $num); @@ -343,7 +342,7 @@ sub link_message { } else { $tid = $self->next_thread_id; } - $doc->add_term(xpfx('thread') . $tid); + $doc->add_term('G' . $tid); } sub index_blob { @@ -553,9 +552,9 @@ sub create_ghost { my $tid = $self->next_thread_id; my $doc = Search::Xapian::Document->new; - $doc->add_term(xpfx('mid') . $mid); - $doc->add_term(xpfx('thread') . $tid); - $doc->add_term(xpfx('type') . 'ghost'); + $doc->add_term('Q' . $mid); + $doc->add_term('G' . $tid); + $doc->add_term('T' . 'ghost'); my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); $self->{xdb}->add_document($doc); @@ -566,15 +565,14 @@ sub create_ghost { sub merge_threads { my ($self, $winner_tid, $loser_tid) = @_; return if $winner_tid == $loser_tid; - my ($head, $tail) = $self->find_doc_ids('thread', $loser_tid); - my $thread_pfx = xpfx('thread'); + my ($head, $tail) = $self->find_doc_ids('G' . $loser_tid); my $db = $self->{xdb}; for (; $head != $tail; $head->inc) { my $docid = $head->get_docid; my $doc = $db->get_document($docid); - $doc->remove_term($thread_pfx . $loser_tid); - $doc->add_term($thread_pfx . $winner_tid); + $doc->remove_term('G' . $loser_tid); + $doc->add_term('G' . $winner_tid); $db->replace_document($docid, $doc); } } diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index b8eee665..a19d45db 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -14,7 +14,7 @@ use PublicInbox::Address; sub new { my ($class, $mime) = @_; my $doc = Search::Xapian::Document->new; - $doc->add_term(PublicInbox::Search::xpfx('type') . 'mail'); + $doc->add_term('T' . 'mail'); bless { type => 'mail', doc => $doc, mime => $mime }, $class; } diff --git a/t/search.t b/t/search.t index 2e29b827..000a0385 100644 --- a/t/search.t +++ b/t/search.t @@ -70,15 +70,8 @@ sub filter_mids { is($found->mid, 'root@s', 'mid set correctly'); ok(int($found->thread_id) > 0, 'thread_id is an integer'); + my ($res, @res); my @exp = sort qw(root@s last@s); - my $res = $ro->query("path:hello_world"); - my @res = filter_mids($res); - is_deeply(\@res, \@exp, 'got expected results for path: match'); - - foreach my $p (qw(hello hello_ hello_world2 hello_world_)) { - $res = $ro->query("path:$p"); - is($res->{total}, 0, "path variant `$p' does not match"); - } $res = $ro->query('s:(Hello world)'); @res = filter_mids($res); diff --git a/t/thread-all.t b/t/thread-all.t index 8ccf4f8c..b1f9b47c 100644 --- a/t/thread-all.t +++ b/t/thread-all.t @@ -1,7 +1,7 @@ # Copyright (C) 2016 all contributors # License: AGPL-3.0+ # -# real-world testing of search threading +# real-world testing of search threading performance use strict; use warnings; use Test::More; @@ -16,7 +16,6 @@ plan skip_all => "$pi_dir not initialized for $0" if $@; require PublicInbox::View; require PublicInbox::SearchThread; -my $pfx = PublicInbox::Search::xpfx('thread'); my $opts = { limit => 1000000, asc => 1 }; my $t0 = clock_gettime(CLOCK_MONOTONIC); my $elapsed; @@ -35,4 +34,6 @@ PublicInbox::View::thread_results($msgs); $elapsed = clock_gettime(CLOCK_MONOTONIC) - $t0; diag "thread_results $elapsed"; +ok(1, 'test completed without crashing :)'); + done_testing(); -- cgit v1.2.3-24-ge0c7