From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 069D41FA13 for ; Wed, 2 Sep 2020 11:04:23 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 06/11] search: replace ->query with ->mset Date: Wed, 2 Sep 2020 11:04:16 +0000 Message-Id: <20200902110421.30905-7-e@80x24.org> In-Reply-To: <20200902110421.30905-1-e@80x24.org> References: <20200902110421.30905-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Nearly all of the search uses in the production code rely on a Xapian mset iterator being returned (instead of an array of $smsg objects). So default to returning the mset and move the burden of smsg array conversion into the test cases. --- lib/PublicInbox/ExtMsg.pm | 4 +- lib/PublicInbox/IMAP.pm | 2 +- lib/PublicInbox/Mbox.pm | 6 +- lib/PublicInbox/Search.pm | 12 ++-- lib/PublicInbox/SearchView.pm | 3 +- lib/PublicInbox/SolverGit.pm | 5 +- t/altid.t | 8 +-- t/altid_v2.t | 7 ++- t/index-git-times.t | 14 +++-- t/indexlevels-mirror.t | 8 +-- t/mda_filter_rubylang.t | 6 +- t/replace.t | 8 +-- t/search.t | 112 +++++++++++++++++----------------- t/v1reindex.t | 4 +- t/v2mda.t | 10 +-- t/v2mirror.t | 22 +++---- t/v2reindex.t | 9 +-- t/v2writable.t | 10 ++- t/watch_filter_rubylang.t | 12 ++-- t/watch_maildir_v2.t | 17 +++--- t/xcpdb-reshard.t | 3 +- 21 files changed, 143 insertions(+), 139 deletions(-) diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 65892161..5dffc65c 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -65,10 +65,10 @@ sub search_partial ($$) { # has too many results. $@ can be # Search::Xapian::QueryParserError or even: # "something terrible happened at ../Search/Xapian/Enquire.pm" - my $mset = eval { $srch->query($m, $opt) } or next; + my $mset = eval { $srch->mset($m, $opt) } or next; my @mids = map { $_->{mid} - } @{$ibx->over->get_all(@{$srch->mset_to_artnums($mset)})}; + } @{$srch->mset_to_smsg($ibx, $mset)}; return \@mids if scalar(@mids); } } diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm index abdb8fec..d540fd0b 100644 --- a/lib/PublicInbox/IMAP.pm +++ b/lib/PublicInbox/IMAP.pm @@ -1187,7 +1187,7 @@ sub refill_xap ($$$$) { my ($beg, $end) = @$range_info; my $srch = $self->{ibx}->search; my $opt = { mset => 2, limit => 1000 }; - my $mset = $srch->query("$q uid:$beg..$end", $opt); + my $mset = $srch->mset("$q uid:$beg..$end", $opt); @$uids = @{$srch->mset_to_artnums($mset)}; if (@$uids) { $range_info->[0] = $uids->[-1] + 1; # update $beg diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 0223bead..47025891 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -213,7 +213,7 @@ sub results_cb { } # refill result set my $srch = $ctx->{-inbox}->search(undef, $ctx) or return; - my $mset = $srch->query($ctx->{query}, $ctx->{qopts}); + my $mset = $srch->mset($ctx->{query}, $ctx->{qopts}); my $size = $mset->size or return; $ctx->{qopts}->{offset} += $size; $ctx->{ids} = $srch->mset_to_artnums($mset); @@ -235,7 +235,7 @@ sub results_thread_cb { # refill result set my $srch = $ctx->{-inbox}->search(undef, $ctx) or return; - my $mset = $srch->query($ctx->{query}, $ctx->{qopts}); + my $mset = $srch->mset($ctx->{query}, $ctx->{qopts}); my $size = $mset->size or return; $ctx->{qopts}->{offset} += $size; $ctx->{ids} = $srch->mset_to_artnums($mset); @@ -254,7 +254,7 @@ sub mbox_all { my $qopts = $ctx->{qopts} = { mset => 2 }; # order by docid $qopts->{thread} = 1 if $q->{t}; - my $mset = $srch->query($q_string, $qopts); + my $mset = $srch->mset($q_string, $qopts); $qopts->{offset} = $mset->size or return [404, [qw(Content-Type text/plain)], ["No results found\n"]]; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 546884a9..cfa942b2 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -279,7 +279,7 @@ sub reopen { } # read-only -sub query { +sub mset { my ($self, $query_string, $opts) = @_; $opts ||= {}; my $qp = $self->{qp} //= qparse_new($self); @@ -346,17 +346,17 @@ sub _enquire_once { # retry_reopen callback if ($opts->{thread} && has_threadid($self)) { $enquire->set_collapse_key(THREADID); } + $enquire->get_mset($opts->{offset} || 0, $opts->{limit} || 50); +} - my $offset = $opts->{offset} || 0; - my $limit = $opts->{limit} || 50; - my $mset = $enquire->get_mset($offset, $limit); - return $mset if $opts->{mset}; +sub mset_to_smsg { + my ($self, $ibx, $mset) = @_; my $nshard = $self->{nshard} // 1; my $i = 0; my %order = map { mdocid($nshard, $_) => ++$i } $mset->items; my @msgs = sort { $order{$a->{num}} <=> $order{$b->{num}} - } @{$self->{over_ro}->get_all(keys %order)}; + } @{$ibx->over->get_all(keys %order)}; wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs; } diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 892e8fda..c482f1c9 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -47,7 +47,6 @@ sub sres_top_html { my $opts = { limit => $q->{l}, offset => $o, - mset => 1, relevance => $q->{r}, thread => $q->{t}, asc => $asc, @@ -55,7 +54,7 @@ sub sres_top_html { my ($mset, $total, $err, $html); retry: eval { - $mset = $srch->query($query, $opts); + $mset = $srch->mset($query, $opts); $total = $mset->get_matches_estimated; }; $err = $@; diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index d0cd59db..dd95f400 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -228,10 +228,9 @@ sub find_extract_diffs ($$$) { } } - my $msgs = $srch->query($q, { relevance => 1 }); - + my $mset = $srch->mset($q, { relevance => 1 }); my $diffs = []; - foreach my $smsg (@$msgs) { + for my $smsg (@{$srch->mset_to_smsg($ibx, $mset)}) { my $eml = $ibx->smsg_eml($smsg) or next; $eml->each_part(\&extract_diff, [$self, $diffs, $pre, $post, $ibx, $smsg], 1); diff --git a/t/altid.t b/t/altid.t index f3c01520..816f5f5b 100644 --- a/t/altid.t +++ b/t/altid.t @@ -45,13 +45,13 @@ EOF } { - my $ro = PublicInbox::Search->new($ibx); - my $msgs = $ro->query("gmane:1234"); + my $mset = $ibx->search->mset("gmane:1234"); + my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); $msgs = [ map { $_->{mid} } @$msgs ]; is_deeply($msgs, ['a@example.com'], 'got one match'); - $msgs = $ro->query("gmane:666"); - is_deeply([], $msgs, 'body did NOT match'); + $mset = $ibx->search->mset('gmane:666'); + is($mset->size, 0, 'body did NOT match'); }; { diff --git a/t/altid_v2.t b/t/altid_v2.t index 01ed9ed4..f04b547b 100644 --- a/t/altid_v2.t +++ b/t/altid_v2.t @@ -41,11 +41,12 @@ hello world gmane:666 EOF $v2w->done; -my $msgs = $ibx->search->reopen->query("gmane:1234"); +my $mset = $ibx->search->reopen->mset('gmane:1234'); +my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); $msgs = [ map { $_->{mid} } @$msgs ]; is_deeply($msgs, ['a@example.com'], 'got one match'); -$msgs = $ibx->search->query("gmane:666"); -is_deeply([], $msgs, 'body did NOT match'); +$mset = $ibx->search->mset('gmane:666'); +is($mset->size, 0, 'body did NOT match'); done_testing(); diff --git a/t/index-git-times.t b/t/index-git-times.t index 73c99e61..f9869cfa 100644 --- a/t/index-git-times.t +++ b/t/index-git-times.t @@ -63,10 +63,12 @@ my $smsg; $smsg = $ibx->over->get_art(1); is($smsg->{ds}, 749520000, 'datestamp from git author time'); is($smsg->{ts}, 1285977600, 'timestamp from git committer time'); - my $res = $ibx->search->query("m:$smsg->{mid}"); - is(scalar @$res, 1, 'got one result for m:'); + my $mset = $ibx->search->mset("m:$smsg->{mid}"); + is($mset->size, 1, 'got one result for m:'); + my $res = $ibx->search->mset_to_smsg($ibx, $mset); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian stored datestamp'); - $res = $ibx->search->query('d:19931002..19931002'); + $mset = $ibx->search->mset('d:19931002..19931002'); + $res = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar @$res, 1, 'got one result for d:'); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian search on datestamp'); } @@ -87,9 +89,11 @@ SKIP: { 'v2 datestamp from git author time'); is($v2smsg->{ts}, $smsg->{ts}, 'v2 timestamp from git committer time'); - my $res = $ibx->search->query("m:$smsg->{mid}"); + my $mset = $ibx->search->mset("m:$smsg->{mid}"); + my $res = $ibx->search->mset_to_smsg($ibx, $mset); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian stored datestamp'); - $res = $ibx->search->query('d:19931002..19931002'); + $mset = $ibx->search->mset('d:19931002..19931002'); + $res = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar @$res, 1, 'got one result for d:'); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian search on datestamp'); }; diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t index 27533546..291e0d2f 100644 --- a/t/indexlevels-mirror.t +++ b/t/indexlevels-mirror.t @@ -121,8 +121,8 @@ my $import_index_incremental = sub { is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level, 'indexlevel detectable by Admin after xcpdb v' .$v.$level); delete $ro_mirror->{$_} for (qw(over search)); - $msgs = $ro_mirror->search->query('m:m@2'); - is(scalar(@$msgs), 1, "v$v found m\@2 via Xapian on $level"); + my $mset = $ro_mirror->search->mset('m:m@2'); + is($mset->size, 1, "v$v found m\@2 via Xapian on $level"); } # sync the mirror @@ -138,8 +138,8 @@ my $import_index_incremental = sub { 'no Xapian shard directories for v2 basic'); } if ($level ne 'basic') { - $msgs = $ro_mirror->search->reopen->query('m:m@2'); - is(scalar(@$msgs), 0, + my $mset = $ro_mirror->search->reopen->mset('m:m@2'); + is($mset->size, 0, "v$v m\@2 gone from Xapian in mirror on $level"); } diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t index 5b6bf28b..754d52f7 100644 --- a/t/mda_filter_rubylang.t +++ b/t/mda_filter_rubylang.t @@ -48,10 +48,10 @@ EOF my $ibx = $config->lookup_name($v); # make sure all serials are searchable: - my ($tot, $msgs); for my $i (1..2) { - ($tot, $msgs) = $ibx->search->query("alerts:$i"); - is($tot, 1, "got one result for alerts:$i"); + my $mset = $ibx->search->mset("alerts:$i"); + is($mset->size, 1, "got one result for alerts:$i"); + my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i"); } is_deeply([], \@warn, 'no warnings'); diff --git a/t/replace.t b/t/replace.t index 490e3b7b..95241adf 100644 --- a/t/replace.t +++ b/t/replace.t @@ -106,8 +106,8 @@ EOF if (my $srch = $ibx->search) { for my $q ('f:streisand', 's:confidential', 'malibu') { - my $msgs = $srch->query($q); - is_deeply($msgs, [], "no match for $q"); + my $mset = $srch->mset($q); + is($mset->size, 0, "no match for $q"); } my @ok = ('f:redactor', 's:redacted', 'nothing to see'); if ($opt->{pre}) { @@ -119,8 +119,8 @@ EOF 's:message3', 's:message4'; } for my $q (@ok) { - my $msgs = $srch->query($q); - ok($msgs->[0], "got match for $q"); + my $mset = $srch->mset($q); + ok($mset->size, "got match for $q"); } } diff --git a/t/search.t b/t/search.t index 3124baeb..8df8a202 100644 --- a/t/search.t +++ b/t/search.t @@ -25,12 +25,12 @@ $ibx->with_umask(sub { $rw->idx_release; }); $rw = undef; -my $ro = $ibx->search; my $rw_commit = sub { $rw->commit_txn_lazy if $rw; $rw = PublicInbox::SearchIdx->new($ibx, 1); $rw->{qp_flags} = 0; # quiet a warning $rw->begin_txn_lazy; + $ibx->search->reopen; }; sub oct_is ($$$) { @@ -103,29 +103,34 @@ sub filter_mids { sort(map { $_->{mid} } @$msgs); } +my $query = sub { + my ($query_string, $opt) = @_; + my $mset = $ibx->search->mset($query_string, $opt); + $ibx->search->mset_to_smsg($ibx, $mset); +}; + { $rw_commit->(); - $ro->reopen; - my $found = $ro->query('m:root@s'); + my $found = $query->('m:root@s'); is(scalar(@$found), 1, "message found"); is($found->[0]->{mid}, 'root@s', 'mid set correctly') if @$found; my ($res, @res); my @exp = sort qw(root@s last@s); - $res = $ro->query('s:(Hello world)'); + $res = $query->('s:(Hello world)'); @res = filter_mids($res); is_deeply(\@res, \@exp, 'got expected results for s:() match'); - $res = $ro->query('s:"Hello world"'); + $res = $query->('s:"Hello world"'); @res = filter_mids($res); is_deeply(\@res, \@exp, 'got expected results for s:"" match'); - $res = $ro->query('s:"Hello world"', {limit => 1}); + $res = $query->('s:"Hello world"', {limit => 1}); is(scalar @$res, 1, "limit works"); my $first = $res->[0]; - $res = $ro->query('s:"Hello world"', {offset => 1}); + $res = $query->('s:"Hello world"', {offset => 1}); is(scalar @$res, 1, "offset works"); my $second = $res->[0]; @@ -173,31 +178,29 @@ EOF # search thread on ghost { $rw_commit->(); - $ro->reopen; # subject - my $res = $ro->query('ghost'); + my $res = $query->('ghost'); my @exp = sort qw(ghost-message@s ghost-reply@s); my @res = filter_mids($res); is_deeply(\@res, \@exp, 'got expected results for Subject match'); # body - $res = $ro->query('goodbye'); + $res = $query->('goodbye'); is(scalar(@$res), 1, "goodbye message found"); is($res->[0]->{mid}, 'last@s', 'got goodbye message body') if @$res; # datestamp - $res = $ro->query('dt:20101002000001..20101002000001'); + $res = $query->('dt:20101002000001..20101002000001'); @res = filter_mids($res); is_deeply(\@res, ['ghost-message@s'], 'exact Date: match works'); - $res = $ro->query('dt:20101002000002..20101002000002'); + $res = $query->('dt:20101002000002..20101002000002'); is_deeply($res, [], 'exact Date: match down to the second'); } # long message-id $ibx->with_umask(sub { $rw_commit->(); - $ro->reopen; my $long_mid = 'last' . ('x' x 60). '@s'; my $long = PublicInbox::Eml->new(<(); - $ro->reopen; my $res; my @res; @@ -232,7 +234,6 @@ EOF ok($rw->add_message($long_reply) > $long_id, "inserted long reply"); $rw_commit->(); - $ro->reopen; my $t = $ibx->over->get_thread('root@s'); is(scalar(@$t), 4, "got all 4 messages in thread"); my @exp = sort($long_reply_mid, 'root@s', 'last@s', $long_mid); @@ -264,13 +265,13 @@ theatre fade EOF $rw_commit->(); - my $res = $ro->reopen->query("theatre"); + my $res = $query->("theatre"); is(scalar(@$res), 2, "got both matches"); if (@$res == 2) { is($res->[0]->{mid}, 'nquote@a', 'non-quoted scores higher'); is($res->[1]->{mid}, 'quote@a', 'quoted result still returned'); } - $res = $ro->query("illusions"); + $res = $query->("illusions"); is(scalar(@$res), 1, "got a match for quoted text"); is($res->[0]->{mid}, 'quote@a', "quoted result returned if nothing else") if scalar(@$res); @@ -292,7 +293,7 @@ LOOP! EOF ok($doc_id > 0, "doc_id defined with circular reference"); $rw_commit->(); - my $smsg = $ro->reopen->query('m:circle@a', {limit=>1})->[0]; + my $smsg = $query->('m:circle@a', {limit=>1})->[0]; is(defined($smsg), 1, 'found m:circl@a'); if (defined $smsg) { is($smsg->{references}, '', "no references created"); @@ -301,11 +302,11 @@ EOF }); { - my $msgs = $ro->query('d:19931002..20101002'); + my $msgs = $query->('d:19931002..20101002'); ok(scalar(@$msgs) > 0, 'got results within range'); - $msgs = $ro->query('d:20101003..'); + $msgs = $query->('d:20101003..'); is(scalar(@$msgs), 0, 'nothing after 20101003'); - $msgs = $ro->query('d:..19931001'); + $msgs = $query->('d:..19931001'); is(scalar(@$msgs), 0, 'nothing before 19931001'); } @@ -314,8 +315,7 @@ $ibx->with_umask(sub { my $doc_id = $rw->add_message($mime); ok($doc_id > 0, 'message indexed doc_id with UTF-8'); $rw_commit->(); - my $msg = $ro->reopen-> - query('m:testmessage@example.com', {limit => 1})->[0]; + my $msg = $query->('m:testmessage@example.com', {limit => 1})->[0]; is(defined($msg), 1, 'found testmessage@example.com'); if (defined $msg) { is($mime->header('Subject'), $msg->{subject}, @@ -325,7 +325,7 @@ $ibx->with_umask(sub { # names and addresses { - my $mset = $ro->query('t:list@example.com', {mset => 1}); + my $mset = $ibx->search->mset('t:list@example.com'); is($mset->size, 9, 'searched To: successfully'); foreach my $m ($mset->items) { my $smsg = $ibx->over->get_art($m->get_docid); @@ -343,7 +343,7 @@ $ibx->with_umask(sub { is($uid, $m->get_docid, 'UID column matches docid'); } - $mset = $ro->query('tc:list@example.com', {mset => 1}); + $mset = $ibx->search->mset('tc:list@example.com'); is($mset->size, 9, 'searched To+Cc: successfully'); foreach my $m ($mset->items) { my $smsg = $ibx->over->get_art($m->get_docid); @@ -352,7 +352,7 @@ $ibx->with_umask(sub { } foreach my $pfx ('tcf:', 'c:') { - my $mset = $ro->query($pfx . 'foo@example.com', { mset => 1 }); + my $mset = $ibx->search->mset($pfx . 'foo@example.com'); is($mset->items, 1, "searched $pfx successfully for Cc:"); foreach my $m ($mset->items) { my $smsg = $ibx->over->get_art($m->get_docid); @@ -362,7 +362,7 @@ $ibx->with_umask(sub { } foreach my $pfx ('', 'tcf:', 'f:') { - my $res = $ro->query($pfx . 'Laggy'); + my $res = $query->($pfx . 'Laggy'); is(scalar(@$res), 1, "searched $pfx successfully for From:"); foreach my $smsg (@$res) { @@ -374,25 +374,24 @@ $ibx->with_umask(sub { { $rw_commit->(); - $ro->reopen; - my $res = $ro->query('b:hello'); + my $res = $query->('b:hello'); is(scalar(@$res), 0, 'no match on body search only'); - $res = $ro->query('bs:smith'); + $res = $query->('bs:smith'); is(scalar(@$res), 0, 'no match on body+subject search for From'); - $res = $ro->query('q:theatre'); + $res = $query->('q:theatre'); is(scalar(@$res), 1, 'only one quoted body'); like($res->[0]->{from_name}, qr/\AQuoter/, 'got quoted body') if (scalar(@$res)); - $res = $ro->query('nq:theatre'); + $res = $query->('nq:theatre'); is(scalar @$res, 1, 'only one non-quoted body'); like($res->[0]->{from_name}, qr/\ANon-Quoter/, 'got non-quoted body') if (scalar(@$res)); foreach my $pfx (qw(b: bs:)) { - $res = $ro->query($pfx . 'theatre'); + $res = $query->($pfx . 'theatre'); is(scalar @$res, 2, "searched both bodies for $pfx"); like($res->[0]->{from_name}, qr/\ANon-Quoter/, "non-quoter first for $pfx") if scalar(@$res); @@ -405,14 +404,13 @@ $ibx->with_umask(sub { my $smsg = bless { blob => $oid }, 'PublicInbox::Smsg'; ok($rw->add_message($amsg, $smsg), 'added attachment'); $rw_commit->(); - $ro->reopen; - my $n = $ro->query('n:attached_fart.txt'); + my $n = $query->('n:attached_fart.txt'); is(scalar @$n, 1, 'got result for n:'); - my $res = $ro->query('part_deux.txt'); + my $res = $query->('part_deux.txt'); is(scalar @$res, 1, 'got result without n:'); is($n->[0]->{mid}, $res->[0]->{mid}, 'same result with and without') if scalar(@$res); - my $txt = $ro->query('"inside another"'); + my $txt = $query->('"inside another"'); is(scalar @$txt, 1, 'found inside another'); is($txt->[0]->{mid}, $res->[0]->{mid}, 'search inside text attachments works') if scalar(@$txt); @@ -459,8 +457,7 @@ $ibx->with_umask(sub { my $digits = '10010260936330'; my $ua = 'Pine.LNX.4.10'; my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com"; - is($ro->reopen->query("m:$digits", { mset => 1})->size, 0, - 'no results yet'); + is($ibx->search->mset("m:$digits")->size, 0, 'no results yet'); my $pine = PublicInbox::Eml->new(< @@ -470,44 +467,45 @@ To: list\@example.com EOF my $x = $rw->add_message($pine); $rw->commit_txn_lazy; - is($ro->reopen->query("m:$digits", { mset => 1})->size, 1, + $ibx->search->reopen; + is($ibx->search->mset("m:$digits")->size, 1, 'searching only digit yielded result'); my $wild = $digits; for my $i (1..6) { chop($wild); - is($ro->query("m:$wild*", { mset => 1})->size, 1, + is($ibx->search->mset("m:$wild*")->size, 1, "searching chopped($i) digit yielded result $wild "); } - is($ro->query("m:Pine m:LNX m:10010260936330", {mset=>1})->size, 1); + is($ibx->search->mset('m:Pine m:LNX m:10010260936330')->size, 1); }); { # List-Id searching - my $found = $ro->query('lid:i.m.just.bored'); + my $found = $query->('lid:i.m.just.bored'); is_deeply([ filter_mids($found) ], [ 'root@s' ], 'got expected mid on exact lid: search'); - $found = $ro->query('lid:just.bored'); + $found = $query->('lid:just.bored'); is_deeply($found, [], 'got nothing on lid: search'); - $found = $ro->query('lid:*.just.bored'); + $found = $query->('lid:*.just.bored'); is_deeply($found, [], 'got nothing on lid: search'); - $found = $ro->query('l:i.m.just.bored'); + $found = $query->('l:i.m.just.bored'); is_deeply([ filter_mids($found) ], [ 'root@s' ], 'probabilistic search works on full List-Id contents'); - $found = $ro->query('l:just.bored'); + $found = $query->('l:just.bored'); is_deeply([ filter_mids($found) ], [ 'root@s' ], 'probabilistic search works on partial List-Id contents'); - $found = $ro->query('lid:mad'); + $found = $query->('lid:mad'); is_deeply($found, [], 'no match on phrase with lid:'); - $found = $ro->query('lid:bored'); + $found = $query->('lid:bored'); is_deeply($found, [], 'no match on partial List-Id with lid:'); - $found = $ro->query('l:nothing'); + $found = $query->('l:nothing'); is_deeply($found, [], 'matched on phrase with l:'); } @@ -516,22 +514,22 @@ $ibx->with_umask(sub { my $doc_id = $rw->add_message(eml_load('t/data/message_embed.eml')); ok($doc_id > 0, 'messages within messages'); $rw->commit_txn_lazy; - $ro->reopen; - my $n_test_eml = $ro->query('n:test.eml'); + $ibx->search->reopen; + my $n_test_eml = $query->('n:test.eml'); is(scalar(@$n_test_eml), 1, 'got a result'); - my $n_embed2x_eml = $ro->query('n:embed2x.eml'); + my $n_embed2x_eml = $query->('n:embed2x.eml'); is_deeply($n_test_eml, $n_embed2x_eml, '.eml filenames searchable'); for my $m (qw(20200418222508.GA13918@dcvr 20200418222020.GA2745@dcvr 20200418214114.7575-1-e@yhbt.net)) { - is($ro->query("m:$m")->[0]->{mid}, + is($query->("m:$m")->[0]->{mid}, '20200418222508.GA13918@dcvr', 'probabilistic m:'.$m); - is($ro->query("mid:$m")->[0]->{mid}, + is($query->("mid:$m")->[0]->{mid}, '20200418222508.GA13918@dcvr', 'boolean mid:'.$m); } - is($ro->query('dfpost:4dc62c50')->[0]->{mid}, + is($query->('dfpost:4dc62c50')->[0]->{mid}, '20200418222508.GA13918@dcvr', 'diff search reaches inside message/rfc822'); - is($ro->query('s:"mail header experiments"')->[0]->{mid}, + is($query->('s:"mail header experiments"')->[0]->{mid}, '20200418222508.GA13918@dcvr', 'Subject search reaches inside message/rfc822'); }); diff --git a/t/v1reindex.t b/t/v1reindex.t index a5c85ffb..e66d89e5 100644 --- a/t/v1reindex.t +++ b/t/v1reindex.t @@ -178,7 +178,7 @@ ok(!-d $xap, 'Xapian directories removed again'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); - my $mset = $ibx->search->query('hello world', {mset=>1}); + my $mset = $ibx->search->mset('hello world'); isnt($mset->size, 0, 'got Xapian search results'); my ($min, $max) = $ibx->mm->minmax; @@ -224,7 +224,7 @@ ok(!-d $xap, 'Xapian directories removed again'); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from indexing'); is_deeply(\@warn, [], 'no warnings'); - my $mset = $ibx->search->reopen->query('hello world', {mset=>1}); + my $mset = $ibx->search->reopen->mset('hello world'); isnt($mset->size, 0, 'search OK after basic -> medium'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); diff --git a/t/v2mda.t b/t/v2mda.t index 2262c3ad..abbdc8e4 100644 --- a/t/v2mda.t +++ b/t/v2mda.t @@ -85,10 +85,12 @@ is($eml->as_string, $mime->as_string, 'injected message'); open my $fh, '<', $patch or die "failed to open $patch: $!\n"; $rdr->{0} = \(do { local $/; <$fh> }); ok(run_script(['-mda'], undef, $rdr), 'mda delivered a patch'); - my $post = $ibx->search->reopen->query('dfpost:6e006fd7'); - is(scalar(@$post), 1, 'got one result for dfpost'); - my $pre = $ibx->search->query('dfpre:090d998'); - is(scalar(@$pre), 1, 'got one result for dfpre'); + my $post = $ibx->search->reopen->mset('dfpost:6e006fd7'); + is($post->size, 1, 'got one result for dfpost'); + my $pre = $ibx->search->mset('dfpre:090d998'); + is($pre->size, 1, 'got one result for dfpre'); + $pre = $ibx->search->mset_to_smsg($ibx, $pre); + $post = $ibx->search->mset_to_smsg($ibx, $post); is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases'); } diff --git a/t/v2mirror.t b/t/v2mirror.t index bca43fd5..81b9544d 100644 --- a/t/v2mirror.t +++ b/t/v2mirror.t @@ -112,11 +112,11 @@ my $fetch_each_epoch = sub { $fetch_each_epoch->(); -my $mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1}); +my $mset = $mibx->search->reopen->mset('m:15@example.com'); is(scalar($mset->items), 0, 'new message not found in mirror, yet'); ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'index updated'); is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax'); -$mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1}); +$mset = $mibx->search->reopen->mset('m:15@example.com'); is(scalar($mset->items), 1, 'found message in mirror'); # purge: @@ -137,7 +137,7 @@ $v2w->done; my $msgs = $mibx->over->get_thread('10@example.com'); my $to_purge = $msgs->[0]->{blob}; like($to_purge, qr/\A[a-f0-9]{40,}\z/, 'read blob to be purged'); -$mset = $ibx->search->reopen->query('m:10@example.com', {mset => 1}); +$mset = $ibx->search->reopen->mset('m:10@example.com'); is(scalar($mset->items), 0, 'purged message gone from origin'); $fetch_each_epoch->(); @@ -153,11 +153,11 @@ $fetch_each_epoch->(); unlike($err, qr/fatal/, 'no scary fatal error shown'); } -$mset = $mibx->search->reopen->query('m:10@example.com', {mset => 1}); +$mset = $mibx->search->reopen->mset('m:10@example.com'); is(scalar($mset->items), 0, 'purged message not found in mirror'); is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'minmax still synced'); for my $i ((1..9),(11..15)) { - $mset = $mibx->search->query("m:$i\@example.com", {mset => 1}); + $mset = $mibx->search->mset("m:$i\@example.com"); is(scalar($mset->items), 1, "$i\@example.com remains visible"); } is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror'); @@ -171,7 +171,7 @@ is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror'); # deletes happen in a different fetch window { - $mset = $mibx->search->reopen->query('m:1@example.com', {mset => 1}); + $mset = $mibx->search->reopen->mset('m:1@example.com'); is(scalar($mset->items), 1, '1@example.com visible in mirror'); $mime->header_set('Message-ID', '<1@example.com>'); $mime->header_set('Subject', 'subject = 1'); @@ -186,12 +186,12 @@ is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror'); my $opt = { 1 => \$out, 2 => \$err }; ok(run_script($cmd, undef, $opt), 'index ran'); is($err, '', 'no errors reported by index'); - $mset = $mibx->search->reopen->query('m:1@example.com', {mset => 1}); + $mset = $mibx->search->reopen->mset('m:1@example.com'); is(scalar($mset->items), 0, '1@example.com no longer visible in mirror'); } if ('sequential-shard') { - $mset = $mibx->search->query('m:15@example.com', {mset => 1}); + $mset = $mibx->search->mset('m:15@example.com'); is(scalar($mset->items), 1, 'large message not indexed'); remove_tree(glob("$tmpdir/m/xap*"), glob("$tmpdir/m/msgmap.*")); my $cmd = [ qw(-index -j9 --sequential-shard), "$tmpdir/m" ]; @@ -199,7 +199,7 @@ if ('sequential-shard') { my @shards = glob("$tmpdir/m/xap*/?"); is(scalar(@shards), 8, 'got expected shard count'); PublicInbox::InboxWritable::cleanup($mibx); - $mset = $mibx->search->query('m:15@example.com', {mset => 1}); + $mset = $mibx->search->mset('m:15@example.com'); is(scalar($mset->items), 1, 'search works after --sequential-shard'); } @@ -216,7 +216,7 @@ if ('max size') { my $opt = { 2 => \(my $err) }; ok(run_script($cmd, undef, $opt), 'indexed with --max-size'); like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message'); - $mset = $mibx->search->reopen->query('m:2big@a', {mset =>1}); + $mset = $mibx->search->reopen->mset('m:2big@a'); is(scalar($mset->items), 0, 'large message not indexed'); { @@ -230,7 +230,7 @@ EOF $cmd = [ qw(-index -j0 --reindex), "$tmpdir/m" ]; ok(run_script($cmd, undef, $opt), 'reindexed w/ indexMaxSize in file'); like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message'); - $mset = $mibx->search->reopen->query('m:2big@a', {mset =>1}); + $mset = $mibx->search->reopen->mset('m:2big@a'); is(scalar($mset->items), 0, 'large message not re-indexed'); } diff --git a/t/v2reindex.t b/t/v2reindex.t index a2fc2075..ae1570ed 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -153,7 +153,7 @@ ok(!-d $xap, 'Xapian directories removed again'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); - my $mset = $ibx->search->query($phrase, {mset=>1}); + my $mset = $ibx->search->mset($phrase); isnt($mset->size, 0, "phrase search succeeds on indexlevel=full"); for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } @@ -184,12 +184,12 @@ ok(!-d $xap, 'Xapian directories removed again'); # not sure why, but Xapian seems to fallback to terms and # phrase searches still work delete $ibx->{search}; - my $mset = $ibx->search->query($phrase, {mset=>1}); + my $mset = $ibx->search->mset($phrase); is($mset->size, 0, 'phrase search does not work on medium'); } my $words = $phrase; $words =~ tr/"'//d; - my $mset = $ibx->search->query($words, {mset=>1}); + my $mset = $ibx->search->mset($words); isnt($mset->size, 0, "normal search works on indexlevel=medium"); for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } @@ -531,7 +531,8 @@ EOF my %uniq; for my $s (qw(uno dos tres)) { - my $msgs = $ibx->search->query("s:$s"); + my $mset = $ibx->search->mset("s:$s"); + my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar(@$msgs), 1, "only one result for `$s'"); $uniq{$msgs->[0]->{num}}++; } diff --git a/t/v2writable.t b/t/v2writable.t index 217eaf97..1de8c032 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -124,15 +124,14 @@ if ('ensure git configs are correct') { SELECT COUNT(*) FROM over WHERE num > 0 is($ibx->mm->num_highwater, $total, 'got expected highwater value'); - my $srch = $ibx->search; - my $mset1 = $srch->reopen->query('m:abcde@1', { mset => 1 }); + my $mset1 = $ibx->search->reopen->mset('m:abcde@1'); is($mset1->size, 1, 'message found by first MID'); - my $mset2 = $srch->reopen->query('m:abcde@2', { mset => 1 }); + my $mset2 = $ibx->search->mset('m:abcde@2'); is($mset2->size, 1, 'message found by second MID'); is((($mset1->items)[0])->get_docid, (($mset2->items)[0])->get_docid, 'same document') if ($mset1->size); - my $alt = $srch->reopen->query('m:alt-id-for-nntp', { mset => 1 }); + my $alt = $ibx->search->mset('m:alt-id-for-nntp'); is($alt->size, 1, 'message found by alt MID (NNTP)'); is((($alt->items)[0])->get_docid, (($mset1->items)[0])->get_docid, 'same document') if ($mset1->size); @@ -231,8 +230,7 @@ EOF my $num = $smsg->{num}; like($num, qr/\A\d+\z/, 'numeric number in return message'); is($ibx->mm->mid_for($num), undef, 'no longer in Msgmap by num'); - my $srch = $ibx->search->reopen; - my $mset = $srch->query('m:'.$mid, { mset => 1}); + my $mset = $ibx->search->reopen->mset('m:'.$mid); is($mset->size, 0, 'no longer found in Xapian'); my @log1 = (@log, qw(-1 --pretty=raw --raw -r --no-renames)); is($ibx->over->get_art($num), undef, diff --git a/t/watch_filter_rubylang.t b/t/watch_filter_rubylang.t index 4b72dbae..6513f30b 100644 --- a/t/watch_filter_rubylang.t +++ b/t/watch_filter_rubylang.t @@ -82,14 +82,13 @@ EOF } # make sure all serials are searchable: - my ($tot, $msgs); for my $i (1..15) { - ($tot, $msgs) = $ibx->search->query("alerts:$i"); - is($tot, 1, "got one result for alerts:$i"); + my $mset = $ibx->search->mset("alerts:$i"); + is($mset->size, 1, "got one result for alerts:$i"); + my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i"); } - ($tot, undef) = $ibx->search->query('b:spam'); - is($tot, 1, 'got spam message'); + is($ibx->search->mset('b:spam')->size, 1, 'got spam message'); my $nr = unlink <$maildir/new/*>; is(16, $nr); @@ -104,8 +103,7 @@ EOF $config = PublicInbox::Config->new(\$orig); $ibx = $config->lookup_name($v); - ($tot, undef) = $ibx->search->reopen->query('b:spam'); - is($tot, 0, 'spam removed'); + is($ibx->search->reopen->mset('b:spam')->size, 0, 'spam removed'); is_deeply([], \@warn, 'no warnings'); } diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t index c2c096ae..12546418 100644 --- a/t/watch_maildir_v2.t +++ b/t/watch_maildir_v2.t @@ -130,12 +130,14 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); $msg = do { local $/; <$fh> }; PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::Watch->new($config)->scan('full'); - my $msgs = $ibx->search->reopen->query('dfpost:6e006fd7'); - is(scalar(@$msgs), 1, 'diff postimage found'); - my $post = $msgs->[0]; - $msgs = $ibx->search->query('dfpre:090d998b6c2c'); - is(scalar(@$msgs), 1, 'diff preimage found'); - is($post->{blob}, $msgs->[0]->{blob}, 'same message'); + my $post = $ibx->search->reopen->mset('dfpost:6e006fd7'); + is($post->size, 1, 'diff postimage found'); + my $pre = $ibx->search->mset('dfpre:090d998b6c2c'); + is($pre->size, 1, 'diff preimage found'); + $pre = $ibx->search->mset_to_smsg($ibx, $pre); + $post = $ibx->search->mset_to_smsg($ibx, $post); + is(scalar(@$pre), 1, 'diff preimage found'); + is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message'); } # multiple inboxes in the same maildir @@ -161,7 +163,8 @@ both EOF PublicInbox::Emergency->new($maildir)->prepare(\$both); PublicInbox::Watch->new($config)->scan('full'); - my $msgs = $ibx->search->reopen->query('m:both@b.com'); + my $mset = $ibx->search->reopen->mset('m:both@b.com'); + my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); my $v1 = $config->lookup_name('v1'); my $msg = $v1->git->cat_file($msgs->[0]->{blob}); is($both, $$msg, 'got original message back from v1'); diff --git a/t/xcpdb-reshard.t b/t/xcpdb-reshard.t index 1835fa62..c1af5d9a 100644 --- a/t/xcpdb-reshard.t +++ b/t/xcpdb-reshard.t @@ -49,7 +49,8 @@ for my $R (qw(2 4 1 3 3)) { ok(run_script($cmd), "xcpdb -R$R"); my @new_shards = grep(m!/\d+\z!, glob("$ibx->{inboxdir}/xap*/*")); is(scalar(@new_shards), $R, 'resharded to two shards'); - my $msgs = $ibx->search->query('s:this'); + my $mset = $ibx->search->mset('s:this'); + my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar(@$msgs), $ndoc, 'got expected docs after resharding'); my %by_mid = map {; "$_->{mid}" => $_ } @$msgs; ok($by_mid{"m$_\@example.com"}, "$_ exists") for (1..$ndoc);