user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 06/11] search: replace ->query with ->mset
Date: Wed,  2 Sep 2020 11:04:16 +0000	[thread overview]
Message-ID: <20200902110421.30905-7-e@80x24.org> (raw)
In-Reply-To: <20200902110421.30905-1-e@80x24.org>

Nearly all of the search uses in the production code rely on
a Xapian mset iterator being returned (instead of an array
of $smsg objects).  So default to returning the mset and move
the burden of smsg array conversion into the test cases.
---
 lib/PublicInbox/ExtMsg.pm     |   4 +-
 lib/PublicInbox/IMAP.pm       |   2 +-
 lib/PublicInbox/Mbox.pm       |   6 +-
 lib/PublicInbox/Search.pm     |  12 ++--
 lib/PublicInbox/SearchView.pm |   3 +-
 lib/PublicInbox/SolverGit.pm  |   5 +-
 t/altid.t                     |   8 +--
 t/altid_v2.t                  |   7 ++-
 t/index-git-times.t           |  14 +++--
 t/indexlevels-mirror.t        |   8 +--
 t/mda_filter_rubylang.t       |   6 +-
 t/replace.t                   |   8 +--
 t/search.t                    | 112 +++++++++++++++++-----------------
 t/v1reindex.t                 |   4 +-
 t/v2mda.t                     |  10 +--
 t/v2mirror.t                  |  22 +++----
 t/v2reindex.t                 |   9 +--
 t/v2writable.t                |  10 ++-
 t/watch_filter_rubylang.t     |  12 ++--
 t/watch_maildir_v2.t          |  17 +++---
 t/xcpdb-reshard.t             |   3 +-
 21 files changed, 143 insertions(+), 139 deletions(-)

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 65892161..5dffc65c 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -65,10 +65,10 @@ sub search_partial ($$) {
 		# has too many results.  $@ can be
 		# Search::Xapian::QueryParserError or even:
 		# "something terrible happened at ../Search/Xapian/Enquire.pm"
-		my $mset = eval { $srch->query($m, $opt) } or next;
+		my $mset = eval { $srch->mset($m, $opt) } or next;
 		my @mids = map {
 			$_->{mid}
-		} @{$ibx->over->get_all(@{$srch->mset_to_artnums($mset)})};
+		} @{$srch->mset_to_smsg($ibx, $mset)};
 		return \@mids if scalar(@mids);
 	}
 }
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index abdb8fec..d540fd0b 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -1187,7 +1187,7 @@ sub refill_xap ($$$$) {
 	my ($beg, $end) = @$range_info;
 	my $srch = $self->{ibx}->search;
 	my $opt = { mset => 2, limit => 1000 };
-	my $mset = $srch->query("$q uid:$beg..$end", $opt);
+	my $mset = $srch->mset("$q uid:$beg..$end", $opt);
 	@$uids = @{$srch->mset_to_artnums($mset)};
 	if (@$uids) {
 		$range_info->[0] = $uids->[-1] + 1; # update $beg
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 0223bead..47025891 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -213,7 +213,7 @@ sub results_cb {
 		}
 		# refill result set
 		my $srch = $ctx->{-inbox}->search(undef, $ctx) or return;
-		my $mset = $srch->query($ctx->{query}, $ctx->{qopts});
+		my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
 		my $size = $mset->size or return;
 		$ctx->{qopts}->{offset} += $size;
 		$ctx->{ids} = $srch->mset_to_artnums($mset);
@@ -235,7 +235,7 @@ sub results_thread_cb {
 
 		# refill result set
 		my $srch = $ctx->{-inbox}->search(undef, $ctx) or return;
-		my $mset = $srch->query($ctx->{query}, $ctx->{qopts});
+		my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
 		my $size = $mset->size or return;
 		$ctx->{qopts}->{offset} += $size;
 		$ctx->{ids} = $srch->mset_to_artnums($mset);
@@ -254,7 +254,7 @@ sub mbox_all {
 
 	my $qopts = $ctx->{qopts} = { mset => 2 }; # order by docid
 	$qopts->{thread} = 1 if $q->{t};
-	my $mset = $srch->query($q_string, $qopts);
+	my $mset = $srch->mset($q_string, $qopts);
 	$qopts->{offset} = $mset->size or
 			return [404, [qw(Content-Type text/plain)],
 				["No results found\n"]];
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 546884a9..cfa942b2 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -279,7 +279,7 @@ sub reopen {
 }
 
 # read-only
-sub query {
+sub mset {
 	my ($self, $query_string, $opts) = @_;
 	$opts ||= {};
 	my $qp = $self->{qp} //= qparse_new($self);
@@ -346,17 +346,17 @@ sub _enquire_once { # retry_reopen callback
 	if ($opts->{thread} && has_threadid($self)) {
 		$enquire->set_collapse_key(THREADID);
 	}
+	$enquire->get_mset($opts->{offset} || 0, $opts->{limit} || 50);
+}
 
-	my $offset = $opts->{offset} || 0;
-	my $limit = $opts->{limit} || 50;
-	my $mset = $enquire->get_mset($offset, $limit);
-	return $mset if $opts->{mset};
+sub mset_to_smsg {
+	my ($self, $ibx, $mset) = @_;
 	my $nshard = $self->{nshard} // 1;
 	my $i = 0;
 	my %order = map { mdocid($nshard, $_) => ++$i } $mset->items;
 	my @msgs = sort {
 		$order{$a->{num}} <=> $order{$b->{num}}
-	} @{$self->{over_ro}->get_all(keys %order)};
+	} @{$ibx->over->get_all(keys %order)};
 	wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs;
 }
 
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 892e8fda..c482f1c9 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -47,7 +47,6 @@ sub sres_top_html {
 	my $opts = {
 		limit => $q->{l},
 		offset => $o,
-		mset => 1,
 		relevance => $q->{r},
 		thread => $q->{t},
 		asc => $asc,
@@ -55,7 +54,7 @@ sub sres_top_html {
 	my ($mset, $total, $err, $html);
 retry:
 	eval {
-		$mset = $srch->query($query, $opts);
+		$mset = $srch->mset($query, $opts);
 		$total = $mset->get_matches_estimated;
 	};
 	$err = $@;
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index d0cd59db..dd95f400 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -228,10 +228,9 @@ sub find_extract_diffs ($$$) {
 		}
 	}
 
-	my $msgs = $srch->query($q, { relevance => 1 });
-
+	my $mset = $srch->mset($q, { relevance => 1 });
 	my $diffs = [];
-	foreach my $smsg (@$msgs) {
+	for my $smsg (@{$srch->mset_to_smsg($ibx, $mset)}) {
 		my $eml = $ibx->smsg_eml($smsg) or next;
 		$eml->each_part(\&extract_diff,
 				[$self, $diffs, $pre, $post, $ibx, $smsg], 1);
diff --git a/t/altid.t b/t/altid.t
index f3c01520..816f5f5b 100644
--- a/t/altid.t
+++ b/t/altid.t
@@ -45,13 +45,13 @@ EOF
 }
 
 {
-	my $ro = PublicInbox::Search->new($ibx);
-	my $msgs = $ro->query("gmane:1234");
+	my $mset = $ibx->search->mset("gmane:1234");
+	my $msgs = $ibx->search->mset_to_smsg($ibx, $mset);
 	$msgs = [ map { $_->{mid} } @$msgs ];
 	is_deeply($msgs, ['a@example.com'], 'got one match');
 
-	$msgs = $ro->query("gmane:666");
-	is_deeply([], $msgs, 'body did NOT match');
+	$mset = $ibx->search->mset('gmane:666');
+	is($mset->size, 0, 'body did NOT match');
 };
 
 {
diff --git a/t/altid_v2.t b/t/altid_v2.t
index 01ed9ed4..f04b547b 100644
--- a/t/altid_v2.t
+++ b/t/altid_v2.t
@@ -41,11 +41,12 @@ hello world gmane:666
 EOF
 $v2w->done;
 
-my $msgs = $ibx->search->reopen->query("gmane:1234");
+my $mset = $ibx->search->reopen->mset('gmane:1234');
+my $msgs = $ibx->search->mset_to_smsg($ibx, $mset);
 $msgs = [ map { $_->{mid} } @$msgs ];
 is_deeply($msgs, ['a@example.com'], 'got one match');
-$msgs = $ibx->search->query("gmane:666");
-is_deeply([], $msgs, 'body did NOT match');
+$mset = $ibx->search->mset('gmane:666');
+is($mset->size, 0, 'body did NOT match');
 
 done_testing();
 
diff --git a/t/index-git-times.t b/t/index-git-times.t
index 73c99e61..f9869cfa 100644
--- a/t/index-git-times.t
+++ b/t/index-git-times.t
@@ -63,10 +63,12 @@ my $smsg;
 	$smsg = $ibx->over->get_art(1);
 	is($smsg->{ds}, 749520000, 'datestamp from git author time');
 	is($smsg->{ts}, 1285977600, 'timestamp from git committer time');
-	my $res = $ibx->search->query("m:$smsg->{mid}");
-	is(scalar @$res, 1, 'got one result for m:');
+	my $mset = $ibx->search->mset("m:$smsg->{mid}");
+	is($mset->size, 1, 'got one result for m:');
+	my $res = $ibx->search->mset_to_smsg($ibx, $mset);
 	is($res->[0]->{ds}, $smsg->{ds}, 'Xapian stored datestamp');
-	$res = $ibx->search->query('d:19931002..19931002');
+	$mset = $ibx->search->mset('d:19931002..19931002');
+	$res = $ibx->search->mset_to_smsg($ibx, $mset);
 	is(scalar @$res, 1, 'got one result for d:');
 	is($res->[0]->{ds}, $smsg->{ds}, 'Xapian search on datestamp');
 }
@@ -87,9 +89,11 @@ SKIP: {
 			'v2 datestamp from git author time');
 		is($v2smsg->{ts}, $smsg->{ts},
 			'v2 timestamp from git committer time');
-		my $res = $ibx->search->query("m:$smsg->{mid}");
+		my $mset = $ibx->search->mset("m:$smsg->{mid}");
+		my $res = $ibx->search->mset_to_smsg($ibx, $mset);
 		is($res->[0]->{ds}, $smsg->{ds}, 'Xapian stored datestamp');
-		$res = $ibx->search->query('d:19931002..19931002');
+		$mset = $ibx->search->mset('d:19931002..19931002');
+		$res = $ibx->search->mset_to_smsg($ibx, $mset);
 		is(scalar @$res, 1, 'got one result for d:');
 		is($res->[0]->{ds}, $smsg->{ds}, 'Xapian search on datestamp');
 	};
diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t
index 27533546..291e0d2f 100644
--- a/t/indexlevels-mirror.t
+++ b/t/indexlevels-mirror.t
@@ -121,8 +121,8 @@ my $import_index_incremental = sub {
 		is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level,
 		   'indexlevel detectable by Admin after xcpdb v' .$v.$level);
 		delete $ro_mirror->{$_} for (qw(over search));
-		$msgs = $ro_mirror->search->query('m:m@2');
-		is(scalar(@$msgs), 1, "v$v found m\@2 via Xapian on $level");
+		my $mset = $ro_mirror->search->mset('m:m@2');
+		is($mset->size, 1, "v$v found m\@2 via Xapian on $level");
 	}
 
 	# sync the mirror
@@ -138,8 +138,8 @@ my $import_index_incremental = sub {
 			 'no Xapian shard directories for v2 basic');
 	}
 	if ($level ne 'basic') {
-		$msgs = $ro_mirror->search->reopen->query('m:m@2');
-		is(scalar(@$msgs), 0,
+		my $mset = $ro_mirror->search->reopen->mset('m:m@2');
+		is($mset->size, 0,
 			"v$v m\@2 gone from Xapian in mirror on $level");
 	}
 
diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t
index 5b6bf28b..754d52f7 100644
--- a/t/mda_filter_rubylang.t
+++ b/t/mda_filter_rubylang.t
@@ -48,10 +48,10 @@ EOF
 	my $ibx = $config->lookup_name($v);
 
 	# make sure all serials are searchable:
-	my ($tot, $msgs);
 	for my $i (1..2) {
-		($tot, $msgs) = $ibx->search->query("alerts:$i");
-		is($tot, 1, "got one result for alerts:$i");
+		my $mset = $ibx->search->mset("alerts:$i");
+		is($mset->size, 1, "got one result for alerts:$i");
+		my $msgs = $ibx->search->mset_to_smsg($ibx, $mset);
 		is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i");
 	}
 	is_deeply([], \@warn, 'no warnings');
diff --git a/t/replace.t b/t/replace.t
index 490e3b7b..95241adf 100644
--- a/t/replace.t
+++ b/t/replace.t
@@ -106,8 +106,8 @@ EOF
 
 	if (my $srch = $ibx->search) {
 		for my $q ('f:streisand', 's:confidential', 'malibu') {
-			my $msgs = $srch->query($q);
-			is_deeply($msgs, [], "no match for $q");
+			my $mset = $srch->mset($q);
+			is($mset->size, 0, "no match for $q");
 		}
 		my @ok = ('f:redactor', 's:redacted', 'nothing to see');
 		if ($opt->{pre}) {
@@ -119,8 +119,8 @@ EOF
 				's:message3', 's:message4';
 		}
 		for my $q (@ok) {
-			my $msgs = $srch->query($q);
-			ok($msgs->[0], "got match for $q");
+			my $mset = $srch->mset($q);
+			ok($mset->size, "got match for $q");
 		}
 	}
 
diff --git a/t/search.t b/t/search.t
index 3124baeb..8df8a202 100644
--- a/t/search.t
+++ b/t/search.t
@@ -25,12 +25,12 @@ $ibx->with_umask(sub {
 	$rw->idx_release;
 });
 $rw = undef;
-my $ro = $ibx->search;
 my $rw_commit = sub {
 	$rw->commit_txn_lazy if $rw;
 	$rw = PublicInbox::SearchIdx->new($ibx, 1);
 	$rw->{qp_flags} = 0; # quiet a warning
 	$rw->begin_txn_lazy;
+	$ibx->search->reopen;
 };
 
 sub oct_is ($$$) {
@@ -103,29 +103,34 @@ sub filter_mids {
 	sort(map { $_->{mid} } @$msgs);
 }
 
+my $query = sub {
+	my ($query_string, $opt) = @_;
+	my $mset = $ibx->search->mset($query_string, $opt);
+	$ibx->search->mset_to_smsg($ibx, $mset);
+};
+
 {
 	$rw_commit->();
-	$ro->reopen;
-	my $found = $ro->query('m:root@s');
+	my $found = $query->('m:root@s');
 	is(scalar(@$found), 1, "message found");
 	is($found->[0]->{mid}, 'root@s', 'mid set correctly') if @$found;
 
 	my ($res, @res);
 	my @exp = sort qw(root@s last@s);
 
-	$res = $ro->query('s:(Hello world)');
+	$res = $query->('s:(Hello world)');
 	@res = filter_mids($res);
 	is_deeply(\@res, \@exp, 'got expected results for s:() match');
 
-	$res = $ro->query('s:"Hello world"');
+	$res = $query->('s:"Hello world"');
 	@res = filter_mids($res);
 	is_deeply(\@res, \@exp, 'got expected results for s:"" match');
 
-	$res = $ro->query('s:"Hello world"', {limit => 1});
+	$res = $query->('s:"Hello world"', {limit => 1});
 	is(scalar @$res, 1, "limit works");
 	my $first = $res->[0];
 
-	$res = $ro->query('s:"Hello world"', {offset => 1});
+	$res = $query->('s:"Hello world"', {offset => 1});
 	is(scalar @$res, 1, "offset works");
 	my $second = $res->[0];
 
@@ -173,31 +178,29 @@ EOF
 # search thread on ghost
 {
 	$rw_commit->();
-	$ro->reopen;
 
 	# subject
-	my $res = $ro->query('ghost');
+	my $res = $query->('ghost');
 	my @exp = sort qw(ghost-message@s ghost-reply@s);
 	my @res = filter_mids($res);
 	is_deeply(\@res, \@exp, 'got expected results for Subject match');
 
 	# body
-	$res = $ro->query('goodbye');
+	$res = $query->('goodbye');
 	is(scalar(@$res), 1, "goodbye message found");
 	is($res->[0]->{mid}, 'last@s', 'got goodbye message body') if @$res;
 
 	# datestamp
-	$res = $ro->query('dt:20101002000001..20101002000001');
+	$res = $query->('dt:20101002000001..20101002000001');
 	@res = filter_mids($res);
 	is_deeply(\@res, ['ghost-message@s'], 'exact Date: match works');
-	$res = $ro->query('dt:20101002000002..20101002000002');
+	$res = $query->('dt:20101002000002..20101002000002');
 	is_deeply($res, [], 'exact Date: match down to the second');
 }
 
 # long message-id
 $ibx->with_umask(sub {
 	$rw_commit->();
-	$ro->reopen;
 	my $long_mid = 'last' . ('x' x 60). '@s';
 	my $long = PublicInbox::Eml->new(<<EOF);
 Date: Sat, 02 Oct 2010 00:00:00 +0000
@@ -214,7 +217,6 @@ EOF
 	is($long_id, int($long_id), "long_id is an integer: $long_id");
 
 	$rw_commit->();
-	$ro->reopen;
 	my $res;
 	my @res;
 
@@ -232,7 +234,6 @@ EOF
 	ok($rw->add_message($long_reply) > $long_id, "inserted long reply");
 
 	$rw_commit->();
-	$ro->reopen;
 	my $t = $ibx->over->get_thread('root@s');
 	is(scalar(@$t), 4, "got all 4 messages in thread");
 	my @exp = sort($long_reply_mid, 'root@s', 'last@s', $long_mid);
@@ -264,13 +265,13 @@ theatre
 fade
 EOF
 	$rw_commit->();
-	my $res = $ro->reopen->query("theatre");
+	my $res = $query->("theatre");
 	is(scalar(@$res), 2, "got both matches");
 	if (@$res == 2) {
 		is($res->[0]->{mid}, 'nquote@a', 'non-quoted scores higher');
 		is($res->[1]->{mid}, 'quote@a', 'quoted result still returned');
 	}
-	$res = $ro->query("illusions");
+	$res = $query->("illusions");
 	is(scalar(@$res), 1, "got a match for quoted text");
 	is($res->[0]->{mid}, 'quote@a',
 		"quoted result returned if nothing else") if scalar(@$res);
@@ -292,7 +293,7 @@ LOOP!
 EOF
 	ok($doc_id > 0, "doc_id defined with circular reference");
 	$rw_commit->();
-	my $smsg = $ro->reopen->query('m:circle@a', {limit=>1})->[0];
+	my $smsg = $query->('m:circle@a', {limit=>1})->[0];
 	is(defined($smsg), 1, 'found m:circl@a');
 	if (defined $smsg) {
 		is($smsg->{references}, '', "no references created");
@@ -301,11 +302,11 @@ EOF
 });
 
 {
-	my $msgs = $ro->query('d:19931002..20101002');
+	my $msgs = $query->('d:19931002..20101002');
 	ok(scalar(@$msgs) > 0, 'got results within range');
-	$msgs = $ro->query('d:20101003..');
+	$msgs = $query->('d:20101003..');
 	is(scalar(@$msgs), 0, 'nothing after 20101003');
-	$msgs = $ro->query('d:..19931001');
+	$msgs = $query->('d:..19931001');
 	is(scalar(@$msgs), 0, 'nothing before 19931001');
 }
 
@@ -314,8 +315,7 @@ $ibx->with_umask(sub {
 	my $doc_id = $rw->add_message($mime);
 	ok($doc_id > 0, 'message indexed doc_id with UTF-8');
 	$rw_commit->();
-	my $msg = $ro->reopen->
-		query('m:testmessage@example.com', {limit => 1})->[0];
+	my $msg = $query->('m:testmessage@example.com', {limit => 1})->[0];
 	is(defined($msg), 1, 'found testmessage@example.com');
 	if (defined $msg) {
 		is($mime->header('Subject'), $msg->{subject},
@@ -325,7 +325,7 @@ $ibx->with_umask(sub {
 
 # names and addresses
 {
-	my $mset = $ro->query('t:list@example.com', {mset => 1});
+	my $mset = $ibx->search->mset('t:list@example.com');
 	is($mset->size, 9, 'searched To: successfully');
 	foreach my $m ($mset->items) {
 		my $smsg = $ibx->over->get_art($m->get_docid);
@@ -343,7 +343,7 @@ $ibx->with_umask(sub {
 		is($uid, $m->get_docid, 'UID column matches docid');
 	}
 
-	$mset = $ro->query('tc:list@example.com', {mset => 1});
+	$mset = $ibx->search->mset('tc:list@example.com');
 	is($mset->size, 9, 'searched To+Cc: successfully');
 	foreach my $m ($mset->items) {
 		my $smsg = $ibx->over->get_art($m->get_docid);
@@ -352,7 +352,7 @@ $ibx->with_umask(sub {
 	}
 
 	foreach my $pfx ('tcf:', 'c:') {
-		my $mset = $ro->query($pfx . 'foo@example.com', { mset => 1 });
+		my $mset = $ibx->search->mset($pfx . 'foo@example.com');
 		is($mset->items, 1, "searched $pfx successfully for Cc:");
 		foreach my $m ($mset->items) {
 			my $smsg = $ibx->over->get_art($m->get_docid);
@@ -362,7 +362,7 @@ $ibx->with_umask(sub {
 	}
 
 	foreach my $pfx ('', 'tcf:', 'f:') {
-		my $res = $ro->query($pfx . 'Laggy');
+		my $res = $query->($pfx . 'Laggy');
 		is(scalar(@$res), 1,
 			"searched $pfx successfully for From:");
 		foreach my $smsg (@$res) {
@@ -374,25 +374,24 @@ $ibx->with_umask(sub {
 
 {
 	$rw_commit->();
-	$ro->reopen;
-	my $res = $ro->query('b:hello');
+	my $res = $query->('b:hello');
 	is(scalar(@$res), 0, 'no match on body search only');
-	$res = $ro->query('bs:smith');
+	$res = $query->('bs:smith');
 	is(scalar(@$res), 0,
 		'no match on body+subject search for From');
 
-	$res = $ro->query('q:theatre');
+	$res = $query->('q:theatre');
 	is(scalar(@$res), 1, 'only one quoted body');
 	like($res->[0]->{from_name}, qr/\AQuoter/,
 		'got quoted body') if (scalar(@$res));
 
-	$res = $ro->query('nq:theatre');
+	$res = $query->('nq:theatre');
 	is(scalar @$res, 1, 'only one non-quoted body');
 	like($res->[0]->{from_name}, qr/\ANon-Quoter/,
 		'got non-quoted body') if (scalar(@$res));
 
 	foreach my $pfx (qw(b: bs:)) {
-		$res = $ro->query($pfx . 'theatre');
+		$res = $query->($pfx . 'theatre');
 		is(scalar @$res, 2, "searched both bodies for $pfx");
 		like($res->[0]->{from_name}, qr/\ANon-Quoter/,
 			"non-quoter first for $pfx") if scalar(@$res);
@@ -405,14 +404,13 @@ $ibx->with_umask(sub {
 	my $smsg = bless { blob => $oid }, 'PublicInbox::Smsg';
 	ok($rw->add_message($amsg, $smsg), 'added attachment');
 	$rw_commit->();
-	$ro->reopen;
-	my $n = $ro->query('n:attached_fart.txt');
+	my $n = $query->('n:attached_fart.txt');
 	is(scalar @$n, 1, 'got result for n:');
-	my $res = $ro->query('part_deux.txt');
+	my $res = $query->('part_deux.txt');
 	is(scalar @$res, 1, 'got result without n:');
 	is($n->[0]->{mid}, $res->[0]->{mid},
 		'same result with and without') if scalar(@$res);
-	my $txt = $ro->query('"inside another"');
+	my $txt = $query->('"inside another"');
 	is(scalar @$txt, 1, 'found inside another');
 	is($txt->[0]->{mid}, $res->[0]->{mid},
 		'search inside text attachments works') if scalar(@$txt);
@@ -459,8 +457,7 @@ $ibx->with_umask(sub {
 	my $digits = '10010260936330';
 	my $ua = 'Pine.LNX.4.10';
 	my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com";
-	is($ro->reopen->query("m:$digits", { mset => 1})->size, 0,
-		'no results yet');
+	is($ibx->search->mset("m:$digits")->size, 0, 'no results yet');
 	my $pine = PublicInbox::Eml->new(<<EOF);
 Subject: blah
 Message-ID: <$mid>
@@ -470,44 +467,45 @@ To: list\@example.com
 EOF
 	my $x = $rw->add_message($pine);
 	$rw->commit_txn_lazy;
-	is($ro->reopen->query("m:$digits", { mset => 1})->size, 1,
+	$ibx->search->reopen;
+	is($ibx->search->mset("m:$digits")->size, 1,
 		'searching only digit yielded result');
 
 	my $wild = $digits;
 	for my $i (1..6) {
 		chop($wild);
-		is($ro->query("m:$wild*", { mset => 1})->size, 1,
+		is($ibx->search->mset("m:$wild*")->size, 1,
 			"searching chopped($i) digit yielded result $wild ");
 	}
-	is($ro->query("m:Pine m:LNX m:10010260936330", {mset=>1})->size, 1);
+	is($ibx->search->mset('m:Pine m:LNX m:10010260936330')->size, 1);
 });
 
 { # List-Id searching
-	my $found = $ro->query('lid:i.m.just.bored');
+	my $found = $query->('lid:i.m.just.bored');
 	is_deeply([ filter_mids($found) ], [ 'root@s' ],
 		'got expected mid on exact lid: search');
 
-	$found = $ro->query('lid:just.bored');
+	$found = $query->('lid:just.bored');
 	is_deeply($found, [], 'got nothing on lid: search');
 
-	$found = $ro->query('lid:*.just.bored');
+	$found = $query->('lid:*.just.bored');
 	is_deeply($found, [], 'got nothing on lid: search');
 
-	$found = $ro->query('l:i.m.just.bored');
+	$found = $query->('l:i.m.just.bored');
 	is_deeply([ filter_mids($found) ], [ 'root@s' ],
 		'probabilistic search works on full List-Id contents');
 
-	$found = $ro->query('l:just.bored');
+	$found = $query->('l:just.bored');
 	is_deeply([ filter_mids($found) ], [ 'root@s' ],
 		'probabilistic search works on partial List-Id contents');
 
-	$found = $ro->query('lid:mad');
+	$found = $query->('lid:mad');
 	is_deeply($found, [], 'no match on phrase with lid:');
 
-	$found = $ro->query('lid:bored');
+	$found = $query->('lid:bored');
 	is_deeply($found, [], 'no match on partial List-Id with lid:');
 
-	$found = $ro->query('l:nothing');
+	$found = $query->('l:nothing');
 	is_deeply($found, [], 'matched on phrase with l:');
 }
 
@@ -516,22 +514,22 @@ $ibx->with_umask(sub {
 	my $doc_id = $rw->add_message(eml_load('t/data/message_embed.eml'));
 	ok($doc_id > 0, 'messages within messages');
 	$rw->commit_txn_lazy;
-	$ro->reopen;
-	my $n_test_eml = $ro->query('n:test.eml');
+	$ibx->search->reopen;
+	my $n_test_eml = $query->('n:test.eml');
 	is(scalar(@$n_test_eml), 1, 'got a result');
-	my $n_embed2x_eml = $ro->query('n:embed2x.eml');
+	my $n_embed2x_eml = $query->('n:embed2x.eml');
 	is_deeply($n_test_eml, $n_embed2x_eml, '.eml filenames searchable');
 	for my $m (qw(20200418222508.GA13918@dcvr 20200418222020.GA2745@dcvr
 			20200418214114.7575-1-e@yhbt.net)) {
-		is($ro->query("m:$m")->[0]->{mid},
+		is($query->("m:$m")->[0]->{mid},
 			'20200418222508.GA13918@dcvr', 'probabilistic m:'.$m);
-		is($ro->query("mid:$m")->[0]->{mid},
+		is($query->("mid:$m")->[0]->{mid},
 			'20200418222508.GA13918@dcvr', 'boolean mid:'.$m);
 	}
-	is($ro->query('dfpost:4dc62c50')->[0]->{mid},
+	is($query->('dfpost:4dc62c50')->[0]->{mid},
 		'20200418222508.GA13918@dcvr',
 		'diff search reaches inside message/rfc822');
-	is($ro->query('s:"mail header experiments"')->[0]->{mid},
+	is($query->('s:"mail header experiments"')->[0]->{mid},
 		'20200418222508.GA13918@dcvr',
 		'Subject search reaches inside message/rfc822');
 });
diff --git a/t/v1reindex.t b/t/v1reindex.t
index a5c85ffb..e66d89e5 100644
--- a/t/v1reindex.t
+++ b/t/v1reindex.t
@@ -178,7 +178,7 @@ ok(!-d $xap, 'Xapian directories removed again');
 	delete $ibx->{mm};
 	is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
 	is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
-	my $mset = $ibx->search->query('hello world', {mset=>1});
+	my $mset = $ibx->search->mset('hello world');
 	isnt($mset->size, 0, 'got Xapian search results');
 
 	my ($min, $max) = $ibx->mm->minmax;
@@ -224,7 +224,7 @@ ok(!-d $xap, 'Xapian directories removed again');
 	eval { $rw->index_sync({reindex => 1}) };
 	is($@, '', 'no error from indexing');
 	is_deeply(\@warn, [], 'no warnings');
-	my $mset = $ibx->search->reopen->query('hello world', {mset=>1});
+	my $mset = $ibx->search->reopen->mset('hello world');
 	isnt($mset->size, 0, 'search OK after basic -> medium');
 
 	is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
diff --git a/t/v2mda.t b/t/v2mda.t
index 2262c3ad..abbdc8e4 100644
--- a/t/v2mda.t
+++ b/t/v2mda.t
@@ -85,10 +85,12 @@ is($eml->as_string, $mime->as_string, 'injected message');
 	open my $fh, '<', $patch or die "failed to open $patch: $!\n";
 	$rdr->{0} = \(do { local $/; <$fh> });
 	ok(run_script(['-mda'], undef, $rdr), 'mda delivered a patch');
-	my $post = $ibx->search->reopen->query('dfpost:6e006fd7');
-	is(scalar(@$post), 1, 'got one result for dfpost');
-	my $pre = $ibx->search->query('dfpre:090d998');
-	is(scalar(@$pre), 1, 'got one result for dfpre');
+	my $post = $ibx->search->reopen->mset('dfpost:6e006fd7');
+	is($post->size, 1, 'got one result for dfpost');
+	my $pre = $ibx->search->mset('dfpre:090d998');
+	is($pre->size, 1, 'got one result for dfpre');
+	$pre = $ibx->search->mset_to_smsg($ibx, $pre);
+	$post = $ibx->search->mset_to_smsg($ibx, $post);
 	is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases');
 }
 
diff --git a/t/v2mirror.t b/t/v2mirror.t
index bca43fd5..81b9544d 100644
--- a/t/v2mirror.t
+++ b/t/v2mirror.t
@@ -112,11 +112,11 @@ my $fetch_each_epoch = sub {
 
 $fetch_each_epoch->();
 
-my $mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1});
+my $mset = $mibx->search->reopen->mset('m:15@example.com');
 is(scalar($mset->items), 0, 'new message not found in mirror, yet');
 ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'index updated');
 is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax');
-$mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1});
+$mset = $mibx->search->reopen->mset('m:15@example.com');
 is(scalar($mset->items), 1, 'found message in mirror');
 
 # purge:
@@ -137,7 +137,7 @@ $v2w->done;
 my $msgs = $mibx->over->get_thread('10@example.com');
 my $to_purge = $msgs->[0]->{blob};
 like($to_purge, qr/\A[a-f0-9]{40,}\z/, 'read blob to be purged');
-$mset = $ibx->search->reopen->query('m:10@example.com', {mset => 1});
+$mset = $ibx->search->reopen->mset('m:10@example.com');
 is(scalar($mset->items), 0, 'purged message gone from origin');
 
 $fetch_each_epoch->();
@@ -153,11 +153,11 @@ $fetch_each_epoch->();
 	unlike($err, qr/fatal/, 'no scary fatal error shown');
 }
 
-$mset = $mibx->search->reopen->query('m:10@example.com', {mset => 1});
+$mset = $mibx->search->reopen->mset('m:10@example.com');
 is(scalar($mset->items), 0, 'purged message not found in mirror');
 is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'minmax still synced');
 for my $i ((1..9),(11..15)) {
-	$mset = $mibx->search->query("m:$i\@example.com", {mset => 1});
+	$mset = $mibx->search->mset("m:$i\@example.com");
 	is(scalar($mset->items), 1, "$i\@example.com remains visible");
 }
 is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror');
@@ -171,7 +171,7 @@ is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror');
 
 # deletes happen in a different fetch window
 {
-	$mset = $mibx->search->reopen->query('m:1@example.com', {mset => 1});
+	$mset = $mibx->search->reopen->mset('m:1@example.com');
 	is(scalar($mset->items), 1, '1@example.com visible in mirror');
 	$mime->header_set('Message-ID', '<1@example.com>');
 	$mime->header_set('Subject', 'subject = 1');
@@ -186,12 +186,12 @@ is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror');
 	my $opt = { 1 => \$out, 2 => \$err };
 	ok(run_script($cmd, undef, $opt), 'index ran');
 	is($err, '', 'no errors reported by index');
-	$mset = $mibx->search->reopen->query('m:1@example.com', {mset => 1});
+	$mset = $mibx->search->reopen->mset('m:1@example.com');
 	is(scalar($mset->items), 0, '1@example.com no longer visible in mirror');
 }
 
 if ('sequential-shard') {
-	$mset = $mibx->search->query('m:15@example.com', {mset => 1});
+	$mset = $mibx->search->mset('m:15@example.com');
 	is(scalar($mset->items), 1, 'large message not indexed');
 	remove_tree(glob("$tmpdir/m/xap*"), glob("$tmpdir/m/msgmap.*"));
 	my $cmd = [ qw(-index -j9 --sequential-shard), "$tmpdir/m" ];
@@ -199,7 +199,7 @@ if ('sequential-shard') {
 	my @shards = glob("$tmpdir/m/xap*/?");
 	is(scalar(@shards), 8, 'got expected shard count');
 	PublicInbox::InboxWritable::cleanup($mibx);
-	$mset = $mibx->search->query('m:15@example.com', {mset => 1});
+	$mset = $mibx->search->mset('m:15@example.com');
 	is(scalar($mset->items), 1, 'search works after --sequential-shard');
 }
 
@@ -216,7 +216,7 @@ if ('max size') {
 	my $opt = { 2 => \(my $err) };
 	ok(run_script($cmd, undef, $opt), 'indexed with --max-size');
 	like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message');
-	$mset = $mibx->search->reopen->query('m:2big@a', {mset =>1});
+	$mset = $mibx->search->reopen->mset('m:2big@a');
 	is(scalar($mset->items), 0, 'large message not indexed');
 
 	{
@@ -230,7 +230,7 @@ EOF
 	$cmd = [ qw(-index -j0 --reindex), "$tmpdir/m" ];
 	ok(run_script($cmd, undef, $opt), 'reindexed w/ indexMaxSize in file');
 	like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message');
-	$mset = $mibx->search->reopen->query('m:2big@a', {mset =>1});
+	$mset = $mibx->search->reopen->mset('m:2big@a');
 	is(scalar($mset->items), 0, 'large message not re-indexed');
 }
 
diff --git a/t/v2reindex.t b/t/v2reindex.t
index a2fc2075..ae1570ed 100644
--- a/t/v2reindex.t
+++ b/t/v2reindex.t
@@ -153,7 +153,7 @@ ok(!-d $xap, 'Xapian directories removed again');
 	delete $ibx->{mm};
 	is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
 	is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
-	my $mset = $ibx->search->query($phrase, {mset=>1});
+	my $mset = $ibx->search->mset($phrase);
 	isnt($mset->size, 0, "phrase search succeeds on indexlevel=full");
 	for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ }
 
@@ -184,12 +184,12 @@ ok(!-d $xap, 'Xapian directories removed again');
 		# not sure why, but Xapian seems to fallback to terms and
 		# phrase searches still work
 		delete $ibx->{search};
-		my $mset = $ibx->search->query($phrase, {mset=>1});
+		my $mset = $ibx->search->mset($phrase);
 		is($mset->size, 0, 'phrase search does not work on medium');
 	}
 	my $words = $phrase;
 	$words =~ tr/"'//d;
-	my $mset = $ibx->search->query($words, {mset=>1});
+	my $mset = $ibx->search->mset($words);
 	isnt($mset->size, 0, "normal search works on indexlevel=medium");
 	for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ }
 
@@ -531,7 +531,8 @@ EOF
 
 	my %uniq;
 	for my $s (qw(uno dos tres)) {
-		my $msgs = $ibx->search->query("s:$s");
+		my $mset = $ibx->search->mset("s:$s");
+		my $msgs = $ibx->search->mset_to_smsg($ibx, $mset);
 		is(scalar(@$msgs), 1, "only one result for `$s'");
 		$uniq{$msgs->[0]->{num}}++;
 	}
diff --git a/t/v2writable.t b/t/v2writable.t
index 217eaf97..1de8c032 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -124,15 +124,14 @@ if ('ensure git configs are correct') {
 SELECT COUNT(*) FROM over WHERE num > 0
 
 	is($ibx->mm->num_highwater, $total, 'got expected highwater value');
-	my $srch = $ibx->search;
-	my $mset1 = $srch->reopen->query('m:abcde@1', { mset => 1 });
+	my $mset1 = $ibx->search->reopen->mset('m:abcde@1');
 	is($mset1->size, 1, 'message found by first MID');
-	my $mset2 = $srch->reopen->query('m:abcde@2', { mset => 1 });
+	my $mset2 = $ibx->search->mset('m:abcde@2');
 	is($mset2->size, 1, 'message found by second MID');
 	is((($mset1->items)[0])->get_docid, (($mset2->items)[0])->get_docid,
 		'same document') if ($mset1->size);
 
-	my $alt = $srch->reopen->query('m:alt-id-for-nntp', { mset => 1 });
+	my $alt = $ibx->search->mset('m:alt-id-for-nntp');
 	is($alt->size, 1, 'message found by alt MID (NNTP)');
 	is((($alt->items)[0])->get_docid, (($mset1->items)[0])->get_docid,
 		'same document') if ($mset1->size);
@@ -231,8 +230,7 @@ EOF
 	my $num = $smsg->{num};
 	like($num, qr/\A\d+\z/, 'numeric number in return message');
 	is($ibx->mm->mid_for($num), undef, 'no longer in Msgmap by num');
-	my $srch = $ibx->search->reopen;
-	my $mset = $srch->query('m:'.$mid, { mset => 1});
+	my $mset = $ibx->search->reopen->mset('m:'.$mid);
 	is($mset->size, 0, 'no longer found in Xapian');
 	my @log1 = (@log, qw(-1 --pretty=raw --raw -r --no-renames));
 	is($ibx->over->get_art($num), undef,
diff --git a/t/watch_filter_rubylang.t b/t/watch_filter_rubylang.t
index 4b72dbae..6513f30b 100644
--- a/t/watch_filter_rubylang.t
+++ b/t/watch_filter_rubylang.t
@@ -82,14 +82,13 @@ EOF
 	}
 
 	# make sure all serials are searchable:
-	my ($tot, $msgs);
 	for my $i (1..15) {
-		($tot, $msgs) = $ibx->search->query("alerts:$i");
-		is($tot, 1, "got one result for alerts:$i");
+		my $mset = $ibx->search->mset("alerts:$i");
+		is($mset->size, 1, "got one result for alerts:$i");
+		my $msgs = $ibx->search->mset_to_smsg($ibx, $mset);
 		is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i");
 	}
-	($tot, undef) = $ibx->search->query('b:spam');
-	is($tot, 1, 'got spam message');
+	is($ibx->search->mset('b:spam')->size, 1, 'got spam message');
 
 	my $nr = unlink <$maildir/new/*>;
 	is(16, $nr);
@@ -104,8 +103,7 @@ EOF
 
 	$config = PublicInbox::Config->new(\$orig);
 	$ibx = $config->lookup_name($v);
-	($tot, undef) = $ibx->search->reopen->query('b:spam');
-	is($tot, 0, 'spam removed');
+	is($ibx->search->reopen->mset('b:spam')->size, 0, 'spam removed');
 
 	is_deeply([], \@warn, 'no warnings');
 }
diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t
index c2c096ae..12546418 100644
--- a/t/watch_maildir_v2.t
+++ b/t/watch_maildir_v2.t
@@ -130,12 +130,14 @@ More majordomo info at  http://vger.kernel.org/majordomo-info.html\n);
 	$msg = do { local $/; <$fh> };
 	PublicInbox::Emergency->new($maildir)->prepare(\$msg);
 	PublicInbox::Watch->new($config)->scan('full');
-	my $msgs = $ibx->search->reopen->query('dfpost:6e006fd7');
-	is(scalar(@$msgs), 1, 'diff postimage found');
-	my $post = $msgs->[0];
-	$msgs = $ibx->search->query('dfpre:090d998b6c2c');
-	is(scalar(@$msgs), 1, 'diff preimage found');
-	is($post->{blob}, $msgs->[0]->{blob}, 'same message');
+	my $post = $ibx->search->reopen->mset('dfpost:6e006fd7');
+	is($post->size, 1, 'diff postimage found');
+	my $pre = $ibx->search->mset('dfpre:090d998b6c2c');
+	is($pre->size, 1, 'diff preimage found');
+	$pre = $ibx->search->mset_to_smsg($ibx, $pre);
+	$post = $ibx->search->mset_to_smsg($ibx, $post);
+	is(scalar(@$pre), 1, 'diff preimage found');
+	is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message');
 }
 
 # multiple inboxes in the same maildir
@@ -161,7 +163,8 @@ both
 EOF
 	PublicInbox::Emergency->new($maildir)->prepare(\$both);
 	PublicInbox::Watch->new($config)->scan('full');
-	my $msgs = $ibx->search->reopen->query('m:both@b.com');
+	my $mset = $ibx->search->reopen->mset('m:both@b.com');
+	my $msgs = $ibx->search->mset_to_smsg($ibx, $mset);
 	my $v1 = $config->lookup_name('v1');
 	my $msg = $v1->git->cat_file($msgs->[0]->{blob});
 	is($both, $$msg, 'got original message back from v1');
diff --git a/t/xcpdb-reshard.t b/t/xcpdb-reshard.t
index 1835fa62..c1af5d9a 100644
--- a/t/xcpdb-reshard.t
+++ b/t/xcpdb-reshard.t
@@ -49,7 +49,8 @@ for my $R (qw(2 4 1 3 3)) {
 	ok(run_script($cmd), "xcpdb -R$R");
 	my @new_shards = grep(m!/\d+\z!, glob("$ibx->{inboxdir}/xap*/*"));
 	is(scalar(@new_shards), $R, 'resharded to two shards');
-	my $msgs = $ibx->search->query('s:this');
+	my $mset = $ibx->search->mset('s:this');
+	my $msgs = $ibx->search->mset_to_smsg($ibx, $mset);
 	is(scalar(@$msgs), $ndoc, 'got expected docs after resharding');
 	my %by_mid = map {; "$_->{mid}" => $_ } @$msgs;
 	ok($by_mid{"m$_\@example.com"}, "$_ exists") for (1..$ndoc);

  parent reply	other threads:[~2020-09-02 11:04 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-02 11:04 [PATCH 00/11] cleanups, mostly indexing related Eric Wong
2020-09-02 11:04 ` [PATCH 01/11] msgmap: note how we use ->created_at Eric Wong
2020-09-02 11:04 ` [PATCH 02/11] disambiguate OverIdx and Over by field name Eric Wong
2020-09-02 11:04 ` [PATCH 03/11] use more idiomatic internal API for ->over access Eric Wong
2020-09-02 11:04 ` [PATCH 04/11] search: remove special case for blank query Eric Wong
2020-09-02 11:04 ` [PATCH 05/11] tests: add "use strict" and declare v5.10.1 compatibility Eric Wong
2020-09-02 11:04 ` Eric Wong [this message]
2020-09-02 11:04 ` [PATCH 07/11] search: remove {over_ro} field Eric Wong
2020-09-02 11:04 ` [PATCH 08/11] imap: drop old, pre-Parse::RecDescent search parser Eric Wong
2020-09-02 11:04 ` [PATCH 09/11] wwwaltid: drop unused sqlite3_missing function Eric Wong
2020-09-02 11:04 ` [PATCH 10/11] overidx: document column uses Eric Wong
2020-09-02 11:04 ` [PATCH 11/11] v2writable: reuse read-only shard counting code Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200902110421.30905-7-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).