user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH] lei up: faster non-thread, single-source incremental query
@ 2023-10-02 14:58  7% Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-10-02 14:58 UTC (permalink / raw)
  To: meta

When using isearch (that is v1/v2 inbox relying on extindex
for search), there's actually no guarantee that IMAP UIDs
are in the correct order with regard to Xapian docids.

Thus we must iterate through every UID(num) to see if it's
suitable to display in a saved search.  The old grep filter
(before commit a6fe84489127) was not effective since it
didn't account for the mset->items correspondence.

Fortunately, this bug merely manifests in reduced performance
as of a6fe84489127.  Prior to that, it could cause incorrect
keywords and labels to be applied.

Unfortunately, this behavior is hard-to-test so no test case
is included.

Followup-to: a6fe84489127 (lei up: fix missing -t/--threads matches w/ saved search)
---
 lib/PublicInbox/LeiXSearch.pm | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 5f105567..4e0849e8 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -211,9 +211,10 @@ sub query_one_mset { # for --threads and l2m w/o sort
 			}
 		} else {
 			$first_ids = $ids;
-			my @items = $mset->items;
+			my @items = $mset->items; # parallel with @$ids
 			for my $n (@$ids) {
 				my $mitem = $items[$i++];
+				next if $n <= $min;
 				my $smsg = $over->get_art($n) or next;
 				next if $smsg->{bytes} == 0;
 				mitem_kw($srch, $smsg, $mitem, $fl) if $can_kw;

^ permalink raw reply related	[relevance 7%]

* [PATCH] lei up: fix missing -t/--threads matches w/ saved search
@ 2023-10-01 22:29  6% Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-10-01 22:29 UTC (permalink / raw)
  To: meta

We must not filter out seen docids from the mset; but only with
the result of over->expand_thread.
---
 lib/PublicInbox/LeiXSearch.pm | 34 +++++++++++++---------------------
 lib/PublicInbox/Over.pm       |  7 +++++--
 t/lei-q-save.t                | 19 +++++++++++++++++++
 3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 7f4911b3..5f105567 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -176,14 +176,10 @@ sub query_one_mset { # for --threads and l2m w/o sort
 	my $threads = $lei->{opt}->{threads} // 0;
 	my $fl = $threads > 1 ? 1 : undef;
 	my $lss = $lei->{lss};
-	my $maxk = "external.$dir.maxuid";
-	my $stop_at = $lss ? $lss->{-cfg}->{$maxk} : undef;
-	if (defined $stop_at) {
-		ref($stop_at) and
-			return warn("$maxk=$stop_at has multiple values\n");
-		($stop_at =~ /[^0-9]/) and
-			return warn("$maxk=$stop_at not numeric\n");
-	}
+	my $maxk = "external.$dir.maxuid"; # max of previous, so our min
+	my $min = $lss ? ($lss->{-cfg}->{$maxk} // 0) : 0;
+	ref($min) and return warn("$maxk=$min has multiple values\n");
+	($min =~ /[^0-9]/) and return warn("$maxk=$min not numeric\n");
 	my $first_ids;
 	do {
 		$mset = eval { $srch->mset($mo->{qstr}, $mo) };
@@ -192,29 +188,26 @@ sub query_one_mset { # for --threads and l2m w/o sort
 				$mset->get_matches_estimated);
 		wait_startq($lei); # wait for keyword updates
 		my $ids = $srch->mset_to_artnums($mset, $mo);
-		@$ids = grep { $_ > $stop_at } @$ids if defined($stop_at);
 		my $i = 0;
 		if ($threads) {
 			# copy $ids if $lss since over->expand_thread
 			# shifts @{$ctx->{ids}}
 			$first_ids = [ @$ids ] if $lss;
-			my $ctx = { ids => $ids };
-			my %n2item = map { ($ids->[$i++], $_) } $mset->items;
-			while ($over->expand_thread($ctx)) {
-				for my $n (@{$ctx->{xids}}) {
+			my $ctx = { ids => $ids, min => $min };
+			my %n2item = map { $ids->[$i++] => $_ } $mset->items;
+			while ($over->expand_thread($ctx)) { # fills {xids}
+				for my $n (@{delete $ctx->{xids}}) {
 					my $smsg = $over->get_art($n) or next;
-					my $mitem = delete $n2item{$n};
+					my $mi = delete $n2item{$n};
 					next if $smsg->{bytes} == 0;
-					if ($mitem && $can_kw) {
-						mitem_kw($srch, $smsg, $mitem,
-							$fl);
-					} elsif ($mitem && $fl) {
+					if ($mi && $can_kw) {
+						mitem_kw($srch, $smsg, $mi, $fl)
+					} elsif ($mi && $fl) {
 						# call ->xsmsg_vmd, later
 						$smsg->{lei_q_tt_flagged} = 1;
 					}
-					$each_smsg->($smsg, $mitem);
+					$each_smsg->($smsg, $mi);
 				}
-				@{$ctx->{xids}} = ();
 			}
 		} else {
 			$first_ids = $ids;
@@ -230,7 +223,6 @@ sub query_one_mset { # for --threads and l2m w/o sort
 	} while (_mset_more($mset, $mo));
 	_check_mset_limit($lei, $dir, $mset);
 	if ($lss && scalar(@$first_ids)) {
-		undef $stop_at;
 		my $max = $first_ids->[0];
 		$lss->cfg_set($maxk, $max);
 		undef $lss;
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index 82034b30..e3a8adb1 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -12,6 +12,7 @@ use DBD::SQLite;
 use PublicInbox::Smsg;
 use Compress::Zlib qw(uncompress);
 use constant DEFAULT_LIMIT => 1000;
+use List::Util (); # for max
 
 sub dbh_new {
 	my ($self, $rw) = @_;
@@ -198,10 +199,12 @@ ORDER BY $sort_col DESC
 }
 
 # strict `tid' matches, only, for thread-expanded mbox.gz search results
-# and future CLI interface
+# and lei
 # returns true if we have IDs, undef if not
 sub expand_thread {
 	my ($self, $ctx) = @_;
+	# previous maxuid for LeiSavedSearch is our min:
+	my $lss_min = $ctx->{min} // 0;
 	my $dbh = dbh($self);
 	do {
 		defined(my $num = $ctx->{ids}->[0]) or return;
@@ -214,7 +217,7 @@ SELECT num FROM over WHERE tid = ? AND num > ?
 ORDER BY num ASC LIMIT 1000
 
 			my $xids = $dbh->selectcol_arrayref($sql, undef, $tid,
-							$ctx->{prev} // 0);
+				List::Util::max($ctx->{prev} // 0, $lss_min));
 			if (scalar(@$xids)) {
 				$ctx->{prev} = $xids->[-1];
 				$ctx->{xids} = $xids;
diff --git a/t/lei-q-save.t b/t/lei-q-save.t
index 1d9d5a51..53311696 100644
--- a/t/lei-q-save.t
+++ b/t/lei-q-save.t
@@ -15,6 +15,7 @@ $doc3->header_set('Date', PublicInbox::Smsg::date({ds => time - (86400 * 4)}));
 my $cat_env = { VISUAL => 'cat', EDITOR => 'cat' };
 my $pre_existing = <<'EOF';
 From x Mon Sep 17 00:00:00 2001
+From: <x@example.com>
 Message-ID: <import-before@example.com>
 Subject: pre-existing
 Date: Sat, 02 Oct 2010 00:00:00 +0000
@@ -286,5 +287,23 @@ test_lei(sub {
 	is(eml_load($new[0])->header('Subject'), 'do not ever call, again',
 		'up retrieved correct message');
 
+	# --thread expansion
+	$d = "$home/thread-expand";
+	lei_ok(qw(q --no-external m:import-before@example.com -t -o), $d);
+	@orig = glob("$d/{new,cur}/*");
+	is(scalar(@orig), 1, 'one result so far');
+	lei_ok [ qw(import -Feml) ], undef, { 0 => \<<'EOM' };
+Date: Sun, 02 Oct 2023 00:00:00 +0000
+From: <x@example.com>
+In-Reply-To: <import-before@example.com>
+Message-ID: <reply1@example.com>
+Subject: reply1
+EOM
+
+	lei_ok qw(up), $d;
+	@new = glob("$d/{new,cur}/*");
+	is(scalar(@new), 2, 'got new message');
+	is_xdeeply([grep { $_ eq $orig[0] } @new], \@orig,
+		'original message preserved on up w/ threads');
 });
 done_testing;

^ permalink raw reply related	[relevance 6%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-10-01 22:29  6% [PATCH] lei up: fix missing -t/--threads matches w/ saved search Eric Wong
2023-10-02 14:58  7% [PATCH] lei up: faster non-thread, single-source incremental query Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).