about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-10-01 22:29:07 +0000
committerEric Wong <e@80x24.org>2023-10-01 22:41:50 +0000
commita6fe8448912719fcf453d112ae715865fe7dca12 (patch)
tree6cdbddba42b83855d6acfbb8b1bed846270241e1
parent0f8926b742f8d9943ac718a0733725c1e89120fa (diff)
downloadpublic-inbox-a6fe8448912719fcf453d112ae715865fe7dca12.tar.gz
We must not filter out seen docids from the mset; but only with
the result of over->expand_thread.
-rw-r--r--lib/PublicInbox/LeiXSearch.pm34
-rw-r--r--lib/PublicInbox/Over.pm7
-rw-r--r--t/lei-q-save.t19
3 files changed, 37 insertions, 23 deletions
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 7f4911b3..5f105567 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -176,14 +176,10 @@ sub query_one_mset { # for --threads and l2m w/o sort
         my $threads = $lei->{opt}->{threads} // 0;
         my $fl = $threads > 1 ? 1 : undef;
         my $lss = $lei->{lss};
-        my $maxk = "external.$dir.maxuid";
-        my $stop_at = $lss ? $lss->{-cfg}->{$maxk} : undef;
-        if (defined $stop_at) {
-                ref($stop_at) and
-                        return warn("$maxk=$stop_at has multiple values\n");
-                ($stop_at =~ /[^0-9]/) and
-                        return warn("$maxk=$stop_at not numeric\n");
-        }
+        my $maxk = "external.$dir.maxuid"; # max of previous, so our min
+        my $min = $lss ? ($lss->{-cfg}->{$maxk} // 0) : 0;
+        ref($min) and return warn("$maxk=$min has multiple values\n");
+        ($min =~ /[^0-9]/) and return warn("$maxk=$min not numeric\n");
         my $first_ids;
         do {
                 $mset = eval { $srch->mset($mo->{qstr}, $mo) };
@@ -192,29 +188,26 @@ sub query_one_mset { # for --threads and l2m w/o sort
                                 $mset->get_matches_estimated);
                 wait_startq($lei); # wait for keyword updates
                 my $ids = $srch->mset_to_artnums($mset, $mo);
-                @$ids = grep { $_ > $stop_at } @$ids if defined($stop_at);
                 my $i = 0;
                 if ($threads) {
                         # copy $ids if $lss since over->expand_thread
                         # shifts @{$ctx->{ids}}
                         $first_ids = [ @$ids ] if $lss;
-                        my $ctx = { ids => $ids };
-                        my %n2item = map { ($ids->[$i++], $_) } $mset->items;
-                        while ($over->expand_thread($ctx)) {
-                                for my $n (@{$ctx->{xids}}) {
+                        my $ctx = { ids => $ids, min => $min };
+                        my %n2item = map { $ids->[$i++] => $_ } $mset->items;
+                        while ($over->expand_thread($ctx)) { # fills {xids}
+                                for my $n (@{delete $ctx->{xids}}) {
                                         my $smsg = $over->get_art($n) or next;
-                                        my $mitem = delete $n2item{$n};
+                                        my $mi = delete $n2item{$n};
                                         next if $smsg->{bytes} == 0;
-                                        if ($mitem && $can_kw) {
-                                                mitem_kw($srch, $smsg, $mitem,
-                                                        $fl);
-                                        } elsif ($mitem && $fl) {
+                                        if ($mi && $can_kw) {
+                                                mitem_kw($srch, $smsg, $mi, $fl)
+                                        } elsif ($mi && $fl) {
                                                 # call ->xsmsg_vmd, later
                                                 $smsg->{lei_q_tt_flagged} = 1;
                                         }
-                                        $each_smsg->($smsg, $mitem);
+                                        $each_smsg->($smsg, $mi);
                                 }
-                                @{$ctx->{xids}} = ();
                         }
                 } else {
                         $first_ids = $ids;
@@ -230,7 +223,6 @@ sub query_one_mset { # for --threads and l2m w/o sort
         } while (_mset_more($mset, $mo));
         _check_mset_limit($lei, $dir, $mset);
         if ($lss && scalar(@$first_ids)) {
-                undef $stop_at;
                 my $max = $first_ids->[0];
                 $lss->cfg_set($maxk, $max);
                 undef $lss;
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index 82034b30..e3a8adb1 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -12,6 +12,7 @@ use DBD::SQLite;
 use PublicInbox::Smsg;
 use Compress::Zlib qw(uncompress);
 use constant DEFAULT_LIMIT => 1000;
+use List::Util (); # for max
 
 sub dbh_new {
         my ($self, $rw) = @_;
@@ -198,10 +199,12 @@ ORDER BY $sort_col DESC
 }
 
 # strict `tid' matches, only, for thread-expanded mbox.gz search results
-# and future CLI interface
+# and lei
 # returns true if we have IDs, undef if not
 sub expand_thread {
         my ($self, $ctx) = @_;
+        # previous maxuid for LeiSavedSearch is our min:
+        my $lss_min = $ctx->{min} // 0;
         my $dbh = dbh($self);
         do {
                 defined(my $num = $ctx->{ids}->[0]) or return;
@@ -214,7 +217,7 @@ SELECT num FROM over WHERE tid = ? AND num > ?
 ORDER BY num ASC LIMIT 1000
 
                         my $xids = $dbh->selectcol_arrayref($sql, undef, $tid,
-                                                        $ctx->{prev} // 0);
+                                List::Util::max($ctx->{prev} // 0, $lss_min));
                         if (scalar(@$xids)) {
                                 $ctx->{prev} = $xids->[-1];
                                 $ctx->{xids} = $xids;
diff --git a/t/lei-q-save.t b/t/lei-q-save.t
index 1d9d5a51..53311696 100644
--- a/t/lei-q-save.t
+++ b/t/lei-q-save.t
@@ -15,6 +15,7 @@ $doc3->header_set('Date', PublicInbox::Smsg::date({ds => time - (86400 * 4)}));
 my $cat_env = { VISUAL => 'cat', EDITOR => 'cat' };
 my $pre_existing = <<'EOF';
 From x Mon Sep 17 00:00:00 2001
+From: <x@example.com>
 Message-ID: <import-before@example.com>
 Subject: pre-existing
 Date: Sat, 02 Oct 2010 00:00:00 +0000
@@ -286,5 +287,23 @@ test_lei(sub {
         is(eml_load($new[0])->header('Subject'), 'do not ever call, again',
                 'up retrieved correct message');
 
+        # --thread expansion
+        $d = "$home/thread-expand";
+        lei_ok(qw(q --no-external m:import-before@example.com -t -o), $d);
+        @orig = glob("$d/{new,cur}/*");
+        is(scalar(@orig), 1, 'one result so far');
+        lei_ok [ qw(import -Feml) ], undef, { 0 => \<<'EOM' };
+Date: Sun, 02 Oct 2023 00:00:00 +0000
+From: <x@example.com>
+In-Reply-To: <import-before@example.com>
+Message-ID: <reply1@example.com>
+Subject: reply1
+EOM
+
+        lei_ok qw(up), $d;
+        @new = glob("$d/{new,cur}/*");
+        is(scalar(@new), 2, 'got new message');
+        is_xdeeply([grep { $_ eq $orig[0] } @new], \@orig,
+                'original message preserved on up w/ threads');
 });
 done_testing;