about summary refs log tree commit homepage
path: root/lib/PublicInbox/View.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2018-08-05 06:04:40 +0000
committerEric Wong <e@80x24.org>2018-08-05 06:12:33 +0000
commit0c22c3e3a99527ebf04a647ca287ea2fc9bdeddd (patch)
tree381fa5ea5c80452950c774fb278dc9f95cc0dfdc /lib/PublicInbox/View.pm
parent861bec7bec5908871e5b0ede244cb1e990a47403 (diff)
downloadpublic-inbox-0c22c3e3a99527ebf04a647ca287ea2fc9bdeddd.tar.gz
The "loose" (Subject:-based) thread matching yields too many
hits for some common subjects (e.g. "[GIT] Networking" on LKML)
and causes thread skeletons to not show the current messages.
Favor strict matches in the query and only add loose matches
if there's space.

While working on this, I noticed the backwards --reindex walk
breaks `tid' on v1 repositories, at least.  That bug was hidden
by the Subject: match logic and not discovered until now.  It
will be fixed separately.

Reported-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
Diffstat (limited to 'lib/PublicInbox/View.pm')
-rw-r--r--lib/PublicInbox/View.pm63
1 files changed, 58 insertions, 5 deletions
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 58851edc..eb002aeb 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -365,7 +365,7 @@ sub walk_thread {
         while (@q) {
                 my ($level, $node, $i) = splice(@q, 0, 3);
                 defined $node or next;
-                $cb->($ctx, $level, $node, $i);
+                $cb->($ctx, $level, $node, $i) or return;
                 ++$level;
                 $i = 0;
                 unshift @q, map { ($level, $_, $i++) } @{$node->{children}};
@@ -818,10 +818,56 @@ sub indent_for {
         $level ? INDENT x ($level - 1) : '';
 }
 
+sub find_mid_root {
+        my ($ctx, $level, $node, $idx) = @_;
+        ++$ctx->{root_idx} if $level == 0;
+        if ($node->{id} eq $ctx->{mid}) {
+                $ctx->{found_mid_at} = $ctx->{root_idx};
+                return 0;
+        }
+        1;
+}
+
+sub strict_loose_note ($) {
+        my ($nr) = @_;
+        my $msg =
+"  -- strict thread matches above, loose matches on Subject: below --\n";
+
+        if ($nr > PublicInbox::Over::DEFAULT_LIMIT()) {
+                $msg .=
+"  -- use mbox.gz link to download all $nr messages --\n";
+        }
+        $msg;
+}
+
 sub thread_results {
         my ($ctx, $msgs) = @_;
         require PublicInbox::SearchThread;
-        PublicInbox::SearchThread::thread($msgs, *sort_ds, $ctx->{-inbox});
+        my $ibx = $ctx->{-inbox};
+        my $rootset = PublicInbox::SearchThread::thread($msgs, *sort_ds, $ibx);
+
+        # FIXME: `tid' is broken on --reindex, so that needs to be fixed
+        # and preserved in the future.  This bug is hidden by `sid' matches
+        # in get_thread, so we never noticed it until now.  And even when
+        # reindexing is fixed, we'll keep this code until a SCHEMA_VERSION
+        # bump since reindexing is expensive and users may not do it
+
+        # loose threading could've returned too many results,
+        # put the root the message we care about at the top:
+        my $mid = $ctx->{mid};
+        if (defined($mid) && scalar(@$rootset) > 1) {
+                $ctx->{root_idx} = -1;
+                my $nr = scalar @$msgs;
+                walk_thread($rootset, $ctx, *find_mid_root);
+                my $idx = $ctx->{found_mid_at};
+                if (defined($idx) && $idx != 0) {
+                        my $tip = splice(@$rootset, $idx, 1);
+                        @$rootset = reverse @$rootset;
+                        unshift @$rootset, $tip;
+                        $ctx->{sl_note} = strict_loose_note($nr);
+                }
+        }
+        $rootset
 }
 
 sub missing_thread {
@@ -864,6 +910,10 @@ sub skel_dump {
         my $cur = $ctx->{cur};
         my $mid = $smsg->{mid};
 
+        if ($level == 0 && $ctx->{skel_dump_roots}++) {
+                $$dst .= delete $ctx->{sl_note} || '';
+        }
+
         my $f = ascii_html($smsg->from_name);
         my $obfs_ibx = $ctx->{-obfs_ibx};
         obfuscate_addrs($obfs_ibx, $f) if $obfs_ibx;
@@ -882,7 +932,7 @@ sub skel_dump {
                         delete $ctx->{cur};
                         $$dst .= "<b>$d<a\nid=r\nhref=\"#t\">".
                                  "$attr [this message]</a></b>\n";
-                        return;
+                        return 1;
                 } else {
                         $ctx->{prev_msg} = $mid;
                 }
@@ -922,6 +972,7 @@ sub skel_dump {
                 $m = $ctx->{-upfx}.mid_escape($mid).'/';
         }
         $$dst .=  $d . "<a\nhref=\"$m\"$id>" . $end;
+        1;
 }
 
 sub _skel_ghost {
@@ -947,6 +998,7 @@ sub _skel_ghost {
         }
         my $dst = $ctx->{dst};
         $$dst .= $d;
+        1;
 }
 
 sub sort_ds {
@@ -973,7 +1025,7 @@ sub acc_topic {
                         $topic = [ $ds, 1, { $subj => $mid }, $subj ];
                         $ctx->{-cur_topic} = $topic;
                         push @{$ctx->{order}}, $topic;
-                        return;
+                        return 1;
                 }
 
                 $topic = $ctx->{-cur_topic}; # should never be undef
@@ -987,11 +1039,12 @@ sub acc_topic {
                 }
                 $seen->{$subj} = $mid; # latest for subject
         } else { # ghost message
-                return if $level != 0; # ignore child ghosts
+                return 1 if $level != 0; # ignore child ghosts
                 $topic = [ -666, 0, {} ];
                 $ctx->{-cur_topic} = $topic;
                 push @{$ctx->{order}}, $topic;
         }
+        1;
 }
 
 sub dump_topics {