user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] search: try to fill in ghosts when generating thread skeleton
@ 2017-10-02 22:24 Eric Wong
  2017-10-03 19:43 ` [PATCH v2] " Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2017-10-02 22:24 UTC (permalink / raw)
  To: meta

Since we attempt to fill in threads by Subject, our thread
skeletons can cross actual thread IDs, leading to the
possibility of false ghosts showing up in the skeleton.
Try to fill in the ghosts as well as possible by performing
a message lookup.
---
 lib/PublicInbox/SearchThread.pm | 18 ++++++++++--------
 lib/PublicInbox/SearchView.pm   |  6 ++++--
 lib/PublicInbox/View.pm         |  8 ++++----
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 2966907..2e66099 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -22,14 +22,15 @@ use strict;
 use warnings;
 
 sub thread {
-	my ($messages, $ordersub) = @_;
+	my ($messages, $ordersub, $srch) = @_;
 	my $id_table = {};
 	_add_message($id_table, $_) foreach @$messages;
 	my $rootset = [ grep {
-		!delete($_->{parent}) && $_->visible } values %$id_table ];
+			!delete($_->{parent}) && $_->visible($srch)
+		} values %$id_table ];
 	$id_table = undef;
 	$rootset = $ordersub->($rootset);
-	$_->order_children($ordersub) for @$rootset;
+	$_->order_children($ordersub, $srch) for @$rootset;
 	$rootset;
 }
 
@@ -129,20 +130,21 @@ sub has_descendent {
 # Do not show/keep ghosts iff they have no children.  Sometimes
 # a ghost Message-ID is the result of a long header line
 # being folded/mangled by a MUA, and not a missing message.
-sub visible ($) {
-	my ($self) = @_;
-	$self->{smsg} || scalar values %{$self->{children}};
+sub visible ($$) {
+	my ($self, $srch) = @_;
+	($self->{smsg} ||= eval { $srch->lookup_message($self->{id})}) ||
+	 (scalar values %{$self->{children}});
 }
 
 sub order_children {
-	my ($cur, $ordersub) = @_;
+	my ($cur, $ordersub, $srch) = @_;
 
 	my %seen = ($cur => 1); # self-referential loop prevention
 	my @q = ($cur);
 	while (defined($cur = shift @q)) {
 		my $c = $cur->{children}; # The hashref here...
 
-		$c = [ grep { !$seen{$_}++ && visible($_) } values %$c ];
+		$c = [ grep { !$seen{$_}++ && visible($_, $srch) } values %$c ];
 		$c = $ordersub->($c) if scalar @$c > 1;
 		$cur->{children} = $c; # ...becomes an arrayref
 		push @q, @$c;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index a597403..c42cf2d 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -207,7 +207,8 @@ sub sort_relevance {
 sub mset_thread {
 	my ($ctx, $mset, $q) = @_;
 	my %pct;
-	my $msgs = $ctx->{srch}->retry_reopen(sub { [ map {
+	my $srch = $ctx->{srch};
+	my $msgs = $srch->retry_reopen(sub { [ map {
 		my $i = $_;
 		my $smsg = PublicInbox::SearchMsg->load_doc($i->get_document);
 		$pct{$smsg->mid} = $i->get_percent;
@@ -215,7 +216,8 @@ sub mset_thread {
 	} ($mset->items) ]});
 	my $r = $q->{r};
 	my $rootset = PublicInbox::SearchThread::thread($msgs,
-		$r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts);
+		$r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts,
+		$srch);
 	my $skel = search_nav_bot($mset, $q). "<pre>";
 	my $inbox = $ctx->{-inbox};
 	$ctx->{-upfx} = '';
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index b39c820..bc45bfa 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -370,7 +370,7 @@ sub thread_html {
 	$ctx->{mapping} = {};
 	$ctx->{s_nr} = "$nr+ messages in thread";
 
-	my $rootset = thread_results($msgs);
+	my $rootset = thread_results($msgs, $srch);
 
 	# reduce hash lookups in pre_thread->skel_dump
 	my $inbox = $ctx->{-inbox};
@@ -607,7 +607,7 @@ sub thread_skel {
 	# reduce hash lookups in skel_dump
 	my $ibx = $ctx->{-inbox};
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
-	walk_thread(thread_results($sres), $ctx, *skel_dump);
+	walk_thread(thread_results($sres, $srch), $ctx, *skel_dump);
 
 	$ctx->{parent_msg} = $parent;
 }
@@ -736,7 +736,7 @@ sub msg_timestamp {
 }
 
 sub thread_results {
-	my ($msgs) = @_;
+	my ($msgs, $srch) = @_;
 	require PublicInbox::SearchThread;
 	PublicInbox::SearchThread::thread($msgs, *sort_ts);
 }
@@ -1000,7 +1000,7 @@ sub index_topics {
 	my $nr = scalar @{$sres->{msgs}};
 	if ($nr) {
 		$sres = load_results($srch, $sres);
-		walk_thread(thread_results($sres), $ctx, *acc_topic);
+		walk_thread(thread_results($sres, $srch), $ctx, *acc_topic);
 	}
 	$ctx->{-next_o} = $off+ $nr;
 	$ctx->{-cur_o} = $off;
-- 
EW


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH v2] search: try to fill in ghosts when generating thread skeleton
  2017-10-02 22:24 [PATCH] search: try to fill in ghosts when generating thread skeleton Eric Wong
@ 2017-10-03 19:43 ` Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2017-10-03 19:43 UTC (permalink / raw)
  To: meta

Since we attempt to fill in threads by Subject, our thread
skeletons can cross actual thread IDs, leading to the
possibility of false ghosts showing up in the skeleton.
Try to fill in the ghosts as well as possible by performing
a message lookup.
---
 lib/PublicInbox/Search.pm       |  2 +-
 lib/PublicInbox/SearchMsg.pm    | 16 ++++++++++++++++
 lib/PublicInbox/SearchThread.pm | 18 ++++++++++--------
 lib/PublicInbox/SearchView.pm   |  6 ++++--
 lib/PublicInbox/View.pm         | 10 +++++-----
 5 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index c7c5455..25ab8d5 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -300,7 +300,7 @@ sub lookup_mail { # no ghosts!
 	my ($self, $mid) = @_;
 	retry_reopen($self, sub {
 		my $smsg = lookup_message($self, $mid) or return;
-		PublicInbox::SearchMsg->load_doc($smsg->{doc});
+		$smsg->load_expand;
 	});
 }
 
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index a19d45d..84e2ad5 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -29,6 +29,22 @@ sub get_val ($$) {
 	Search::Xapian::sortable_unserialise($doc->get_value($col));
 }
 
+sub load_expand {
+	my ($self) = @_;
+	my $doc = $self->{doc};
+	my $data = $doc->get_data or return;
+	$self->{ts} = get_val($doc, &PublicInbox::Search::TS);
+	utf8::decode($data);
+	my ($subj, $from, $refs, $to, $cc, $blob) = split(/\n/, $data);
+	$self->{subject} = $subj;
+	$self->{from} = $from;
+	$self->{references} = $refs;
+	$self->{to} = $to;
+	$self->{cc} = $cc;
+	$self->{blob} = $blob;
+	$self;
+}
+
 sub load_doc {
 	my ($class, $doc) = @_;
 	my $data = $doc->get_data or return;
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 2966907..6fbce15 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -22,14 +22,15 @@ use strict;
 use warnings;
 
 sub thread {
-	my ($messages, $ordersub) = @_;
+	my ($messages, $ordersub, $srch) = @_;
 	my $id_table = {};
 	_add_message($id_table, $_) foreach @$messages;
 	my $rootset = [ grep {
-		!delete($_->{parent}) && $_->visible } values %$id_table ];
+			!delete($_->{parent}) && $_->visible($srch)
+		} values %$id_table ];
 	$id_table = undef;
 	$rootset = $ordersub->($rootset);
-	$_->order_children($ordersub) for @$rootset;
+	$_->order_children($ordersub, $srch) for @$rootset;
 	$rootset;
 }
 
@@ -129,20 +130,21 @@ sub has_descendent {
 # Do not show/keep ghosts iff they have no children.  Sometimes
 # a ghost Message-ID is the result of a long header line
 # being folded/mangled by a MUA, and not a missing message.
-sub visible ($) {
-	my ($self) = @_;
-	$self->{smsg} || scalar values %{$self->{children}};
+sub visible ($$) {
+	my ($self, $srch) = @_;
+	($self->{smsg} ||= eval { $srch->lookup_mail($self->{id}) }) ||
+	 (scalar values %{$self->{children}});
 }
 
 sub order_children {
-	my ($cur, $ordersub) = @_;
+	my ($cur, $ordersub, $srch) = @_;
 
 	my %seen = ($cur => 1); # self-referential loop prevention
 	my @q = ($cur);
 	while (defined($cur = shift @q)) {
 		my $c = $cur->{children}; # The hashref here...
 
-		$c = [ grep { !$seen{$_}++ && visible($_) } values %$c ];
+		$c = [ grep { !$seen{$_}++ && visible($_, $srch) } values %$c ];
 		$c = $ordersub->($c) if scalar @$c > 1;
 		$cur->{children} = $c; # ...becomes an arrayref
 		push @q, @$c;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index a597403..c42cf2d 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -207,7 +207,8 @@ sub sort_relevance {
 sub mset_thread {
 	my ($ctx, $mset, $q) = @_;
 	my %pct;
-	my $msgs = $ctx->{srch}->retry_reopen(sub { [ map {
+	my $srch = $ctx->{srch};
+	my $msgs = $srch->retry_reopen(sub { [ map {
 		my $i = $_;
 		my $smsg = PublicInbox::SearchMsg->load_doc($i->get_document);
 		$pct{$smsg->mid} = $i->get_percent;
@@ -215,7 +216,8 @@ sub mset_thread {
 	} ($mset->items) ]});
 	my $r = $q->{r};
 	my $rootset = PublicInbox::SearchThread::thread($msgs,
-		$r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts);
+		$r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts,
+		$srch);
 	my $skel = search_nav_bot($mset, $q). "<pre>";
 	my $inbox = $ctx->{-inbox};
 	$ctx->{-upfx} = '';
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index b39c820..ac7657a 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -370,7 +370,7 @@ sub thread_html {
 	$ctx->{mapping} = {};
 	$ctx->{s_nr} = "$nr+ messages in thread";
 
-	my $rootset = thread_results($msgs);
+	my $rootset = thread_results($msgs, $srch);
 
 	# reduce hash lookups in pre_thread->skel_dump
 	my $inbox = $ctx->{-inbox};
@@ -607,7 +607,7 @@ sub thread_skel {
 	# reduce hash lookups in skel_dump
 	my $ibx = $ctx->{-inbox};
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
-	walk_thread(thread_results($sres), $ctx, *skel_dump);
+	walk_thread(thread_results($sres, $srch), $ctx, *skel_dump);
 
 	$ctx->{parent_msg} = $parent;
 }
@@ -736,9 +736,9 @@ sub msg_timestamp {
 }
 
 sub thread_results {
-	my ($msgs) = @_;
+	my ($msgs, $srch) = @_;
 	require PublicInbox::SearchThread;
-	PublicInbox::SearchThread::thread($msgs, *sort_ts);
+	PublicInbox::SearchThread::thread($msgs, *sort_ts, $srch);
 }
 
 sub missing_thread {
@@ -1000,7 +1000,7 @@ sub index_topics {
 	my $nr = scalar @{$sres->{msgs}};
 	if ($nr) {
 		$sres = load_results($srch, $sres);
-		walk_thread(thread_results($sres), $ctx, *acc_topic);
+		walk_thread(thread_results($sres, $srch), $ctx, *acc_topic);
 	}
 	$ctx->{-next_o} = $off+ $nr;
 	$ctx->{-cur_o} = $off;
-- 
EW

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-10-03 19:43 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-02 22:24 [PATCH] search: try to fill in ghosts when generating thread skeleton Eric Wong
2017-10-03 19:43 ` [PATCH v2] " Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).