From 11707dae97d1f4638157cfee298464b2f2deeed4 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Thu, 29 Mar 2018 09:57:52 +0000 Subject: search: get rid of most lookup_* subroutines Too many similar functions doing the same basic thing was redundant and misleading, especially since Message-ID is no longer treated as a truly unique identifier. For displaying threads in the HTML, this makes it clear that we favor the primary Message-ID mapped to an NNTP article number if a message cannot be found. --- lib/PublicInbox/Inbox.pm | 22 ++++++++-------- lib/PublicInbox/Search.pm | 56 ++++------------------------------------- lib/PublicInbox/SearchThread.pm | 14 +++++------ lib/PublicInbox/SearchView.pm | 2 +- lib/PublicInbox/View.pm | 12 ++++----- 5 files changed, 30 insertions(+), 76 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 4c7305f9..01aa500c 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -293,20 +293,20 @@ sub path_check { git($self)->check('HEAD:'.$path); } +sub smsg_by_mid ($$) { + my ($self, $mid) = @_; + my $srch = search($self) or return; + # favor the Message-ID we used for the NNTP article number: + my $mm = mm($self) or return; + my $num = $mm->num_for($mid); + $srch->lookup_article($num); +} + sub msg_by_mid ($$;$) { my ($self, $mid, $ref) = @_; my $srch = search($self) or - return msg_by_path($self, mid2path($mid), $ref); - my $smsg; - # favor the Message-ID we used for the NNTP article number: - if (my $mm = mm($self)) { - my $num = $mm->num_for($mid); - $smsg = $srch->lookup_article($num); - } else { - $smsg = $srch->retry_reopen(sub { - $srch->lookup_skeleton($mid) and $smsg->load_expand; - }); - } + return msg_by_path($self, mid2path($mid), $ref); + my $smsg = smsg_by_mid($self, $mid); $smsg ? msg_by_smsg($self, $smsg, $ref) : undef; } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 584a508e..7d42aaad 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -18,7 +18,7 @@ use constant YYYYMMDD => 5; # for searching in the WWW UI use Search::Xapian qw/:standard/; use PublicInbox::SearchMsg; use PublicInbox::MIME; -use PublicInbox::MID qw/mid_clean id_compress/; +use PublicInbox::MID qw/id_compress/; # This is English-only, everything else is non-standard and may be confused as # a prefix common in patch emails @@ -193,9 +193,8 @@ sub query { sub get_thread { my ($self, $mid, $opts) = @_; - my $smsg = retry_reopen($self, sub { lookup_skeleton($self, $mid) }); - - return { total => 0, msgs => [] } unless $smsg; + my $smsg = first_smsg_by_mid($self, $mid) or + return { total => 0, msgs => [] }; my $qtid = Search::Xapian::Query->new('G' . $smsg->thread_id); my $path = $smsg->path; if (defined $path && $path ne '') { @@ -346,48 +345,13 @@ sub query_ts { _do_enquire($self, $query, $opts); } -sub lookup_skeleton { +sub first_smsg_by_mid { my ($self, $mid) = @_; - my $skel = $self->{skel} or return lookup_message($self, $mid); - $mid = mid_clean($mid); - my $term = 'Q' . $mid; my $smsg; - my $beg = $skel->postlist_begin($term); - if ($beg != $skel->postlist_end($term)) { - my $doc_id = $beg->get_docid; - if (defined $doc_id) { - # raises on error: - my $doc = $skel->get_document($doc_id); - $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); - $smsg->{doc_id} = $doc_id; - } - } + each_smsg_by_mid($self, $mid, sub { $smsg = $_[0]; undef }); $smsg; } -sub lookup_message { - my ($self, $mid) = @_; - $mid = mid_clean($mid); - - my $doc_id = $self->find_first_doc_id('Q' . $mid); - my $smsg; - if (defined $doc_id) { - # raises on error: - my $doc = $self->{xdb}->get_document($doc_id); - $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); - $smsg->{doc_id} = $doc_id; - } - $smsg; -} - -sub lookup_mail { # no ghosts! - my ($self, $mid) = @_; - retry_reopen($self, sub { - my $smsg = lookup_skeleton($self, $mid) or return; - $smsg->load_expand; - }); -} - sub lookup_article { my ($self, $num) = @_; my $term = 'XNUM'.$num; @@ -447,16 +411,6 @@ sub find_doc_ids { ($db->postlist_begin($termval), $db->postlist_end($termval)); } -sub find_first_doc_id { - my ($self, $termval) = @_; - - my ($begin, $end) = $self->find_doc_ids($termval); - - return undef if $begin->equal($end); # not found - - $begin->get_docid; -} - # normalize subjects so they are suitable as pathnames for URLs # XXX: consider for removal sub subject_path { diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 6fbce15c..1d250b46 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -22,15 +22,15 @@ use strict; use warnings; sub thread { - my ($messages, $ordersub, $srch) = @_; + my ($messages, $ordersub, $ibx) = @_; my $id_table = {}; _add_message($id_table, $_) foreach @$messages; my $rootset = [ grep { - !delete($_->{parent}) && $_->visible($srch) + !delete($_->{parent}) && $_->visible($ibx) } values %$id_table ]; $id_table = undef; $rootset = $ordersub->($rootset); - $_->order_children($ordersub, $srch) for @$rootset; + $_->order_children($ordersub, $ibx) for @$rootset; $rootset; } @@ -131,20 +131,20 @@ sub has_descendent { # a ghost Message-ID is the result of a long header line # being folded/mangled by a MUA, and not a missing message. sub visible ($$) { - my ($self, $srch) = @_; - ($self->{smsg} ||= eval { $srch->lookup_mail($self->{id}) }) || + my ($self, $ibx) = @_; + ($self->{smsg} ||= eval { $ibx->smsg_by_mid($self->{id}) }) || (scalar values %{$self->{children}}); } sub order_children { - my ($cur, $ordersub, $srch) = @_; + my ($cur, $ordersub, $ibx) = @_; my %seen = ($cur => 1); # self-referential loop prevention my @q = ($cur); while (defined($cur = shift @q)) { my $c = $cur->{children}; # The hashref here... - $c = [ grep { !$seen{$_}++ && visible($_, $srch) } values %$c ]; + $c = [ grep { !$seen{$_}++ && visible($_, $ibx) } values %$c ]; $c = $ordersub->($c) if scalar @$c > 1; $cur->{children} = $c; # ...becomes an arrayref push @q, @$c; diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 1a8fe7f7..c7897958 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -228,7 +228,7 @@ sub mset_thread { my $r = $q->{r}; my $rootset = PublicInbox::SearchThread::thread($msgs, $r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ds, - $srch); + $ctx); my $skel = search_nav_bot($mset, $q). "
";
 	my $inbox = $ctx->{-inbox};
 	$ctx->{-upfx} = '';
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index aad860e9..f5b278c2 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -430,7 +430,7 @@ sub thread_html {
 	$ctx->{mapping} = {};
 	$ctx->{s_nr} = "$nr+ messages in thread";
 
-	my $rootset = thread_results($msgs, $srch);
+	my $rootset = thread_results($ctx, $msgs);
 
 	# reduce hash lookups in pre_thread->skel_dump
 	my $inbox = $ctx->{-inbox};
@@ -686,7 +686,7 @@ sub thread_skel {
 	# reduce hash lookups in skel_dump
 	my $ibx = $ctx->{-inbox};
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
-	walk_thread(thread_results($sres, $srch), $ctx, *skel_dump);
+	walk_thread(thread_results($ctx, $sres), $ctx, *skel_dump);
 
 	$ctx->{parent_msg} = $parent;
 }
@@ -809,9 +809,9 @@ sub load_results {
 }
 
 sub thread_results {
-	my ($msgs, $srch) = @_;
+	my ($ctx, $msgs) = @_;
 	require PublicInbox::SearchThread;
-	PublicInbox::SearchThread::thread($msgs, *sort_ds, $srch);
+	PublicInbox::SearchThread::thread($msgs, *sort_ds, $ctx->{-inbox});
 }
 
 sub missing_thread {
@@ -952,7 +952,7 @@ sub acc_topic {
 	my ($ctx, $level, $node) = @_;
 	my $srch = $ctx->{srch};
 	my $mid = $node->{id};
-	my $x = $node->{smsg} || $srch->lookup_mail($mid);
+	my $x = $node->{smsg} || $ctx->{-inbox}->smsg_by_mid($mid);
 	my ($subj, $ds);
 	my $topic;
 	if ($x) {
@@ -1078,7 +1078,7 @@ sub index_topics {
 	my $nr = scalar @{$sres->{msgs}};
 	if ($nr) {
 		$sres = load_results($srch, $sres);
-		walk_thread(thread_results($sres, $srch), $ctx, *acc_topic);
+		walk_thread(thread_results($ctx, $sres), $ctx, *acc_topic);
 	}
 	$ctx->{-next_o} = $off+ $nr;
 	$ctx->{-cur_o} = $off;
-- 
cgit v1.2.3-24-ge0c7