From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=unavailable autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 2861E1FAEC; Thu, 29 Mar 2018 10:28:31 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Cc: "Eric Wong (Contractor, The Linux Foundation)" Subject: [PATCH 09/14] search: get rid of most lookup_* subroutines Date: Thu, 29 Mar 2018 10:28:14 +0000 Message-Id: <20180329102819.15234-10-e@80x24.org> In-Reply-To: <20180329102819.15234-1-e@80x24.org> References: <20180329102819.15234-1-e@80x24.org> List-Id: Too many similar functions doing the same basic thing was redundant and misleading, especially since Message-ID is no longer treated as a truly unique identifier. For displaying threads in the HTML, this makes it clear that we favor the primary Message-ID mapped to an NNTP article number if a message cannot be found. --- lib/PublicInbox/Inbox.pm | 22 ++++++------- lib/PublicInbox/Search.pm | 56 +++------------------------------ lib/PublicInbox/SearchThread.pm | 14 ++++----- lib/PublicInbox/SearchView.pm | 2 +- lib/PublicInbox/View.pm | 12 +++---- t/search-thr-index.t | 3 +- t/search.t | 6 ++-- 7 files changed, 35 insertions(+), 80 deletions(-) diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 4c7305f..01aa500 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -293,20 +293,20 @@ sub path_check { git($self)->check('HEAD:'.$path); } +sub smsg_by_mid ($$) { + my ($self, $mid) = @_; + my $srch = search($self) or return; + # favor the Message-ID we used for the NNTP article number: + my $mm = mm($self) or return; + my $num = $mm->num_for($mid); + $srch->lookup_article($num); +} + sub msg_by_mid ($$;$) { my ($self, $mid, $ref) = @_; my $srch = search($self) or - return msg_by_path($self, mid2path($mid), $ref); - my $smsg; - # favor the Message-ID we used for the NNTP article number: - if (my $mm = mm($self)) { - my $num = $mm->num_for($mid); - $smsg = $srch->lookup_article($num); - } else { - $smsg = $srch->retry_reopen(sub { - $srch->lookup_skeleton($mid) and $smsg->load_expand; - }); - } + return msg_by_path($self, mid2path($mid), $ref); + my $smsg = smsg_by_mid($self, $mid); $smsg ? msg_by_smsg($self, $smsg, $ref) : undef; } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 584a508..7d42aaa 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -18,7 +18,7 @@ use constant YYYYMMDD => 5; # for searching in the WWW UI use Search::Xapian qw/:standard/; use PublicInbox::SearchMsg; use PublicInbox::MIME; -use PublicInbox::MID qw/mid_clean id_compress/; +use PublicInbox::MID qw/id_compress/; # This is English-only, everything else is non-standard and may be confused as # a prefix common in patch emails @@ -193,9 +193,8 @@ sub query { sub get_thread { my ($self, $mid, $opts) = @_; - my $smsg = retry_reopen($self, sub { lookup_skeleton($self, $mid) }); - - return { total => 0, msgs => [] } unless $smsg; + my $smsg = first_smsg_by_mid($self, $mid) or + return { total => 0, msgs => [] }; my $qtid = Search::Xapian::Query->new('G' . $smsg->thread_id); my $path = $smsg->path; if (defined $path && $path ne '') { @@ -346,48 +345,13 @@ sub query_ts { _do_enquire($self, $query, $opts); } -sub lookup_skeleton { +sub first_smsg_by_mid { my ($self, $mid) = @_; - my $skel = $self->{skel} or return lookup_message($self, $mid); - $mid = mid_clean($mid); - my $term = 'Q' . $mid; my $smsg; - my $beg = $skel->postlist_begin($term); - if ($beg != $skel->postlist_end($term)) { - my $doc_id = $beg->get_docid; - if (defined $doc_id) { - # raises on error: - my $doc = $skel->get_document($doc_id); - $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); - $smsg->{doc_id} = $doc_id; - } - } + each_smsg_by_mid($self, $mid, sub { $smsg = $_[0]; undef }); $smsg; } -sub lookup_message { - my ($self, $mid) = @_; - $mid = mid_clean($mid); - - my $doc_id = $self->find_first_doc_id('Q' . $mid); - my $smsg; - if (defined $doc_id) { - # raises on error: - my $doc = $self->{xdb}->get_document($doc_id); - $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); - $smsg->{doc_id} = $doc_id; - } - $smsg; -} - -sub lookup_mail { # no ghosts! - my ($self, $mid) = @_; - retry_reopen($self, sub { - my $smsg = lookup_skeleton($self, $mid) or return; - $smsg->load_expand; - }); -} - sub lookup_article { my ($self, $num) = @_; my $term = 'XNUM'.$num; @@ -447,16 +411,6 @@ sub find_doc_ids { ($db->postlist_begin($termval), $db->postlist_end($termval)); } -sub find_first_doc_id { - my ($self, $termval) = @_; - - my ($begin, $end) = $self->find_doc_ids($termval); - - return undef if $begin->equal($end); # not found - - $begin->get_docid; -} - # normalize subjects so they are suitable as pathnames for URLs # XXX: consider for removal sub subject_path { diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 6fbce15..1d250b4 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -22,15 +22,15 @@ use strict; use warnings; sub thread { - my ($messages, $ordersub, $srch) = @_; + my ($messages, $ordersub, $ibx) = @_; my $id_table = {}; _add_message($id_table, $_) foreach @$messages; my $rootset = [ grep { - !delete($_->{parent}) && $_->visible($srch) + !delete($_->{parent}) && $_->visible($ibx) } values %$id_table ]; $id_table = undef; $rootset = $ordersub->($rootset); - $_->order_children($ordersub, $srch) for @$rootset; + $_->order_children($ordersub, $ibx) for @$rootset; $rootset; } @@ -131,20 +131,20 @@ sub has_descendent { # a ghost Message-ID is the result of a long header line # being folded/mangled by a MUA, and not a missing message. sub visible ($$) { - my ($self, $srch) = @_; - ($self->{smsg} ||= eval { $srch->lookup_mail($self->{id}) }) || + my ($self, $ibx) = @_; + ($self->{smsg} ||= eval { $ibx->smsg_by_mid($self->{id}) }) || (scalar values %{$self->{children}}); } sub order_children { - my ($cur, $ordersub, $srch) = @_; + my ($cur, $ordersub, $ibx) = @_; my %seen = ($cur => 1); # self-referential loop prevention my @q = ($cur); while (defined($cur = shift @q)) { my $c = $cur->{children}; # The hashref here... - $c = [ grep { !$seen{$_}++ && visible($_, $srch) } values %$c ]; + $c = [ grep { !$seen{$_}++ && visible($_, $ibx) } values %$c ]; $c = $ordersub->($c) if scalar @$c > 1; $cur->{children} = $c; # ...becomes an arrayref push @q, @$c; diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 1a8fe7f..c789795 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -228,7 +228,7 @@ sub mset_thread { my $r = $q->{r}; my $rootset = PublicInbox::SearchThread::thread($msgs, $r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ds, - $srch); + $ctx); my $skel = search_nav_bot($mset, $q). "
";
 	my $inbox = $ctx->{-inbox};
 	$ctx->{-upfx} = '';
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index aad860e..f5b278c 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -430,7 +430,7 @@ sub thread_html {
 	$ctx->{mapping} = {};
 	$ctx->{s_nr} = "$nr+ messages in thread";
 
-	my $rootset = thread_results($msgs, $srch);
+	my $rootset = thread_results($ctx, $msgs);
 
 	# reduce hash lookups in pre_thread->skel_dump
 	my $inbox = $ctx->{-inbox};
@@ -686,7 +686,7 @@ sub thread_skel {
 	# reduce hash lookups in skel_dump
 	my $ibx = $ctx->{-inbox};
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
-	walk_thread(thread_results($sres, $srch), $ctx, *skel_dump);
+	walk_thread(thread_results($ctx, $sres), $ctx, *skel_dump);
 
 	$ctx->{parent_msg} = $parent;
 }
@@ -809,9 +809,9 @@ sub load_results {
 }
 
 sub thread_results {
-	my ($msgs, $srch) = @_;
+	my ($ctx, $msgs) = @_;
 	require PublicInbox::SearchThread;
-	PublicInbox::SearchThread::thread($msgs, *sort_ds, $srch);
+	PublicInbox::SearchThread::thread($msgs, *sort_ds, $ctx->{-inbox});
 }
 
 sub missing_thread {
@@ -952,7 +952,7 @@ sub acc_topic {
 	my ($ctx, $level, $node) = @_;
 	my $srch = $ctx->{srch};
 	my $mid = $node->{id};
-	my $x = $node->{smsg} || $srch->lookup_mail($mid);
+	my $x = $node->{smsg} || $ctx->{-inbox}->smsg_by_mid($mid);
 	my ($subj, $ds);
 	my $topic;
 	if ($x) {
@@ -1078,7 +1078,7 @@ sub index_topics {
 	my $nr = scalar @{$sres->{msgs}};
 	if ($nr) {
 		$sres = load_results($srch, $sres);
-		walk_thread(thread_results($sres, $srch), $ctx, *acc_topic);
+		walk_thread(thread_results($ctx, $sres), $ctx, *acc_topic);
 	}
 	$ctx->{-next_o} = $off+ $nr;
 	$ctx->{-cur_o} = $off;
diff --git a/t/search-thr-index.t b/t/search-thr-index.t
index 6c6e4c5..9549976 100644
--- a/t/search-thr-index.t
+++ b/t/search-thr-index.t
@@ -4,6 +4,7 @@ use strict;
 use warnings;
 use Test::More;
 use File::Temp qw/tempdir/;
+use PublicInbox::MID qw(mids);
 use Email::MIME;
 eval { require PublicInbox::SearchIdx; };
 plan skip_all => "Xapian missing for search" if $@;
@@ -41,7 +42,7 @@ foreach (reverse split(/\n\n/, $data)) {
 	$mime->header_set('From' => 'bw@g');
 	$mime->header_set('To' => 'git@vger.kernel.org');
 	my $bytes = bytes::length($mime->as_string);
-	my $mid = $mime->header('Message-Id');
+	my $mid = mids($mime->header_obj)->[0];
 	my $doc_id = $rw->add_message($mime, $bytes, ++$num, 'ignored', $mid);
 	push @mids, $mid;
 	ok($doc_id, 'message added: '. $mid);
diff --git a/t/search.t b/t/search.t
index 6b1aa2a..ccf0f74 100644
--- a/t/search.t
+++ b/t/search.t
@@ -89,7 +89,7 @@ sub filter_mids {
 {
 	$rw_commit->();
 	$ro->reopen;
-	my $found = $ro->lookup_message('');
+	my $found = $ro->first_smsg_by_mid('root@s');
 	ok($found, "message found");
 	is($root_id, $found->{doc_id}, 'doc_id set correctly');
 	is($found->mid, 'root@s', 'mid set correctly');
@@ -264,7 +264,7 @@ sub filter_mids {
 		],
 		body => "LOOP!\n"));
 	ok($doc_id > 0, "doc_id defined with circular reference");
-	my $smsg = $rw->lookup_message('circle@a');
+	my $smsg = $rw->first_smsg_by_mid('circle@a');
 	is($smsg->references, '', "no references created");
 	my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
 	is($s, $msg->subject, 'long subject not rewritten');
@@ -281,7 +281,7 @@ sub filter_mids {
 	my $mime = Email::MIME->new($str);
 	my $doc_id = $rw->add_message($mime);
 	ok($doc_id > 0, 'message indexed doc_id with UTF-8');
-	my $smsg = $rw->lookup_message('testmessage@example.com');
+	my $smsg = $rw->first_smsg_by_mid('testmessage@example.com');
 	my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
 
 	is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved');
-- 
EW