user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Cc: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
Subject: [PATCH 09/14] search: get rid of most lookup_* subroutines
Date: Thu, 29 Mar 2018 10:28:14 +0000	[thread overview]
Message-ID: <20180329102819.15234-10-e@80x24.org> (raw)
In-Reply-To: <20180329102819.15234-1-e@80x24.org>

Too many similar functions doing the same basic thing was
redundant and misleading, especially since Message-ID is
no longer treated as a truly unique identifier.

For displaying threads in the HTML, this makes it clear
that we favor the primary Message-ID mapped to an NNTP
article number if a message cannot be found.
---
 lib/PublicInbox/Inbox.pm        | 22 ++++++-------
 lib/PublicInbox/Search.pm       | 56 +++------------------------------
 lib/PublicInbox/SearchThread.pm | 14 ++++-----
 lib/PublicInbox/SearchView.pm   |  2 +-
 lib/PublicInbox/View.pm         | 12 +++----
 t/search-thr-index.t            |  3 +-
 t/search.t                      |  6 ++--
 7 files changed, 35 insertions(+), 80 deletions(-)

diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 4c7305f..01aa500 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -293,20 +293,20 @@ sub path_check {
 	git($self)->check('HEAD:'.$path);
 }
 
+sub smsg_by_mid ($$) {
+	my ($self, $mid) = @_;
+	my $srch = search($self) or return;
+	# favor the Message-ID we used for the NNTP article number:
+	my $mm = mm($self) or return;
+	my $num = $mm->num_for($mid);
+	$srch->lookup_article($num);
+}
+
 sub msg_by_mid ($$;$) {
 	my ($self, $mid, $ref) = @_;
 	my $srch = search($self) or
-			return msg_by_path($self, mid2path($mid), $ref);
-	my $smsg;
-	# favor the Message-ID we used for the NNTP article number:
-	if (my $mm = mm($self)) {
-		my $num = $mm->num_for($mid);
-		$smsg = $srch->lookup_article($num);
-	} else {
-		$smsg = $srch->retry_reopen(sub {
-			$srch->lookup_skeleton($mid) and $smsg->load_expand;
-		});
-	}
+		return msg_by_path($self, mid2path($mid), $ref);
+	my $smsg = smsg_by_mid($self, $mid);
 	$smsg ? msg_by_smsg($self, $smsg, $ref) : undef;
 }
 
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 584a508..7d42aaa 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -18,7 +18,7 @@ use constant YYYYMMDD => 5; # for searching in the WWW UI
 use Search::Xapian qw/:standard/;
 use PublicInbox::SearchMsg;
 use PublicInbox::MIME;
-use PublicInbox::MID qw/mid_clean id_compress/;
+use PublicInbox::MID qw/id_compress/;
 
 # This is English-only, everything else is non-standard and may be confused as
 # a prefix common in patch emails
@@ -193,9 +193,8 @@ sub query {
 
 sub get_thread {
 	my ($self, $mid, $opts) = @_;
-	my $smsg = retry_reopen($self, sub { lookup_skeleton($self, $mid) });
-
-	return { total => 0, msgs => [] } unless $smsg;
+	my $smsg = first_smsg_by_mid($self, $mid) or
+			return { total => 0, msgs => [] };
 	my $qtid = Search::Xapian::Query->new('G' . $smsg->thread_id);
 	my $path = $smsg->path;
 	if (defined $path && $path ne '') {
@@ -346,48 +345,13 @@ sub query_ts {
 	_do_enquire($self, $query, $opts);
 }
 
-sub lookup_skeleton {
+sub first_smsg_by_mid {
 	my ($self, $mid) = @_;
-	my $skel = $self->{skel} or return lookup_message($self, $mid);
-	$mid = mid_clean($mid);
-	my $term = 'Q' . $mid;
 	my $smsg;
-	my $beg = $skel->postlist_begin($term);
-	if ($beg != $skel->postlist_end($term)) {
-		my $doc_id = $beg->get_docid;
-		if (defined $doc_id) {
-			# raises on error:
-			my $doc = $skel->get_document($doc_id);
-			$smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
-			$smsg->{doc_id} = $doc_id;
-		}
-	}
+	each_smsg_by_mid($self, $mid, sub { $smsg = $_[0]; undef });
 	$smsg;
 }
 
-sub lookup_message {
-	my ($self, $mid) = @_;
-	$mid = mid_clean($mid);
-
-	my $doc_id = $self->find_first_doc_id('Q' . $mid);
-	my $smsg;
-	if (defined $doc_id) {
-		# raises on error:
-		my $doc = $self->{xdb}->get_document($doc_id);
-		$smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
-		$smsg->{doc_id} = $doc_id;
-	}
-	$smsg;
-}
-
-sub lookup_mail { # no ghosts!
-	my ($self, $mid) = @_;
-	retry_reopen($self, sub {
-		my $smsg = lookup_skeleton($self, $mid) or return;
-		$smsg->load_expand;
-	});
-}
-
 sub lookup_article {
 	my ($self, $num) = @_;
 	my $term = 'XNUM'.$num;
@@ -447,16 +411,6 @@ sub find_doc_ids {
 	($db->postlist_begin($termval), $db->postlist_end($termval));
 }
 
-sub find_first_doc_id {
-	my ($self, $termval) = @_;
-
-	my ($begin, $end) = $self->find_doc_ids($termval);
-
-	return undef if $begin->equal($end); # not found
-
-	$begin->get_docid;
-}
-
 # normalize subjects so they are suitable as pathnames for URLs
 # XXX: consider for removal
 sub subject_path {
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 6fbce15..1d250b4 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -22,15 +22,15 @@ use strict;
 use warnings;
 
 sub thread {
-	my ($messages, $ordersub, $srch) = @_;
+	my ($messages, $ordersub, $ibx) = @_;
 	my $id_table = {};
 	_add_message($id_table, $_) foreach @$messages;
 	my $rootset = [ grep {
-			!delete($_->{parent}) && $_->visible($srch)
+			!delete($_->{parent}) && $_->visible($ibx)
 		} values %$id_table ];
 	$id_table = undef;
 	$rootset = $ordersub->($rootset);
-	$_->order_children($ordersub, $srch) for @$rootset;
+	$_->order_children($ordersub, $ibx) for @$rootset;
 	$rootset;
 }
 
@@ -131,20 +131,20 @@ sub has_descendent {
 # a ghost Message-ID is the result of a long header line
 # being folded/mangled by a MUA, and not a missing message.
 sub visible ($$) {
-	my ($self, $srch) = @_;
-	($self->{smsg} ||= eval { $srch->lookup_mail($self->{id}) }) ||
+	my ($self, $ibx) = @_;
+	($self->{smsg} ||= eval { $ibx->smsg_by_mid($self->{id}) }) ||
 	 (scalar values %{$self->{children}});
 }
 
 sub order_children {
-	my ($cur, $ordersub, $srch) = @_;
+	my ($cur, $ordersub, $ibx) = @_;
 
 	my %seen = ($cur => 1); # self-referential loop prevention
 	my @q = ($cur);
 	while (defined($cur = shift @q)) {
 		my $c = $cur->{children}; # The hashref here...
 
-		$c = [ grep { !$seen{$_}++ && visible($_, $srch) } values %$c ];
+		$c = [ grep { !$seen{$_}++ && visible($_, $ibx) } values %$c ];
 		$c = $ordersub->($c) if scalar @$c > 1;
 		$cur->{children} = $c; # ...becomes an arrayref
 		push @q, @$c;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 1a8fe7f..c789795 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -228,7 +228,7 @@ sub mset_thread {
 	my $r = $q->{r};
 	my $rootset = PublicInbox::SearchThread::thread($msgs,
 		$r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ds,
-		$srch);
+		$ctx);
 	my $skel = search_nav_bot($mset, $q). "<pre>";
 	my $inbox = $ctx->{-inbox};
 	$ctx->{-upfx} = '';
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index aad860e..f5b278c 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -430,7 +430,7 @@ sub thread_html {
 	$ctx->{mapping} = {};
 	$ctx->{s_nr} = "$nr+ messages in thread";
 
-	my $rootset = thread_results($msgs, $srch);
+	my $rootset = thread_results($ctx, $msgs);
 
 	# reduce hash lookups in pre_thread->skel_dump
 	my $inbox = $ctx->{-inbox};
@@ -686,7 +686,7 @@ sub thread_skel {
 	# reduce hash lookups in skel_dump
 	my $ibx = $ctx->{-inbox};
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
-	walk_thread(thread_results($sres, $srch), $ctx, *skel_dump);
+	walk_thread(thread_results($ctx, $sres), $ctx, *skel_dump);
 
 	$ctx->{parent_msg} = $parent;
 }
@@ -809,9 +809,9 @@ sub load_results {
 }
 
 sub thread_results {
-	my ($msgs, $srch) = @_;
+	my ($ctx, $msgs) = @_;
 	require PublicInbox::SearchThread;
-	PublicInbox::SearchThread::thread($msgs, *sort_ds, $srch);
+	PublicInbox::SearchThread::thread($msgs, *sort_ds, $ctx->{-inbox});
 }
 
 sub missing_thread {
@@ -952,7 +952,7 @@ sub acc_topic {
 	my ($ctx, $level, $node) = @_;
 	my $srch = $ctx->{srch};
 	my $mid = $node->{id};
-	my $x = $node->{smsg} || $srch->lookup_mail($mid);
+	my $x = $node->{smsg} || $ctx->{-inbox}->smsg_by_mid($mid);
 	my ($subj, $ds);
 	my $topic;
 	if ($x) {
@@ -1078,7 +1078,7 @@ sub index_topics {
 	my $nr = scalar @{$sres->{msgs}};
 	if ($nr) {
 		$sres = load_results($srch, $sres);
-		walk_thread(thread_results($sres, $srch), $ctx, *acc_topic);
+		walk_thread(thread_results($ctx, $sres), $ctx, *acc_topic);
 	}
 	$ctx->{-next_o} = $off+ $nr;
 	$ctx->{-cur_o} = $off;
diff --git a/t/search-thr-index.t b/t/search-thr-index.t
index 6c6e4c5..9549976 100644
--- a/t/search-thr-index.t
+++ b/t/search-thr-index.t
@@ -4,6 +4,7 @@ use strict;
 use warnings;
 use Test::More;
 use File::Temp qw/tempdir/;
+use PublicInbox::MID qw(mids);
 use Email::MIME;
 eval { require PublicInbox::SearchIdx; };
 plan skip_all => "Xapian missing for search" if $@;
@@ -41,7 +42,7 @@ foreach (reverse split(/\n\n/, $data)) {
 	$mime->header_set('From' => 'bw@g');
 	$mime->header_set('To' => 'git@vger.kernel.org');
 	my $bytes = bytes::length($mime->as_string);
-	my $mid = $mime->header('Message-Id');
+	my $mid = mids($mime->header_obj)->[0];
 	my $doc_id = $rw->add_message($mime, $bytes, ++$num, 'ignored', $mid);
 	push @mids, $mid;
 	ok($doc_id, 'message added: '. $mid);
diff --git a/t/search.t b/t/search.t
index 6b1aa2a..ccf0f74 100644
--- a/t/search.t
+++ b/t/search.t
@@ -89,7 +89,7 @@ sub filter_mids {
 {
 	$rw_commit->();
 	$ro->reopen;
-	my $found = $ro->lookup_message('<root@s>');
+	my $found = $ro->first_smsg_by_mid('root@s');
 	ok($found, "message found");
 	is($root_id, $found->{doc_id}, 'doc_id set correctly');
 	is($found->mid, 'root@s', 'mid set correctly');
@@ -264,7 +264,7 @@ sub filter_mids {
 		],
 		body => "LOOP!\n"));
 	ok($doc_id > 0, "doc_id defined with circular reference");
-	my $smsg = $rw->lookup_message('circle@a');
+	my $smsg = $rw->first_smsg_by_mid('circle@a');
 	is($smsg->references, '', "no references created");
 	my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
 	is($s, $msg->subject, 'long subject not rewritten');
@@ -281,7 +281,7 @@ sub filter_mids {
 	my $mime = Email::MIME->new($str);
 	my $doc_id = $rw->add_message($mime);
 	ok($doc_id > 0, 'message indexed doc_id with UTF-8');
-	my $smsg = $rw->lookup_message('testmessage@example.com');
+	my $smsg = $rw->first_smsg_by_mid('testmessage@example.com');
 	my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
 
 	is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved');
-- 
EW


  parent reply	other threads:[~2018-03-29 10:28 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-29 10:28 [PATCH 00/14] purging support, v1 conversions, cleanups + more Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 01/14] www: remove unnecessary ghost checks Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 02/14] v2writable: append, instead of prepending generated Message-ID Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 03/14] lookup by Message-ID favors the "primary" one Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 04/14] www: fix attachment downloads for conflicted Message-IDs Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 05/14] searchmsg: document why we store To: and Cc: for NNTP Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 06/14] public-inbox-convert: tool for converting old to new inboxes Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 07/14] v2writable: support purging messages from git entirely Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 08/14] search: cleanup uniqueness checking Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` Eric Wong (Contractor, The Linux Foundation) [this message]
2018-03-29 10:28 ` [PATCH 10/14] search: move find_doc_ids to searchidx Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 11/14] v2writable: cleanup: get rid of unused fields Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 12/14] mbox: avoid extracting Message-ID for linkification Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 13/14] www: cleanup expensive fallback for legacy URLs Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 14/14] view: get rid of some unnecessary imports Eric Wong (Contractor, The Linux Foundation)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180329102819.15234-10-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).