From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Cc: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
Subject: [PATCH 09/14] search: get rid of most lookup_* subroutines
Date: Thu, 29 Mar 2018 10:28:14 +0000 [thread overview]
Message-ID: <20180329102819.15234-10-e@80x24.org> (raw)
In-Reply-To: <20180329102819.15234-1-e@80x24.org>
Too many similar functions doing the same basic thing was
redundant and misleading, especially since Message-ID is
no longer treated as a truly unique identifier.
For displaying threads in the HTML, this makes it clear
that we favor the primary Message-ID mapped to an NNTP
article number if a message cannot be found.
---
lib/PublicInbox/Inbox.pm | 22 ++++++-------
lib/PublicInbox/Search.pm | 56 +++------------------------------
lib/PublicInbox/SearchThread.pm | 14 ++++-----
lib/PublicInbox/SearchView.pm | 2 +-
lib/PublicInbox/View.pm | 12 +++----
t/search-thr-index.t | 3 +-
t/search.t | 6 ++--
7 files changed, 35 insertions(+), 80 deletions(-)
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 4c7305f..01aa500 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -293,20 +293,20 @@ sub path_check {
git($self)->check('HEAD:'.$path);
}
+sub smsg_by_mid ($$) {
+ my ($self, $mid) = @_;
+ my $srch = search($self) or return;
+ # favor the Message-ID we used for the NNTP article number:
+ my $mm = mm($self) or return;
+ my $num = $mm->num_for($mid);
+ $srch->lookup_article($num);
+}
+
sub msg_by_mid ($$;$) {
my ($self, $mid, $ref) = @_;
my $srch = search($self) or
- return msg_by_path($self, mid2path($mid), $ref);
- my $smsg;
- # favor the Message-ID we used for the NNTP article number:
- if (my $mm = mm($self)) {
- my $num = $mm->num_for($mid);
- $smsg = $srch->lookup_article($num);
- } else {
- $smsg = $srch->retry_reopen(sub {
- $srch->lookup_skeleton($mid) and $smsg->load_expand;
- });
- }
+ return msg_by_path($self, mid2path($mid), $ref);
+ my $smsg = smsg_by_mid($self, $mid);
$smsg ? msg_by_smsg($self, $smsg, $ref) : undef;
}
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 584a508..7d42aaa 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -18,7 +18,7 @@ use constant YYYYMMDD => 5; # for searching in the WWW UI
use Search::Xapian qw/:standard/;
use PublicInbox::SearchMsg;
use PublicInbox::MIME;
-use PublicInbox::MID qw/mid_clean id_compress/;
+use PublicInbox::MID qw/id_compress/;
# This is English-only, everything else is non-standard and may be confused as
# a prefix common in patch emails
@@ -193,9 +193,8 @@ sub query {
sub get_thread {
my ($self, $mid, $opts) = @_;
- my $smsg = retry_reopen($self, sub { lookup_skeleton($self, $mid) });
-
- return { total => 0, msgs => [] } unless $smsg;
+ my $smsg = first_smsg_by_mid($self, $mid) or
+ return { total => 0, msgs => [] };
my $qtid = Search::Xapian::Query->new('G' . $smsg->thread_id);
my $path = $smsg->path;
if (defined $path && $path ne '') {
@@ -346,48 +345,13 @@ sub query_ts {
_do_enquire($self, $query, $opts);
}
-sub lookup_skeleton {
+sub first_smsg_by_mid {
my ($self, $mid) = @_;
- my $skel = $self->{skel} or return lookup_message($self, $mid);
- $mid = mid_clean($mid);
- my $term = 'Q' . $mid;
my $smsg;
- my $beg = $skel->postlist_begin($term);
- if ($beg != $skel->postlist_end($term)) {
- my $doc_id = $beg->get_docid;
- if (defined $doc_id) {
- # raises on error:
- my $doc = $skel->get_document($doc_id);
- $smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
- $smsg->{doc_id} = $doc_id;
- }
- }
+ each_smsg_by_mid($self, $mid, sub { $smsg = $_[0]; undef });
$smsg;
}
-sub lookup_message {
- my ($self, $mid) = @_;
- $mid = mid_clean($mid);
-
- my $doc_id = $self->find_first_doc_id('Q' . $mid);
- my $smsg;
- if (defined $doc_id) {
- # raises on error:
- my $doc = $self->{xdb}->get_document($doc_id);
- $smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
- $smsg->{doc_id} = $doc_id;
- }
- $smsg;
-}
-
-sub lookup_mail { # no ghosts!
- my ($self, $mid) = @_;
- retry_reopen($self, sub {
- my $smsg = lookup_skeleton($self, $mid) or return;
- $smsg->load_expand;
- });
-}
-
sub lookup_article {
my ($self, $num) = @_;
my $term = 'XNUM'.$num;
@@ -447,16 +411,6 @@ sub find_doc_ids {
($db->postlist_begin($termval), $db->postlist_end($termval));
}
-sub find_first_doc_id {
- my ($self, $termval) = @_;
-
- my ($begin, $end) = $self->find_doc_ids($termval);
-
- return undef if $begin->equal($end); # not found
-
- $begin->get_docid;
-}
-
# normalize subjects so they are suitable as pathnames for URLs
# XXX: consider for removal
sub subject_path {
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 6fbce15..1d250b4 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -22,15 +22,15 @@ use strict;
use warnings;
sub thread {
- my ($messages, $ordersub, $srch) = @_;
+ my ($messages, $ordersub, $ibx) = @_;
my $id_table = {};
_add_message($id_table, $_) foreach @$messages;
my $rootset = [ grep {
- !delete($_->{parent}) && $_->visible($srch)
+ !delete($_->{parent}) && $_->visible($ibx)
} values %$id_table ];
$id_table = undef;
$rootset = $ordersub->($rootset);
- $_->order_children($ordersub, $srch) for @$rootset;
+ $_->order_children($ordersub, $ibx) for @$rootset;
$rootset;
}
@@ -131,20 +131,20 @@ sub has_descendent {
# a ghost Message-ID is the result of a long header line
# being folded/mangled by a MUA, and not a missing message.
sub visible ($$) {
- my ($self, $srch) = @_;
- ($self->{smsg} ||= eval { $srch->lookup_mail($self->{id}) }) ||
+ my ($self, $ibx) = @_;
+ ($self->{smsg} ||= eval { $ibx->smsg_by_mid($self->{id}) }) ||
(scalar values %{$self->{children}});
}
sub order_children {
- my ($cur, $ordersub, $srch) = @_;
+ my ($cur, $ordersub, $ibx) = @_;
my %seen = ($cur => 1); # self-referential loop prevention
my @q = ($cur);
while (defined($cur = shift @q)) {
my $c = $cur->{children}; # The hashref here...
- $c = [ grep { !$seen{$_}++ && visible($_, $srch) } values %$c ];
+ $c = [ grep { !$seen{$_}++ && visible($_, $ibx) } values %$c ];
$c = $ordersub->($c) if scalar @$c > 1;
$cur->{children} = $c; # ...becomes an arrayref
push @q, @$c;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 1a8fe7f..c789795 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -228,7 +228,7 @@ sub mset_thread {
my $r = $q->{r};
my $rootset = PublicInbox::SearchThread::thread($msgs,
$r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ds,
- $srch);
+ $ctx);
my $skel = search_nav_bot($mset, $q). "<pre>";
my $inbox = $ctx->{-inbox};
$ctx->{-upfx} = '';
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index aad860e..f5b278c 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -430,7 +430,7 @@ sub thread_html {
$ctx->{mapping} = {};
$ctx->{s_nr} = "$nr+ messages in thread";
- my $rootset = thread_results($msgs, $srch);
+ my $rootset = thread_results($ctx, $msgs);
# reduce hash lookups in pre_thread->skel_dump
my $inbox = $ctx->{-inbox};
@@ -686,7 +686,7 @@ sub thread_skel {
# reduce hash lookups in skel_dump
my $ibx = $ctx->{-inbox};
$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
- walk_thread(thread_results($sres, $srch), $ctx, *skel_dump);
+ walk_thread(thread_results($ctx, $sres), $ctx, *skel_dump);
$ctx->{parent_msg} = $parent;
}
@@ -809,9 +809,9 @@ sub load_results {
}
sub thread_results {
- my ($msgs, $srch) = @_;
+ my ($ctx, $msgs) = @_;
require PublicInbox::SearchThread;
- PublicInbox::SearchThread::thread($msgs, *sort_ds, $srch);
+ PublicInbox::SearchThread::thread($msgs, *sort_ds, $ctx->{-inbox});
}
sub missing_thread {
@@ -952,7 +952,7 @@ sub acc_topic {
my ($ctx, $level, $node) = @_;
my $srch = $ctx->{srch};
my $mid = $node->{id};
- my $x = $node->{smsg} || $srch->lookup_mail($mid);
+ my $x = $node->{smsg} || $ctx->{-inbox}->smsg_by_mid($mid);
my ($subj, $ds);
my $topic;
if ($x) {
@@ -1078,7 +1078,7 @@ sub index_topics {
my $nr = scalar @{$sres->{msgs}};
if ($nr) {
$sres = load_results($srch, $sres);
- walk_thread(thread_results($sres, $srch), $ctx, *acc_topic);
+ walk_thread(thread_results($ctx, $sres), $ctx, *acc_topic);
}
$ctx->{-next_o} = $off+ $nr;
$ctx->{-cur_o} = $off;
diff --git a/t/search-thr-index.t b/t/search-thr-index.t
index 6c6e4c5..9549976 100644
--- a/t/search-thr-index.t
+++ b/t/search-thr-index.t
@@ -4,6 +4,7 @@ use strict;
use warnings;
use Test::More;
use File::Temp qw/tempdir/;
+use PublicInbox::MID qw(mids);
use Email::MIME;
eval { require PublicInbox::SearchIdx; };
plan skip_all => "Xapian missing for search" if $@;
@@ -41,7 +42,7 @@ foreach (reverse split(/\n\n/, $data)) {
$mime->header_set('From' => 'bw@g');
$mime->header_set('To' => 'git@vger.kernel.org');
my $bytes = bytes::length($mime->as_string);
- my $mid = $mime->header('Message-Id');
+ my $mid = mids($mime->header_obj)->[0];
my $doc_id = $rw->add_message($mime, $bytes, ++$num, 'ignored', $mid);
push @mids, $mid;
ok($doc_id, 'message added: '. $mid);
diff --git a/t/search.t b/t/search.t
index 6b1aa2a..ccf0f74 100644
--- a/t/search.t
+++ b/t/search.t
@@ -89,7 +89,7 @@ sub filter_mids {
{
$rw_commit->();
$ro->reopen;
- my $found = $ro->lookup_message('<root@s>');
+ my $found = $ro->first_smsg_by_mid('root@s');
ok($found, "message found");
is($root_id, $found->{doc_id}, 'doc_id set correctly');
is($found->mid, 'root@s', 'mid set correctly');
@@ -264,7 +264,7 @@ sub filter_mids {
],
body => "LOOP!\n"));
ok($doc_id > 0, "doc_id defined with circular reference");
- my $smsg = $rw->lookup_message('circle@a');
+ my $smsg = $rw->first_smsg_by_mid('circle@a');
is($smsg->references, '', "no references created");
my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
is($s, $msg->subject, 'long subject not rewritten');
@@ -281,7 +281,7 @@ sub filter_mids {
my $mime = Email::MIME->new($str);
my $doc_id = $rw->add_message($mime);
ok($doc_id > 0, 'message indexed doc_id with UTF-8');
- my $smsg = $rw->lookup_message('testmessage@example.com');
+ my $smsg = $rw->first_smsg_by_mid('testmessage@example.com');
my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved');
--
EW
next prev parent reply other threads:[~2018-03-29 10:28 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-29 10:28 [PATCH 00/14] purging support, v1 conversions, cleanups + more Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 01/14] www: remove unnecessary ghost checks Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 02/14] v2writable: append, instead of prepending generated Message-ID Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 03/14] lookup by Message-ID favors the "primary" one Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 04/14] www: fix attachment downloads for conflicted Message-IDs Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 05/14] searchmsg: document why we store To: and Cc: for NNTP Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 06/14] public-inbox-convert: tool for converting old to new inboxes Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 07/14] v2writable: support purging messages from git entirely Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 08/14] search: cleanup uniqueness checking Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` Eric Wong (Contractor, The Linux Foundation) [this message]
2018-03-29 10:28 ` [PATCH 10/14] search: move find_doc_ids to searchidx Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 11/14] v2writable: cleanup: get rid of unused fields Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 12/14] mbox: avoid extracting Message-ID for linkification Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 13/14] www: cleanup expensive fallback for legacy URLs Eric Wong (Contractor, The Linux Foundation)
2018-03-29 10:28 ` [PATCH 14/14] view: get rid of some unnecessary imports Eric Wong (Contractor, The Linux Foundation)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180329102819.15234-10-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).