From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 11/11] view: depend on SearchMsg for Message-ID
Date: Tue, 27 Mar 2018 11:11:32 +0000 [thread overview]
Message-ID: <20180327111132.20681-12-e@80x24.org> (raw)
In-Reply-To: <20180327111132.20681-1-e@80x24.org>
Since we need to handle messages with multiple and duplicate
Message-ID headers, our thread skeleton display must account
for that.
Since we have a "preferred" Message-ID in case of conflicts,
use it as the UUID in an Atom feed so readers do not get
confused by conflicts.
---
lib/PublicInbox/Feed.pm | 36 ++++++++--------
lib/PublicInbox/Inbox.pm | 9 ++++
lib/PublicInbox/SearchMsg.pm | 6 +--
lib/PublicInbox/SearchView.pm | 14 +++---
lib/PublicInbox/View.pm | 93 ++++++++++++++++++----------------------
lib/PublicInbox/WWW.pm | 1 +
lib/PublicInbox/WwwAtomStream.pm | 9 ++--
t/psgi_v2.t | 26 +++++++++++
8 files changed, 109 insertions(+), 85 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 74d0bbd..f2285a6 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -8,18 +8,18 @@ use warnings;
use PublicInbox::MIME;
use PublicInbox::View;
use PublicInbox::WwwAtomStream;
+use PublicInbox::SearchMsg; # this loads w/o Search::Xapian
# main function
sub generate {
my ($ctx) = @_;
- my $oids = recent_blobs($ctx);
- return _no_thread() unless @$oids;
+ my $msgs = recent_msgs($ctx);
+ return _no_thread() unless @$msgs;
- my $git = $ctx->{-inbox}->git;
+ my $ibx = $ctx->{-inbox};
PublicInbox::WwwAtomStream->response($ctx, 200, sub {
- while (my $oid = shift @$oids) {
- my $msg = $git->cat_file($oid) or next;
- return PublicInbox::MIME->new($msg);
+ while (my $smsg = shift @$msgs) {
+ $ibx->smsg_mime($smsg) and return $smsg;
}
});
}
@@ -36,9 +36,8 @@ sub generate_thread_atom {
$ctx->{-html_url} = $html_url;
my $msgs = $res->{msgs};
PublicInbox::WwwAtomStream->response($ctx, 200, sub {
- while (my $msg = shift @$msgs) {
- $msg = $ibx->msg_by_smsg($msg) and
- return PublicInbox::MIME->new($msg);
+ while (my $smsg = shift @$msgs) {
+ $ibx->smsg_mime($smsg) and return $smsg;
}
});
}
@@ -62,20 +61,19 @@ sub generate_html_index {
sub new_html {
my ($ctx) = @_;
- my $oids = recent_blobs($ctx);
- if (!@$oids) {
+ my $msgs = recent_msgs($ctx);
+ if (!@$msgs) {
return [404, ['Content-Type', 'text/plain'],
["No messages, yet\n"] ];
}
$ctx->{-html_tip} = '<pre>';
$ctx->{-upfx} = '';
$ctx->{-hr} = 1;
- my $git = $ctx->{-inbox}->git;
+ my $ibx = $ctx->{-inbox};
PublicInbox::WwwStream->response($ctx, 200, sub {
- while (my $oid = shift @$oids) {
- my $msg = $git->cat_file($oid) or next;
- my $m = PublicInbox::MIME->new($msg);
- my $more = scalar @$oids;
+ while (my $smsg = shift @$msgs) {
+ my $m = $ibx->smsg_mime($smsg) or next;
+ my $more = scalar @$msgs;
return PublicInbox::View::index_entry($m, $ctx, $more);
}
new_html_footer($ctx);
@@ -103,7 +101,7 @@ sub new_html_footer {
"<hr><pre>page: $next$latest</pre>";
}
-sub recent_blobs {
+sub recent_msgs {
my ($ctx) = @_;
my $ibx = $ctx->{-inbox};
my $max = $ibx->{feedmax};
@@ -119,7 +117,7 @@ sub recent_blobs {
my $res = $srch->query('', { limit => $max, offset => $o });
my $next = $o + $max;
$ctx->{next_page} = "o=$next" if $res->{total} >= $next;
- return [ map { $_->{blob} } @{$res->{msgs}} ];
+ return $res->{msgs};
}
my $hex = '[a-f0-9]';
@@ -171,7 +169,7 @@ sub recent_blobs {
}
$ctx->{next_page} = "r=$last_commit" if $last_commit;
- \@oids;
+ [ map { bless {blob => $_ }, 'PublicInbox::SearchMsg' } @oids ];
}
1;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 666c81d..b1ea8dc 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -8,6 +8,7 @@ use warnings;
use PublicInbox::Git;
use PublicInbox::MID qw(mid2path);
use Devel::Peek qw(SvREFCNT);
+use PublicInbox::MIME;
my $cleanup_timer;
eval {
@@ -246,6 +247,14 @@ sub msg_by_smsg ($$;$) {
$str;
}
+sub smsg_mime {
+ my ($self, $smsg, $ref) = @_;
+ if (my $s = msg_by_smsg($self, $smsg, $ref)) {
+ $smsg->{mime} = PublicInbox::MIME->new($s);
+ return $smsg;
+ }
+}
+
sub path_check {
my ($self, $path) = @_;
git($self)->check('HEAD:'.$path);
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index b944868..e314fed 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -6,7 +6,6 @@
package PublicInbox::SearchMsg;
use strict;
use warnings;
-use Search::Xapian;
use PublicInbox::MID qw/mid_clean mid_mime/;
use PublicInbox::Address;
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
@@ -165,9 +164,10 @@ sub mid ($;$) {
$self->{mid} = $mid;
} elsif (my $rv = $self->{mid}) {
$rv;
+ } elsif ($self->{doc}) {
+ $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/);
} else {
- $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/) ||
- $self->_extract_mid;
+ $self->_extract_mid; # v1 w/o Xapian
}
}
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 6e537b4..1a8fe7f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -248,15 +248,14 @@ sub mset_thread {
*PublicInbox::View::pre_thread);
@$msgs = reverse @$msgs if $r;
- my $mime;
sub {
return unless $msgs;
- while ($mime = pop @$msgs) {
- $mime = $inbox->msg_by_smsg($mime) and last;
+ my $smsg;
+ while (my $m = pop @$msgs) {
+ $smsg = $inbox->smsg_mime($m) and last;
}
- if ($mime) {
- $mime = PublicInbox::MIME->new($mime);
- return PublicInbox::View::index_entry($mime, $ctx,
+ if ($smsg) {
+ return PublicInbox::View::index_entry($smsg, $ctx,
scalar @$msgs);
}
$msgs = undef;
@@ -290,8 +289,7 @@ sub adump {
PublicInbox::WwwAtomStream->response($ctx, 200, sub {
while (my $x = shift @items) {
$x = load_doc_retry($srch, $x);
- $x = $ibx->msg_by_smsg($x) and
- return PublicInbox::MIME->new($x);
+ $x = $ibx->smsg_mime($x) and return $x;
}
return undef;
});
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 34ab3e5..5fb2b31 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -9,7 +9,8 @@ use warnings;
use PublicInbox::MsgTime qw(msg_datestamp);
use PublicInbox::Hval qw/ascii_html obfuscate_addrs/;
use PublicInbox::Linkify;
-use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape mids/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape mids
+ references/;
use PublicInbox::MsgIter;
use PublicInbox::Address;
use PublicInbox::WwwStream;
@@ -69,30 +70,31 @@ sub msg_page {
$more = [ $head, $tail, $db ];
}
});
+ return unless $first;
} else {
$first = $ibx->msg_by_mid($mid) or return;
}
- $first ? msg_html($ctx, PublicInbox::MIME->new($first), $more) : undef;
+ msg_html($ctx, PublicInbox::MIME->new($first), $more);
}
sub msg_html_more {
my ($ctx, $more, $nr) = @_;
my $str = eval {
- my $mref;
+ my $smsg;
my ($head, $tail, $db) = @$more;
- for (; !defined($mref) && $head != $tail; $head++) {
- my $smsg = PublicInbox::SearchMsg->get($head, $db,
- $ctx->{mid});
- next if $smsg->type ne 'mail';
- $mref = $ctx->{-inbox}->msg_by_smsg($smsg);
+ my $mid = $ctx->{mid};
+ for (; !defined($smsg) && $head != $tail; $head++) {
+ my $m = PublicInbox::SearchMsg->get($head, $db, $mid);
+ next if $m->type ne 'mail';
+ $smsg = $ctx->{-inbox}->smsg_mime($m);
}
if ($head == $tail) { # done
@$more = ();
} else {
$more->[0] = $head;
}
- if ($mref) {
- my $mime = PublicInbox::MIME->new($mref);
+ if ($smsg) {
+ my $mime = $smsg->{mime};
_msg_html_prepare($mime->header_obj, $ctx, $more, $nr) .
multipart_text_as_html($mime, '',
$ctx->{-obfs_ibx}) .
@@ -167,14 +169,8 @@ EOF
sub in_reply_to {
my ($hdr) = @_;
- my %mid = map { $_ => 1 } $hdr->header_raw('Message-ID');
- my @refs = (($hdr->header_raw('References') || '') =~ /<([^>]+)>/g);
- push(@refs, (($hdr->header_raw('In-Reply-To') || '') =~ /<([^>]+)>/g));
- while (defined(my $irt = pop @refs)) {
- next if $mid{"<$irt>"};
- return $irt;
- }
- undef;
+ my $refs = references($hdr);
+ $refs->[-1];
}
sub _hdr_names_html ($$) {
@@ -191,12 +187,10 @@ sub nr_to_s ($$$) {
# this is already inside a <pre>
sub index_entry {
- my ($mime, $ctx, $more) = @_;
+ my ($smsg, $ctx, $more) = @_;
my $srch = $ctx->{srch};
- my $hdr = $mime->header_obj;
- my $subj = $hdr->header('Subject');
-
- my $mid_raw = mid_clean(mid_mime($mime));
+ my $subj = $smsg->subject;
+ my $mid_raw = $smsg->mid;
my $id = id_compress($mid_raw, 1);
my $id_m = 'm'.$id;
@@ -211,6 +205,8 @@ sub index_entry {
$rv .= $subj . "\n";
$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
my @tocc;
+ my $mime = $smsg->{mime};
+ my $hdr = $mime->header_obj;
foreach my $f (qw(To Cc)) {
my $dst = _hdr_names_html($hdr, $f);
if ($dst ne '') {
@@ -220,7 +216,7 @@ sub index_entry {
}
my $from = _hdr_names_html($hdr, 'From');
obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
- $rv .= "From: $from @ "._msg_date($hdr)." UTC";
+ $rv .= "From: $from @ ".fmt_ts($smsg->ds)." UTC";
my $upfx = $ctx->{-upfx};
my $mhref = $upfx . mid_escape($mid_raw) . '/';
$rv .= qq{ (<a\nhref="$mhref">permalink</a> / };
@@ -363,30 +359,30 @@ sub pre_thread {
}
sub thread_index_entry {
- my ($ctx, $level, $mime) = @_;
+ my ($ctx, $level, $smsg) = @_;
my ($beg, $end) = thread_adj_level($ctx, $level);
- $beg . '<pre>' . index_entry($mime, $ctx, 0) . '</pre>' . $end;
+ $beg . '<pre>' . index_entry($smsg, $ctx, 0) . '</pre>' . $end;
}
sub stream_thread ($$) {
my ($rootset, $ctx) = @_;
my $inbox = $ctx->{-inbox};
- my $mime;
my @q = map { (0, $_) } @$rootset;
my $level;
+ my $smsg;
while (@q) {
$level = shift @q;
my $node = shift @q or next;
my $cl = $level + 1;
unshift @q, map { ($cl, $_) } @{$node->{children}};
- $mime = $inbox->msg_by_smsg($node->{smsg}) and last;
+ $smsg = $inbox->smsg_mime($node->{smsg}) and last;
}
- return missing_thread($ctx) unless $mime;
+ return missing_thread($ctx) unless $smsg;
$ctx->{-obfs_ibx} = $inbox->{obfuscate} ? $inbox : undef;
- $mime = PublicInbox::MIME->new($mime);
- $ctx->{-title_html} = ascii_html($mime->header('Subject'));
- $ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime);
+ $ctx->{-title_html} = ascii_html($smsg->subject);
+ $ctx->{-html_tip} = thread_index_entry($ctx, $level, $smsg);
+ $smsg = undef;
PublicInbox::WwwStream->response($ctx, 200, sub {
return unless $ctx;
while (@q) {
@@ -394,10 +390,8 @@ sub stream_thread ($$) {
my $node = shift @q or next;
my $cl = $level + 1;
unshift @q, map { ($cl, $_) } @{$node->{children}};
- my $mid = $node->{id};
- if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
- $mime = PublicInbox::MIME->new($mime);
- return thread_index_entry($ctx, $level, $mime);
+ if ($smsg = $inbox->smsg_mime($node->{smsg})) {
+ return thread_index_entry($ctx, $level, $smsg);
} else {
return ghost_index_entry($ctx, $level, $node);
}
@@ -445,24 +439,21 @@ sub thread_html {
return stream_thread($rootset, $ctx) unless $ctx->{flat};
# flat display: lazy load the full message from smsg
- my $mime;
- while ($mime = shift @$msgs) {
- $mime = $inbox->msg_by_smsg($mime) and last;
+ my $smsg;
+ while (my $m = shift @$msgs) {
+ $smsg = $inbox->smsg_mime($m) and last;
}
- return missing_thread($ctx) unless $mime;
- $mime = PublicInbox::MIME->new($mime);
- $ctx->{-title_html} = ascii_html($mime->header('Subject'));
- $ctx->{-html_tip} = '<pre>'.index_entry($mime, $ctx, scalar @$msgs);
- $mime = undef;
+ return missing_thread($ctx) unless $smsg;
+ $ctx->{-title_html} = ascii_html($smsg->subject);
+ $ctx->{-html_tip} = '<pre>'.index_entry($smsg, $ctx, scalar @$msgs);
+ $smsg = undef;
PublicInbox::WwwStream->response($ctx, 200, sub {
return unless $msgs;
- while ($mime = shift @$msgs) {
- $mime = $inbox->msg_by_smsg($mime) and last;
- }
- if ($mime) {
- $mime = PublicInbox::MIME->new($mime);
- return index_entry($mime, $ctx, scalar @$msgs);
+ $smsg = undef;
+ while (my $m = shift @$msgs) {
+ $smsg = $inbox->smsg_mime($m) and last;
}
+ return index_entry($smsg, $ctx, scalar @$msgs) if $smsg;
$msgs = undef;
$skel;
});
@@ -656,7 +647,7 @@ sub _msg_html_prepare {
sub thread_skel {
my ($dst, $ctx, $hdr, $tpfx) = @_;
my $srch = $ctx->{srch};
- my $mid = mid_clean($hdr->header_raw('Message-ID'));
+ my $mid = mids($hdr)->[0];
my $sres = $srch->get_thread($mid);
my $nr = $sres->{total};
my $expand = qq(expand[<a\nhref="${tpfx}T/#u">flat</a>) .
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f5ed271..a2c2a4a 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -225,6 +225,7 @@ sub get_mid_txt {
sub get_mid_html {
my ($ctx) = @_;
require PublicInbox::View;
+ searcher($ctx);
PublicInbox::View::msg_page($ctx) || r404($ctx);
}
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index bb574a7..38eba2a 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -33,8 +33,8 @@ sub response {
sub getline {
my ($self) = @_;
if (my $middle = $self->{cb}) {
- my $mime = $middle->();
- return feed_entry($self, $mime) if $mime;
+ my $smsg = $middle->();
+ return feed_entry($self, $smsg) if $smsg;
}
delete $self->{cb} ? '</feed>' : undef;
}
@@ -92,10 +92,11 @@ sub mid2uuid ($) {
# returns undef or string
sub feed_entry {
- my ($self, $mime) = @_;
+ my ($self, $smsg) = @_;
my $ctx = $self->{ctx};
+ my $mime = $smsg->{mime};
my $hdr = $mime->header_obj;
- my $mid = mid_clean($hdr->header_raw('Message-ID'));
+ my $mid = $smsg->mid;
my $irt = PublicInbox::View::in_reply_to($hdr);
my $uuid = mid2uuid($mid);
my $base = $ctx->{feed_base_url};
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index eaa3218..2a798d6 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -139,6 +139,32 @@ test_psgi(sub { $www->call(@_) }, sub {
foreach my $mid ('a-mid@b', $new_mid, $third) {
like($raw, qr/<\Q$mid\E>/s, "Message-ID $mid shown");
}
+ like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
+
+ my $exp = [ qw(<a-mid@b> <reuse@mid>) ];
+ $mime->header_set('Message-Id', @$exp);
+ $mime->header_set('Subject', '4th dupe');
+ local $SIG{__WARN__} = sub {};
+ ok($im->add($mime), 'added one message');
+ $im->done;
+ my @h = $mime->header('Message-ID');
+ is_deeply($exp, \@h, 'reused existing Message-ID');
+
+ $config->each_inbox(sub { $_[0]->search->reopen });
+
+ $res = $cb->(GET('/v2test/new.atom'));
+ my @ids = ($res->content =~ m!<id>urn:uuid:([^<]+)</id>!sg);
+ my %ids;
+ $ids{$_}++ for @ids;
+ is_deeply([qw(1 1 1 1)], [values %ids], 'feed ids unique');
+
+ $res = $cb->(GET('/v2test/reuse@mid/T/'));
+ $raw = $res->content;
+ like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /T/');
+
+ $res = $cb->(GET('/v2test/reuse@mid/t/'));
+ $raw = $res->content;
+ like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /t/');
});
done_testing();
--
EW
prev parent reply other threads:[~2018-03-27 11:11 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 01/11] import: consolidate mid prepend logic, here Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 02/11] www: $MESSAGE_ID/raw endpoint supports "duplicates" Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 03/11] search: reopen DB if each_smsg_by_mid fails Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 04/11] t/psgi_v2: minimal test for Atom feed and t.mbox.gz Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 05/11] feed: fix new.html for v2 Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 06/11] view: permalink (per-message) view shows multiple messages Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 07/11] searchidx: warn about vivifying multiple ghosts Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 08/11] v2writable: warn on unseen deleted files Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 09/11] www: get rid of unnecessary 'inbox' name reference Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 10/11] searchview: remove unnecessary imports from MID module Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation) [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180327111132.20681-12-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).