From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 93AA71FAEF for ; Tue, 27 Mar 2018 11:11:35 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 11/11] view: depend on SearchMsg for Message-ID Date: Tue, 27 Mar 2018 11:11:32 +0000 Message-Id: <20180327111132.20681-12-e@80x24.org> In-Reply-To: <20180327111132.20681-1-e@80x24.org> References: <20180327111132.20681-1-e@80x24.org> List-Id: Since we need to handle messages with multiple and duplicate Message-ID headers, our thread skeleton display must account for that. Since we have a "preferred" Message-ID in case of conflicts, use it as the UUID in an Atom feed so readers do not get confused by conflicts. --- lib/PublicInbox/Feed.pm | 36 ++++++++-------- lib/PublicInbox/Inbox.pm | 9 ++++ lib/PublicInbox/SearchMsg.pm | 6 +-- lib/PublicInbox/SearchView.pm | 14 +++--- lib/PublicInbox/View.pm | 93 ++++++++++++++++++---------------------- lib/PublicInbox/WWW.pm | 1 + lib/PublicInbox/WwwAtomStream.pm | 9 ++-- t/psgi_v2.t | 26 +++++++++++ 8 files changed, 109 insertions(+), 85 deletions(-) diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 74d0bbd..f2285a6 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -8,18 +8,18 @@ use warnings; use PublicInbox::MIME; use PublicInbox::View; use PublicInbox::WwwAtomStream; +use PublicInbox::SearchMsg; # this loads w/o Search::Xapian # main function sub generate { my ($ctx) = @_; - my $oids = recent_blobs($ctx); - return _no_thread() unless @$oids; + my $msgs = recent_msgs($ctx); + return _no_thread() unless @$msgs; - my $git = $ctx->{-inbox}->git; + my $ibx = $ctx->{-inbox}; PublicInbox::WwwAtomStream->response($ctx, 200, sub { - while (my $oid = shift @$oids) { - my $msg = $git->cat_file($oid) or next; - return PublicInbox::MIME->new($msg); + while (my $smsg = shift @$msgs) { + $ibx->smsg_mime($smsg) and return $smsg; } }); } @@ -36,9 +36,8 @@ sub generate_thread_atom { $ctx->{-html_url} = $html_url; my $msgs = $res->{msgs}; PublicInbox::WwwAtomStream->response($ctx, 200, sub { - while (my $msg = shift @$msgs) { - $msg = $ibx->msg_by_smsg($msg) and - return PublicInbox::MIME->new($msg); + while (my $smsg = shift @$msgs) { + $ibx->smsg_mime($smsg) and return $smsg; } }); } @@ -62,20 +61,19 @@ sub generate_html_index { sub new_html { my ($ctx) = @_; - my $oids = recent_blobs($ctx); - if (!@$oids) { + my $msgs = recent_msgs($ctx); + if (!@$msgs) { return [404, ['Content-Type', 'text/plain'], ["No messages, yet\n"] ]; } $ctx->{-html_tip} = '
';
 	$ctx->{-upfx} = '';
 	$ctx->{-hr} = 1;
-	my $git = $ctx->{-inbox}->git;
+	my $ibx = $ctx->{-inbox};
 	PublicInbox::WwwStream->response($ctx, 200, sub {
-		while (my $oid = shift @$oids) {
-			my $msg = $git->cat_file($oid) or next;
-			my $m = PublicInbox::MIME->new($msg);
-			my $more = scalar @$oids;
+		while (my $smsg = shift @$msgs) {
+			my $m = $ibx->smsg_mime($smsg) or next;
+			my $more = scalar @$msgs;
 			return PublicInbox::View::index_entry($m, $ctx, $more);
 		}
 		new_html_footer($ctx);
@@ -103,7 +101,7 @@ sub new_html_footer {
 	"
page: $next$latest
"; } -sub recent_blobs { +sub recent_msgs { my ($ctx) = @_; my $ibx = $ctx->{-inbox}; my $max = $ibx->{feedmax}; @@ -119,7 +117,7 @@ sub recent_blobs { my $res = $srch->query('', { limit => $max, offset => $o }); my $next = $o + $max; $ctx->{next_page} = "o=$next" if $res->{total} >= $next; - return [ map { $_->{blob} } @{$res->{msgs}} ]; + return $res->{msgs}; } my $hex = '[a-f0-9]'; @@ -171,7 +169,7 @@ sub recent_blobs { } $ctx->{next_page} = "r=$last_commit" if $last_commit; - \@oids; + [ map { bless {blob => $_ }, 'PublicInbox::SearchMsg' } @oids ]; } 1; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 666c81d..b1ea8dc 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -8,6 +8,7 @@ use warnings; use PublicInbox::Git; use PublicInbox::MID qw(mid2path); use Devel::Peek qw(SvREFCNT); +use PublicInbox::MIME; my $cleanup_timer; eval { @@ -246,6 +247,14 @@ sub msg_by_smsg ($$;$) { $str; } +sub smsg_mime { + my ($self, $smsg, $ref) = @_; + if (my $s = msg_by_smsg($self, $smsg, $ref)) { + $smsg->{mime} = PublicInbox::MIME->new($s); + return $smsg; + } +} + sub path_check { my ($self, $path) = @_; git($self)->check('HEAD:'.$path); diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index b944868..e314fed 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -6,7 +6,6 @@ package PublicInbox::SearchMsg; use strict; use warnings; -use Search::Xapian; use PublicInbox::MID qw/mid_clean mid_mime/; use PublicInbox::Address; use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); @@ -165,9 +164,10 @@ sub mid ($;$) { $self->{mid} = $mid; } elsif (my $rv = $self->{mid}) { $rv; + } elsif ($self->{doc}) { + $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/); } else { - $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/) || - $self->_extract_mid; + $self->_extract_mid; # v1 w/o Xapian } } diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 6e537b4..1a8fe7f 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -248,15 +248,14 @@ sub mset_thread { *PublicInbox::View::pre_thread); @$msgs = reverse @$msgs if $r; - my $mime; sub { return unless $msgs; - while ($mime = pop @$msgs) { - $mime = $inbox->msg_by_smsg($mime) and last; + my $smsg; + while (my $m = pop @$msgs) { + $smsg = $inbox->smsg_mime($m) and last; } - if ($mime) { - $mime = PublicInbox::MIME->new($mime); - return PublicInbox::View::index_entry($mime, $ctx, + if ($smsg) { + return PublicInbox::View::index_entry($smsg, $ctx, scalar @$msgs); } $msgs = undef; @@ -290,8 +289,7 @@ sub adump { PublicInbox::WwwAtomStream->response($ctx, 200, sub { while (my $x = shift @items) { $x = load_doc_retry($srch, $x); - $x = $ibx->msg_by_smsg($x) and - return PublicInbox::MIME->new($x); + $x = $ibx->smsg_mime($x) and return $x; } return undef; }); diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 34ab3e5..5fb2b31 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -9,7 +9,8 @@ use warnings; use PublicInbox::MsgTime qw(msg_datestamp); use PublicInbox::Hval qw/ascii_html obfuscate_addrs/; use PublicInbox::Linkify; -use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape mids/; +use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape mids + references/; use PublicInbox::MsgIter; use PublicInbox::Address; use PublicInbox::WwwStream; @@ -69,30 +70,31 @@ sub msg_page { $more = [ $head, $tail, $db ]; } }); + return unless $first; } else { $first = $ibx->msg_by_mid($mid) or return; } - $first ? msg_html($ctx, PublicInbox::MIME->new($first), $more) : undef; + msg_html($ctx, PublicInbox::MIME->new($first), $more); } sub msg_html_more { my ($ctx, $more, $nr) = @_; my $str = eval { - my $mref; + my $smsg; my ($head, $tail, $db) = @$more; - for (; !defined($mref) && $head != $tail; $head++) { - my $smsg = PublicInbox::SearchMsg->get($head, $db, - $ctx->{mid}); - next if $smsg->type ne 'mail'; - $mref = $ctx->{-inbox}->msg_by_smsg($smsg); + my $mid = $ctx->{mid}; + for (; !defined($smsg) && $head != $tail; $head++) { + my $m = PublicInbox::SearchMsg->get($head, $db, $mid); + next if $m->type ne 'mail'; + $smsg = $ctx->{-inbox}->smsg_mime($m); } if ($head == $tail) { # done @$more = (); } else { $more->[0] = $head; } - if ($mref) { - my $mime = PublicInbox::MIME->new($mref); + if ($smsg) { + my $mime = $smsg->{mime}; _msg_html_prepare($mime->header_obj, $ctx, $more, $nr) . multipart_text_as_html($mime, '', $ctx->{-obfs_ibx}) . @@ -167,14 +169,8 @@ EOF sub in_reply_to { my ($hdr) = @_; - my %mid = map { $_ => 1 } $hdr->header_raw('Message-ID'); - my @refs = (($hdr->header_raw('References') || '') =~ /<([^>]+)>/g); - push(@refs, (($hdr->header_raw('In-Reply-To') || '') =~ /<([^>]+)>/g)); - while (defined(my $irt = pop @refs)) { - next if $mid{"<$irt>"}; - return $irt; - } - undef; + my $refs = references($hdr); + $refs->[-1]; } sub _hdr_names_html ($$) { @@ -191,12 +187,10 @@ sub nr_to_s ($$$) { # this is already inside a
 sub index_entry {
-	my ($mime, $ctx, $more) = @_;
+	my ($smsg, $ctx, $more) = @_;
 	my $srch = $ctx->{srch};
-	my $hdr = $mime->header_obj;
-	my $subj = $hdr->header('Subject');
-
-	my $mid_raw = mid_clean(mid_mime($mime));
+	my $subj = $smsg->subject;
+	my $mid_raw = $smsg->mid;
 	my $id = id_compress($mid_raw, 1);
 	my $id_m = 'm'.$id;
 
@@ -211,6 +205,8 @@ sub index_entry {
 	$rv .= $subj . "\n";
 	$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
 	my @tocc;
+	my $mime = $smsg->{mime};
+	my $hdr = $mime->header_obj;
 	foreach my $f (qw(To Cc)) {
 		my $dst = _hdr_names_html($hdr, $f);
 		if ($dst ne '') {
@@ -220,7 +216,7 @@ sub index_entry {
 	}
 	my $from = _hdr_names_html($hdr, 'From');
 	obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
-	$rv .= "From: $from @ "._msg_date($hdr)." UTC";
+	$rv .= "From: $from @ ".fmt_ts($smsg->ds)." UTC";
 	my $upfx = $ctx->{-upfx};
 	my $mhref = $upfx . mid_escape($mid_raw) . '/';
 	$rv .= qq{ (permalink / };
@@ -363,30 +359,30 @@ sub pre_thread  {
 }
 
 sub thread_index_entry {
-	my ($ctx, $level, $mime) = @_;
+	my ($ctx, $level, $smsg) = @_;
 	my ($beg, $end) = thread_adj_level($ctx, $level);
-	$beg . '
' . index_entry($mime, $ctx, 0) . '
' . $end; + $beg . '
' . index_entry($smsg, $ctx, 0) . '
' . $end; } sub stream_thread ($$) { my ($rootset, $ctx) = @_; my $inbox = $ctx->{-inbox}; - my $mime; my @q = map { (0, $_) } @$rootset; my $level; + my $smsg; while (@q) { $level = shift @q; my $node = shift @q or next; my $cl = $level + 1; unshift @q, map { ($cl, $_) } @{$node->{children}}; - $mime = $inbox->msg_by_smsg($node->{smsg}) and last; + $smsg = $inbox->smsg_mime($node->{smsg}) and last; } - return missing_thread($ctx) unless $mime; + return missing_thread($ctx) unless $smsg; $ctx->{-obfs_ibx} = $inbox->{obfuscate} ? $inbox : undef; - $mime = PublicInbox::MIME->new($mime); - $ctx->{-title_html} = ascii_html($mime->header('Subject')); - $ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime); + $ctx->{-title_html} = ascii_html($smsg->subject); + $ctx->{-html_tip} = thread_index_entry($ctx, $level, $smsg); + $smsg = undef; PublicInbox::WwwStream->response($ctx, 200, sub { return unless $ctx; while (@q) { @@ -394,10 +390,8 @@ sub stream_thread ($$) { my $node = shift @q or next; my $cl = $level + 1; unshift @q, map { ($cl, $_) } @{$node->{children}}; - my $mid = $node->{id}; - if ($mime = $inbox->msg_by_smsg($node->{smsg})) { - $mime = PublicInbox::MIME->new($mime); - return thread_index_entry($ctx, $level, $mime); + if ($smsg = $inbox->smsg_mime($node->{smsg})) { + return thread_index_entry($ctx, $level, $smsg); } else { return ghost_index_entry($ctx, $level, $node); } @@ -445,24 +439,21 @@ sub thread_html { return stream_thread($rootset, $ctx) unless $ctx->{flat}; # flat display: lazy load the full message from smsg - my $mime; - while ($mime = shift @$msgs) { - $mime = $inbox->msg_by_smsg($mime) and last; + my $smsg; + while (my $m = shift @$msgs) { + $smsg = $inbox->smsg_mime($m) and last; } - return missing_thread($ctx) unless $mime; - $mime = PublicInbox::MIME->new($mime); - $ctx->{-title_html} = ascii_html($mime->header('Subject')); - $ctx->{-html_tip} = '
'.index_entry($mime, $ctx, scalar @$msgs);
-	$mime = undef;
+	return missing_thread($ctx) unless $smsg;
+	$ctx->{-title_html} = ascii_html($smsg->subject);
+	$ctx->{-html_tip} = '
'.index_entry($smsg, $ctx, scalar @$msgs);
+	$smsg = undef;
 	PublicInbox::WwwStream->response($ctx, 200, sub {
 		return unless $msgs;
-		while ($mime = shift @$msgs) {
-			$mime = $inbox->msg_by_smsg($mime) and last;
-		}
-		if ($mime) {
-			$mime = PublicInbox::MIME->new($mime);
-			return index_entry($mime, $ctx, scalar @$msgs);
+		$smsg = undef;
+		while (my $m = shift @$msgs) {
+			$smsg = $inbox->smsg_mime($m) and last;
 		}
+		return index_entry($smsg, $ctx, scalar @$msgs) if $smsg;
 		$msgs = undef;
 		$skel;
 	});
@@ -656,7 +647,7 @@ sub _msg_html_prepare {
 sub thread_skel {
 	my ($dst, $ctx, $hdr, $tpfx) = @_;
 	my $srch = $ctx->{srch};
-	my $mid = mid_clean($hdr->header_raw('Message-ID'));
+	my $mid = mids($hdr)->[0];
 	my $sres = $srch->get_thread($mid);
 	my $nr = $sres->{total};
 	my $expand = qq(expand[flat) .
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f5ed271..a2c2a4a 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -225,6 +225,7 @@ sub get_mid_txt {
 sub get_mid_html {
 	my ($ctx) = @_;
 	require PublicInbox::View;
+	searcher($ctx);
 	PublicInbox::View::msg_page($ctx) || r404($ctx);
 }
 
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index bb574a7..38eba2a 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -33,8 +33,8 @@ sub response {
 sub getline {
 	my ($self) = @_;
 	if (my $middle = $self->{cb}) {
-		my $mime = $middle->();
-		return feed_entry($self, $mime) if $mime;
+		my $smsg = $middle->();
+		return feed_entry($self, $smsg) if $smsg;
 	}
 	delete $self->{cb} ? '' : undef;
 }
@@ -92,10 +92,11 @@ sub mid2uuid ($) {
 
 # returns undef or string
 sub feed_entry {
-	my ($self, $mime) = @_;
+	my ($self, $smsg) = @_;
 	my $ctx = $self->{ctx};
+	my $mime = $smsg->{mime};
 	my $hdr = $mime->header_obj;
-	my $mid = mid_clean($hdr->header_raw('Message-ID'));
+	my $mid = $smsg->mid;
 	my $irt = PublicInbox::View::in_reply_to($hdr);
 	my $uuid = mid2uuid($mid);
 	my $base = $ctx->{feed_base_url};
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index eaa3218..2a798d6 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -139,6 +139,32 @@ test_psgi(sub { $www->call(@_) }, sub {
 	foreach my $mid ('a-mid@b', $new_mid, $third) {
 		like($raw, qr/<\Q$mid\E>/s, "Message-ID $mid shown");
 	}
+	like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
+
+	my $exp = [ qw( ) ];
+	$mime->header_set('Message-Id', @$exp);
+	$mime->header_set('Subject', '4th dupe');
+	local $SIG{__WARN__} = sub {};
+	ok($im->add($mime), 'added one message');
+	$im->done;
+	my @h = $mime->header('Message-ID');
+	is_deeply($exp, \@h, 'reused existing Message-ID');
+
+	$config->each_inbox(sub { $_[0]->search->reopen });
+
+	$res = $cb->(GET('/v2test/new.atom'));
+	my @ids = ($res->content =~ m!urn:uuid:([^<]+)!sg);
+	my %ids;
+	$ids{$_}++ for @ids;
+	is_deeply([qw(1 1 1 1)], [values %ids], 'feed ids unique');
+
+	$res = $cb->(GET('/v2test/reuse@mid/T/'));
+	$raw = $res->content;
+	like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /T/');
+
+	$res = $cb->(GET('/v2test/reuse@mid/t/'));
+	$raw = $res->content;
+	like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /t/');
 });
 
 done_testing();
-- 
EW