user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 11/11] view: depend on SearchMsg for Message-ID
Date: Tue, 27 Mar 2018 11:11:32 +0000	[thread overview]
Message-ID: <20180327111132.20681-12-e@80x24.org> (raw)
In-Reply-To: <20180327111132.20681-1-e@80x24.org>

Since we need to handle messages with multiple and duplicate
Message-ID headers, our thread skeleton display must account
for that.

Since we have a "preferred" Message-ID in case of conflicts,
use it as the UUID in an Atom feed so readers do not get
confused by conflicts.
---
 lib/PublicInbox/Feed.pm          | 36 ++++++++--------
 lib/PublicInbox/Inbox.pm         |  9 ++++
 lib/PublicInbox/SearchMsg.pm     |  6 +--
 lib/PublicInbox/SearchView.pm    | 14 +++---
 lib/PublicInbox/View.pm          | 93 ++++++++++++++++++----------------------
 lib/PublicInbox/WWW.pm           |  1 +
 lib/PublicInbox/WwwAtomStream.pm |  9 ++--
 t/psgi_v2.t                      | 26 +++++++++++
 8 files changed, 109 insertions(+), 85 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 74d0bbd..f2285a6 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -8,18 +8,18 @@ use warnings;
 use PublicInbox::MIME;
 use PublicInbox::View;
 use PublicInbox::WwwAtomStream;
+use PublicInbox::SearchMsg; # this loads w/o Search::Xapian
 
 # main function
 sub generate {
 	my ($ctx) = @_;
-	my $oids = recent_blobs($ctx);
-	return _no_thread() unless @$oids;
+	my $msgs = recent_msgs($ctx);
+	return _no_thread() unless @$msgs;
 
-	my $git = $ctx->{-inbox}->git;
+	my $ibx = $ctx->{-inbox};
 	PublicInbox::WwwAtomStream->response($ctx, 200, sub {
-		while (my $oid = shift @$oids) {
-			my $msg = $git->cat_file($oid) or next;
-			return PublicInbox::MIME->new($msg);
+		while (my $smsg = shift @$msgs) {
+			$ibx->smsg_mime($smsg) and return $smsg;
 		}
 	});
 }
@@ -36,9 +36,8 @@ sub generate_thread_atom {
 	$ctx->{-html_url} = $html_url;
 	my $msgs = $res->{msgs};
 	PublicInbox::WwwAtomStream->response($ctx, 200, sub {
-		while (my $msg = shift @$msgs) {
-			$msg = $ibx->msg_by_smsg($msg) and
-				return PublicInbox::MIME->new($msg);
+		while (my $smsg = shift @$msgs) {
+			$ibx->smsg_mime($smsg) and return $smsg;
 		}
 	});
 }
@@ -62,20 +61,19 @@ sub generate_html_index {
 
 sub new_html {
 	my ($ctx) = @_;
-	my $oids = recent_blobs($ctx);
-	if (!@$oids) {
+	my $msgs = recent_msgs($ctx);
+	if (!@$msgs) {
 		return [404, ['Content-Type', 'text/plain'],
 			["No messages, yet\n"] ];
 	}
 	$ctx->{-html_tip} = '<pre>';
 	$ctx->{-upfx} = '';
 	$ctx->{-hr} = 1;
-	my $git = $ctx->{-inbox}->git;
+	my $ibx = $ctx->{-inbox};
 	PublicInbox::WwwStream->response($ctx, 200, sub {
-		while (my $oid = shift @$oids) {
-			my $msg = $git->cat_file($oid) or next;
-			my $m = PublicInbox::MIME->new($msg);
-			my $more = scalar @$oids;
+		while (my $smsg = shift @$msgs) {
+			my $m = $ibx->smsg_mime($smsg) or next;
+			my $more = scalar @$msgs;
 			return PublicInbox::View::index_entry($m, $ctx, $more);
 		}
 		new_html_footer($ctx);
@@ -103,7 +101,7 @@ sub new_html_footer {
 	"<hr><pre>page: $next$latest</pre>";
 }
 
-sub recent_blobs {
+sub recent_msgs {
 	my ($ctx) = @_;
 	my $ibx = $ctx->{-inbox};
 	my $max = $ibx->{feedmax};
@@ -119,7 +117,7 @@ sub recent_blobs {
 		my $res = $srch->query('', { limit => $max, offset => $o });
 		my $next = $o + $max;
 		$ctx->{next_page} = "o=$next" if $res->{total} >= $next;
-		return [ map { $_->{blob} } @{$res->{msgs}} ];
+		return $res->{msgs};
 	}
 
 	my $hex = '[a-f0-9]';
@@ -171,7 +169,7 @@ sub recent_blobs {
 	}
 
 	$ctx->{next_page} = "r=$last_commit" if $last_commit;
-	\@oids;
+	[ map { bless {blob => $_ }, 'PublicInbox::SearchMsg' } @oids ];
 }
 
 1;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 666c81d..b1ea8dc 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -8,6 +8,7 @@ use warnings;
 use PublicInbox::Git;
 use PublicInbox::MID qw(mid2path);
 use Devel::Peek qw(SvREFCNT);
+use PublicInbox::MIME;
 
 my $cleanup_timer;
 eval {
@@ -246,6 +247,14 @@ sub msg_by_smsg ($$;$) {
 	$str;
 }
 
+sub smsg_mime {
+	my ($self, $smsg, $ref) = @_;
+	if (my $s = msg_by_smsg($self, $smsg, $ref)) {
+		$smsg->{mime} = PublicInbox::MIME->new($s);
+		return $smsg;
+	}
+}
+
 sub path_check {
 	my ($self, $path) = @_;
 	git($self)->check('HEAD:'.$path);
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index b944868..e314fed 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -6,7 +6,6 @@
 package PublicInbox::SearchMsg;
 use strict;
 use warnings;
-use Search::Xapian;
 use PublicInbox::MID qw/mid_clean mid_mime/;
 use PublicInbox::Address;
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
@@ -165,9 +164,10 @@ sub mid ($;$) {
 		$self->{mid} = $mid;
 	} elsif (my $rv = $self->{mid}) {
 		$rv;
+	} elsif ($self->{doc}) {
+		$self->{mid} = _get_term_val($self, 'Q', qr/\AQ/);
 	} else {
-		$self->{mid} = _get_term_val($self, 'Q', qr/\AQ/) ||
-				$self->_extract_mid;
+		$self->_extract_mid; # v1 w/o Xapian
 	}
 }
 
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 6e537b4..1a8fe7f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -248,15 +248,14 @@ sub mset_thread {
 		*PublicInbox::View::pre_thread);
 
 	@$msgs = reverse @$msgs if $r;
-	my $mime;
 	sub {
 		return unless $msgs;
-		while ($mime = pop @$msgs) {
-			$mime = $inbox->msg_by_smsg($mime) and last;
+		my $smsg;
+		while (my $m = pop @$msgs) {
+			$smsg = $inbox->smsg_mime($m) and last;
 		}
-		if ($mime) {
-			$mime = PublicInbox::MIME->new($mime);
-			return PublicInbox::View::index_entry($mime, $ctx,
+		if ($smsg) {
+			return PublicInbox::View::index_entry($smsg, $ctx,
 				scalar @$msgs);
 		}
 		$msgs = undef;
@@ -290,8 +289,7 @@ sub adump {
 	PublicInbox::WwwAtomStream->response($ctx, 200, sub {
 		while (my $x = shift @items) {
 			$x = load_doc_retry($srch, $x);
-			$x = $ibx->msg_by_smsg($x) and
-					return PublicInbox::MIME->new($x);
+			$x = $ibx->smsg_mime($x) and return $x;
 		}
 		return undef;
 	});
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 34ab3e5..5fb2b31 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -9,7 +9,8 @@ use warnings;
 use PublicInbox::MsgTime qw(msg_datestamp);
 use PublicInbox::Hval qw/ascii_html obfuscate_addrs/;
 use PublicInbox::Linkify;
-use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape mids/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape mids
+			references/;
 use PublicInbox::MsgIter;
 use PublicInbox::Address;
 use PublicInbox::WwwStream;
@@ -69,30 +70,31 @@ sub msg_page {
 				$more = [ $head, $tail, $db ];
 			}
 		});
+		return unless $first;
 	} else {
 		$first = $ibx->msg_by_mid($mid) or return;
 	}
-	$first ? msg_html($ctx, PublicInbox::MIME->new($first), $more) : undef;
+	msg_html($ctx, PublicInbox::MIME->new($first), $more);
 }
 
 sub msg_html_more {
 	my ($ctx, $more, $nr) = @_;
 	my $str = eval {
-		my $mref;
+		my $smsg;
 		my ($head, $tail, $db) = @$more;
-		for (; !defined($mref) && $head != $tail; $head++) {
-			my $smsg = PublicInbox::SearchMsg->get($head, $db,
-								$ctx->{mid});
-			next if $smsg->type ne 'mail';
-			$mref = $ctx->{-inbox}->msg_by_smsg($smsg);
+		my $mid = $ctx->{mid};
+		for (; !defined($smsg) && $head != $tail; $head++) {
+			my $m = PublicInbox::SearchMsg->get($head, $db, $mid);
+			next if $m->type ne 'mail';
+			$smsg = $ctx->{-inbox}->smsg_mime($m);
 		}
 		if ($head == $tail) { # done
 			@$more = ();
 		} else {
 			$more->[0] = $head;
 		}
-		if ($mref) {
-			my $mime = PublicInbox::MIME->new($mref);
+		if ($smsg) {
+			my $mime = $smsg->{mime};
 			_msg_html_prepare($mime->header_obj, $ctx, $more, $nr) .
 				multipart_text_as_html($mime, '',
 							$ctx->{-obfs_ibx}) .
@@ -167,14 +169,8 @@ EOF
 
 sub in_reply_to {
 	my ($hdr) = @_;
-	my %mid = map { $_ => 1 } $hdr->header_raw('Message-ID');
-	my @refs = (($hdr->header_raw('References') || '') =~ /<([^>]+)>/g);
-	push(@refs, (($hdr->header_raw('In-Reply-To') || '') =~ /<([^>]+)>/g));
-	while (defined(my $irt = pop @refs)) {
-		next if $mid{"<$irt>"};
-		return $irt;
-	}
-	undef;
+	my $refs = references($hdr);
+	$refs->[-1];
 }
 
 sub _hdr_names_html ($$) {
@@ -191,12 +187,10 @@ sub nr_to_s ($$$) {
 
 # this is already inside a <pre>
 sub index_entry {
-	my ($mime, $ctx, $more) = @_;
+	my ($smsg, $ctx, $more) = @_;
 	my $srch = $ctx->{srch};
-	my $hdr = $mime->header_obj;
-	my $subj = $hdr->header('Subject');
-
-	my $mid_raw = mid_clean(mid_mime($mime));
+	my $subj = $smsg->subject;
+	my $mid_raw = $smsg->mid;
 	my $id = id_compress($mid_raw, 1);
 	my $id_m = 'm'.$id;
 
@@ -211,6 +205,8 @@ sub index_entry {
 	$rv .= $subj . "\n";
 	$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
 	my @tocc;
+	my $mime = $smsg->{mime};
+	my $hdr = $mime->header_obj;
 	foreach my $f (qw(To Cc)) {
 		my $dst = _hdr_names_html($hdr, $f);
 		if ($dst ne '') {
@@ -220,7 +216,7 @@ sub index_entry {
 	}
 	my $from = _hdr_names_html($hdr, 'From');
 	obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
-	$rv .= "From: $from @ "._msg_date($hdr)." UTC";
+	$rv .= "From: $from @ ".fmt_ts($smsg->ds)." UTC";
 	my $upfx = $ctx->{-upfx};
 	my $mhref = $upfx . mid_escape($mid_raw) . '/';
 	$rv .= qq{ (<a\nhref="$mhref">permalink</a> / };
@@ -363,30 +359,30 @@ sub pre_thread  {
 }
 
 sub thread_index_entry {
-	my ($ctx, $level, $mime) = @_;
+	my ($ctx, $level, $smsg) = @_;
 	my ($beg, $end) = thread_adj_level($ctx, $level);
-	$beg . '<pre>' . index_entry($mime, $ctx, 0) . '</pre>' . $end;
+	$beg . '<pre>' . index_entry($smsg, $ctx, 0) . '</pre>' . $end;
 }
 
 sub stream_thread ($$) {
 	my ($rootset, $ctx) = @_;
 	my $inbox = $ctx->{-inbox};
-	my $mime;
 	my @q = map { (0, $_) } @$rootset;
 	my $level;
+	my $smsg;
 	while (@q) {
 		$level = shift @q;
 		my $node = shift @q or next;
 		my $cl = $level + 1;
 		unshift @q, map { ($cl, $_) } @{$node->{children}};
-		$mime = $inbox->msg_by_smsg($node->{smsg}) and last;
+		$smsg = $inbox->smsg_mime($node->{smsg}) and last;
 	}
-	return missing_thread($ctx) unless $mime;
+	return missing_thread($ctx) unless $smsg;
 
 	$ctx->{-obfs_ibx} = $inbox->{obfuscate} ? $inbox : undef;
-	$mime = PublicInbox::MIME->new($mime);
-	$ctx->{-title_html} = ascii_html($mime->header('Subject'));
-	$ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime);
+	$ctx->{-title_html} = ascii_html($smsg->subject);
+	$ctx->{-html_tip} = thread_index_entry($ctx, $level, $smsg);
+	$smsg = undef;
 	PublicInbox::WwwStream->response($ctx, 200, sub {
 		return unless $ctx;
 		while (@q) {
@@ -394,10 +390,8 @@ sub stream_thread ($$) {
 			my $node = shift @q or next;
 			my $cl = $level + 1;
 			unshift @q, map { ($cl, $_) } @{$node->{children}};
-			my $mid = $node->{id};
-			if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
-				$mime = PublicInbox::MIME->new($mime);
-				return thread_index_entry($ctx, $level, $mime);
+			if ($smsg = $inbox->smsg_mime($node->{smsg})) {
+				return thread_index_entry($ctx, $level, $smsg);
 			} else {
 				return ghost_index_entry($ctx, $level, $node);
 			}
@@ -445,24 +439,21 @@ sub thread_html {
 	return stream_thread($rootset, $ctx) unless $ctx->{flat};
 
 	# flat display: lazy load the full message from smsg
-	my $mime;
-	while ($mime = shift @$msgs) {
-		$mime = $inbox->msg_by_smsg($mime) and last;
+	my $smsg;
+	while (my $m = shift @$msgs) {
+		$smsg = $inbox->smsg_mime($m) and last;
 	}
-	return missing_thread($ctx) unless $mime;
-	$mime = PublicInbox::MIME->new($mime);
-	$ctx->{-title_html} = ascii_html($mime->header('Subject'));
-	$ctx->{-html_tip} = '<pre>'.index_entry($mime, $ctx, scalar @$msgs);
-	$mime = undef;
+	return missing_thread($ctx) unless $smsg;
+	$ctx->{-title_html} = ascii_html($smsg->subject);
+	$ctx->{-html_tip} = '<pre>'.index_entry($smsg, $ctx, scalar @$msgs);
+	$smsg = undef;
 	PublicInbox::WwwStream->response($ctx, 200, sub {
 		return unless $msgs;
-		while ($mime = shift @$msgs) {
-			$mime = $inbox->msg_by_smsg($mime) and last;
-		}
-		if ($mime) {
-			$mime = PublicInbox::MIME->new($mime);
-			return index_entry($mime, $ctx, scalar @$msgs);
+		$smsg = undef;
+		while (my $m = shift @$msgs) {
+			$smsg = $inbox->smsg_mime($m) and last;
 		}
+		return index_entry($smsg, $ctx, scalar @$msgs) if $smsg;
 		$msgs = undef;
 		$skel;
 	});
@@ -656,7 +647,7 @@ sub _msg_html_prepare {
 sub thread_skel {
 	my ($dst, $ctx, $hdr, $tpfx) = @_;
 	my $srch = $ctx->{srch};
-	my $mid = mid_clean($hdr->header_raw('Message-ID'));
+	my $mid = mids($hdr)->[0];
 	my $sres = $srch->get_thread($mid);
 	my $nr = $sres->{total};
 	my $expand = qq(expand[<a\nhref="${tpfx}T/#u">flat</a>) .
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f5ed271..a2c2a4a 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -225,6 +225,7 @@ sub get_mid_txt {
 sub get_mid_html {
 	my ($ctx) = @_;
 	require PublicInbox::View;
+	searcher($ctx);
 	PublicInbox::View::msg_page($ctx) || r404($ctx);
 }
 
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index bb574a7..38eba2a 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -33,8 +33,8 @@ sub response {
 sub getline {
 	my ($self) = @_;
 	if (my $middle = $self->{cb}) {
-		my $mime = $middle->();
-		return feed_entry($self, $mime) if $mime;
+		my $smsg = $middle->();
+		return feed_entry($self, $smsg) if $smsg;
 	}
 	delete $self->{cb} ? '</feed>' : undef;
 }
@@ -92,10 +92,11 @@ sub mid2uuid ($) {
 
 # returns undef or string
 sub feed_entry {
-	my ($self, $mime) = @_;
+	my ($self, $smsg) = @_;
 	my $ctx = $self->{ctx};
+	my $mime = $smsg->{mime};
 	my $hdr = $mime->header_obj;
-	my $mid = mid_clean($hdr->header_raw('Message-ID'));
+	my $mid = $smsg->mid;
 	my $irt = PublicInbox::View::in_reply_to($hdr);
 	my $uuid = mid2uuid($mid);
 	my $base = $ctx->{feed_base_url};
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index eaa3218..2a798d6 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -139,6 +139,32 @@ test_psgi(sub { $www->call(@_) }, sub {
 	foreach my $mid ('a-mid@b', $new_mid, $third) {
 		like($raw, qr/&lt;\Q$mid\E&gt;/s, "Message-ID $mid shown");
 	}
+	like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
+
+	my $exp = [ qw(<a-mid@b> <reuse@mid>) ];
+	$mime->header_set('Message-Id', @$exp);
+	$mime->header_set('Subject', '4th dupe');
+	local $SIG{__WARN__} = sub {};
+	ok($im->add($mime), 'added one message');
+	$im->done;
+	my @h = $mime->header('Message-ID');
+	is_deeply($exp, \@h, 'reused existing Message-ID');
+
+	$config->each_inbox(sub { $_[0]->search->reopen });
+
+	$res = $cb->(GET('/v2test/new.atom'));
+	my @ids = ($res->content =~ m!<id>urn:uuid:([^<]+)</id>!sg);
+	my %ids;
+	$ids{$_}++ for @ids;
+	is_deeply([qw(1 1 1 1)], [values %ids], 'feed ids unique');
+
+	$res = $cb->(GET('/v2test/reuse@mid/T/'));
+	$raw = $res->content;
+	like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /T/');
+
+	$res = $cb->(GET('/v2test/reuse@mid/t/'));
+	$raw = $res->content;
+	like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /t/');
 });
 
 done_testing();
-- 
EW


      parent reply	other threads:[~2018-03-27 11:11 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 01/11] import: consolidate mid prepend logic, here Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 02/11] www: $MESSAGE_ID/raw endpoint supports "duplicates" Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 03/11] search: reopen DB if each_smsg_by_mid fails Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 04/11] t/psgi_v2: minimal test for Atom feed and t.mbox.gz Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 05/11] feed: fix new.html for v2 Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 06/11] view: permalink (per-message) view shows multiple messages Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 07/11] searchidx: warn about vivifying multiple ghosts Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 08/11] v2writable: warn on unseen deleted files Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 09/11] www: get rid of unnecessary 'inbox' name reference Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 10/11] searchview: remove unnecessary imports from MID module Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation) [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180327111132.20681-12-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).