From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.1 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 704FD1F47C for ; Wed, 11 Jan 2023 10:55:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1673434551; bh=bKrynGbs71E3RPL/BxsOzVTzAKJ9/T5K8Fo2q5IeQh0=; h=From:To:Subject:Date:From; b=PQsr6ak5Z7TStYcNlENmx+R0boQfY7u7ojNN/+HvzLmUIMGoHtQvUwfu5F3rNL00q nmxRYCRPSab3clJCg4Jk2XO6iTU4nPw73KL2jPTkJ2UnWbiLEtsJ86o1wfCTVWAp1n sJGH/nJSWSysXowrVG31l4Y+Lb/z3DIs8M6Yq0ks= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] www: /$INBOX/$MSGID/d/ to diff reused Message-IDs Date: Wed, 11 Jan 2023 10:55:39 +0000 Message-Id: <20230111105539.302803-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: To ensure users aren't abusing the ability to reuse Message-IDs, provide a convenient front-end to `lei mail-diff' from WWW. Most of the time it's just list-appended signatures, so I expect this to be useful for /all/ users. --- lib/PublicInbox/Hval.pm | 2 +- lib/PublicInbox/MailDiff.pm | 88 +++++++++++++++++++++++++++++++++++++ lib/PublicInbox/View.pm | 29 +++++++++++- lib/PublicInbox/WWW.pm | 6 ++- t/psgi_v2.t | 3 ++ 5 files changed, 125 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 00b3c8b4..0677865e 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -118,7 +118,7 @@ $ESCAPES{'/'} = ':'; # common sub to_attr ($) { my ($str) = @_; - # git would never do this to us: + # git would never do this to us, mail diff uses // to prevent anchors: return if index($str, '//') >= 0; my $first = ''; diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm index 06eb3a0d..0ed06f9a 100644 --- a/lib/PublicInbox/MailDiff.pm +++ b/lib/PublicInbox/MailDiff.pm @@ -7,6 +7,8 @@ use PublicInbox::ContentHash qw(content_digest); use PublicInbox::ContentDigestDbg; use Data::Dumper (); use PublicInbox::MsgIter qw(msg_part_text); +use PublicInbox::ViewDiff qw(flush_diff); +use PublicInbox::GitAsyncCat; sub write_part { # Eml->each_part callback my ($ary, $self) = @_; @@ -31,6 +33,9 @@ sub dump_eml ($$$) { mkdir $dir or die "mkdir($dir): $!"; $eml->each_part(\&write_part, $self); + return if $self->{ctx}; # don't need content_digest noise in WWW UI + + # XXX is this even useful? perhaps hide it behind a CLI switch open my $fh, '>', "$dir/content_digest" or die "open: $!"; my $dig = PublicInbox::ContentDigestDbg->new($fh); local $Data::Dumper::Useqq = 1; @@ -47,4 +52,87 @@ sub prep_a ($$) { dump_eml($self, "$self->{tmp}/a", $eml); } +sub next_smsg ($) { + my ($self) = @_; + my $ctx = $self->{ctx}; + my $over = $ctx->{ibx}->over; + $self->{smsg} = $over ? $over->next_by_mid(@{$self->{next_arg}}) + : $ctx->gone('over'); + if (!$self->{smsg}) { + $ctx->write($ctx->_html_end); + return $ctx->close; + } + my $async = $self->{ctx}->{env}->{'pi-httpd.async'}; + $async->(undef, undef, $self) if $async # PublicInbox::HTTPD::Async->new +} + +sub emit_msg_diff { + my ($bref, $self) = @_; # bref is `git diff' output + # will be escaped to `•' in HTML + $self->{ctx}->{ibx}->{obfuscate} and + obfuscate_addrs($self->{ctx}->{ibx}, $$bref, "\x{2022}"); + $$bref =~ s/\r+\n/\n/sg; + print { $self->{ctx}->{zfh} } '
' if $self->{nr} > 1;
+	flush_diff($self->{ctx}, $bref);
+	next_smsg($self);
+}
+
+sub do_diff {
+	my ($self, $eml) = @_;
+	my $n = 'N'.(++$self->{nr});
+	my $dir = "$self->{tmp}/$n";
+	$self->dump_eml($dir, $eml);
+	my $cmd = [ qw(git diff --no-index --no-color -- a), $n ];
+	my $opt = { -C => "$self->{tmp}", quiet => 1 };
+	my $qsp = PublicInbox::Qspawn->new($cmd, undef, $opt);
+	$qsp->psgi_qx($self->{ctx}->{env}, undef, \&emit_msg_diff, $self);
+}
+
+sub diff_msg_i {
+	my ($self, $eml) = @_;
+	if ($eml) {
+		if ($self->{tmp}) { # 2nd..last message
+			do_diff($self, $eml);
+		} else { # first message:
+			prep_a($self, $eml);
+			next_smsg($self);
+		}
+	} else {
+		warn "W: $self->{smsg}->{blob} missing\n";
+		next_smsg($self);
+	}
+}
+
+sub diff_msg_i_async {
+	my ($bref, $oid, $type, $size, $self) = @_;
+	diff_msg_i($self, $bref ? PublicInbox::Eml->new($bref) : undef);
+}
+
+sub event_step {
+	my ($self) = @_;
+	eval {
+		my $ctx = $self->{ctx};
+		if ($ctx->{env}->{'pi-httpd.async'}) {
+			ibx_async_cat($ctx->{ibx}, $self->{smsg}->{blob},
+					\&diff_msg_i_async, $self);
+		} else {
+			diff_msg_i($self, $ctx->{ibx}->smsg_eml($self->{smsg}));
+		}
+	};
+	if ($@) {
+		warn "E: $@";
+		delete $self->{smsg};
+		$self->{ctx}->close;
+	}
+}
+
+sub begin_mail_diff {
+	my ($self) = @_;
+	if (my $async = $self->{ctx}->{env}->{'pi-httpd.async'}) {
+		$async->(undef, undef, $self); # PublicInbox::HTTPD::Async->new
+	} else {
+		event_step($self) while $self->{smsg};
+	}
+}
+
 1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 071a2093..b8d6d85e 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -623,7 +623,8 @@ sub _msg_page_prepare {
 			return;
 		}
 		$ctx->{-html_tip} =
-"
WARNING: multiple messages have this Message-ID\n
";
+qq[
WARNING: multiple messages have this Message-ID (diff)
];
 	} else {
 		$ctx->{first_hdr} = $eml->header_obj;
 		$ctx->{chash} = content_hash($eml) if $ctx->{smsg}; # reused MID
@@ -1225,4 +1226,30 @@ sub ghost_index_entry {
 		. '
' . $end; } +# /$INBOX/$MSGID/d/ endpoint +sub diff_msg { + my ($ctx) = @_; + require PublicInbox::MailDiff; + my $ibx = $ctx->{ibx}; + my $over = $ibx->over or return no_over_html($ctx); + my ($id, $prev); + my $md = bless { ctx => $ctx }, 'PublicInbox::MailDiff'; + my $next_arg = $md->{next_arg} = [ $ctx->{mid}, \$id, \$prev ]; + my $smsg = $md->{smsg} = $over->next_by_mid(@$next_arg) or + return; # undef == 404 + $ctx->{-t_max} = $smsg->{ts}; + $ctx->{-upfx} = '../../'; + $ctx->{-apfx} = '//'; # fail on to_attr() + $ctx->{-linkify} = PublicInbox::Linkify->new; + my $mid = ascii_html($smsg->{mid}); + $ctx->{-title_html} = "diff for duplicates of <$mid>"; + PublicInbox::WwwStream::html_init($ctx); + print { $ctx->{zfh} } '
diff for duplicates of <',
+				$mid, ">\n\n";
+	sub {
+		$ctx->attach($_[0]->([200, delete $ctx->{-res_hdr}]));
+		$md->begin_mail_diff;
+	};
+}
+
 1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f861b192..9ffcb879 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -25,7 +25,7 @@ use PublicInbox::Eml;
 # TODO: consider a routing tree now that we have more endpoints:
 our $INBOX_RE = qr!\A/([\w\-][\w\.\-\+]*)!;
 our $MID_RE = qr!([^/]+)!;
-our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
+our $END_RE = qr!(T/|t/|d/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
 our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
 our $OID_RE = qr![a-f0-9]{7,}!;
 
@@ -452,6 +452,10 @@ sub msg_page {
 
 	# legacy, but no redirect for compatibility:
 	'f/' eq $e and return get_mid_html($ctx);
+	if ($e eq 'd/') {
+		require PublicInbox::View;
+		return PublicInbox::View::diff_msg($ctx);
+	}
 	r404($ctx);
 }
 
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 6b1b3a39..f709c3c7 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -220,6 +220,9 @@ my $client1 = sub {
 		like($raw, qr!>\Q$mid\E!s, "Message-ID $mid shown");
 	}
 	like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
+
+	$res = $cb->(GET('/v2test/a-mid@b/d/'));
+	is($res->code, 200, '/d/ (diff) endpoint works');
 };
 
 test_psgi(sub { $www->call(@_) }, $client1);