about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-01-11 10:55:39 +0000
committerEric Wong <e@80x24.org>2023-01-11 18:53:05 +0000
commit20ab293a131f0ec228932bf3448d1b09e280672b (patch)
treed9ff2c63a209dcd52044461c2d8cee6e50d6f775 /lib
parent563206c94db52f7e0e6d899fef2681c9e63f8fee (diff)
downloadpublic-inbox-20ab293a131f0ec228932bf3448d1b09e280672b.tar.gz
To ensure users aren't abusing the ability to reuse Message-IDs,
provide a convenient front-end to `lei mail-diff' from WWW.
Most of the time it's just list-appended signatures, so I expect
this to be useful for /all/ users.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Hval.pm2
-rw-r--r--lib/PublicInbox/MailDiff.pm88
-rw-r--r--lib/PublicInbox/View.pm29
-rw-r--r--lib/PublicInbox/WWW.pm6
4 files changed, 122 insertions, 3 deletions
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 00b3c8b4..0677865e 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -118,7 +118,7 @@ $ESCAPES{'/'} = ':'; # common
 sub to_attr ($) {
         my ($str) = @_;
 
-        # git would never do this to us:
+        # git would never do this to us, mail diff uses // to prevent anchors:
         return if index($str, '//') >= 0;
 
         my $first = '';
diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm
index 06eb3a0d..0ed06f9a 100644
--- a/lib/PublicInbox/MailDiff.pm
+++ b/lib/PublicInbox/MailDiff.pm
@@ -7,6 +7,8 @@ use PublicInbox::ContentHash qw(content_digest);
 use PublicInbox::ContentDigestDbg;
 use Data::Dumper ();
 use PublicInbox::MsgIter qw(msg_part_text);
+use PublicInbox::ViewDiff qw(flush_diff);
+use PublicInbox::GitAsyncCat;
 
 sub write_part { # Eml->each_part callback
         my ($ary, $self) = @_;
@@ -31,6 +33,9 @@ sub dump_eml ($$$) {
         mkdir $dir or die "mkdir($dir): $!";
         $eml->each_part(\&write_part, $self);
 
+        return if $self->{ctx}; # don't need content_digest noise in WWW UI
+
+        # XXX is this even useful?  perhaps hide it behind a CLI switch
         open my $fh, '>', "$dir/content_digest" or die "open: $!";
         my $dig = PublicInbox::ContentDigestDbg->new($fh);
         local $Data::Dumper::Useqq = 1;
@@ -47,4 +52,87 @@ sub prep_a ($$) {
         dump_eml($self, "$self->{tmp}/a", $eml);
 }
 
+sub next_smsg ($) {
+        my ($self) = @_;
+        my $ctx = $self->{ctx};
+        my $over = $ctx->{ibx}->over;
+        $self->{smsg} = $over ? $over->next_by_mid(@{$self->{next_arg}})
+                        : $ctx->gone('over');
+        if (!$self->{smsg}) {
+                $ctx->write($ctx->_html_end);
+                return $ctx->close;
+        }
+        my $async = $self->{ctx}->{env}->{'pi-httpd.async'};
+        $async->(undef, undef, $self) if $async # PublicInbox::HTTPD::Async->new
+}
+
+sub emit_msg_diff {
+        my ($bref, $self) = @_; # bref is `git diff' output
+        # will be escaped to `&#8226;' in HTML
+        $self->{ctx}->{ibx}->{obfuscate} and
+                obfuscate_addrs($self->{ctx}->{ibx}, $$bref, "\x{2022}");
+        $$bref =~ s/\r+\n/\n/sg;
+        print { $self->{ctx}->{zfh} } '</pre><hr><pre>' if $self->{nr} > 1;
+        flush_diff($self->{ctx}, $bref);
+        next_smsg($self);
+}
+
+sub do_diff {
+        my ($self, $eml) = @_;
+        my $n = 'N'.(++$self->{nr});
+        my $dir = "$self->{tmp}/$n";
+        $self->dump_eml($dir, $eml);
+        my $cmd = [ qw(git diff --no-index --no-color -- a), $n ];
+        my $opt = { -C => "$self->{tmp}", quiet => 1 };
+        my $qsp = PublicInbox::Qspawn->new($cmd, undef, $opt);
+        $qsp->psgi_qx($self->{ctx}->{env}, undef, \&emit_msg_diff, $self);
+}
+
+sub diff_msg_i {
+        my ($self, $eml) = @_;
+        if ($eml) {
+                if ($self->{tmp}) { # 2nd..last message
+                        do_diff($self, $eml);
+                } else { # first message:
+                        prep_a($self, $eml);
+                        next_smsg($self);
+                }
+        } else {
+                warn "W: $self->{smsg}->{blob} missing\n";
+                next_smsg($self);
+        }
+}
+
+sub diff_msg_i_async {
+        my ($bref, $oid, $type, $size, $self) = @_;
+        diff_msg_i($self, $bref ? PublicInbox::Eml->new($bref) : undef);
+}
+
+sub event_step {
+        my ($self) = @_;
+        eval {
+                my $ctx = $self->{ctx};
+                if ($ctx->{env}->{'pi-httpd.async'}) {
+                        ibx_async_cat($ctx->{ibx}, $self->{smsg}->{blob},
+                                        \&diff_msg_i_async, $self);
+                } else {
+                        diff_msg_i($self, $ctx->{ibx}->smsg_eml($self->{smsg}));
+                }
+        };
+        if ($@) {
+                warn "E: $@";
+                delete $self->{smsg};
+                $self->{ctx}->close;
+        }
+}
+
+sub begin_mail_diff {
+        my ($self) = @_;
+        if (my $async = $self->{ctx}->{env}->{'pi-httpd.async'}) {
+                $async->(undef, undef, $self); # PublicInbox::HTTPD::Async->new
+        } else {
+                event_step($self) while $self->{smsg};
+        }
+}
+
 1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 071a2093..b8d6d85e 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -623,7 +623,8 @@ sub _msg_page_prepare {
                         return;
                 }
                 $ctx->{-html_tip} =
-"<pre>WARNING: multiple messages have this Message-ID\n</pre><pre>";
+qq[<pre>WARNING: multiple messages have this Message-ID (<a
+href="d/">diff</a>)</pre><pre>];
         } else {
                 $ctx->{first_hdr} = $eml->header_obj;
                 $ctx->{chash} = content_hash($eml) if $ctx->{smsg}; # reused MID
@@ -1225,4 +1226,30 @@ sub ghost_index_entry {
                 . '</pre>' . $end;
 }
 
+# /$INBOX/$MSGID/d/ endpoint
+sub diff_msg {
+        my ($ctx) = @_;
+        require PublicInbox::MailDiff;
+        my $ibx = $ctx->{ibx};
+        my $over = $ibx->over or return no_over_html($ctx);
+        my ($id, $prev);
+        my $md = bless { ctx => $ctx }, 'PublicInbox::MailDiff';
+        my $next_arg = $md->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
+        my $smsg = $md->{smsg} = $over->next_by_mid(@$next_arg) or
+                return; # undef == 404
+        $ctx->{-t_max} = $smsg->{ts};
+        $ctx->{-upfx} = '../../';
+        $ctx->{-apfx} = '//'; # fail on to_attr()
+        $ctx->{-linkify} = PublicInbox::Linkify->new;
+        my $mid = ascii_html($smsg->{mid});
+        $ctx->{-title_html} = "diff for duplicates of &lt;$mid&gt;";
+        PublicInbox::WwwStream::html_init($ctx);
+        print { $ctx->{zfh} } '<pre>diff for duplicates of &lt;<a href="../">',
+                                $mid, "</a>&gt;\n\n";
+        sub {
+                $ctx->attach($_[0]->([200, delete $ctx->{-res_hdr}]));
+                $md->begin_mail_diff;
+        };
+}
+
 1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f861b192..9ffcb879 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -25,7 +25,7 @@ use PublicInbox::Eml;
 # TODO: consider a routing tree now that we have more endpoints:
 our $INBOX_RE = qr!\A/([\w\-][\w\.\-\+]*)!;
 our $MID_RE = qr!([^/]+)!;
-our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
+our $END_RE = qr!(T/|t/|d/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
 our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
 our $OID_RE = qr![a-f0-9]{7,}!;
 
@@ -452,6 +452,10 @@ sub msg_page {
 
         # legacy, but no redirect for compatibility:
         'f/' eq $e and return get_mid_html($ctx);
+        if ($e eq 'd/') {
+                require PublicInbox::View;
+                return PublicInbox::View::diff_msg($ctx);
+        }
         r404($ctx);
 }