about summary refs log tree commit homepage
path: root/xt/cmp-msgstr.t
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-05-07 21:05:54 +0000
committerEric Wong <e@yhbt.net>2020-05-09 08:59:10 +0000
commitfd966061df6a61104935984bed7c28a461a124ff (patch)
treeca41fe84768d2401d34631fc7da7b59c219525f6 /xt/cmp-msgstr.t
parentd35a49e114579a4df7bcb23d013ef0101cbb3ec2 (diff)
downloadpublic-inbox-fd966061df6a61104935984bed7c28a461a124ff.tar.gz
While our codebase can still work with either MIME
implementation, add comparison tests to ensure we
handle corner cases in existing archives.
Diffstat (limited to 'xt/cmp-msgstr.t')
-rw-r--r--xt/cmp-msgstr.t108
1 files changed, 108 insertions, 0 deletions
diff --git a/xt/cmp-msgstr.t b/xt/cmp-msgstr.t
new file mode 100644
index 00000000..6bae0f66
--- /dev/null
+++ b/xt/cmp-msgstr.t
@@ -0,0 +1,108 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use Benchmark qw(:all);
+use PublicInbox::Inbox;
+use PublicInbox::View;
+use PublicInbox::TestCommon;
+use PublicInbox::Eml;
+use Digest::MD5;
+use PublicInbox::MsgIter;
+require_mods(qw(Data::Dumper Email::MIME));
+Data::Dumper->import('Dumper');
+require PublicInbox::MIME;
+require_git(2.19);
+my ($tmpdir, $for_destroy) = tmpdir();
+my $inboxdir = $ENV{GIANT_INBOX_DIR};
+plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir;
+my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered);
+my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'cmp' });
+my $git = $ibx->git;
+my $fh = $git->popen(@cat);
+vec(my $vec = '', fileno($fh), 1) = 1;
+select($vec, undef, undef, 60) or die "timed out waiting for --batch-check";
+my $n = 0;
+my $m = 0;
+my $dig_cls = 'Digest::MD5';
+sub h ($) {
+        s/\s+\z//s; # E::M leaves trailing white space
+        s/\s+/ /sg;
+        "$_[0]: $_";
+}
+
+my $cmp = sub {
+        my ($p, $cmp_arg) = @_;
+        my $part = shift @$p;
+        push @$cmp_arg, '---'.join(', ', @$p).'---';
+        my $ct = $part->content_type // 'text/plain';
+        $ct =~ s/[ \t]+.*\z//s;
+        my ($s, $err);
+        eval {
+                push @$cmp_arg, map { h 'f' } $part->header('From');
+                push @$cmp_arg, map { h 't' } $part->header('To');
+                push @$cmp_arg, map { h 'cc' } $part->header('Cc');
+                push @$cmp_arg, map { h 'mid' } $part->header('Message-ID');
+                push @$cmp_arg, map { h 'refs' } $part->header('References');
+                push @$cmp_arg, map { h 'irt' } $part->header('In-Reply-To');
+                push @$cmp_arg, map { h 's' } $part->header('Subject');
+                push @$cmp_arg, map { h 'cd' }
+                                        $part->header('Content-Description');
+                ($s, $err) = msg_part_text($part, $ct);
+                if (defined $s) {
+                        $s =~ s/\s+\z//s;
+                        push @$cmp_arg, "S: ".$s;
+                } else {
+                        $part = $part->body;
+                        push @$cmp_arg, "T: $ct";
+                        if ($part =~ /[^\p{XPosixPrint}\s]/s) { # binary
+                                my $dig = $dig_cls->new;
+                                $dig->add($part);
+                                push @$cmp_arg, "M: ".$dig->hexdigest;
+                                push @$cmp_arg, "B: ".bytes::length($part);
+                        } else {
+                                $part =~ s/\s+\z//s;
+                                push @$cmp_arg, "X: ".$part;
+                        }
+                }
+        };
+        if ($@) {
+                $err //= '';
+                push @$cmp_arg, "E: $@ ($err)";
+        }
+};
+
+my $ndiff = 0;
+my $git_cb = sub {
+        my ($bref, $oid) = @_;
+        local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ };
+        ++$m;
+        PublicInbox::MIME->new($$bref)->each_part($cmp, my $m_ctx = [], 1);
+        PublicInbox::Eml->new($$bref)->each_part($cmp, my $e_ctx = [], 1);
+        if (join("\0", @$e_ctx) ne join("\0", @$m_ctx)) {
+                ++$ndiff;
+                open my $fh, '>', "$tmpdir/mime" or die $!;
+                print $fh Dumper($m_ctx) or die $!;
+                close $fh or die $!;
+                open $fh, '>', "$tmpdir/eml" or die $!;
+                print $fh Dumper($e_ctx) or die $!;
+                close $fh or die $!;
+                diag "$inboxdir $oid differ";
+                # using `git diff', diff(1) may not be installed
+                diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]);
+        }
+};
+$git->cat_async_begin;
+my $t = timeit(1, sub {
+        while (<$fh>) {
+                my ($oid, $type) = split / /;
+                next if $type ne 'blob';
+                ++$n;
+                $git->cat_async($oid, $git_cb);
+        }
+        $git->cat_async_wait;
+});
+is($m, $n, "$inboxdir rendered all $m <=> $n messages");
+is($ndiff, 0, "$inboxdir $ndiff differences");
+done_testing();