From fd966061df6a61104935984bed7c28a461a124ff Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 7 May 2020 21:05:54 +0000 Subject: xt: eml comparison tests While our codebase can still work with either MIME implementation, add comparison tests to ensure we handle corner cases in existing archives. --- xt/cmp-msgview.t | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 xt/cmp-msgview.t (limited to 'xt/cmp-msgview.t') diff --git a/xt/cmp-msgview.t b/xt/cmp-msgview.t new file mode 100644 index 00000000..66fb467e --- /dev/null +++ b/xt/cmp-msgview.t @@ -0,0 +1,95 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use Test::More; +use Benchmark qw(:all); +use PublicInbox::Inbox; +use PublicInbox::View; +use PublicInbox::TestCommon; +use PublicInbox::Eml; +use Digest::MD5; +require_git(2.19); +require_mods qw(Data::Dumper Email::MIME Plack::Util); +Data::Dumper->import('Dumper'); +require PublicInbox::MIME; +my ($tmpdir, $for_destroy) = tmpdir(); +my $inboxdir = $ENV{GIANT_INBOX_DIR}; +plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; +my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); +my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'perf' }); +my $git = $ibx->git; +my $fh = $git->popen(@cat); +vec(my $vec = '', fileno($fh), 1) = 1; +select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; +my $mime_ctx = { + env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, + -inbox => $ibx, + www => Plack::Util::inline_object(style => sub {''}), + obuf => \(my $mime_buf = ''), + mhref => '../', +}; +my $eml_ctx = { %$mime_ctx, obuf => \(my $eml_buf = '') }; +my $n = 0; +my $m = 0; +my $ndiff_html = 0; +my $dig_cls = 'Digest::MD5'; +my $digest_attach = sub { # ensure ->body (not ->body_raw) matches + my ($p, $cmp_arg) = @_; + my $part = shift @$p; + my $dig = $cmp_arg->[0] //= $dig_cls->new; + $dig->add($part->body_raw); + push @$cmp_arg, join(', ', @$p); +}; + +my $git_cb = sub { + my ($bref, $oid) = @_; + local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; + ++$m; + my $mime = PublicInbox::MIME->new($$bref); + PublicInbox::View::multipart_text_as_html($mime, $mime_ctx); + my $eml = PublicInbox::Eml->new($$bref); + PublicInbox::View::multipart_text_as_html($eml, $eml_ctx); + if ($eml_buf ne $mime_buf) { + ++$ndiff_html; + open my $fh, '>', "$tmpdir/mime" or die $!; + print $fh $mime_buf or die $!; + close $fh or die $!; + open $fh, '>', "$tmpdir/eml" or die $!; + print $fh $eml_buf or die $!; + close $fh or die $!; + # using `git diff', diff(1) may not be installed + diag "$inboxdir $oid differs"; + diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]); + } + $eml_buf = $mime_buf = ''; + + # don't tolerate differences in attachment downloads + $mime = PublicInbox::MIME->new($$bref); + $mime->each_part($digest_attach, my $mime_cmp = [], 1); + $eml = PublicInbox::Eml->new($$bref); + $eml->each_part($digest_attach, my $eml_cmp = [], 1); + $mime_cmp->[0] = $mime_cmp->[0]->hexdigest; + $eml_cmp->[0] = $eml_cmp->[0]->hexdigest; + # don't have millions of "ok" lines + if (join("\0", @$eml_cmp) ne join("\0", @$mime_cmp)) { + diag Dumper([ $oid, eml => $eml_cmp, mime =>$mime_cmp ]); + is_deeply($eml_cmp, $mime_cmp, "$inboxdir $oid match"); + } +}; +$git->cat_async_begin; +my $t = timeit(1, sub { + while (<$fh>) { + my ($oid, $type) = split / /; + next if $type ne 'blob'; + ++$n; + $git->cat_async($oid, $git_cb); + } + $git->cat_async_wait; +}); +is($m, $n, 'rendered all messages'); + +# we'll tolerate minor differences in HTML rendering +diag "$ndiff_html HTML differences"; + +done_testing(); -- cgit v1.2.3-24-ge0c7