#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; use PublicInbox::TestCommon; use PublicInbox::Eml; use Digest::MD5; require_git(2.19); require_mods qw(Data::Dumper Email::MIME Plack::Util); Data::Dumper->import('Dumper'); require PublicInbox::MIME; my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'perf' }); my $git = $ibx->git; my $fh = $git->popen(@cat); vec(my $vec = '', fileno($fh), 1) = 1; select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; my $mime_ctx = { env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, -inbox => $ibx, www => Plack::Util::inline_object(style => sub {''}), obuf => \(my $mime_buf = ''), mhref => '../', }; my $eml_ctx = { %$mime_ctx, obuf => \(my $eml_buf = '') }; my $n = 0; my $m = 0; my $ndiff_html = 0; my $dig_cls = 'Digest::MD5'; my $digest_attach = sub { # ensure ->body (not ->body_raw) matches my ($p, $cmp_arg) = @_; my $part = shift @$p; my $dig = $cmp_arg->[0] //= $dig_cls->new; $dig->add($part->body_raw); push @$cmp_arg, join(', ', @$p); }; my $git_cb = sub { my ($bref, $oid) = @_; local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; ++$m; my $mime = PublicInbox::MIME->new($$bref); PublicInbox::View::multipart_text_as_html($mime, $mime_ctx); my $eml = PublicInbox::Eml->new($$bref); PublicInbox::View::multipart_text_as_html($eml, $eml_ctx); if ($eml_buf ne $mime_buf) { ++$ndiff_html; open my $fh, '>', "$tmpdir/mime" or die $!; print $fh $mime_buf or die $!; close $fh or die $!; open $fh, '>', "$tmpdir/eml" or die $!; print $fh $eml_buf or die $!; close $fh or die $!; # using `git diff', diff(1) may not be installed diag "$inboxdir $oid differs"; diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]); } $eml_buf = $mime_buf = ''; # don't tolerate differences in attachment downloads $mime = PublicInbox::MIME->new($$bref); $mime->each_part($digest_attach, my $mime_cmp = [], 1); $eml = PublicInbox::Eml->new($$bref); $eml->each_part($digest_attach, my $eml_cmp = [], 1); $mime_cmp->[0] = $mime_cmp->[0]->hexdigest; $eml_cmp->[0] = $eml_cmp->[0]->hexdigest; # don't have millions of "ok" lines if (join("\0", @$eml_cmp) ne join("\0", @$mime_cmp)) { diag Dumper([ $oid, eml => $eml_cmp, mime =>$mime_cmp ]); is_deeply($eml_cmp, $mime_cmp, "$inboxdir $oid match"); } }; my $t = timeit(1, sub { while (<$fh>) { my ($oid, $type) = split / /; next if $type ne 'blob'; ++$n; $git->cat_async($oid, $git_cb); } $git->cat_async_wait; }); is($m, $n, 'rendered all messages'); # we'll tolerate minor differences in HTML rendering diag "$ndiff_html HTML differences"; done_testing();