#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; use PublicInbox::TestCommon; use PublicInbox::Eml; use Digest::MD5; use PublicInbox::MsgIter; require_mods(qw(Data::Dumper Email::MIME)); Data::Dumper->import('Dumper'); require PublicInbox::MIME; require_git(2.19); my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'cmp' }); my $git = $ibx->git; my $fh = $git->popen(@cat); vec(my $vec = '', fileno($fh), 1) = 1; select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; my $n = 0; my $m = 0; my $dig_cls = 'Digest::MD5'; sub h ($) { s/\s+\z//s; # E::M leaves trailing white space s/\s+/ /sg; "$_[0]: $_"; } my $cmp = sub { my ($p, $cmp_arg) = @_; my $part = shift @$p; push @$cmp_arg, '---'.join(', ', @$p).'---'; my $ct = $part->content_type // 'text/plain'; $ct =~ s/[ \t]+.*\z//s; my ($s, $err); eval { push @$cmp_arg, map { h 'f' } $part->header('From'); push @$cmp_arg, map { h 't' } $part->header('To'); push @$cmp_arg, map { h 'cc' } $part->header('Cc'); push @$cmp_arg, map { h 'mid' } $part->header('Message-ID'); push @$cmp_arg, map { h 'refs' } $part->header('References'); push @$cmp_arg, map { h 'irt' } $part->header('In-Reply-To'); push @$cmp_arg, map { h 's' } $part->header('Subject'); push @$cmp_arg, map { h 'cd' } $part->header('Content-Description'); ($s, $err) = msg_part_text($part, $ct); if (defined $s) { $s =~ s/\s+\z//s; push @$cmp_arg, "S: ".$s; } else { $part = $part->body; push @$cmp_arg, "T: $ct"; if ($part =~ /[^\p{XPosixPrint}\s]/s) { # binary my $dig = $dig_cls->new; $dig->add($part); push @$cmp_arg, "M: ".$dig->hexdigest; push @$cmp_arg, "B: ".bytes::length($part); } else { $part =~ s/\s+\z//s; push @$cmp_arg, "X: ".$part; } } }; if ($@) { $err //= ''; push @$cmp_arg, "E: $@ ($err)"; } }; my $ndiff = 0; my $git_cb = sub { my ($bref, $oid) = @_; local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; ++$m; PublicInbox::MIME->new($$bref)->each_part($cmp, my $m_ctx = [], 1); PublicInbox::Eml->new($$bref)->each_part($cmp, my $e_ctx = [], 1); if (join("\0", @$e_ctx) ne join("\0", @$m_ctx)) { ++$ndiff; open my $fh, '>', "$tmpdir/mime" or die $!; print $fh Dumper($m_ctx) or die $!; close $fh or die $!; open $fh, '>', "$tmpdir/eml" or die $!; print $fh Dumper($e_ctx) or die $!; close $fh or die $!; diag "$inboxdir $oid differ"; # using `git diff', diff(1) may not be installed diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]); } }; my $t = timeit(1, sub { while (<$fh>) { my ($oid, $type) = split / /; next if $type ne 'blob'; ++$n; $git->cat_async($oid, $git_cb); } $git->cat_async_wait; }); is($m, $n, "$inboxdir rendered all $m <=> $n messages"); is($ndiff, 0, "$inboxdir $ndiff differences"); done_testing();