user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
b6c8ec659cebcee4ac7e6e62a86fcfe06b896151 blob 3249 bytes (raw)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
 
#!perl -w
# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
use Benchmark qw(:all);
use PublicInbox::Inbox;
use PublicInbox::View;
use PublicInbox::TestCommon;
use PublicInbox::Eml;
use Digest::MD5;
use PublicInbox::MsgIter;
require_mods(qw(Data::Dumper Email::MIME));
Data::Dumper->import('Dumper');
require PublicInbox::MIME;
require_git(2.19);
my ($tmpdir, $for_destroy) = tmpdir();
my $inboxdir = $ENV{GIANT_INBOX_DIR};
plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir;
my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered);
my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'cmp' });
my $git = $ibx->git;
my $fh = $git->popen(@cat);
vec(my $vec = '', fileno($fh), 1) = 1;
select($vec, undef, undef, 60) or die "timed out waiting for --batch-check";
my $n = 0;
my $m = 0;
my $dig_cls = 'Digest::MD5';
sub h ($) {
	s/\s+\z//s; # E::M leaves trailing white space
	s/\s+/ /sg;
	"$_[0]: $_";
}

my $cmp = sub {
	my ($p, $cmp_arg) = @_;
	my $part = shift @$p;
	push @$cmp_arg, '---'.join(', ', @$p).'---';
	my $ct = $part->content_type // 'text/plain';
	$ct =~ s/[ \t]+.*\z//s;
	my ($s, $err);
	eval {
		push @$cmp_arg, map { h 'f' } $part->header('From');
		push @$cmp_arg, map { h 't' } $part->header('To');
		push @$cmp_arg, map { h 'cc' } $part->header('Cc');
		push @$cmp_arg, map { h 'mid' } $part->header('Message-ID');
		push @$cmp_arg, map { h 'refs' } $part->header('References');
		push @$cmp_arg, map { h 'irt' } $part->header('In-Reply-To');
		push @$cmp_arg, map { h 's' } $part->header('Subject');
		push @$cmp_arg, map { h 'cd' }
					$part->header('Content-Description');
		($s, $err) = msg_part_text($part, $ct);
		if (defined $s) {
			$s =~ s/\s+\z//s;
			push @$cmp_arg, "S: ".$s;
		} else {
			$part = $part->body;
			push @$cmp_arg, "T: $ct";
			if ($part =~ /[^\p{XPosixPrint}\s]/s) { # binary
				my $dig = $dig_cls->new;
				$dig->add($part);
				push @$cmp_arg, "M: ".$dig->hexdigest;
				push @$cmp_arg, "B: ".length($part);
			} else {
				$part =~ s/\s+\z//s;
				push @$cmp_arg, "X: ".$part;
			}
		}
	};
	if ($@) {
		$err //= '';
		push @$cmp_arg, "E: $@ ($err)";
	}
};

my $ndiff = 0;
my $git_cb = sub {
	my ($bref, $oid) = @_;
	local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ };
	++$m;
	PublicInbox::MIME->new($$bref)->each_part($cmp, my $m_ctx = [], 1);
	PublicInbox::Eml->new($$bref)->each_part($cmp, my $e_ctx = [], 1);
	if (join("\0", @$e_ctx) ne join("\0", @$m_ctx)) {
		++$ndiff;
		open my $fh, '>', "$tmpdir/mime" or die $!;
		print $fh Dumper($m_ctx) or die $!;
		close $fh or die $!;
		open $fh, '>', "$tmpdir/eml" or die $!;
		print $fh Dumper($e_ctx) or die $!;
		close $fh or die $!;
		diag "$inboxdir $oid differ";
		# using `git diff', diff(1) may not be installed
		diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]);
	}
};
my $t = timeit(1, sub {
	while (<$fh>) {
		my ($oid, $type) = split / /;
		next if $type ne 'blob';
		++$n;
		$git->cat_async($oid, $git_cb);
	}
	$git->async_wait_all;
});
is($m, $n, "$inboxdir rendered all $m <=> $n messages");
is($ndiff, 0, "$inboxdir $ndiff differences");
done_testing();
debug log:

solving b6c8ec659ceb ...
found b6c8ec659ceb in https://80x24.org/public-inbox.git

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).