public-inbox.git  about / heads / tags
an "archives first" approach to mailing lists
blob 5bd7aa174df0c786fcd16978a022e3d72e1728bb 3081 bytes (raw)
$ git show stable-1.6:xt/cmp-msgview.t	# shows this blob on the CLI

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
 
#!perl -w
# Copyright (C) 2020 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
use Benchmark qw(:all);
use PublicInbox::Inbox;
use PublicInbox::View;
use PublicInbox::TestCommon;
use PublicInbox::Eml;
use Digest::MD5;
require_git(2.19);
require_mods qw(Data::Dumper Email::MIME Plack::Util);
Data::Dumper->import('Dumper');
require PublicInbox::MIME;
my ($tmpdir, $for_destroy) = tmpdir();
my $inboxdir = $ENV{GIANT_INBOX_DIR};
plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir;
my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered);
my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'perf' });
my $git = $ibx->git;
my $fh = $git->popen(@cat);
vec(my $vec = '', fileno($fh), 1) = 1;
select($vec, undef, undef, 60) or die "timed out waiting for --batch-check";
my $mime_ctx = {
	env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
	-inbox => $ibx,
	www => Plack::Util::inline_object(style => sub {''}),
	obuf => \(my $mime_buf = ''),
	mhref => '../',
};
my $eml_ctx = { %$mime_ctx, obuf => \(my $eml_buf = '') };
my $n = 0;
my $m = 0;
my $ndiff_html = 0;
my $dig_cls = 'Digest::MD5';
my $digest_attach = sub { # ensure ->body (not ->body_raw) matches
	my ($p, $cmp_arg) = @_;
	my $part = shift @$p;
	my $dig = $cmp_arg->[0] //= $dig_cls->new;
	$dig->add($part->body_raw);
	push @$cmp_arg, join(', ', @$p);
};

my $git_cb = sub {
	my ($bref, $oid) = @_;
	local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ };
	++$m;
	my $mime = PublicInbox::MIME->new($$bref);
	PublicInbox::View::multipart_text_as_html($mime, $mime_ctx);
	my $eml = PublicInbox::Eml->new($$bref);
	PublicInbox::View::multipart_text_as_html($eml, $eml_ctx);
	if ($eml_buf ne $mime_buf) {
		++$ndiff_html;
		open my $fh, '>', "$tmpdir/mime" or die $!;
		print $fh $mime_buf or die $!;
		close $fh or die $!;
		open $fh, '>', "$tmpdir/eml" or die $!;
		print $fh $eml_buf or die $!;
		close $fh or die $!;
		# using `git diff', diff(1) may not be installed
		diag "$inboxdir $oid differs";
		diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]);
	}
	$eml_buf = $mime_buf = '';

	# don't tolerate differences in attachment downloads
	$mime = PublicInbox::MIME->new($$bref);
	$mime->each_part($digest_attach, my $mime_cmp = [], 1);
	$eml = PublicInbox::Eml->new($$bref);
	$eml->each_part($digest_attach, my $eml_cmp = [], 1);
	$mime_cmp->[0] = $mime_cmp->[0]->hexdigest;
	$eml_cmp->[0] = $eml_cmp->[0]->hexdigest;
	# don't have millions of "ok" lines
	if (join("\0", @$eml_cmp) ne join("\0", @$mime_cmp)) {
		diag Dumper([ $oid, eml => $eml_cmp, mime =>$mime_cmp ]);
		is_deeply($eml_cmp, $mime_cmp, "$inboxdir $oid match");
	}
};
my $t = timeit(1, sub {
	while (<$fh>) {
		my ($oid, $type) = split / /;
		next if $type ne 'blob';
		++$n;
		$git->cat_async($oid, $git_cb);
	}
	$git->cat_async_wait;
});
is($m, $n, 'rendered all messages');

# we'll tolerate minor differences in HTML rendering
diag "$ndiff_html HTML differences";

done_testing();

git clone https://public-inbox.org/public-inbox.git
git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git