public-inbox.git  about / heads / tags
an "archives first" approach to mailing lists
blob 14248a2dd0c6a21ed34c03fee4ab9c2a7907c656 4494 bytes (raw)
$ git show HEAD:t/mbox_reader.t	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
 
#!perl -w
# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
use Test::More;
use PublicInbox::TestCommon;
use List::Util qw(shuffle);
use PublicInbox::Eml;
use Fcntl qw(SEEK_SET);
require_ok 'PublicInbox::MboxReader';
require_ok 'PublicInbox::LeiToMail';
my %raw = (
	hdr_only => "From: header-only\@example.com\n\n",
	small_from => "From: small-from\@example.com\n\nFrom hell\n",
	small => "From: small\@example.com\n\nfrom hell\n",
	big_hdr_only => "From: big-header\@example.com\n" .
		(('A: '.('a' x 72)."\n") x 1000)."\n",
	big_body => "From: big-body\@example.com\n\n".
		(('b: '.('b' x 72)."\n") x 1000) .
		"From hell\n",
	big_all => "From: big-all\@example.com\n".
		(("A: ".('a' x 72)."\n") x 1000). "\n" .
		(("b: ".('b' x 72)."\n") x 1000) .
		"From hell\n",
);
{
	my $eml = PublicInbox::Eml->new($raw{small});
	my $mbox_keywords = PublicInbox::MboxReader->can('mbox_keywords');
	is_deeply($mbox_keywords->($eml), [], 'no keywords');
	$eml->header_set('Status', 'RO');
	is_deeply($mbox_keywords->($eml), ['seen'], 'seen extracted');
	$eml->header_set('X-Status', 'A');
	is_deeply($mbox_keywords->($eml), [qw(answered seen)],
		'seen+answered extracted');
}

if ($ENV{TEST_EXTRA}) {
	for my $fn (glob('t/*.eml'), glob('t/*/*.{patch,eml}')) {
		$raw{$fn} = eml_load($fn)->as_string;
	}
}

my $reader = PublicInbox::MboxReader->new;
my $check_fmt = sub {
	my $fmt = shift;
	my @order = shuffle(keys %raw);
	my $eml2mbox = PublicInbox::LeiToMail->can("eml2$fmt");
	open my $fh, '+>', undef or BAIL_OUT "open: $!";
	for my $k (@order) {
		my $eml = PublicInbox::Eml->new($raw{$k});
		my $buf = $eml2mbox->($eml);
		print $fh $$buf or BAIL_OUT "print $!";
	}
	seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
	$reader->$fmt($fh, sub {
		my ($eml) = @_;
		$eml->header_set('Status');
		$eml->header_set('Lines');
		my $cur = shift @order;
		my @cl = $eml->header_raw('Content-Length');
		if ($fmt =~ /\Amboxcl/) {
			is(scalar(@cl), 1, "Content-Length set $fmt $cur");
			my $raw = $eml->body_raw;
			my $adj = 0;
			if ($fmt eq 'mboxcl') {
				my @from = ($raw =~ /^(From )/smg);
				$adj = scalar(@from);
			}
			is(length($raw), $cl[0] - $adj,
				"Content-Length is correct $fmt $cur");
			# clobber for ->as_string comparison below
			$eml->header_set('Content-Length');

			# special case for t/solve/bare.patch, not sure if we
			# should even handle it...
			if ($cl[0] eq '0' && ${$eml->{hdr}} eq '') {
				delete $eml->{bdy};
			}
		} else {
			is(scalar(@cl), 0, "Content-Length unset $fmt $cur");
		}
		my $orig = PublicInbox::Eml->new($raw{$cur});
		is($eml->as_string, $orig->as_string,
			"read back original $fmt $cur");
	});
};
my @mbox = qw(mboxrd mboxo mboxcl mboxcl2);
for my $fmt (@mbox) { $check_fmt->($fmt) }
s/\n/\r\n/sg for (values %raw);
for my $fmt (@mbox) { $check_fmt->($fmt) }

{
	my $no_blank_eom = <<'EOM';
From x@y Fri Oct  2 00:00:00 1993
a: b

body1
From x@y Fri Oct  2 00:00:00 1993
c: d

body2
EOM
	# chop($no_blank_eom) eq "\n" or BAIL_OUT 'broken LF';
	for my $variant (qw(mboxrd mboxo)) {
		my @x;
		open my $fh, '<', \$no_blank_eom or BAIL_OUT 'PerlIO::scalar';
		$reader->$variant($fh, sub { push @x, shift });
		is_deeply($x[0]->{bdy}, \"body1\n", 'LF preserved in 1st');
		is_deeply($x[1]->{bdy}, \"body2\n", 'no LF added in 2nd');
	}
}

SKIP: {
	use PublicInbox::Spawn qw(popen_rd);
	my $fh = popen_rd([ $^X, qw(-w -Mv5.12 -e), <<'' ]);
say 'From x@y Fri Oct  2 00:00:00 1993';
print "a: b\n\n", "x" x 70000, "\n\n";
say 'From x@y Fri Oct  2 00:00:00 2010';
print "Final: bit\n\n", "Incomplete\n\n";
exit 1

	my @x;
	eval { $reader->mboxrd($fh, sub { push @x, shift->as_string }) };
	like($@, qr/error closing mbox/, 'detects error reading from pipe');
	is(scalar(@x), 1, 'only saw one message');
	is(scalar(grep(/Final/, @x)), 0, 'no incomplete bit');
}

{
	my $html = <<EOM;
<html><head><title>hi,</title></head><body>how are you</body></html>
EOM
	for my $m (qw(mboxrd mboxcl mboxcl2 mboxo)) {
		my (@w, @x);
		local $SIG{__WARN__} = sub { push @w, @_ };
		open my $fh, '<', \$html or xbail 'PerlIO::scalar';
		PublicInbox::MboxReader->$m($fh, sub {
			push @x, $_[0]->as_string
		});
		if ($m =~ /\Amboxcl/) {
			is_deeply(\@x, [], "messages in invalid $m");
		} else {
			is_deeply(\@x, [ "\n$html" ], "body-only $m");
		}
		is_deeply([grep(!/^W: leftover/, @w)], [],
			"no extra warnings besides leftover ($m)");
	}
}

done_testing;

git clone https://public-inbox.org/public-inbox.git
git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git