public-inbox.git  about / heads / tags
an "archives first" approach to mailing lists
blob ae3594da08324f67b7747ce9b0d8a6432cad02ac 4065 bytes (raw)
$ git show HEAD:t/msg_iter.t	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
 
# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict; use v5.10.1; use PublicInbox::TestCommon;
use PublicInbox::Hval qw(ascii_html);
use MIME::QuotedPrint 3.05 qw(encode_qp);
use_ok('PublicInbox::MsgIter');

{
	my $mime = eml_load 't/msg_iter-order.eml';
	my @parts;
	msg_iter($mime, sub {
		my ($part, $level, @ex) = @{$_[0]};
		my $s = $part->body_str;
		$s =~ s/\s+//s;
		push @parts, [ $s, $level, @ex ];
	});
	is_deeply(\@parts, [ [ qw(a 1 1) ], [ qw(b 1 2) ] ], 'order is fine');
}

{
	my $mime = eml_load 't/msg_iter-nested.eml';
	my @parts;
	msg_iter($mime, sub {
		my ($part, $level, @ex) = @{$_[0]};
		my $s = $part->body_str;
		$s =~ s/\s+//s;
		push @parts, [ $s, $level, @ex ];
	});
	is_deeply(\@parts, [ [qw(a 2 1.1)], [qw(b 2 1.2)], [qw(sig 1 2)] ],
		'nested part shows up properly');
}

{
	my $mime = eml_load 't/iso-2202-jp.eml';
	my $raw = '';
	msg_iter($mime, sub {
		my ($part, $level, @ex) = @{$_[0]};
		my ($s, $err) = msg_part_text($part, 'text/plain');
		ok(!$err, 'no error');
		$raw .= $s;
	});
	ok(length($raw) > 0, 'got non-empty message');
	is(index($raw, '$$$'), -1, 'no unescaped $$$');
}

{
	my $mime = eml_load 't/x-unknown-alpine.eml';
	my $raw = '';
	msg_iter($mime, sub {
		my ($part, $level, @ex) = @{$_[0]};
		my ($s, $err) = msg_part_text($part, 'text/plain');
		$raw .= $s;
	});
	like($raw, qr!^\thttps://!ms, 'tab expanded with X-UNKNOWN');
	like(ascii_html($raw), qr/&#8226; bullet point/s,
		'got bullet point when X-UNKNOWN assumes UTF-8');
}

{ # API not finalized
	my @warn;
	local $SIG{__WARN__} = sub { push @warn, [ @_ ] };
	my $attr = "So and so wrote:\n";
	my $q = "> hello world\n" x 10;
	my $nq = "hello world\n" x 10;
	my @sections = PublicInbox::MsgIter::split_quotes($attr . $q . $nq);
	is($sections[0], $attr, 'attribution matches');
	is($sections[1], $q, 'quoted section matches');
	is($sections[2], $nq, 'non-quoted section matches');
	is(scalar(@sections), 3, 'only three sections for short message');
	is_deeply(\@warn, [], 'no warnings');

	$q x= 3300;
	$nq x= 3300;
	@sections = PublicInbox::MsgIter::split_quotes($attr . $q . $nq);
	is_deeply(\@warn, [], 'no warnings on giant message');
	is(join('', @sections), $attr . $q . $nq, 'result matches expected');
	is(shift(@sections), $attr, 'attribution is first section');
	my @check = ('', '');
	while (defined(my $l = shift @sections)) {
		next if $l eq '';
		like($l, qr/\n\z/s, 'section ends with newline');
		my $idx = ($l =~ /\A>/) ? 0 : 1;
		$check[$idx] .= $l;
	}
	is($check[0], $q, 'long quoted section matches');
	is($check[1], $nq, 'long quoted section matches');
}

{
	open my $fh, '<', 't/utf8.eml' or BAIL_OUT $!;
	my $expect = do { local $/; <$fh>  };
	my $qp_patch = encode_qp($expect, "\r\n");
	my $common = <<EOM;
Content-Type: multipart/mixed; boundary="DEADBEEF"
MIME-Version: 1.0

--DEADBEEF
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
	charset=utf-8

blah

--DEADBEEF
Content-Disposition: attachment;
	filename=foo.patch
Content-Type: application/octet-stream;
	x-unix-mode=0644;
	name="foo.patch"
Content-Transfer-Encoding: quoted-printable
EOM
	my $eml = PublicInbox::Eml->new(<<EOM);
$common
$qp_patch
--DEADBEEF--
EOM
	my @parts;
	$eml->each_part(sub {
		my ($part, $level, @ex) = @{$_[0]};
		my ($s, $err) = msg_part_text($part, $part->content_type);
		push @parts, $s;
	});
	$expect =~ s/\n/\r\n/sg;
	utf8::decode($expect); # aka "bytes2str"
	is_deeply(\@parts, [ "blah\r\n", $expect ],
		'fallback to application/octet-stream as UTF-8 text');

	my $qp_binary = encode_qp("Binary\0crap", "\r\n");
	$eml = PublicInbox::Eml->new(<<EOM);
$common
$qp_binary
--DEADBEEF--
EOM
	@parts = ();
	my @err;
	$eml->each_part(sub {
		my ($part, $level, @ex) = @{$_[0]};
		my ($s, $err) = msg_part_text($part, $part->content_type);
		push @parts, $s;
		push @err, $err;
	});
	is_deeply(\@parts, [ "blah\r\n", undef ],
		'non-text ignored in octet-stream');
	ok($err[1], 'got error for second element');
}

done_testing();

git clone https://public-inbox.org/public-inbox.git
git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git