user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
6c52eec8c7a59e8b8ce9172d528610c4309f29b3 blob 4023 bytes (raw)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
 
# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict; use v5.10.1; use PublicInbox::TestCommon;
use PublicInbox::Hval qw(ascii_html);
use MIME::QuotedPrint 3.05 qw(encode_qp);
use_ok('PublicInbox::MsgIter');

{
	my $mime = eml_load 't/msg_iter-order.eml';
	my @parts;
	msg_iter($mime, sub {
		my ($part, $level, @ex) = @{$_[0]};
		my $s = $part->body_str;
		$s =~ s/\s+//s;
		push @parts, [ $s, $level, @ex ];
	});
	is_deeply(\@parts, [ [ qw(a 1 1) ], [ qw(b 1 2) ] ], 'order is fine');
}

{
	my $mime = eml_load 't/msg_iter-nested.eml';
	my @parts;
	msg_iter($mime, sub {
		my ($part, $level, @ex) = @{$_[0]};
		my $s = $part->body_str;
		$s =~ s/\s+//s;
		push @parts, [ $s, $level, @ex ];
	});
	is_deeply(\@parts, [ [qw(a 2 1.1)], [qw(b 2 1.2)], [qw(sig 1 2)] ],
		'nested part shows up properly');
}

{
	my $mime = eml_load 't/iso-2202-jp.eml';
	my $raw = '';
	msg_iter($mime, sub {
		my ($part, $level, @ex) = @{$_[0]};
		my ($s, $err) = msg_part_text($part, 'text/plain');
		ok(!$err, 'no error');
		$raw .= $s;
	});
	ok(length($raw) > 0, 'got non-empty message');
	is(index($raw, '$$$'), -1, 'no unescaped $$$');
}

{
	my $mime = eml_load 't/x-unknown-alpine.eml';
	my $raw = '';
	msg_iter($mime, sub {
		my ($part, $level, @ex) = @{$_[0]};
		my ($s, $err) = msg_part_text($part, 'text/plain');
		$raw .= $s;
	});
	like($raw, qr!^\thttps://!ms, 'tab expanded with X-UNKNOWN');
	like(ascii_html($raw), qr/&#8226; bullet point/s,
		'got bullet point when X-UNKNOWN assumes UTF-8');
}

{ # API not finalized
	my @warn;
	local $SIG{__WARN__} = sub { push @warn, [ @_ ] };
	my $attr = "So and so wrote:\n";
	my $q = "> hello world\n" x 10;
	my $nq = "hello world\n" x 10;
	my @sections = PublicInbox::MsgIter::split_quotes($attr . $q . $nq);
	is($sections[0], $attr, 'attribution matches');
	is($sections[1], $q, 'quoted section matches');
	is($sections[2], $nq, 'non-quoted section matches');
	is(scalar(@sections), 3, 'only three sections for short message');
	is_deeply(\@warn, [], 'no warnings');

	$q x= 3300;
	$nq x= 3300;
	@sections = PublicInbox::MsgIter::split_quotes($attr . $q . $nq);
	is_deeply(\@warn, [], 'no warnings on giant message');
	is(join('', @sections), $attr . $q . $nq, 'result matches expected');
	is(shift(@sections), $attr, 'attribution is first section');
	my @check = ('', '');
	while (defined(my $l = shift @sections)) {
		next if $l eq '';
		like($l, qr/\n\z/s, 'section ends with newline');
		my $idx = ($l =~ /\A>/) ? 0 : 1;
		$check[$idx] .= $l;
	}
	is($check[0], $q, 'long quoted section matches');
	is($check[1], $nq, 'long quoted section matches');
}

{
	open my $fh, '<', 't/utf8.eml' or BAIL_OUT $!;
	my $expect = do { local $/; <$fh>  };
	my $qp_patch = encode_qp($expect, "\r\n");
	my $common = <<EOM;
Content-Type: multipart/mixed; boundary="DEADBEEF"
MIME-Version: 1.0

--DEADBEEF
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
	charset=utf-8

blah

--DEADBEEF
Content-Disposition: attachment;
	filename=foo.patch
Content-Type: application/octet-stream;
	x-unix-mode=0644;
	name="foo.patch"
Content-Transfer-Encoding: quoted-printable
EOM
	my $eml = PublicInbox::Eml->new(<<EOM);
$common
$qp_patch
--DEADBEEF--
EOM
	my @parts;
	$eml->each_part(sub {
		my ($part, $level, @ex) = @{$_[0]};
		my ($s, $err) = msg_part_text($part, $part->content_type);
		push @parts, $s;
	});
	$expect =~ s/\n/\r\n/sg;
	is_deeply(\@parts, [ "blah\r\n", $expect ],
		'fallback to application/octet-stream as UTF-8 text');

	my $qp_binary = encode_qp("Binary\0crap", "\r\n");
	$eml = PublicInbox::Eml->new(<<EOM);
$common
$qp_binary
--DEADBEEF--
EOM
	@parts = ();
	my @err;
	$eml->each_part(sub {
		my ($part, $level, @ex) = @{$_[0]};
		my ($s, $err) = msg_part_text($part, $part->content_type);
		push @parts, $s;
		push @err, $err;
	});
	is_deeply(\@parts, [ "blah\r\n", undef ],
		'non-text ignored in octet-stream');
	ok($err[1], 'got error for second element');
}

done_testing();
debug log:

solving 6c52eec8 ...
found 6c52eec8 in https://public-inbox.org/meta/20210311014539.19756-1-e@80x24.org/
found e46d515c in public-inbox.git
preparing index
index prepared:
100644 e46d515c8ee4de99aafc8653ee542338f89648f4	t/msg_iter.t

applying [1/1] https://public-inbox.org/meta/20210311014539.19756-1-e@80x24.org/
diff --git a/t/msg_iter.t b/t/msg_iter.t
index e46d515c..6c52eec8 100644

Checking patch t/msg_iter.t...
Applied patch t/msg_iter.t cleanly.

index at:
100644 6c52eec8c7a59e8b8ce9172d528610c4309f29b3	t/msg_iter.t

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).