public-inbox.git  about / heads / tags
an "archives first" approach to mailing lists
blob 7e2432e7ee8250a8b103ab452fc87761f83411e3 4114 bytes (raw)
$ git show HEAD:t/import.t	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
 
#!perl -w
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use v5.10.1;
use strict;
use PublicInbox::Eml;
use PublicInbox::Smsg;
use PublicInbox::Git;
use PublicInbox::Import;
use Fcntl qw(:DEFAULT SEEK_SET);
use PublicInbox::TestCommon;
use MIME::Base64 3.05; # Perl 5.10.0 / 5.9.2
my ($dir, $for_destroy) = tmpdir();

my $git = PublicInbox::Git->new($dir);
my $im = PublicInbox::Import->new($git, 'testbox', 'test@example');
$im->init_bare;
my $mime = PublicInbox::Eml->new(<<'EOF');
From: a@example.com
To: b@example.com
Subject: this is a subject
Message-ID: <a@example.com>
Date: Fri, 02 Oct 1993 00:00:00 +0000

hello world
EOF

my $v2 = require_git(2.6, 1);
my $smsg = $v2 ? bless({}, 'PublicInbox::Smsg') : undef;
like($im->add($mime, undef, $smsg), qr/\A:[0-9]+\z/, 'added one message');

SKIP: {
	skip 'git 2.6+ required', 3 if !$v2;
	like($smsg->{blob}, qr/\A[a-f0-9]{40,64}\z/, 'got last object_id');
	my @cmd = ('git', "--git-dir=$git->{git_dir}", qw(hash-object --stdin));
	open my $in, '+<', undef or BAIL_OUT "open(+<): $!";
	print $in $mime->as_string or die "write failed: $!";
	$in->flush or die "flush failed: $!";
	seek($in, 0, SEEK_SET) or die "seek: $!";
	chomp(my $hashed_obj = xqx(\@cmd, undef, { 0 => $in }));
	is($?, 0, 'hash-object');
	is($hashed_obj, $smsg->{blob}, "blob object_id matches exp");
}

$im->done;
my @revs = $git->qx(qw(rev-list HEAD));
is(scalar @revs, 1, 'one revision created');

my $odd = '"=?iso-8859-1?Q?J_K=FCpper?= <usenet"@example.de';
$mime->header_set('From', $odd);
$mime->header_set('Message-ID', '<b@example.com>');
$mime->header_set('Subject', 'msg2');
like($im->add($mime, sub { $mime }), qr/\A:\d+\z/, 'added 2nd message');
$im->done;
@revs = $git->qx(qw(rev-list HEAD));
is(scalar @revs, 2, '2 revisions exist');

is($im->add($mime), undef, 'message only inserted once');
$im->done;
@revs = $git->qx(qw(rev-list HEAD));
is(scalar @revs, 2, '2 revisions exist');

foreach my $c ('c'..'z') {
	$mime->header_set('Message-ID', "<$c\@example.com>");
	$mime->header_set('Subject', "msg - $c");
	like($im->add($mime), qr/\A:\d+\z/, "added $c message");
}
$im->done;
@revs = $git->qx(qw(rev-list HEAD));
is(scalar @revs, 26, '26 revisions exist after mass import');
my ($mark, $msg) = $im->remove($mime);
like($mark, qr/\A:\d+\z/, 'got mark');
like(ref($msg), qr/\bPublicInbox::(?:Eml|MIME)\b/, 'got old message deleted');

is(undef, $im->remove($mime), 'remove is idempotent');

# mismatch on identical Message-ID
$mime->header_set('Message-ID', '<a@example.com>');
($mark, $msg) = $im->remove($mime);
is($mark, 'MISMATCH', 'mark == MISMATCH on mismatch');
is($msg->header('Message-ID'), '<a@example.com>', 'Message-ID matches');
isnt($msg->header('Subject'), $mime->header('Subject'), 'subject mismatch');

$mime->header_set('Message-Id', '<failcheck@example.com>');
is($im->add($mime, sub { undef }), undef, 'check callback fails');
is($im->remove($mime), undef, 'message not added, so not removed');
is(undef, $im->checkpoint, 'checkpoint works before ->done');
$im->done;
is(undef, $im->checkpoint, 'checkpoint works after ->done');
$im->checkpoint;

my $nogit = PublicInbox::Git->new("$dir/non-existent/dir");
eval {
	my $nope = PublicInbox::Import->new($nogit, 'nope', 'no@example.com');
	$nope->add($mime);
};
ok($@, 'Import->add fails on non-existent dir');

my @cls = qw(PublicInbox::Eml);
SKIP: {
	require_mods('Email::MIME', 1);
	require PublicInbox::MIME;
	push @cls, 'PublicInbox::MIME';
};

$main::badchars = "\n\0\r";
my $from = '=?UTF-8?B?'. encode_base64("B\ra\nd\0\$main::badchars", ''). '?=';
for my $cls (@cls) {
	my $eml = $cls->new(<<EOF);
From: $from <spammer\@example.com>
Message-ID: <$cls\@example.com>

EOF
	ok($im->add($eml), "added $cls message with nasty char in From");
}
$im->done;
my $bref = $git->cat_file('HEAD');
like($$bref, qr/^author Ba d \$main::badchars <spammer\@example\.com> /sm,
	 'latest commit accepted by spammer');
$git->qx(qw(fsck --no-progress --strict));
is($?, 0, 'fsck reported no errors');
$main::badchars = undef;

done_testing();

git clone https://public-inbox.org/public-inbox.git
git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git