public-inbox.git  about / heads / tags
an "archives first" approach to mailing lists
blob 13fc1f3bdb6a78bc0154255c8e36c4ec6ca25fcc 3410 bytes (raw)
$ git show HEAD:t/lei_dedupe.t	# shows this blob on the CLI

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
 
#!perl -w
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
use Test::More;
use PublicInbox::TestCommon;
use PublicInbox::Eml;
use PublicInbox::Smsg;
require_mods(qw(DBD::SQLite));
use_ok 'PublicInbox::LeiDedupe';
my $eml = eml_load('t/plack-qp.eml');
my $sameish = eml_load('t/plack-qp.eml');
$sameish->header_set('Message-ID', '<cuepee@example.com>');
my $mid = $eml->header_raw('Message-ID');
my $different = eml_load('t/msg_iter-order.eml');
$different->header_set('Message-ID', $mid);
my $smsg = bless { ds => time }, 'PublicInbox::Smsg';
$smsg->populate($eml);
$smsg->{$_} //= '' for (qw(to cc references)) ;

my $check_storable = sub {
	my ($x) = @_;
	SKIP: {
		require_mods('Storable', 1);
		my $dup = Storable::thaw(Storable::freeze($x));
		is_deeply($dup, $x, "$x->[3] round-trips through storable");
	}
};

my $lei = { opt => { dedupe => 'none' } };
my $dd = PublicInbox::LeiDedupe->new($lei);
$check_storable->($dd);
$dd->prepare_dedupe;
ok(!$dd->is_dup($eml), '1st is_dup w/o dedupe');
ok(!$dd->is_dup($eml), '2nd is_dup w/o dedupe');
ok(!$dd->is_dup($different), 'different is_dup w/o dedupe');
ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 1');
ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 2');

for my $strat (undef, 'content') {
	$lei->{opt}->{dedupe} = $strat;
	$dd = PublicInbox::LeiDedupe->new($lei);
	$check_storable->($dd);
	$dd->prepare_dedupe;
	my $desc = $strat // 'default';
	ok(!$dd->is_dup($eml), "1st is_dup with $desc dedupe");
	ok($dd->is_dup($eml), "2nd seen with $desc dedupe");
	ok(!$dd->is_dup($different), "different is_dup with $desc dedupe");
	ok(!$dd->is_smsg_dup($smsg), "is_smsg_dup pass w/ $desc dedupe");
	ok($dd->is_smsg_dup($smsg), "is_smsg_dup reject w/ $desc dedupe");
	ok(!$dd->is_dup($sameish),
		"Message-ID accounted for w/ same content otherwise");
}
$lei->{opt}->{dedupe} = 'bogus';
eval { PublicInbox::LeiDedupe->new($lei) };
like($@, qr/unsupported.*bogus/, 'died on bogus strategy');

$lei->{opt}->{dedupe} = 'mid';
$dd = PublicInbox::LeiDedupe->new($lei);
$check_storable->($dd);
$dd->prepare_dedupe;
ok(!$dd->is_dup($eml), '1st is_dup with mid dedupe');
ok($dd->is_dup($eml), '2nd seen with mid dedupe');
ok($dd->is_dup($different), 'different seen with mid dedupe');
ok(!$dd->is_smsg_dup($smsg), 'smsg mid dedupe pass');
ok($dd->is_smsg_dup($smsg), 'smsg mid dedupe reject');

$lei->{opt}->{dedupe} = 'oid';
$dd = PublicInbox::LeiDedupe->new($lei);
$check_storable->($dd);
$dd->prepare_dedupe;

# --augment won't have OIDs:
ok(!$dd->is_dup($eml), '1st is_dup with oid dedupe (augment)');
ok($dd->is_dup($eml), '2nd seen with oid dedupe (augment)');
ok(!$dd->is_dup($different), 'different is_dup with mid dedupe (augment)');
$different->header_set('Status', 'RO');
ok($dd->is_dup($different), 'different seen with oid dedupe Status removed');

$smsg = { blob => '01d' };
ok(!$dd->is_dup($eml, $smsg), '1st is_dup with oid dedupe');
ok($dd->is_dup($different, $smsg), 'different content ignored if oid matches');
$smsg->{blob} = uc($smsg->{blob});
ok($dd->is_dup($eml, $smsg), 'case insensitive oid comparison :P');
$smsg->{blob} = '01dbad';
ok(!$dd->is_dup($eml, $smsg), 'case insensitive oid comparison :P');

$smsg->{blob} = 'dead';
ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe pass');
ok($dd->is_smsg_dup($smsg), 'smsg dedupe reject');

done_testing;

git clone https://public-inbox.org/public-inbox.git
git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git