diff options
Diffstat (limited to 't/lei_dedupe.t')
-rw-r--r-- | t/lei_dedupe.t | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/t/lei_dedupe.t b/t/lei_dedupe.t new file mode 100644 index 00000000..13fc1f3b --- /dev/null +++ b/t/lei_dedupe.t @@ -0,0 +1,93 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use v5.10.1; +use Test::More; +use PublicInbox::TestCommon; +use PublicInbox::Eml; +use PublicInbox::Smsg; +require_mods(qw(DBD::SQLite)); +use_ok 'PublicInbox::LeiDedupe'; +my $eml = eml_load('t/plack-qp.eml'); +my $sameish = eml_load('t/plack-qp.eml'); +$sameish->header_set('Message-ID', '<cuepee@example.com>'); +my $mid = $eml->header_raw('Message-ID'); +my $different = eml_load('t/msg_iter-order.eml'); +$different->header_set('Message-ID', $mid); +my $smsg = bless { ds => time }, 'PublicInbox::Smsg'; +$smsg->populate($eml); +$smsg->{$_} //= '' for (qw(to cc references)) ; + +my $check_storable = sub { + my ($x) = @_; + SKIP: { + require_mods('Storable', 1); + my $dup = Storable::thaw(Storable::freeze($x)); + is_deeply($dup, $x, "$x->[3] round-trips through storable"); + } +}; + +my $lei = { opt => { dedupe => 'none' } }; +my $dd = PublicInbox::LeiDedupe->new($lei); +$check_storable->($dd); +$dd->prepare_dedupe; +ok(!$dd->is_dup($eml), '1st is_dup w/o dedupe'); +ok(!$dd->is_dup($eml), '2nd is_dup w/o dedupe'); +ok(!$dd->is_dup($different), 'different is_dup w/o dedupe'); +ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 1'); +ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 2'); + +for my $strat (undef, 'content') { + $lei->{opt}->{dedupe} = $strat; + $dd = PublicInbox::LeiDedupe->new($lei); + $check_storable->($dd); + $dd->prepare_dedupe; + my $desc = $strat // 'default'; + ok(!$dd->is_dup($eml), "1st is_dup with $desc dedupe"); + ok($dd->is_dup($eml), "2nd seen with $desc dedupe"); + ok(!$dd->is_dup($different), "different is_dup with $desc dedupe"); + ok(!$dd->is_smsg_dup($smsg), "is_smsg_dup pass w/ $desc dedupe"); + ok($dd->is_smsg_dup($smsg), "is_smsg_dup reject w/ $desc dedupe"); + ok(!$dd->is_dup($sameish), + "Message-ID accounted for w/ same content otherwise"); +} +$lei->{opt}->{dedupe} = 'bogus'; +eval { PublicInbox::LeiDedupe->new($lei) }; +like($@, qr/unsupported.*bogus/, 'died on bogus strategy'); + +$lei->{opt}->{dedupe} = 'mid'; +$dd = PublicInbox::LeiDedupe->new($lei); +$check_storable->($dd); +$dd->prepare_dedupe; +ok(!$dd->is_dup($eml), '1st is_dup with mid dedupe'); +ok($dd->is_dup($eml), '2nd seen with mid dedupe'); +ok($dd->is_dup($different), 'different seen with mid dedupe'); +ok(!$dd->is_smsg_dup($smsg), 'smsg mid dedupe pass'); +ok($dd->is_smsg_dup($smsg), 'smsg mid dedupe reject'); + +$lei->{opt}->{dedupe} = 'oid'; +$dd = PublicInbox::LeiDedupe->new($lei); +$check_storable->($dd); +$dd->prepare_dedupe; + +# --augment won't have OIDs: +ok(!$dd->is_dup($eml), '1st is_dup with oid dedupe (augment)'); +ok($dd->is_dup($eml), '2nd seen with oid dedupe (augment)'); +ok(!$dd->is_dup($different), 'different is_dup with mid dedupe (augment)'); +$different->header_set('Status', 'RO'); +ok($dd->is_dup($different), 'different seen with oid dedupe Status removed'); + +$smsg = { blob => '01d' }; +ok(!$dd->is_dup($eml, $smsg), '1st is_dup with oid dedupe'); +ok($dd->is_dup($different, $smsg), 'different content ignored if oid matches'); +$smsg->{blob} = uc($smsg->{blob}); +ok($dd->is_dup($eml, $smsg), 'case insensitive oid comparison :P'); +$smsg->{blob} = '01dbad'; +ok(!$dd->is_dup($eml, $smsg), 'case insensitive oid comparison :P'); + +$smsg->{blob} = 'dead'; +ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe pass'); +ok($dd->is_smsg_dup($smsg), 'smsg dedupe reject'); + +done_testing; |