From 7f17df5c6f1892ef53f149a0ab24a5d917cce7d9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 31 Dec 2020 13:51:27 +0000 Subject: lei_to_mail: start --augment, dedupe, bz2 and xz --augment will match the mairix(1) option of the same name to augment existing search results. We'll need to implement deduplication for a better user experience. mutt ships with compressed mbox support for bz2 and xz, at least, so we'll support those out-of-the-box. --- t/lei_to_mail.t | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) (limited to 't') diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t index 231cf543..e4551e69 100644 --- a/t/lei_to_mail.t +++ b/t/lei_to_mail.t @@ -92,21 +92,40 @@ my $orig = do { is($raw, do { local $/; <$fh> }, 'jobs > 1'); $raw; }; -SKIP: { - use PublicInbox::Spawn qw(which); - my $gzip = which('gzip') or skip 'gzip not found', 1; - my $wcb = PublicInbox::LeiToMail->write_cb("mboxcl2:$fn.gz", $lei); - $wcb->(\(my $dup = $buf), 'deadbeef', [ qw(seen) ]); - undef $wcb; - my $uncompressed = xqx([$gzip, '-dc', "$fn.gz"]); - is($uncompressed, $orig, 'gzip works'); +for my $zsfx (qw(gz bz2 xz)) { # XXX should we support zst, zz, lzo, lzma? + my $zsfx2cmd = PublicInbox::LeiToMail->can('zsfx2cmd'); + SKIP: { + my $cmd = eval { $zsfx2cmd->($zsfx, 0, $lei) }; + skip $@, 3 if $@; + my $dc_cmd = eval { $zsfx2cmd->($zsfx, 1, $lei) }; + ok($dc_cmd, "decompressor for .$zsfx"); + my $f = "$fn.$zsfx"; + my $dst = "mboxcl2:$f"; + my $wcb = PublicInbox::LeiToMail->write_cb($dst, $lei); + $wcb->(\(my $dup = $buf), 'deadbeef', [ qw(seen) ]); + undef $wcb; + my $uncompressed = xqx([@$dc_cmd, $f]); + is($uncompressed, $orig, "$zsfx works unlocked"); - local $lei->{opt} = { jobs => 2 }; - unlink "$fn.gz" or die "unlink $!"; - $wcb = PublicInbox::LeiToMail->write_cb("mboxcl2:$fn.gz", $lei); - $wcb->(\(my $dupe = $buf), 'deadbeef', [ qw(seen) ]); - undef $wcb; - is(xqx([$gzip, '-dc', "$fn.gz"]), $orig); + local $lei->{opt} = { jobs => 2 }; # for atomic writes + unlink $f or BAIL_OUT "unlink $!"; + $wcb = PublicInbox::LeiToMail->write_cb($dst, $lei); + $wcb->(\($dup = $buf), 'deadbeef', [ qw(seen) ]); + undef $wcb; + is(xqx([@$dc_cmd, $f]), $orig, "$zsfx matches with lock"); + } +} + +unlink $fn or BAIL_OUT $!; +if ('default deduplication uses content_hash') { + my $wcb = PublicInbox::LeiToMail->write_cb("mboxo:$fn", $lei); + $wcb->(\(my $x = $buf), 'deadbeef', []) for (1..2); + undef $wcb; # undef to commit changes + my $cmp = ''; + open my $fh, '<', $fn or BAIL_OUT $!; + require PublicInbox::MboxReader; + PublicInbox::MboxReader->mboxo($fh, sub { $cmp .= shift->as_string }); + is($cmp, $buf, 'only one message written'); } done_testing; -- cgit v1.2.3-24-ge0c7