about summary refs log tree commit homepage
path: root/t/mbox_reader.t
diff options
context:
space:
mode:
Diffstat (limited to 't/mbox_reader.t')
-rw-r--r--t/mbox_reader.t151
1 files changed, 151 insertions, 0 deletions
diff --git a/t/mbox_reader.t b/t/mbox_reader.t
new file mode 100644
index 00000000..14248a2d
--- /dev/null
+++ b/t/mbox_reader.t
@@ -0,0 +1,151 @@
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use List::Util qw(shuffle);
+use PublicInbox::Eml;
+use Fcntl qw(SEEK_SET);
+require_ok 'PublicInbox::MboxReader';
+require_ok 'PublicInbox::LeiToMail';
+my %raw = (
+        hdr_only => "From: header-only\@example.com\n\n",
+        small_from => "From: small-from\@example.com\n\nFrom hell\n",
+        small => "From: small\@example.com\n\nfrom hell\n",
+        big_hdr_only => "From: big-header\@example.com\n" .
+                (('A: '.('a' x 72)."\n") x 1000)."\n",
+        big_body => "From: big-body\@example.com\n\n".
+                (('b: '.('b' x 72)."\n") x 1000) .
+                "From hell\n",
+        big_all => "From: big-all\@example.com\n".
+                (("A: ".('a' x 72)."\n") x 1000). "\n" .
+                (("b: ".('b' x 72)."\n") x 1000) .
+                "From hell\n",
+);
+{
+        my $eml = PublicInbox::Eml->new($raw{small});
+        my $mbox_keywords = PublicInbox::MboxReader->can('mbox_keywords');
+        is_deeply($mbox_keywords->($eml), [], 'no keywords');
+        $eml->header_set('Status', 'RO');
+        is_deeply($mbox_keywords->($eml), ['seen'], 'seen extracted');
+        $eml->header_set('X-Status', 'A');
+        is_deeply($mbox_keywords->($eml), [qw(answered seen)],
+                'seen+answered extracted');
+}
+
+if ($ENV{TEST_EXTRA}) {
+        for my $fn (glob('t/*.eml'), glob('t/*/*.{patch,eml}')) {
+                $raw{$fn} = eml_load($fn)->as_string;
+        }
+}
+
+my $reader = PublicInbox::MboxReader->new;
+my $check_fmt = sub {
+        my $fmt = shift;
+        my @order = shuffle(keys %raw);
+        my $eml2mbox = PublicInbox::LeiToMail->can("eml2$fmt");
+        open my $fh, '+>', undef or BAIL_OUT "open: $!";
+        for my $k (@order) {
+                my $eml = PublicInbox::Eml->new($raw{$k});
+                my $buf = $eml2mbox->($eml);
+                print $fh $$buf or BAIL_OUT "print $!";
+        }
+        seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+        $reader->$fmt($fh, sub {
+                my ($eml) = @_;
+                $eml->header_set('Status');
+                $eml->header_set('Lines');
+                my $cur = shift @order;
+                my @cl = $eml->header_raw('Content-Length');
+                if ($fmt =~ /\Amboxcl/) {
+                        is(scalar(@cl), 1, "Content-Length set $fmt $cur");
+                        my $raw = $eml->body_raw;
+                        my $adj = 0;
+                        if ($fmt eq 'mboxcl') {
+                                my @from = ($raw =~ /^(From )/smg);
+                                $adj = scalar(@from);
+                        }
+                        is(length($raw), $cl[0] - $adj,
+                                "Content-Length is correct $fmt $cur");
+                        # clobber for ->as_string comparison below
+                        $eml->header_set('Content-Length');
+
+                        # special case for t/solve/bare.patch, not sure if we
+                        # should even handle it...
+                        if ($cl[0] eq '0' && ${$eml->{hdr}} eq '') {
+                                delete $eml->{bdy};
+                        }
+                } else {
+                        is(scalar(@cl), 0, "Content-Length unset $fmt $cur");
+                }
+                my $orig = PublicInbox::Eml->new($raw{$cur});
+                is($eml->as_string, $orig->as_string,
+                        "read back original $fmt $cur");
+        });
+};
+my @mbox = qw(mboxrd mboxo mboxcl mboxcl2);
+for my $fmt (@mbox) { $check_fmt->($fmt) }
+s/\n/\r\n/sg for (values %raw);
+for my $fmt (@mbox) { $check_fmt->($fmt) }
+
+{
+        my $no_blank_eom = <<'EOM';
+From x@y Fri Oct  2 00:00:00 1993
+a: b
+
+body1
+From x@y Fri Oct  2 00:00:00 1993
+c: d
+
+body2
+EOM
+        # chop($no_blank_eom) eq "\n" or BAIL_OUT 'broken LF';
+        for my $variant (qw(mboxrd mboxo)) {
+                my @x;
+                open my $fh, '<', \$no_blank_eom or BAIL_OUT 'PerlIO::scalar';
+                $reader->$variant($fh, sub { push @x, shift });
+                is_deeply($x[0]->{bdy}, \"body1\n", 'LF preserved in 1st');
+                is_deeply($x[1]->{bdy}, \"body2\n", 'no LF added in 2nd');
+        }
+}
+
+SKIP: {
+        use PublicInbox::Spawn qw(popen_rd);
+        my $fh = popen_rd([ $^X, qw(-w -Mv5.12 -e), <<'' ]);
+say 'From x@y Fri Oct  2 00:00:00 1993';
+print "a: b\n\n", "x" x 70000, "\n\n";
+say 'From x@y Fri Oct  2 00:00:00 2010';
+print "Final: bit\n\n", "Incomplete\n\n";
+exit 1
+
+        my @x;
+        eval { $reader->mboxrd($fh, sub { push @x, shift->as_string }) };
+        like($@, qr/error closing mbox/, 'detects error reading from pipe');
+        is(scalar(@x), 1, 'only saw one message');
+        is(scalar(grep(/Final/, @x)), 0, 'no incomplete bit');
+}
+
+{
+        my $html = <<EOM;
+<html><head><title>hi,</title></head><body>how are you</body></html>
+EOM
+        for my $m (qw(mboxrd mboxcl mboxcl2 mboxo)) {
+                my (@w, @x);
+                local $SIG{__WARN__} = sub { push @w, @_ };
+                open my $fh, '<', \$html or xbail 'PerlIO::scalar';
+                PublicInbox::MboxReader->$m($fh, sub {
+                        push @x, $_[0]->as_string
+                });
+                if ($m =~ /\Amboxcl/) {
+                        is_deeply(\@x, [], "messages in invalid $m");
+                } else {
+                        is_deeply(\@x, [ "\n$html" ], "body-only $m");
+                }
+                is_deeply([grep(!/^W: leftover/, @w)], [],
+                        "no extra warnings besides leftover ($m)");
+        }
+}
+
+done_testing;