about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--MANIFEST2
-rw-r--r--lib/PublicInbox/InboxWritable.pm14
-rw-r--r--lib/PublicInbox/V2Writable.pm13
-rw-r--r--lib/PublicInbox/WatchMaildir.pm5
-rwxr-xr-xscript/public-inbox-mda1
-rw-r--r--t/mda_filter_rubylang.t69
-rw-r--r--t/watch_filter_rubylang.t109
7 files changed, 208 insertions, 5 deletions
diff --git a/MANIFEST b/MANIFEST
index d56cd85e..e4f3df86 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -175,6 +175,7 @@ t/init.t
 t/linkify.t
 t/main-bin/spamc
 t/mda.t
+t/mda_filter_rubylang.t
 t/mid.t
 t/mime.t
 t/msg_iter.t
@@ -212,5 +213,6 @@ t/v2mirror.t
 t/v2reindex.t
 t/v2writable.t
 t/view.t
+t/watch_filter_rubylang.t
 t/watch_maildir.t
 t/watch_maildir_v2.t
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 87c9ada9..2f1ca6f0 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -46,9 +46,16 @@ sub importer {
 }
 
 sub filter {
-        my ($self) = @_;
+        my ($self, $im) = @_;
         my $f = $self->{filter};
         if ($f && $f =~ /::/) {
+                # v2 keeps msgmap open, which causes conflicts for filters
+                # such as PublicInbox::Filter::RubyLang which overload msgmap
+                # for a predictable serial number.
+                if ($im && ($self->{version} || 1) >= 2 && $self->{altid}) {
+                        $im->done;
+                }
+
                 my @args = (-inbox => $self);
                 # basic line splitting, only
                 # Perhaps we can have proper quote splitting one day...
@@ -100,7 +107,7 @@ sub maildir_path_load ($) {
 sub import_maildir {
         my ($self, $dir) = @_;
         my $im = $self->importer(1);
-        my $filter = $self->filter;
+
         foreach my $sub (qw(cur new tmp)) {
                 -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
         }
@@ -109,7 +116,8 @@ sub import_maildir {
                 while (defined(my $fn = readdir($dh))) {
                         next unless is_maildir_basename($fn);
                         my $mime = maildir_file_load("$dir/$fn") or next;
-                        if ($filter) {
+
+                        if (my $filter = $self->filter($im)) {
                                 my $ret = $filter->scrub($mime) or return;
                                 return if $ret == REJECT();
                                 $mime = $ret;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 07319646..93babed5 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -163,6 +163,19 @@ sub num_for {
                         return if $existing;
                 }
 
+                # AltId may pre-populate article numbers (e.g. X-Mail-Count
+                # or NNTP article number), use that article number if it's
+                # not in Over.
+                my $altid = $self->{-inbox}->{altid};
+                if ($altid && grep(/:file=msgmap\.sqlite3\z/, @$altid)) {
+                        my $num = $self->{mm}->num_for($mid);
+
+                        if (defined $num && !$self->{over}->get_art($num)) {
+                                $$mid0 = $mid;
+                                return $num;
+                        }
+                }
+
                 # very unlikely:
                 warn "<$mid> reused for mismatched content\n";
 
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 84080ba9..2d4c6f43 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -121,7 +121,7 @@ sub _remove_spam {
                 eval {
                         my $im = _importer_for($self, $ibx);
                         $im->remove($mime, 'spam');
-                        if (my $scrub = $ibx->filter) {
+                        if (my $scrub = $ibx->filter($im)) {
                                 my $scrubbed = $scrub->scrub($mime, 1);
                                 $scrubbed or return;
                                 $scrubbed == REJECT() and return;
@@ -159,7 +159,8 @@ sub _try_path {
                         my $v = $mime->header_obj->header_raw($wm->[0]);
                         next unless ($v && $v =~ $wm->[1]);
                 }
-                if (my $scrub = $ibx->filter) {
+
+                if (my $scrub = $ibx->filter($im)) {
                         my $ret = $scrub->scrub($mime) or next;
                         $ret == REJECT() and next;
                         $mime = $ret;
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 183b915d..7486059d 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -81,6 +81,7 @@ if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message
         $! = 65; # EX_DATAERR 5.6.0 data format error
         die $filter->err, "\n";
 } # else { accept
+$filter = undef;
 
 PublicInbox::MDA->set_list_headers($mime, $dst);
 my $im = $dst->importer(0);
diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t
new file mode 100644
index 00000000..cb6da4bb
--- /dev/null
+++ b/t/mda_filter_rubylang.t
@@ -0,0 +1,69 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+use PublicInbox::Config;
+my @mods = qw(DBD::SQLite Search::Xapian IPC::Run);
+foreach my $mod (@mods) {
+        eval "require $mod";
+        plan skip_all => "$mod missing for mda_filter_rubylang.t" if $@;
+}
+
+use_ok 'PublicInbox::V2Writable';
+my $tmpdir = tempdir('mda-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $pi_config = "$tmpdir/pi_config";
+local $ENV{PI_CONFIG} = $pi_config;
+my $mda = 'blib/script/public-inbox-mda';
+my @cfg = ('git', 'config', "--file=$pi_config");
+is(system(@cfg, 'publicinboxmda.spamcheck', 'none'), 0);
+
+for my $v (qw(V1 V2)) {
+        my @warn;
+        $SIG{__WARN__} = sub { push @warn, @_ };
+        my $cfgpfx = "publicinbox.$v";
+        my $mainrepo = "$tmpdir/$v";
+        my $addr = "test-$v\@example.com";
+        my @cmd = ('blib/script/public-inbox-init', "-$v", $v, $mainrepo,
+                "http://example.com/$v", $addr);
+        is(system(@cmd), 0, 'public-inbox init OK');
+        if ($v eq 'V1') {
+                is(system('blib/script/public-inbox-index', $mainrepo), 0);
+        }
+        is(system(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0);
+        is(system(@cfg, "$cfgpfx.altid",
+                'serial:alerts:file=msgmap.sqlite3'), 0);
+
+        for my $i (1..2) {
+                local $ENV{ORIGINAL_RECIPIENT} = $addr;
+                my $msg = <<EOF;
+From: user\@example.com
+To: $addr
+Subject: blah $i
+X-Mail-Count: $i
+Message-Id: <a.$i\@b.com>
+Date: Sat, 05 Jan 2019 04:19:17 +0000
+
+something
+EOF
+                ok(IPC::Run::run([$mda], \"$msg"), 'message delivered');
+        }
+        my $config = PublicInbox::Config->new;
+        my $ibx = $config->lookup_name($v);
+
+        # make sure all serials are searchable:
+        my ($tot, $msgs);
+        for my $i (1..2) {
+                ($tot, $msgs) = $ibx->search->query("alerts:$i");
+                is($tot, 1, "got one result for alerts:$i");
+                is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i");
+        }
+        is_deeply([], \@warn, 'no warnings');
+
+        # TODO: public-inbox-learn doesn't know about filters
+        # (but -watch does)
+}
+
+done_testing();
diff --git a/t/watch_filter_rubylang.t b/t/watch_filter_rubylang.t
new file mode 100644
index 00000000..32565556
--- /dev/null
+++ b/t/watch_filter_rubylang.t
@@ -0,0 +1,109 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+use PublicInbox::Config;
+my @mods = qw(Filesys::Notify::Simple DBD::SQLite Search::Xapian);
+foreach my $mod (@mods) {
+        eval "require $mod";
+        plan skip_all => "$mod missing for watch_filter_rubylang_v2.t" if $@;
+}
+
+use_ok 'PublicInbox::V2Writable';
+use_ok 'PublicInbox::WatchMaildir';
+use_ok 'PublicInbox::Emergency';
+my $tmpdir = tempdir('watch-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+local $ENV{PI_CONFIG} = "$tmpdir/pi_config";
+
+for my $v (qw(V1 V2)) {
+        my @warn;
+        $SIG{__WARN__} = sub { push @warn, @_ };
+        my $cfgpfx = "publicinbox.$v";
+        my $mainrepo = "$tmpdir/$v";
+        my $maildir = "$tmpdir/md-$v";
+        my $spamdir = "$tmpdir/spam-$v";
+        my $addr = "test-$v\@example.com";
+        my @cmd = ('blib/script/public-inbox-init', "-$v", $v, $mainrepo,
+                "http://example.com/$v", $addr);
+        is(system(@cmd), 0, 'public-inbox init OK');
+        if ($v eq 'V1') {
+                is(system('blib/script/public-inbox-index', $mainrepo), 0);
+        }
+        PublicInbox::Emergency->new($spamdir);
+
+        for my $i (1..15) {
+                my $msg = <<EOF;
+From: user\@example.com
+To: $addr
+Subject: blah $i
+X-Mail-Count: $i
+Message-Id: <a.$i\@b.com>
+Date: Sat, 05 Jan 2019 04:19:17 +0000
+
+something
+EOF
+                PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+        }
+
+        my $spam = <<EOF;
+From: spammer\@example.com
+To: $addr
+Subject: spam
+X-Mail-Count: 99
+Message-Id: <a.99\@b.com>
+Date: Sat, 05 Jan 2019 04:19:17 +0000
+
+spam
+EOF
+        PublicInbox::Emergency->new($maildir)->prepare(\"$spam");
+
+        my %orig = (
+                "$cfgpfx.address" => $addr,
+                "$cfgpfx.mainrepo" => $mainrepo,
+                "$cfgpfx.watch" => "maildir:$maildir",
+                "$cfgpfx.filter" => 'PublicInbox::Filter::RubyLang',
+                "$cfgpfx.altid" => 'serial:alerts:file=msgmap.sqlite3',
+                "publicinboxwatch.watchspam" => "maildir:$spamdir",
+        );
+        my $config = PublicInbox::Config->new({%orig});
+        my $ibx = $config->lookup_name($v);
+        ok($ibx, 'found inbox by name');
+
+        my $w = PublicInbox::WatchMaildir->new($config);
+        for my $i (1..2) {
+                $w->scan('full');
+        }
+
+        # make sure all serials are searchable:
+        my ($tot, $msgs);
+        for my $i (1..15) {
+                ($tot, $msgs) = $ibx->search->query("alerts:$i");
+                is($tot, 1, "got one result for alerts:$i");
+                is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i");
+        }
+        ($tot, undef) = $ibx->search->query('b:spam');
+        is($tot, 1, 'got spam message');
+
+        my $nr = unlink <$maildir/new/*>;
+        is(16, $nr);
+        {
+                PublicInbox::Emergency->new($spamdir)->prepare(\$spam);
+                my @new = glob("$spamdir/new/*");
+                my @p = split(m!/+!, $new[0]);
+                ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S"));
+                is(unlink($new[0]), 1);
+        }
+        $w->scan('full');
+
+        $config = PublicInbox::Config->new({%orig});
+        $ibx = $config->lookup_name($v);
+        ($tot, undef) = $ibx->search->reopen->query('b:spam');
+        is($tot, 0, 'spam removed');
+
+        is_deeply([], \@warn, 'no warnings');
+}
+
+done_testing();