From 97c6b564fd79e47ae6fca8de273c2aeaf2f5bea5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 5 Jan 2019 10:41:15 +0000 Subject: filter/rubylang: fix SQLite DB lifetime problems Clearly the AltId stuff was never tested for v2. Ensure this tricky filter (which reuses Msgmap to avoid introducing new serial numbers) doesn't trigger deadlocks SQLite due to opening a DB for writing multiple times. I went through several iterations of this change before going with this one, which is the least intrusive I could fine. --- MANIFEST | 2 + lib/PublicInbox/InboxWritable.pm | 14 +++-- lib/PublicInbox/V2Writable.pm | 13 +++++ lib/PublicInbox/WatchMaildir.pm | 5 +- script/public-inbox-mda | 1 + t/mda_filter_rubylang.t | 69 +++++++++++++++++++++++++ t/watch_filter_rubylang.t | 109 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 208 insertions(+), 5 deletions(-) create mode 100644 t/mda_filter_rubylang.t create mode 100644 t/watch_filter_rubylang.t diff --git a/MANIFEST b/MANIFEST index d56cd85e..e4f3df86 100644 --- a/MANIFEST +++ b/MANIFEST @@ -175,6 +175,7 @@ t/init.t t/linkify.t t/main-bin/spamc t/mda.t +t/mda_filter_rubylang.t t/mid.t t/mime.t t/msg_iter.t @@ -212,5 +213,6 @@ t/v2mirror.t t/v2reindex.t t/v2writable.t t/view.t +t/watch_filter_rubylang.t t/watch_maildir.t t/watch_maildir_v2.t diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 87c9ada9..2f1ca6f0 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -46,9 +46,16 @@ sub importer { } sub filter { - my ($self) = @_; + my ($self, $im) = @_; my $f = $self->{filter}; if ($f && $f =~ /::/) { + # v2 keeps msgmap open, which causes conflicts for filters + # such as PublicInbox::Filter::RubyLang which overload msgmap + # for a predictable serial number. + if ($im && ($self->{version} || 1) >= 2 && $self->{altid}) { + $im->done; + } + my @args = (-inbox => $self); # basic line splitting, only # Perhaps we can have proper quote splitting one day... @@ -100,7 +107,7 @@ sub maildir_path_load ($) { sub import_maildir { my ($self, $dir) = @_; my $im = $self->importer(1); - my $filter = $self->filter; + foreach my $sub (qw(cur new tmp)) { -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; } @@ -109,7 +116,8 @@ sub import_maildir { while (defined(my $fn = readdir($dh))) { next unless is_maildir_basename($fn); my $mime = maildir_file_load("$dir/$fn") or next; - if ($filter) { + + if (my $filter = $self->filter($im)) { my $ret = $filter->scrub($mime) or return; return if $ret == REJECT(); $mime = $ret; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 07319646..93babed5 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -163,6 +163,19 @@ sub num_for { return if $existing; } + # AltId may pre-populate article numbers (e.g. X-Mail-Count + # or NNTP article number), use that article number if it's + # not in Over. + my $altid = $self->{-inbox}->{altid}; + if ($altid && grep(/:file=msgmap\.sqlite3\z/, @$altid)) { + my $num = $self->{mm}->num_for($mid); + + if (defined $num && !$self->{over}->get_art($num)) { + $$mid0 = $mid; + return $num; + } + } + # very unlikely: warn "<$mid> reused for mismatched content\n"; diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index 84080ba9..2d4c6f43 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -121,7 +121,7 @@ sub _remove_spam { eval { my $im = _importer_for($self, $ibx); $im->remove($mime, 'spam'); - if (my $scrub = $ibx->filter) { + if (my $scrub = $ibx->filter($im)) { my $scrubbed = $scrub->scrub($mime, 1); $scrubbed or return; $scrubbed == REJECT() and return; @@ -159,7 +159,8 @@ sub _try_path { my $v = $mime->header_obj->header_raw($wm->[0]); next unless ($v && $v =~ $wm->[1]); } - if (my $scrub = $ibx->filter) { + + if (my $scrub = $ibx->filter($im)) { my $ret = $scrub->scrub($mime) or next; $ret == REJECT() and next; $mime = $ret; diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 183b915d..7486059d 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -81,6 +81,7 @@ if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message $! = 65; # EX_DATAERR 5.6.0 data format error die $filter->err, "\n"; } # else { accept +$filter = undef; PublicInbox::MDA->set_list_headers($mime, $dst); my $im = $dst->importer(0); diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t new file mode 100644 index 00000000..cb6da4bb --- /dev/null +++ b/t/mda_filter_rubylang.t @@ -0,0 +1,69 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +use PublicInbox::MIME; +use PublicInbox::Config; +my @mods = qw(DBD::SQLite Search::Xapian IPC::Run); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for mda_filter_rubylang.t" if $@; +} + +use_ok 'PublicInbox::V2Writable'; +my $tmpdir = tempdir('mda-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $pi_config = "$tmpdir/pi_config"; +local $ENV{PI_CONFIG} = $pi_config; +my $mda = 'blib/script/public-inbox-mda'; +my @cfg = ('git', 'config', "--file=$pi_config"); +is(system(@cfg, 'publicinboxmda.spamcheck', 'none'), 0); + +for my $v (qw(V1 V2)) { + my @warn; + $SIG{__WARN__} = sub { push @warn, @_ }; + my $cfgpfx = "publicinbox.$v"; + my $mainrepo = "$tmpdir/$v"; + my $addr = "test-$v\@example.com"; + my @cmd = ('blib/script/public-inbox-init', "-$v", $v, $mainrepo, + "http://example.com/$v", $addr); + is(system(@cmd), 0, 'public-inbox init OK'); + if ($v eq 'V1') { + is(system('blib/script/public-inbox-index', $mainrepo), 0); + } + is(system(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0); + is(system(@cfg, "$cfgpfx.altid", + 'serial:alerts:file=msgmap.sqlite3'), 0); + + for my $i (1..2) { + local $ENV{ORIGINAL_RECIPIENT} = $addr; + my $msg = < +Date: Sat, 05 Jan 2019 04:19:17 +0000 + +something +EOF + ok(IPC::Run::run([$mda], \"$msg"), 'message delivered'); + } + my $config = PublicInbox::Config->new; + my $ibx = $config->lookup_name($v); + + # make sure all serials are searchable: + my ($tot, $msgs); + for my $i (1..2) { + ($tot, $msgs) = $ibx->search->query("alerts:$i"); + is($tot, 1, "got one result for alerts:$i"); + is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i"); + } + is_deeply([], \@warn, 'no warnings'); + + # TODO: public-inbox-learn doesn't know about filters + # (but -watch does) +} + +done_testing(); diff --git a/t/watch_filter_rubylang.t b/t/watch_filter_rubylang.t new file mode 100644 index 00000000..32565556 --- /dev/null +++ b/t/watch_filter_rubylang.t @@ -0,0 +1,109 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +use PublicInbox::MIME; +use PublicInbox::Config; +my @mods = qw(Filesys::Notify::Simple DBD::SQLite Search::Xapian); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for watch_filter_rubylang_v2.t" if $@; +} + +use_ok 'PublicInbox::V2Writable'; +use_ok 'PublicInbox::WatchMaildir'; +use_ok 'PublicInbox::Emergency'; +my $tmpdir = tempdir('watch-XXXXXX', TMPDIR => 1, CLEANUP => 1); +local $ENV{PI_CONFIG} = "$tmpdir/pi_config"; + +for my $v (qw(V1 V2)) { + my @warn; + $SIG{__WARN__} = sub { push @warn, @_ }; + my $cfgpfx = "publicinbox.$v"; + my $mainrepo = "$tmpdir/$v"; + my $maildir = "$tmpdir/md-$v"; + my $spamdir = "$tmpdir/spam-$v"; + my $addr = "test-$v\@example.com"; + my @cmd = ('blib/script/public-inbox-init', "-$v", $v, $mainrepo, + "http://example.com/$v", $addr); + is(system(@cmd), 0, 'public-inbox init OK'); + if ($v eq 'V1') { + is(system('blib/script/public-inbox-index', $mainrepo), 0); + } + PublicInbox::Emergency->new($spamdir); + + for my $i (1..15) { + my $msg = < +Date: Sat, 05 Jan 2019 04:19:17 +0000 + +something +EOF + PublicInbox::Emergency->new($maildir)->prepare(\$msg); + } + + my $spam = < +Date: Sat, 05 Jan 2019 04:19:17 +0000 + +spam +EOF + PublicInbox::Emergency->new($maildir)->prepare(\"$spam"); + + my %orig = ( + "$cfgpfx.address" => $addr, + "$cfgpfx.mainrepo" => $mainrepo, + "$cfgpfx.watch" => "maildir:$maildir", + "$cfgpfx.filter" => 'PublicInbox::Filter::RubyLang', + "$cfgpfx.altid" => 'serial:alerts:file=msgmap.sqlite3', + "publicinboxwatch.watchspam" => "maildir:$spamdir", + ); + my $config = PublicInbox::Config->new({%orig}); + my $ibx = $config->lookup_name($v); + ok($ibx, 'found inbox by name'); + + my $w = PublicInbox::WatchMaildir->new($config); + for my $i (1..2) { + $w->scan('full'); + } + + # make sure all serials are searchable: + my ($tot, $msgs); + for my $i (1..15) { + ($tot, $msgs) = $ibx->search->query("alerts:$i"); + is($tot, 1, "got one result for alerts:$i"); + is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i"); + } + ($tot, undef) = $ibx->search->query('b:spam'); + is($tot, 1, 'got spam message'); + + my $nr = unlink <$maildir/new/*>; + is(16, $nr); + { + PublicInbox::Emergency->new($spamdir)->prepare(\$spam); + my @new = glob("$spamdir/new/*"); + my @p = split(m!/+!, $new[0]); + ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S")); + is(unlink($new[0]), 1); + } + $w->scan('full'); + + $config = PublicInbox::Config->new({%orig}); + $ibx = $config->lookup_name($v); + ($tot, undef) = $ibx->search->reopen->query('b:spam'); + is($tot, 0, 'spam removed'); + + is_deeply([], \@warn, 'no warnings'); +} + +done_testing(); -- cgit v1.2.3-24-ge0c7