From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id AE2A21F6A9 for ; Sat, 5 Jan 2019 11:09:38 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] filter/rubylang: fix SQLite DB lifetime problems Date: Sat, 5 Jan 2019 11:09:38 +0000 Message-Id: <20190105110938.15235-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Clearly the AltId stuff was never tested for v2. Ensure this tricky filter (which reuses Msgmap to avoid introducing new serial numbers) doesn't trigger deadlocks SQLite due to opening a DB for writing multiple times. I went through several iterations of this change before going with this one, which is the least intrusive I could fine. --- MANIFEST | 2 + lib/PublicInbox/InboxWritable.pm | 14 +++- lib/PublicInbox/V2Writable.pm | 13 ++++ lib/PublicInbox/WatchMaildir.pm | 5 +- script/public-inbox-mda | 1 + t/mda_filter_rubylang.t | 69 +++++++++++++++++++ t/watch_filter_rubylang.t | 109 +++++++++++++++++++++++++++++++ 7 files changed, 208 insertions(+), 5 deletions(-) create mode 100644 t/mda_filter_rubylang.t create mode 100644 t/watch_filter_rubylang.t diff --git a/MANIFEST b/MANIFEST index d56cd85..e4f3df8 100644 --- a/MANIFEST +++ b/MANIFEST @@ -175,6 +175,7 @@ t/init.t t/linkify.t t/main-bin/spamc t/mda.t +t/mda_filter_rubylang.t t/mid.t t/mime.t t/msg_iter.t @@ -212,5 +213,6 @@ t/v2mirror.t t/v2reindex.t t/v2writable.t t/view.t +t/watch_filter_rubylang.t t/watch_maildir.t t/watch_maildir_v2.t diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 87c9ada..2f1ca6f 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -46,9 +46,16 @@ sub importer { } sub filter { - my ($self) = @_; + my ($self, $im) = @_; my $f = $self->{filter}; if ($f && $f =~ /::/) { + # v2 keeps msgmap open, which causes conflicts for filters + # such as PublicInbox::Filter::RubyLang which overload msgmap + # for a predictable serial number. + if ($im && ($self->{version} || 1) >= 2 && $self->{altid}) { + $im->done; + } + my @args = (-inbox => $self); # basic line splitting, only # Perhaps we can have proper quote splitting one day... @@ -100,7 +107,7 @@ ($) sub import_maildir { my ($self, $dir) = @_; my $im = $self->importer(1); - my $filter = $self->filter; + foreach my $sub (qw(cur new tmp)) { -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; } @@ -109,7 +116,8 @@ sub import_maildir { while (defined(my $fn = readdir($dh))) { next unless is_maildir_basename($fn); my $mime = maildir_file_load("$dir/$fn") or next; - if ($filter) { + + if (my $filter = $self->filter($im)) { my $ret = $filter->scrub($mime) or return; return if $ret == REJECT(); $mime = $ret; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 0731964..93babed 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -163,6 +163,19 @@ sub num_for { return if $existing; } + # AltId may pre-populate article numbers (e.g. X-Mail-Count + # or NNTP article number), use that article number if it's + # not in Over. + my $altid = $self->{-inbox}->{altid}; + if ($altid && grep(/:file=msgmap\.sqlite3\z/, @$altid)) { + my $num = $self->{mm}->num_for($mid); + + if (defined $num && !$self->{over}->get_art($num)) { + $$mid0 = $mid; + return $num; + } + } + # very unlikely: warn "<$mid> reused for mismatched content\n"; diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index 84080ba..2d4c6f4 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -121,7 +121,7 @@ sub _remove_spam { eval { my $im = _importer_for($self, $ibx); $im->remove($mime, 'spam'); - if (my $scrub = $ibx->filter) { + if (my $scrub = $ibx->filter($im)) { my $scrubbed = $scrub->scrub($mime, 1); $scrubbed or return; $scrubbed == REJECT() and return; @@ -159,7 +159,8 @@ sub _try_path { my $v = $mime->header_obj->header_raw($wm->[0]); next unless ($v && $v =~ $wm->[1]); } - if (my $scrub = $ibx->filter) { + + if (my $scrub = $ibx->filter($im)) { my $ret = $scrub->scrub($mime) or next; $ret == REJECT() and next; $mime = $ret; diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 183b915..7486059 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -81,6 +81,7 @@ if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message $! = 65; # EX_DATAERR 5.6.0 data format error die $filter->err, "\n"; } # else { accept +$filter = undef; PublicInbox::MDA->set_list_headers($mime, $dst); my $im = $dst->importer(0); diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t new file mode 100644 index 0000000..cb6da4b --- /dev/null +++ b/t/mda_filter_rubylang.t @@ -0,0 +1,69 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +use PublicInbox::MIME; +use PublicInbox::Config; +my @mods = qw(DBD::SQLite Search::Xapian IPC::Run); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for mda_filter_rubylang.t" if $@; +} + +use_ok 'PublicInbox::V2Writable'; +my $tmpdir = tempdir('mda-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $pi_config = "$tmpdir/pi_config"; +local $ENV{PI_CONFIG} = $pi_config; +my $mda = 'blib/script/public-inbox-mda'; +my @cfg = ('git', 'config', "--file=$pi_config"); +is(system(@cfg, 'publicinboxmda.spamcheck', 'none'), 0); + +for my $v (qw(V1 V2)) { + my @warn; + $SIG{__WARN__} = sub { push @warn, @_ }; + my $cfgpfx = "publicinbox.$v"; + my $mainrepo = "$tmpdir/$v"; + my $addr = "test-$v\@example.com"; + my @cmd = ('blib/script/public-inbox-init', "-$v", $v, $mainrepo, + "http://example.com/$v", $addr); + is(system(@cmd), 0, 'public-inbox init OK'); + if ($v eq 'V1') { + is(system('blib/script/public-inbox-index', $mainrepo), 0); + } + is(system(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0); + is(system(@cfg, "$cfgpfx.altid", + 'serial:alerts:file=msgmap.sqlite3'), 0); + + for my $i (1..2) { + local $ENV{ORIGINAL_RECIPIENT} = $addr; + my $msg = < +Date: Sat, 05 Jan 2019 04:19:17 +0000 + +something +EOF + ok(IPC::Run::run([$mda], \"$msg"), 'message delivered'); + } + my $config = PublicInbox::Config->new; + my $ibx = $config->lookup_name($v); + + # make sure all serials are searchable: + my ($tot, $msgs); + for my $i (1..2) { + ($tot, $msgs) = $ibx->search->query("alerts:$i"); + is($tot, 1, "got one result for alerts:$i"); + is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i"); + } + is_deeply([], \@warn, 'no warnings'); + + # TODO: public-inbox-learn doesn't know about filters + # (but -watch does) +} + +done_testing(); diff --git a/t/watch_filter_rubylang.t b/t/watch_filter_rubylang.t new file mode 100644 index 0000000..3256555 --- /dev/null +++ b/t/watch_filter_rubylang.t @@ -0,0 +1,109 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +use PublicInbox::MIME; +use PublicInbox::Config; +my @mods = qw(Filesys::Notify::Simple DBD::SQLite Search::Xapian); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for watch_filter_rubylang_v2.t" if $@; +} + +use_ok 'PublicInbox::V2Writable'; +use_ok 'PublicInbox::WatchMaildir'; +use_ok 'PublicInbox::Emergency'; +my $tmpdir = tempdir('watch-XXXXXX', TMPDIR => 1, CLEANUP => 1); +local $ENV{PI_CONFIG} = "$tmpdir/pi_config"; + +for my $v (qw(V1 V2)) { + my @warn; + $SIG{__WARN__} = sub { push @warn, @_ }; + my $cfgpfx = "publicinbox.$v"; + my $mainrepo = "$tmpdir/$v"; + my $maildir = "$tmpdir/md-$v"; + my $spamdir = "$tmpdir/spam-$v"; + my $addr = "test-$v\@example.com"; + my @cmd = ('blib/script/public-inbox-init', "-$v", $v, $mainrepo, + "http://example.com/$v", $addr); + is(system(@cmd), 0, 'public-inbox init OK'); + if ($v eq 'V1') { + is(system('blib/script/public-inbox-index', $mainrepo), 0); + } + PublicInbox::Emergency->new($spamdir); + + for my $i (1..15) { + my $msg = < +Date: Sat, 05 Jan 2019 04:19:17 +0000 + +something +EOF + PublicInbox::Emergency->new($maildir)->prepare(\$msg); + } + + my $spam = < +Date: Sat, 05 Jan 2019 04:19:17 +0000 + +spam +EOF + PublicInbox::Emergency->new($maildir)->prepare(\"$spam"); + + my %orig = ( + "$cfgpfx.address" => $addr, + "$cfgpfx.mainrepo" => $mainrepo, + "$cfgpfx.watch" => "maildir:$maildir", + "$cfgpfx.filter" => 'PublicInbox::Filter::RubyLang', + "$cfgpfx.altid" => 'serial:alerts:file=msgmap.sqlite3', + "publicinboxwatch.watchspam" => "maildir:$spamdir", + ); + my $config = PublicInbox::Config->new({%orig}); + my $ibx = $config->lookup_name($v); + ok($ibx, 'found inbox by name'); + + my $w = PublicInbox::WatchMaildir->new($config); + for my $i (1..2) { + $w->scan('full'); + } + + # make sure all serials are searchable: + my ($tot, $msgs); + for my $i (1..15) { + ($tot, $msgs) = $ibx->search->query("alerts:$i"); + is($tot, 1, "got one result for alerts:$i"); + is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i"); + } + ($tot, undef) = $ibx->search->query('b:spam'); + is($tot, 1, 'got spam message'); + + my $nr = unlink <$maildir/new/*>; + is(16, $nr); + { + PublicInbox::Emergency->new($spamdir)->prepare(\$spam); + my @new = glob("$spamdir/new/*"); + my @p = split(m!/+!, $new[0]); + ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S")); + is(unlink($new[0]), 1); + } + $w->scan('full'); + + $config = PublicInbox::Config->new({%orig}); + $ibx = $config->lookup_name($v); + ($tot, undef) = $ibx->search->reopen->query('b:spam'); + is($tot, 0, 'spam removed'); + + is_deeply([], \@warn, 'no warnings'); +} + +done_testing(); -- EW