user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] filter/rubylang: fix SQLite DB lifetime problems
@ 2019-01-05 11:09 Eric Wong
  2019-01-07  5:22 ` [2/1 PATCH] t/mda_filter_rubylang.t: set PI_EMERGENCY for -mda Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2019-01-05 11:09 UTC (permalink / raw)
  To: meta

Clearly the AltId stuff was never tested for v2.  Ensure
this tricky filter (which reuses Msgmap to avoid introducing
new serial numbers) doesn't trigger deadlocks SQLite due
to opening a DB for writing multiple times.

I went through several iterations of this change before
going with this one, which is the least intrusive I could
fine.
---
 MANIFEST                         |   2 +
 lib/PublicInbox/InboxWritable.pm |  14 +++-
 lib/PublicInbox/V2Writable.pm    |  13 ++++
 lib/PublicInbox/WatchMaildir.pm  |   5 +-
 script/public-inbox-mda          |   1 +
 t/mda_filter_rubylang.t          |  69 +++++++++++++++++++
 t/watch_filter_rubylang.t        | 109 +++++++++++++++++++++++++++++++
 7 files changed, 208 insertions(+), 5 deletions(-)
 create mode 100644 t/mda_filter_rubylang.t
 create mode 100644 t/watch_filter_rubylang.t

diff --git a/MANIFEST b/MANIFEST
index d56cd85..e4f3df8 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -175,6 +175,7 @@ t/init.t
 t/linkify.t
 t/main-bin/spamc
 t/mda.t
+t/mda_filter_rubylang.t
 t/mid.t
 t/mime.t
 t/msg_iter.t
@@ -212,5 +213,6 @@ t/v2mirror.t
 t/v2reindex.t
 t/v2writable.t
 t/view.t
+t/watch_filter_rubylang.t
 t/watch_maildir.t
 t/watch_maildir_v2.t
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 87c9ada..2f1ca6f 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -46,9 +46,16 @@ sub importer {
 }
 
 sub filter {
-	my ($self) = @_;
+	my ($self, $im) = @_;
 	my $f = $self->{filter};
 	if ($f && $f =~ /::/) {
+		# v2 keeps msgmap open, which causes conflicts for filters
+		# such as PublicInbox::Filter::RubyLang which overload msgmap
+		# for a predictable serial number.
+		if ($im && ($self->{version} || 1) >= 2 && $self->{altid}) {
+			$im->done;
+		}
+
 		my @args = (-inbox => $self);
 		# basic line splitting, only
 		# Perhaps we can have proper quote splitting one day...
@@ -100,7 +107,7 @@ ($)
 sub import_maildir {
 	my ($self, $dir) = @_;
 	my $im = $self->importer(1);
-	my $filter = $self->filter;
+
 	foreach my $sub (qw(cur new tmp)) {
 		-d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
 	}
@@ -109,7 +116,8 @@ sub import_maildir {
 		while (defined(my $fn = readdir($dh))) {
 			next unless is_maildir_basename($fn);
 			my $mime = maildir_file_load("$dir/$fn") or next;
-			if ($filter) {
+
+			if (my $filter = $self->filter($im)) {
 				my $ret = $filter->scrub($mime) or return;
 				return if $ret == REJECT();
 				$mime = $ret;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 0731964..93babed 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -163,6 +163,19 @@ sub num_for {
 			return if $existing;
 		}
 
+		# AltId may pre-populate article numbers (e.g. X-Mail-Count
+		# or NNTP article number), use that article number if it's
+		# not in Over.
+		my $altid = $self->{-inbox}->{altid};
+		if ($altid && grep(/:file=msgmap\.sqlite3\z/, @$altid)) {
+			my $num = $self->{mm}->num_for($mid);
+
+			if (defined $num && !$self->{over}->get_art($num)) {
+				$$mid0 = $mid;
+				return $num;
+			}
+		}
+
 		# very unlikely:
 		warn "<$mid> reused for mismatched content\n";
 
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 84080ba..2d4c6f4 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -121,7 +121,7 @@ sub _remove_spam {
 		eval {
 			my $im = _importer_for($self, $ibx);
 			$im->remove($mime, 'spam');
-			if (my $scrub = $ibx->filter) {
+			if (my $scrub = $ibx->filter($im)) {
 				my $scrubbed = $scrub->scrub($mime, 1);
 				$scrubbed or return;
 				$scrubbed == REJECT() and return;
@@ -159,7 +159,8 @@ sub _try_path {
 			my $v = $mime->header_obj->header_raw($wm->[0]);
 			next unless ($v && $v =~ $wm->[1]);
 		}
-		if (my $scrub = $ibx->filter) {
+
+		if (my $scrub = $ibx->filter($im)) {
 			my $ret = $scrub->scrub($mime) or next;
 			$ret == REJECT() and next;
 			$mime = $ret;
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 183b915..7486059 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -81,6 +81,7 @@ if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message
 	$! = 65; # EX_DATAERR 5.6.0 data format error
 	die $filter->err, "\n";
 } # else { accept
+$filter = undef;
 
 PublicInbox::MDA->set_list_headers($mime, $dst);
 my $im = $dst->importer(0);
diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t
new file mode 100644
index 0000000..cb6da4b
--- /dev/null
+++ b/t/mda_filter_rubylang.t
@@ -0,0 +1,69 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+use PublicInbox::Config;
+my @mods = qw(DBD::SQLite Search::Xapian IPC::Run);
+foreach my $mod (@mods) {
+	eval "require $mod";
+	plan skip_all => "$mod missing for mda_filter_rubylang.t" if $@;
+}
+
+use_ok 'PublicInbox::V2Writable';
+my $tmpdir = tempdir('mda-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $pi_config = "$tmpdir/pi_config";
+local $ENV{PI_CONFIG} = $pi_config;
+my $mda = 'blib/script/public-inbox-mda';
+my @cfg = ('git', 'config', "--file=$pi_config");
+is(system(@cfg, 'publicinboxmda.spamcheck', 'none'), 0);
+
+for my $v (qw(V1 V2)) {
+	my @warn;
+	$SIG{__WARN__} = sub { push @warn, @_ };
+	my $cfgpfx = "publicinbox.$v";
+	my $mainrepo = "$tmpdir/$v";
+	my $addr = "test-$v\@example.com";
+	my @cmd = ('blib/script/public-inbox-init', "-$v", $v, $mainrepo,
+		"http://example.com/$v", $addr);
+	is(system(@cmd), 0, 'public-inbox init OK');
+	if ($v eq 'V1') {
+		is(system('blib/script/public-inbox-index', $mainrepo), 0);
+	}
+	is(system(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0);
+	is(system(@cfg, "$cfgpfx.altid",
+		'serial:alerts:file=msgmap.sqlite3'), 0);
+
+	for my $i (1..2) {
+		local $ENV{ORIGINAL_RECIPIENT} = $addr;
+		my $msg = <<EOF;
+From: user\@example.com
+To: $addr
+Subject: blah $i
+X-Mail-Count: $i
+Message-Id: <a.$i\@b.com>
+Date: Sat, 05 Jan 2019 04:19:17 +0000
+
+something
+EOF
+		ok(IPC::Run::run([$mda], \"$msg"), 'message delivered');
+	}
+	my $config = PublicInbox::Config->new;
+	my $ibx = $config->lookup_name($v);
+
+	# make sure all serials are searchable:
+	my ($tot, $msgs);
+	for my $i (1..2) {
+		($tot, $msgs) = $ibx->search->query("alerts:$i");
+		is($tot, 1, "got one result for alerts:$i");
+		is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i");
+	}
+	is_deeply([], \@warn, 'no warnings');
+
+	# TODO: public-inbox-learn doesn't know about filters
+	# (but -watch does)
+}
+
+done_testing();
diff --git a/t/watch_filter_rubylang.t b/t/watch_filter_rubylang.t
new file mode 100644
index 0000000..3256555
--- /dev/null
+++ b/t/watch_filter_rubylang.t
@@ -0,0 +1,109 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+use PublicInbox::Config;
+my @mods = qw(Filesys::Notify::Simple DBD::SQLite Search::Xapian);
+foreach my $mod (@mods) {
+	eval "require $mod";
+	plan skip_all => "$mod missing for watch_filter_rubylang_v2.t" if $@;
+}
+
+use_ok 'PublicInbox::V2Writable';
+use_ok 'PublicInbox::WatchMaildir';
+use_ok 'PublicInbox::Emergency';
+my $tmpdir = tempdir('watch-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+local $ENV{PI_CONFIG} = "$tmpdir/pi_config";
+
+for my $v (qw(V1 V2)) {
+	my @warn;
+	$SIG{__WARN__} = sub { push @warn, @_ };
+	my $cfgpfx = "publicinbox.$v";
+	my $mainrepo = "$tmpdir/$v";
+	my $maildir = "$tmpdir/md-$v";
+	my $spamdir = "$tmpdir/spam-$v";
+	my $addr = "test-$v\@example.com";
+	my @cmd = ('blib/script/public-inbox-init', "-$v", $v, $mainrepo,
+		"http://example.com/$v", $addr);
+	is(system(@cmd), 0, 'public-inbox init OK');
+	if ($v eq 'V1') {
+		is(system('blib/script/public-inbox-index', $mainrepo), 0);
+	}
+	PublicInbox::Emergency->new($spamdir);
+
+	for my $i (1..15) {
+		my $msg = <<EOF;
+From: user\@example.com
+To: $addr
+Subject: blah $i
+X-Mail-Count: $i
+Message-Id: <a.$i\@b.com>
+Date: Sat, 05 Jan 2019 04:19:17 +0000
+
+something
+EOF
+		PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+	}
+
+	my $spam = <<EOF;
+From: spammer\@example.com
+To: $addr
+Subject: spam
+X-Mail-Count: 99
+Message-Id: <a.99\@b.com>
+Date: Sat, 05 Jan 2019 04:19:17 +0000
+
+spam
+EOF
+	PublicInbox::Emergency->new($maildir)->prepare(\"$spam");
+
+	my %orig = (
+		"$cfgpfx.address" => $addr,
+		"$cfgpfx.mainrepo" => $mainrepo,
+		"$cfgpfx.watch" => "maildir:$maildir",
+		"$cfgpfx.filter" => 'PublicInbox::Filter::RubyLang',
+		"$cfgpfx.altid" => 'serial:alerts:file=msgmap.sqlite3',
+		"publicinboxwatch.watchspam" => "maildir:$spamdir",
+	);
+	my $config = PublicInbox::Config->new({%orig});
+	my $ibx = $config->lookup_name($v);
+	ok($ibx, 'found inbox by name');
+
+	my $w = PublicInbox::WatchMaildir->new($config);
+	for my $i (1..2) {
+		$w->scan('full');
+	}
+
+	# make sure all serials are searchable:
+	my ($tot, $msgs);
+	for my $i (1..15) {
+		($tot, $msgs) = $ibx->search->query("alerts:$i");
+		is($tot, 1, "got one result for alerts:$i");
+		is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i");
+	}
+	($tot, undef) = $ibx->search->query('b:spam');
+	is($tot, 1, 'got spam message');
+
+	my $nr = unlink <$maildir/new/*>;
+	is(16, $nr);
+	{
+		PublicInbox::Emergency->new($spamdir)->prepare(\$spam);
+		my @new = glob("$spamdir/new/*");
+		my @p = split(m!/+!, $new[0]);
+		ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S"));
+		is(unlink($new[0]), 1);
+	}
+	$w->scan('full');
+
+	$config = PublicInbox::Config->new({%orig});
+	$ibx = $config->lookup_name($v);
+	($tot, undef) = $ibx->search->reopen->query('b:spam');
+	is($tot, 0, 'spam removed');
+
+	is_deeply([], \@warn, 'no warnings');
+}
+
+done_testing();
-- 
EW


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-01-07  5:22 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-05 11:09 [PATCH] filter/rubylang: fix SQLite DB lifetime problems Eric Wong
2019-01-07  5:22 ` [2/1 PATCH] t/mda_filter_rubylang.t: set PI_EMERGENCY for -mda Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).