user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 22/27] watchmaildir: support v2 repositories
Date: Mon, 19 Mar 2018 08:14:54 +0000	[thread overview]
Message-ID: <20180319081459.10645-23-e@80x24.org> (raw)
In-Reply-To: <20180319081459.10645-1-e@80x24.org>

Unfortunately this gives up some minor performance tweaks we
made to avoid reforking import processes.
---
 MANIFEST                        |   1 +
 lib/PublicInbox/WatchMaildir.pm |  29 +++++-----
 t/watch_maildir_v2.t            | 125 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 139 insertions(+), 16 deletions(-)
 create mode 100644 t/watch_maildir_v2.t

diff --git a/MANIFEST b/MANIFEST
index 3b0b013..4346cd9 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -182,3 +182,4 @@ t/utf8.mbox
 t/v2writable.t
 t/view.t
 t/watch_maildir.t
+t/watch_maildir_v2.t
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 3adebdd..2808b72 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -91,18 +91,6 @@ sub _done_for_now {
 	my ($self) = @_;
 	my $importers = $self->{importers};
 	foreach my $im (values %$importers) {
-		$im->barrier;
-	}
-
-	my $opendirs = $self->{opendirs};
-
-	# spamdir scanning means every importer remains open
-	my $spamdir = $self->{spamdir};
-	return if defined($spamdir) && $opendirs->{$spamdir};
-
-	foreach my $im (values %$importers) {
-		# not done if we're scanning
-		next if $opendirs->{$im->{git}->{git_dir}};
 		$im->done;
 	}
 }
@@ -267,10 +255,19 @@ sub _path_to_mime {
 sub _importer_for {
 	my ($self, $inbox) = @_;
 	my $im = $inbox->{-import} ||= eval {
-		my $git = $inbox->git;
-		my $name = $inbox->{name};
-		my $addr = $inbox->{-primary_address};
-		PublicInbox::Import->new($git, $name, $addr, $inbox);
+		my $v = $inbox->{version} || 1;
+		if ($v == 2) {
+			eval { require PublicInbox::V2Writable };
+			die "v2 not supported: $@\n" if $@;
+			PublicInbox::V2Writable->new($inbox);
+		} elsif ($v == 1) {
+			my $git = $inbox->git;
+			my $name = $inbox->{name};
+			my $addr = $inbox->{-primary_address};
+			PublicInbox::Import->new($git, $name, $addr, $inbox);
+		} else {
+			die "unsupported inbox version: $v\n";
+		}
 	};
 
 	my $importers = $self->{importers};
diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t
new file mode 100644
index 0000000..85130e3
--- /dev/null
+++ b/t/watch_maildir_v2.t
@@ -0,0 +1,125 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use Test::More;
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+use Cwd;
+use PublicInbox::Config;
+my @mods = qw(Filesys::Notify::Simple PublicInbox::V2Writable);
+foreach my $mod (@mods) {
+	eval "require $mod";
+	plan skip_all => "$mod missing for watch_maildir_v2.t" if $@;
+}
+
+my $tmpdir = tempdir('watch_maildir-v2-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $mainrepo = "$tmpdir/v2";
+my $maildir = "$tmpdir/md";
+my $spamdir = "$tmpdir/spam";
+use_ok 'PublicInbox::WatchMaildir';
+use_ok 'PublicInbox::Emergency';
+my $cfgpfx = "publicinbox.test";
+my $addr = 'test-public@example.com';
+my @cmd = ('blib/script/public-inbox-init', '-V2', 'test', $mainrepo,
+	'http://example.com/v2list', $addr);
+local $ENV{PI_CONFIG} = "$tmpdir/pi_config";
+is(system(@cmd), 0, 'public-inbox init OK');
+
+my $msg = <<EOF;
+From: user\@example.com
+To: $addr
+Subject: spam
+Message-Id: <a\@b.com>
+Date: Sat, 18 Jun 2016 00:00:00 +0000
+
+something
+EOF
+PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+ok(POSIX::mkfifo("$maildir/cur/fifo", 0777),
+	'create FIFO to ensure we do not get stuck on it :P');
+my $sem = PublicInbox::Emergency->new($spamdir); # create dirs
+
+my $config = PublicInbox::Config->new({
+	"$cfgpfx.address" => $addr,
+	"$cfgpfx.mainrepo" => $mainrepo,
+	"$cfgpfx.watch" => "maildir:$maildir",
+	"$cfgpfx.filter" => 'PublicInbox::Filter::Vger',
+	"publicinboxlearn.watchspam" => "maildir:$spamdir",
+});
+my $ibx = $config->lookup_name('test');
+ok($ibx, 'found inbox by name');
+my $srch = $ibx->search;
+
+PublicInbox::WatchMaildir->new($config)->scan('full');
+my $res = $srch->reopen->query('');
+is($res->{total}, 1, 'got one revision');
+
+# my $git = PublicInbox::Git->new("$mainrepo/git/0.git");
+# my @list = $git->qx(qw(rev-list refs/heads/master));
+# is(scalar @list, 1, 'one revision in rev-list');
+
+my $write_spam = sub {
+	is(scalar glob("$spamdir/new/*"), undef, 'no spam existing');
+	$sem->prepare(\$msg);
+	$sem->commit;
+	my @new = glob("$spamdir/new/*");
+	is(scalar @new, 1);
+	my @p = split(m!/+!, $new[0]);
+	ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S"));
+	is(unlink($new[0]), 1);
+};
+$write_spam->();
+is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam');
+PublicInbox::WatchMaildir->new($config)->scan('full');
+is($srch->reopen->query('')->{total}, 0, 'deleted file');
+
+# check with scrubbing
+{
+	$msg .= qq(--
+To unsubscribe from this list: send the line "unsubscribe git" in
+the body of a message to majordomo\@vger.kernel.org
+More majordomo info at  http://vger.kernel.org/majordomo-info.html\n);
+	PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+	PublicInbox::WatchMaildir->new($config)->scan('full');
+	$res = $srch->reopen->query('');
+	is($res->{total}, 1, 'got one file back');
+	my $mref = $ibx->msg_by_smsg($res->{msgs}->[0]);
+	like($$mref, qr/something\n\z/s, 'message scrubbed on import');
+
+	is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam');
+	$write_spam->();
+	PublicInbox::WatchMaildir->new($config)->scan('full');
+	$res = $srch->reopen->query('');
+	is($res->{total}, 0, 'inbox is empty again');
+}
+
+{
+	my $fail_bin = getcwd()."/t/fail-bin";
+	ok(-x "$fail_bin/spamc", "mock spamc exists");
+	my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc ham mock
+	local $ENV{PATH} = $fail_path;
+	PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+	$config->{'publicinboxwatch.spamcheck'} = 'spamc';
+	{
+		local $SIG{__WARN__} = sub {}; # quiet spam check warning
+		PublicInbox::WatchMaildir->new($config)->scan('full');
+	}
+	$res = $srch->reopen->query('');
+	is($res->{total}, 0, 'inbox is still empty');
+	is(unlink(glob("$maildir/new/*")), 1);
+}
+
+{
+	my $main_bin = getcwd()."/t/main-bin";
+	ok(-x "$main_bin/spamc", "mock spamc exists");
+	my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock
+	local $ENV{PATH} = $main_path;
+	PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+	$config->{'publicinboxwatch.spamcheck'} = 'spamc';
+	PublicInbox::WatchMaildir->new($config)->scan('full');
+	$res = $srch->reopen->query('');
+	is($res->{total}, 1, 'inbox has one mail after spamc OK-ed a message');
+	my $mref = $ibx->msg_by_smsg($res->{msgs}->[0]);
+	like($$mref, qr/something\n\z/s, 'message scrubbed on import');
+}
+
+done_testing;
-- 
EW


  parent reply	other threads:[~2018-03-19  8:15 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-19  8:14 [PATCH 00/27] v2 public-inbox-watch support Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 01/27] content_id: use Sender header if From is not available Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 02/27] v2writable: support "barrier" operation to avoid reforking Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 03/27] use string ref for Email::Simple->new Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 04/27] v2writable: remove unnecessary idx_init call Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 05/27] searchidx: do not delete documents while iterating Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 06/27] search: allow ->reopen to be chainable Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 07/27] v2writable: implement remove correctly Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 08/27] skeleton: barrier init requires a lock Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 09/27] import: (v2) delete writes the blob into history in subdir Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 10/27] import: (v2): write deletes to a separate '_' subdirectory Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 11/27] import: implement barrier operation for v1 repos Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 12/27] mid: mid_mime uses v2-compatible mids function Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 13/27] watchmaildir: use content_digest to generate Message-Id Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 14/27] import: force Message-ID generation for v1 here Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 15/27] import: switch to URL-safe Base64 for Message-IDs Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 16/27] v2writable: test for idempotent removals Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 17/27] import: enable locking under v2 Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 18/27] index: s/GIT_DIR/REPO_DIR/ Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 19/27] Lock: new base class for writable lockers Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 20/27] t/watch_maildir: note the reason for FIFO creation Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 21/27] v2writable: ensure ->done is idempotent Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` Eric Wong (Contractor, The Linux Foundation) [this message]
2018-03-19  8:14 ` [PATCH 23/27] searchidxpart: s/barrier/remote_barrier/ Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 24/27] v2writable: allow disabling parallelization Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 25/27] scripts/import_vger_from_mbox: filter out same headers as MDA Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 26/27] v2writable: add DEBUG_DIFF env support Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:14 ` [PATCH 27/27] v2writable: remove "resent" message for duplicate Message-IDs Eric Wong (Contractor, The Linux Foundation)
2018-03-19  8:18   ` SQUASH: " Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180319081459.10645-23-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).