about summary refs log tree commit homepage
path: root/lib/PublicInbox/Watch.pm
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-08-31 04:41:34 +0000
committerEric Wong <e@80x24.org>2020-09-01 00:19:17 +0000
commit94ae705673cb03045a109041eec9a6704b8a735b (patch)
tree22ac480aaaff37bfe00c401b10ca2249876e2903 /lib/PublicInbox/Watch.pm
parent3bfbaacac15a14a379e360dc2d6c5989f49c3769 (diff)
downloadpublic-inbox-94ae705673cb03045a109041eec9a6704b8a735b.tar.gz
This should further mitigate lock contention problems
when -watch is configured to watch on a Maildir for spam
while performing a large NNTP import.

There is now a small risk a message won't get removed because if
it's in the current (uncommitted) fast-import batch, but
unlikely given the batch size is now only 10 messages.

If a that small window is hit, flipping the \Seen flag
(e.g. marking it unread, and then read again) will trigger
another removal attempt via IMAP or Maildir.
Diffstat (limited to 'lib/PublicInbox/Watch.pm')
-rw-r--r--lib/PublicInbox/Watch.pm31
1 files changed, 25 insertions, 6 deletions
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 5f786139..0bb92d0a 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -134,15 +134,34 @@ sub _done_for_now {
 sub remove_eml_i { # each_inbox callback
         my ($ibx, $arg) = @_;
         my ($self, $eml, $loc) = @$arg;
+
         eval {
-                my $im = _importer_for($self, $ibx);
-                $im->remove($eml, 'spam');
-                if (my $scrub = $ibx->filter($im)) {
-                        my $scrubbed = $scrub->scrub($eml, 1);
-                        if ($scrubbed && $scrubbed != REJECT) {
-                                $im->remove($scrubbed, 'spam');
+                # try to avoid taking a lock or unnecessary spawning
+                my $im = $self->{importers}->{"$ibx"};
+                my $scrubbed;
+                if ((!$im || !$im->active) && $ibx->over) {
+                        if (content_exists($ibx, $eml)) {
+                                # continue
+                        } elsif (my $scrub = $ibx->filter($im)) {
+                                $scrubbed = $scrub->scrub($eml, 1);
+                                if ($scrubbed && $scrubbed != REJECT &&
+                                          !content_exists($ibx, $scrubbed)) {
+                                        return;
+                                }
+                        } else {
+                                return;
                         }
                 }
+
+                $im //= _importer_for($self, $ibx); # may spawn fast-import
+                $im->remove($eml, 'spam');
+                $scrubbed //= do {
+                        my $scrub = $ibx->filter($im);
+                        $scrub ? $scrub->scrub($eml, 1) : undef;
+                };
+                if ($scrubbed && $scrubbed != REJECT) {
+                        $im->remove($scrubbed, 'spam');
+                }
         };
         if ($@) {
                 warn "error removing spam at: $loc from $ibx->{name}: $@\n";