diff options
author | Eric Wong <e@yhbt.net> | 2020-08-31 04:41:34 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2020-09-01 00:19:17 +0000 |
commit | 94ae705673cb03045a109041eec9a6704b8a735b (patch) | |
tree | 22ac480aaaff37bfe00c401b10ca2249876e2903 /lib/PublicInbox/Watch.pm | |
parent | 3bfbaacac15a14a379e360dc2d6c5989f49c3769 (diff) | |
download | public-inbox-94ae705673cb03045a109041eec9a6704b8a735b.tar.gz |
This should further mitigate lock contention problems when -watch is configured to watch on a Maildir for spam while performing a large NNTP import. There is now a small risk a message won't get removed because if it's in the current (uncommitted) fast-import batch, but unlikely given the batch size is now only 10 messages. If a that small window is hit, flipping the \Seen flag (e.g. marking it unread, and then read again) will trigger another removal attempt via IMAP or Maildir.
Diffstat (limited to 'lib/PublicInbox/Watch.pm')
-rw-r--r-- | lib/PublicInbox/Watch.pm | 31 |
1 files changed, 25 insertions, 6 deletions
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 5f786139..0bb92d0a 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -134,15 +134,34 @@ sub _done_for_now { sub remove_eml_i { # each_inbox callback my ($ibx, $arg) = @_; my ($self, $eml, $loc) = @$arg; + eval { - my $im = _importer_for($self, $ibx); - $im->remove($eml, 'spam'); - if (my $scrub = $ibx->filter($im)) { - my $scrubbed = $scrub->scrub($eml, 1); - if ($scrubbed && $scrubbed != REJECT) { - $im->remove($scrubbed, 'spam'); + # try to avoid taking a lock or unnecessary spawning + my $im = $self->{importers}->{"$ibx"}; + my $scrubbed; + if ((!$im || !$im->active) && $ibx->over) { + if (content_exists($ibx, $eml)) { + # continue + } elsif (my $scrub = $ibx->filter($im)) { + $scrubbed = $scrub->scrub($eml, 1); + if ($scrubbed && $scrubbed != REJECT && + !content_exists($ibx, $scrubbed)) { + return; + } + } else { + return; } } + + $im //= _importer_for($self, $ibx); # may spawn fast-import + $im->remove($eml, 'spam'); + $scrubbed //= do { + my $scrub = $ibx->filter($im); + $scrub ? $scrub->scrub($eml, 1) : undef; + }; + if ($scrubbed && $scrubbed != REJECT) { + $im->remove($scrubbed, 'spam'); + } }; if ($@) { warn "error removing spam at: $loc from $ibx->{name}: $@\n"; |