From 40782735e74e427997b5b900d60cc07597e330c3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 24 Jun 2016 01:15:16 +0000 Subject: watch_maildir: implement optional spam checking Mailing lists I watch and mirror may not have the best spam filtering, and an extra layer should not hurt. --- lib/PublicInbox/Import.pm | 6 +++++- lib/PublicInbox/WatchMaildir.pm | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 5ffc26ef..27f36a7e 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -140,7 +140,7 @@ sub remove { # returns undef on duplicate sub add { - my ($self, $mime) = @_; # mime = Email::MIME + my ($self, $mime, $check_cb) = @_; # mime = Email::MIME my $from = $mime->header('From'); my ($email) = ($from =~ /([^<\s]+\@[^>\s]+)/g); @@ -170,6 +170,10 @@ sub add { # kill potentially confusing/misleading headers $mime->header_set($_) for qw(bytes lines content-length status); + if ($check_cb) { + $mime = $check_cb->($mime) or return; + } + $mime = $mime->as_string; my $blob = $self->{mark}++; print $w "blob\nmark :$blob\ndata ", length($mime), "\n" or wfail; diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index c1fe81ec..72bd3d08 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -13,7 +13,9 @@ use PublicInbox::Spawn qw(spawn); sub new { my ($class, $config) = @_; - my (%mdmap, @mdir); + my (%mdmap, @mdir, $spamc); + + # XXX is "publicinboxlearn" really a good namespace for this? my $k = 'publicinboxlearn.watchspam'; if (my $spamdir = $config->{$k}) { if ($spamdir =~ s/\Amaildir://) { @@ -26,6 +28,21 @@ sub new { warn "unsupported $k=$spamdir\n"; } } + + $k = 'publicinboxwatch.spamcheck'; + my $spamcheck = $config->{$k}; + if ($spamcheck) { + if ($spamcheck eq 'spamc') { + $spamcheck = 'PublicInbox::Spamcheck::Spamc'; + } + if ($spamcheck =~ /::/) { + eval "require $spamcheck"; + $spamcheck = _spamcheck_cb($spamcheck->new); + } else { + warn "unsupported $k=$spamcheck\n"; + $spamcheck = undef; + } + } foreach $k (keys %$config) { $k =~ /\Apublicinbox\.([^\.]+)\.watch\z/ or next; my $name = $1; @@ -52,6 +69,7 @@ sub new { my $mdre = join('|', map { quotemeta($_) } @mdir); $mdre = qr!\A($mdre)/!; bless { + spamcheck => $spamcheck, mdmap => \%mdmap, mdir => \@mdir, mdre => $mdre, @@ -136,7 +154,7 @@ sub _try_path { } _force_mid($mime); - $im->add($mime); + $im->add($mime, $self->{spamcheck}); } sub watch { @@ -208,4 +226,16 @@ sub _scrubber_for { undef; } +sub _spamcheck_cb { + my ($sc) = @_; + sub { + my ($mime) = @_; + my $tmp = ''; + if ($sc->spamcheck($mime, \$tmp)) { + return Email::MIME->new(\$tmp); + } + undef; + } +} + 1; -- cgit v1.2.3-24-ge0c7