diff options
Diffstat (limited to 'script')
-rw-r--r--[-rwxr-xr-x] | script/public-inbox-learn | 76 | ||||
-rwxr-xr-x | script/public-inbox-mda | 110 |
2 files changed, 111 insertions, 75 deletions
diff --git a/script/public-inbox-learn b/script/public-inbox-learn index c4c4d4b9..3073294a 100755..100644 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -4,7 +4,7 @@ # # Used for training spam (via SpamAssassin) and removing messages from a # public-inbox -my $usage = "$0 (spam|ham) < /path/to/message"; +my $usage = "$0 <spam|ham|rm> </path/to/message"; use strict; use warnings; use PublicInbox::Config; @@ -39,47 +39,26 @@ my $mime = PublicInbox::MIME->new(eval { $data }); -# get all recipients -my %dests; -foreach my $h (qw(Cc To)) { - my $val = $mime->header($h) or next; - foreach my $email (PublicInbox::Address::emails($val)) { - $dests{lc($email)} = 1; - } -} - -if ($train eq 'spam') { - $pi_config->each_inbox(sub { - my ($ibx) = @_; - $ibx = PublicInbox::InboxWritable->new($ibx); - my $im = $ibx->importer(0); - $im->remove($mime, 'spam'); - $im->done; - }); -} - -require PublicInbox::MDA if $train eq "ham"; +sub remove_or_add ($$$) { + my ($ibx, $train, $addr) = @_; -# n.b. message may be cross-posted to multiple public-inboxes -foreach my $recipient (keys %dests) { - my $dst = $pi_config->lookup($recipient) or next; # We do not touch GIT_COMMITTER_* env here so we can track # who trained the message. - $dst->{name} = $ENV{GIT_COMMITTER_NAME} || $dst->{name}; - $dst->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} || $recipient; - $dst = PublicInbox::InboxWritable->new($dst); - my $im = $dst->importer(0); + $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name}; + $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr; + $ibx = PublicInbox::InboxWritable->new($ibx); + my $im = $ibx->importer(0); - if ($train eq "spam" || $train eq "rm") { + if ($train eq "rm") { # This needs to be idempotent, as my inotify trainer # may train for each cross-posted message, and this # script already learns for every list in # ~/.public-inbox/config $im->remove($mime, $train); - } else { # $train eq "ham" + } elsif ($train eq "ham") { # no checking for spam here, we assume the message has # been reviewed by a human at this point: - PublicInbox::MDA->set_list_headers($mime, $dst); + PublicInbox::MDA->set_list_headers($mime, $ibx); # Ham messages are trained when they're marked into # a SEEN state, so this is idempotent: @@ -88,6 +67,41 @@ foreach my $recipient (keys %dests) { $im->done; } +# spam is removed from all known inboxes since it is often Bcc:-ed +if ($train eq 'spam') { + $pi_config->each_inbox(sub { + my ($ibx) = @_; + $ibx = PublicInbox::InboxWritable->new($ibx); + my $im = $ibx->importer(0); + $im->remove($mime, 'spam'); + $im->done; + }); +} else { + require PublicInbox::MDA; + + # get all recipients + my %dests; # address => <PublicInbox::Inbox|0(false)> + for ($mime->header('Cc'), $mime->header('To')) { + foreach my $addr (PublicInbox::Address::emails($_)) { + $addr = lc($addr); + $dests{$addr} //= $pi_config->lookup($addr) // 0; + } + } + + # n.b. message may be cross-posted to multiple public-inboxes + my %seen; + while (my ($addr, $ibx) = each %dests) { + next unless ref($ibx); # $ibx may be 0 + next if $seen{"$ibx"}++; + remove_or_add($ibx, $train, $addr); + } + my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_config, $mime); + for my $ibx (@$dests) { + next if !$seen{"$ibx"}++; + remove_or_add($ibx, $train, $ibx->{-primary_address}); + } +} + if ($err) { warn $err; exit 1; diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 584218b5..dca8a0ea 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -37,28 +37,38 @@ my $config = PublicInbox::Config->new; my $key = 'publicinboxmda.spamcheck'; my $default = 'PublicInbox::Spamcheck::Spamc'; my $spamc = PublicInbox::Spamcheck::get($config, $key, $default); -my $dst; +my $dests = []; my $recipient = $ENV{ORIGINAL_RECIPIENT}; if (defined $recipient) { - $dst = $config->lookup($recipient); # first check + my $ibx = $config->lookup($recipient); # first check + push @$dests, $ibx if $ibx; } -if (!defined $dst) { - my $list_id = $simple->header('List-Id'); - if (defined $list_id && $list_id =~ /<[ \t]*(.+)?[ \t]*>/) { - $dst = $config->lookup_list_id($1); - } - if (!defined $dst && !defined $recipient) { +if (!scalar(@$dests)) { + $dests = PublicInbox::MDA->inboxes_for_list_id($config, $simple); + if (!scalar(@$dests) && !defined($recipient)) { die "ORIGINAL_RECIPIENT not defined in ENV\n"; } - defined $dst or do_exit(67); # EX_NOUSER 5.1.1 user unknown + scalar(@$dests) or do_exit(67); # EX_NOUSER 5.1.1 user unknown } -$dst->{inboxdir} or do_exit(67); -$dst = PublicInbox::InboxWritable->new($dst); -# pre-check, MDA has stricter rules than an importer might; -if ($precheck && !PublicInbox::MDA->precheck($simple, $dst->{address})) { - do_exit(0); -} +my $err; +@$dests = grep { + my $ibx = PublicInbox::InboxWritable->new($_); + eval { $ibx->assert_usable_dir }; + if ($@) { + warn $@; + $err = 1; + 0; + # pre-check, MDA has stricter rules than an importer might; + } elsif ($precheck) { + !!PublicInbox::MDA->precheck($simple, $ibx->{address}); + } else { + 1; + } +} @$dests; + +do_exit(67) if $err && scalar(@$dests) == 0; + $simple = undef; my $spam_ok; if ($spamc) { @@ -74,39 +84,51 @@ if ($spamc) { my $fh = $emm->fh; read($fh, $str, -s $fh); } - -my $mime = PublicInbox::MIME->new(\$str); do_exit(0) unless $spam_ok; -my $fcfg = $dst->{filter} || ''; -# -mda defaults to the strict base filter -if ($fcfg eq '') { - $dst->{filter} = 'PublicInbox::Filter::Base'; -} elsif ($fcfg eq 'scrub') { # legacy alias, undocumented, remove? - $dst->{filter} = 'PublicInbox::Filter::Mirror'; +# -mda defaults to the strict base filter which we won't use anywhere else +sub mda_filter_adjust ($) { + my ($ibx) = @_; + my $fcfg = $ibx->{filter} || ''; + if ($fcfg eq '') { + $ibx->{filter} = 'PublicInbox::Filter::Base'; + } elsif ($fcfg eq 'scrub') { # legacy alias, undocumented, remove? + $ibx->{filter} = 'PublicInbox::Filter::Mirror'; + } +} + +my @rejects; +for my $ibx (@$dests) { + mda_filter_adjust($ibx); + my $filter = $ibx->filter; + my $mime = PublicInbox::MIME->new($str); + my $ret = $filter->delivery($mime); + if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message + $mime = $ret; + } elsif ($ret == PublicInbox::Filter::Base::IGNORE) { + next; # nothing, keep looping + } elsif ($ret == PublicInbox::Filter::Base::REJECT) { + push @rejects, $filter->err; + next; + } + + PublicInbox::MDA->set_list_headers($mime, $ibx); + my $im = $ibx->importer(0); + if (defined $im->add($mime)) { + # ->abort is idempotent, no emergency if a single + # destination succeeds + $emm->abort; + } else { # v1-only + my $mid = $mime->header_obj->header_raw('Message-ID'); + # this message is similar to what ssoma-mda shows: + print STDERR "CONFLICT: Message-ID: $mid exists\n"; + } + $im->done; } -my $filter = $dst->filter; -my $ret = $filter->delivery($mime); -if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message - $mime = $ret; -} elsif ($ret == PublicInbox::Filter::Base::IGNORE) { - do_exit(0); # chuck it to emergency -} elsif ($ret == PublicInbox::Filter::Base::REJECT) { - $! = 65; # EX_DATAERR 5.6.0 data format error - die $filter->err, "\n"; -} # else { accept -$filter = undef; -PublicInbox::MDA->set_list_headers($mime, $dst); -my $im = $dst->importer(0); -if (defined $im->add($mime)) { - $emm = $emm->abort; -} else { - # this message is similar to what ssoma-mda shows: - print STDERR "CONFLICT: Message-ID: ", - $mime->header_obj->header_raw('Message-ID'), - " exists\n"; +if (scalar(@rejects) && scalar(@rejects) == scalar(@$dests)) { + $! = 65; # EX_DATAERR 5.6.0 data format error + die join("\n", @rejects, ''); } -$im->done; do_exit(0); |