From 975a1b174d40fd957ed869e9ff68f3c6dd6f37df Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 15 Jun 2016 00:14:29 +0000 Subject: mda: hook up new filter functionality This removes the Email::Filter dependency as well as the signature-breaking scrubber code. We now prefer to reject unacceptable messages and grudgingly (and blindly) mirror messages we're not the primary endpoint for. --- script/public-inbox-learn | 6 +-- script/public-inbox-mda | 131 ++++++++++++++++++++++++---------------------- 2 files changed, 69 insertions(+), 68 deletions(-) (limited to 'script') diff --git a/script/public-inbox-learn b/script/public-inbox-learn index 783cf03a..817fd5e9 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -55,11 +55,7 @@ foreach my $h (qw(Cc To)) { } } -if ($train eq "ham") { - require PublicInbox::MDA; - require PublicInbox::Filter; - PublicInbox::Filter->run($mime); -} +require PublicInbox::MDA if $train eq "ham"; # n.b. message may be cross-posted to multiple public-inboxes foreach my $recipient (keys %dests) { diff --git a/script/public-inbox-mda b/script/public-inbox-mda index ff2835da..63096fee 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -6,97 +6,102 @@ use strict; use warnings; my $usage = 'public-inbox-mda < rfc2822_message'; +my ($ems, $emm); -use Email::Filter; +sub do_exit { + my ($code) = shift; + $emm = $ems = undef; # trigger DESTROY + exit $code; +} + +use Email::Simple; use Email::MIME; use Email::MIME::ContentType; $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect -use IPC::Run qw(run); use PublicInbox::MDA; -use PublicInbox::Filter; use PublicInbox::Config; use PublicInbox::Import; use PublicInbox::Git; +use PublicInbox::Emergency; +use PublicInbox::Filter::Base; +use PublicInbox::Spawn qw(popen_rd); # n.b: hopefully we can setup the emergency path without bailing due to # user error, we really want to setup the emergency destination ASAP # in case there's bugs in our code or user error. my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/"; - -# this reads the message from stdin -my $filter = Email::Filter->new(emergency => $emergency); +$ems = PublicInbox::Emergency->new($emergency); +my $str = eval { local $/; }; +$ems->prepare(\$str); +my $simple = Email::Simple->new(\$str); my $config = PublicInbox::Config->new; my $recipient = $ENV{ORIGINAL_RECIPIENT}; defined $recipient or die "ORIGINAL_RECIPIENT not defined in ENV\n"; my $dst = $config->lookup($recipient); # first check -defined $dst or exit(1); -my $main_repo = $dst->{mainrepo} or exit(1); -my $filtered; # string dest +defined $dst or do_exit(1); +my $main_repo = $dst->{mainrepo} or do_exit(1); + +# pre-check, MDA has stricter rules than an importer might; +do_exit(0) unless PublicInbox::MDA->precheck($simple, $dst->{address}); -if (PublicInbox::MDA->precheck($filter->simple, $dst->{address}) && - do_spamc($filter->simple, \$filtered)) { - # update our message with SA headers (in case our filter rejects it) - my $msg = Email::MIME->new(\$filtered); - $filtered = undef; - $filter->simple($msg); +$str = ''; +my $spam_ok = do_spamc($ems->fh, \$str); +$simple = undef; +$emm = PublicInbox::Emergency->new($emergency); +$emm->prepare(\$str); +$ems = $ems->abort; +my $mime = Email::MIME->new(\$str); +$str = ''; +do_exit(0) unless $spam_ok; - my $filter_arg; - my $fcfg = $dst->{filter}; - if (!defined $fcfg || $filter eq 'reject') { - $filter_arg = $filter; - } elsif ($fcfg eq 'scrub') { - $filter_arg = undef; # the default for legacy versions - } else { - warn "publicinbox.$dst->{name}.filter=$fcfg invalid\n"; - warn "must be either 'scrub' or 'reject' (the default)\n"; - } +my $fcfg = $dst->{filter} || ''; +my $filter; +if ($fcfg eq 'scrub') { # TODO: + require PublicInbox::Filter::Mirror; + $filter = PublicInbox::Filter::Mirror->new; +} else { + $filter = PublicInbox::Filter::Base->new; +} - if (PublicInbox::Filter->run($msg, $filter_arg)) { - # run spamc again on the HTML-free message - if (do_spamc($msg, \$filtered)) { - $msg = Email::MIME->new(\$filtered); - PublicInbox::MDA->set_list_headers($msg, $dst); - $filter->simple($msg); +my $ret = $filter->delivery($mime); +if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message + $mime = $ret; +} elsif ($ret == PublicInbox::Filter::Base::IGNORE) { + do_exit(0); # chuck it to emergency +} elsif ($ret == PublicInbox::Filter::Base::REJECT) { + $! = $ret; + die $filter->err, "\n"; +} # else { accept - END { - index_sync($main_repo) if ($? == 0); - }; - my $git = PublicInbox::Git->new($main_repo); - my $im = PublicInbox::Import->new($git, - $dst->{name}, $recipient); - if (defined $im->add($msg)) { - $im->done; - $filter->ignore; # exits - } - # this message is similar to what ssoma-mda shows: - print STDERR "CONFLICT: Message-ID: ", - $msg->header_obj->header_raw('Message-ID'), - " exists\n"; - } - } +PublicInbox::MDA->set_list_headers($mime, $dst); +END { index_sync($main_repo) if $? == 0 }; +my $git = PublicInbox::Git->new($main_repo); +my $im = PublicInbox::Import->new($git, $dst->{name}, $recipient); +if (defined $im->add($mime)) { + $im->done; + $emm = $emm->abort; } else { - # Ensure emergency spam gets spamassassin headers. - # This makes it easier to prioritize obvious spam from less obvious - if (defined($filtered) && $filtered ne '') { - my $drop = Email::MIME->new(\$filtered); - $filtered = undef; - $filter->simple($drop); - } + # this message is similar to what ssoma-mda shows: + print STDERR "CONFLICT: Message-ID: ", + $mime->header_obj->header_raw('Message-ID'), + " exists\n"; } -exit 0; # goes to emergency +do_exit(0); # we depend on "report_safe 0" in /etc/spamassassin/*.cf with --headers -# not using Email::Filter->pipe here since we want the stdout of -# the command even on failure (spamc will set $? on error). sub do_spamc { - my ($msg, $out) = @_; - eval { - my $orig = $msg->as_string; - run([qw/spamc -E --headers/], \$orig, $out); - }; + my ($in, $out) = @_; + my $rdr = { 0 => fileno($in) }; + my ($fh, $pid) = popen_rd([qw/spamc -E --headers/], undef, $rdr); + my $r; + do { + $r = sysread($fh, $$out, 65536, length($$out)); + } while (defined($r) && $r != 0); + close $fh or die "close failed: $!\n"; + waitpid($pid, 0); - return ($@ || $? || !defined($$out) || $$out eq '') ? 0 : 1; + ($? || $$out eq '') ? 0 : 1; } sub index_sync { -- cgit v1.2.3-24-ge0c7