about summary refs log tree commit homepage
path: root/script
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-06-15 00:14:29 +0000
committerEric Wong <e@80x24.org>2016-06-15 00:30:04 +0000
commit975a1b174d40fd957ed869e9ff68f3c6dd6f37df (patch)
treea6484ef33025577dfe9fc1e5d3d092bade5d9cff /script
parentc1abb946e53e4179666ebb290e31c2d9ddc40711 (diff)
downloadpublic-inbox-975a1b174d40fd957ed869e9ff68f3c6dd6f37df.tar.gz
This removes the Email::Filter dependency as well as the
signature-breaking scrubber code.  We now prefer to
reject unacceptable messages and grudgingly (and blindly)
mirror messages we're not the primary endpoint for.
Diffstat (limited to 'script')
-rwxr-xr-xscript/public-inbox-learn6
-rwxr-xr-xscript/public-inbox-mda131
2 files changed, 69 insertions, 68 deletions
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 783cf03a..817fd5e9 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -55,11 +55,7 @@ foreach my $h (qw(Cc To)) {
         }
 }
 
-if ($train eq "ham") {
-        require PublicInbox::MDA;
-        require PublicInbox::Filter;
-        PublicInbox::Filter->run($mime);
-}
+require PublicInbox::MDA if $train eq "ham";
 
 # n.b. message may be cross-posted to multiple public-inboxes
 foreach my $recipient (keys %dests) {
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index ff2835da..63096fee 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -6,97 +6,102 @@
 use strict;
 use warnings;
 my $usage = 'public-inbox-mda < rfc2822_message';
+my ($ems, $emm);
 
-use Email::Filter;
+sub do_exit {
+        my ($code) = shift;
+        $emm = $ems = undef; # trigger DESTROY
+        exit $code;
+}
+
+use Email::Simple;
 use Email::MIME;
 use Email::MIME::ContentType;
 $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
-use IPC::Run qw(run);
 use PublicInbox::MDA;
-use PublicInbox::Filter;
 use PublicInbox::Config;
 use PublicInbox::Import;
 use PublicInbox::Git;
+use PublicInbox::Emergency;
+use PublicInbox::Filter::Base;
+use PublicInbox::Spawn qw(popen_rd);
 
 # n.b: hopefully we can setup the emergency path without bailing due to
 # user error, we really want to setup the emergency destination ASAP
 # in case there's bugs in our code or user error.
 my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/";
-
-# this reads the message from stdin
-my $filter = Email::Filter->new(emergency => $emergency);
+$ems = PublicInbox::Emergency->new($emergency);
+my $str = eval { local $/; <STDIN> };
+$ems->prepare(\$str);
+my $simple = Email::Simple->new(\$str);
 my $config = PublicInbox::Config->new;
 
 my $recipient = $ENV{ORIGINAL_RECIPIENT};
 defined $recipient or die "ORIGINAL_RECIPIENT not defined in ENV\n";
 my $dst = $config->lookup($recipient); # first check
-defined $dst or exit(1);
-my $main_repo = $dst->{mainrepo} or exit(1);
-my $filtered; # string dest
+defined $dst or do_exit(1);
+my $main_repo = $dst->{mainrepo} or do_exit(1);
+
+# pre-check, MDA has stricter rules than an importer might;
+do_exit(0) unless PublicInbox::MDA->precheck($simple, $dst->{address});
 
-if (PublicInbox::MDA->precheck($filter->simple, $dst->{address}) &&
-    do_spamc($filter->simple, \$filtered)) {
-        # update our message with SA headers (in case our filter rejects it)
-        my $msg = Email::MIME->new(\$filtered);
-        $filtered = undef;
-        $filter->simple($msg);
+$str = '';
+my $spam_ok = do_spamc($ems->fh, \$str);
+$simple = undef;
+$emm = PublicInbox::Emergency->new($emergency);
+$emm->prepare(\$str);
+$ems = $ems->abort;
+my $mime = Email::MIME->new(\$str);
+$str = '';
+do_exit(0) unless $spam_ok;
 
-        my $filter_arg;
-        my $fcfg = $dst->{filter};
-        if (!defined $fcfg || $filter eq 'reject') {
-                $filter_arg = $filter;
-        } elsif ($fcfg eq 'scrub') {
-                $filter_arg = undef; # the default for legacy versions
-        } else {
-                warn "publicinbox.$dst->{name}.filter=$fcfg invalid\n";
-                warn "must be either 'scrub' or 'reject' (the default)\n";
-        }
+my $fcfg = $dst->{filter} || '';
+my $filter;
+if ($fcfg eq 'scrub') { # TODO:
+        require PublicInbox::Filter::Mirror;
+        $filter = PublicInbox::Filter::Mirror->new;
+} else {
+        $filter = PublicInbox::Filter::Base->new;
+}
 
-        if (PublicInbox::Filter->run($msg, $filter_arg)) {
-                # run spamc again on the HTML-free message
-                if (do_spamc($msg, \$filtered)) {
-                        $msg = Email::MIME->new(\$filtered);
-                        PublicInbox::MDA->set_list_headers($msg, $dst);
-                        $filter->simple($msg);
+my $ret = $filter->delivery($mime);
+if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message
+        $mime = $ret;
+} elsif ($ret == PublicInbox::Filter::Base::IGNORE) {
+        do_exit(0); # chuck it to emergency
+} elsif ($ret == PublicInbox::Filter::Base::REJECT) {
+        $! = $ret;
+        die $filter->err, "\n";
+} # else { accept
 
-                        END {
-                                index_sync($main_repo) if ($? == 0);
-                        };
-                        my $git = PublicInbox::Git->new($main_repo);
-                        my $im = PublicInbox::Import->new($git,
-                                                $dst->{name}, $recipient);
-                        if (defined $im->add($msg)) {
-                                $im->done;
-                                $filter->ignore; # exits
-                        }
-                        # this message is similar to what ssoma-mda shows:
-                        print STDERR "CONFLICT: Message-ID: ",
-                                $msg->header_obj->header_raw('Message-ID'),
-                                " exists\n";
-                }
-        }
+PublicInbox::MDA->set_list_headers($mime, $dst);
+END { index_sync($main_repo) if $? == 0 };
+my $git = PublicInbox::Git->new($main_repo);
+my $im = PublicInbox::Import->new($git, $dst->{name}, $recipient);
+if (defined $im->add($mime)) {
+        $im->done;
+        $emm = $emm->abort;
 } else {
-        # Ensure emergency spam gets spamassassin headers.
-        # This makes it easier to prioritize obvious spam from less obvious
-        if (defined($filtered) && $filtered ne '') {
-                my $drop = Email::MIME->new(\$filtered);
-                $filtered = undef;
-                $filter->simple($drop);
-        }
+        # this message is similar to what ssoma-mda shows:
+        print STDERR "CONFLICT: Message-ID: ",
+                        $mime->header_obj->header_raw('Message-ID'),
+                        " exists\n";
 }
-exit 0; # goes to emergency
+do_exit(0);
 
 # we depend on "report_safe 0" in /etc/spamassassin/*.cf with --headers
-# not using Email::Filter->pipe here since we want the stdout of
-# the command even on failure (spamc will set $? on error).
 sub do_spamc {
-        my ($msg, $out) = @_;
-        eval {
-                my $orig = $msg->as_string;
-                run([qw/spamc -E --headers/], \$orig, $out);
-        };
+        my ($in, $out) = @_;
+        my $rdr = { 0 => fileno($in) };
+        my ($fh, $pid) = popen_rd([qw/spamc -E --headers/], undef, $rdr);
+        my $r;
+        do {
+                $r = sysread($fh, $$out, 65536, length($$out));
+        } while (defined($r) && $r != 0);
+        close $fh or die "close failed: $!\n";
+        waitpid($pid, 0);
 
-        return ($@ || $? || !defined($$out) || $$out eq '') ? 0 : 1;
+        ($? || $$out eq '') ? 0 : 1;
 }
 
 sub index_sync {