From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_05 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id B36EB2018A for ; Fri, 24 Jun 2016 20:47:18 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/6] split out spamcheck/spamc to its own module. Date: Fri, 24 Jun 2016 20:47:14 +0000 Message-Id: <20160624204718.27540-2-e@80x24.org> In-Reply-To: <20160624204718.27540-1-e@80x24.org> References: <20160624204718.27540-1-e@80x24.org> List-Id: This should hopefully make it easier to try other anti-spam systems (or none at all) in the future. --- MANIFEST | 2 + lib/PublicInbox/Spamcheck/Spamc.pm | 94 ++++++++++++++++++++++++++++++++++++++ script/public-inbox-learn | 21 +++------ script/public-inbox-mda | 23 ++-------- t/spamcheck_spamc.t | 49 ++++++++++++++++++++ 5 files changed, 156 insertions(+), 33 deletions(-) create mode 100644 lib/PublicInbox/Spamcheck/Spamc.pm create mode 100644 t/spamcheck_spamc.t diff --git a/MANIFEST b/MANIFEST index bc7d54c..834cb5d 100644 --- a/MANIFEST +++ b/MANIFEST @@ -69,6 +69,7 @@ lib/PublicInbox/Search.pm lib/PublicInbox/SearchIdx.pm lib/PublicInbox/SearchMsg.pm lib/PublicInbox/SearchView.pm +lib/PublicInbox/Spamcheck/Spamc.pm lib/PublicInbox/Spawn.pm lib/PublicInbox/SpawnPP.pm lib/PublicInbox/Thread.pm @@ -133,6 +134,7 @@ t/psgi_attach.t t/psgi_mount.t t/qspawn.t t/search.t +t/spamcheck_spamc.t t/spawn.t t/utf8.mbox t/view.t diff --git a/lib/PublicInbox/Spamcheck/Spamc.pm b/lib/PublicInbox/Spamcheck/Spamc.pm new file mode 100644 index 0000000..312e52d --- /dev/null +++ b/lib/PublicInbox/Spamcheck/Spamc.pm @@ -0,0 +1,94 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +package PublicInbox::Spamcheck::Spamc; +use strict; +use warnings; +use PublicInbox::Spawn qw(popen_rd spawn); +use IO::File; +use Fcntl qw(:DEFAULT SEEK_SET); + +sub new { + my ($class) = @_; + bless { + checkcmd => [qw(spamc -E --headers)], + hamcmd => [qw(spamc -L ham)], + spamcmd => [qw(spamc -L spam)], + }, $class; +} + +sub spamcheck { + my ($self, $msg, $out) = @_; + + my $tmp; + my $fd = _msg_to_fd($self, $msg, \$tmp); + my $rdr = { 0 => $fd }; + my ($fh, $pid) = popen_rd($self->{checkcmd}, undef, $rdr); + defined $pid or die "failed to popen_rd spamc: $!\n"; + my $r; + unless (ref $out) { + my $buf = ''; + $out = \$buf; + } + do { + $r = sysread($fh, $$out, 65536, length($$out)); + } while (defined($r) && $r != 0); + defined $r or die "read failed: $!"; + close $fh or die "close failed: $!"; + waitpid($pid, 0); + ($? || $$out eq '') ? 0 : 1; +} + +sub hamlearn { + my ($self, $msg, $rdr) = @_; + _learn($self, $msg, $rdr, 'hamcmd'); +} + +sub spamlearn { + my ($self, $msg, $rdr) = @_; + _learn($self, $msg, $rdr, 'spamcmd'); +} + +sub _learn { + my ($self, $msg, $rdr, $field) = @_; + $rdr ||= {}; + $rdr->{1} ||= $self->_devnull; + $rdr->{2} ||= $self->_devnull; + my $tmp; + $rdr->{0} = _msg_to_fd($self, $msg, \$tmp); + my $pid = spawn($self->{$field}, undef, $rdr); + waitpid($pid, 0); + !$?; +} + +sub _devnull { + my ($self) = @_; + my $fd = $self->{-devnullfd}; + return $fd if defined $fd; + open my $fh, '+>', '/dev/null' or + die "failed to open /dev/null: $!"; + $self->{-devnull} = $fh; + $self->{-devnullfd} = fileno($fh); +} + +sub _msg_to_fd { + my ($self, $msg, $tmpref) = @_; + my $tmpfh; + my $fd; + if (my $ref = ref($msg)) { + + return $msg->fileno if $ref ne 'SCALAR' && $msg->can('fileno'); + + $tmpfh = IO::File->new_tmpfile; + $tmpfh->autoflush(1); + $msg = \($msg->as_string) if $ref ne 'SCALAR'; + print $tmpfh $$msg or die "failed to print: $!"; + sysseek($tmpfh, 0, SEEK_SET) or + die "sysseek(fh) failed: $!"; + $$tmpref = $tmpfh; + + return fileno($tmpfh); + } + $msg; +} + +1; diff --git a/script/public-inbox-learn b/script/public-inbox-learn index b05ef05..7ef2a31 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -14,12 +14,13 @@ use Email::MIME; use Email::MIME::ContentType; $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect use PublicInbox::Address; -use PublicInbox::Spawn qw(spawn); +use PublicInbox::Spamcheck::Spamc; my $train = shift or die "usage: $usage\n"; if ($train !~ /\A(?:ham|spam)\z/) { die "`$train' not recognized.\nusage: $usage\n"; } +my $spamc = PublicInbox::Spamcheck::Spamc->new; my $pi_config = PublicInbox::Config->new; my $err; my $mime = Email::MIME->new(eval { @@ -27,19 +28,11 @@ my $mime = Email::MIME->new(eval { my $data = scalar ; $data =~ s/\AFrom [^\r\n]*\r?\n//s; eval { - my @cmd = (qw(spamc -L), $train); - my ($r, $w); - pipe($r, $w) or die "pipe failed: $!"; - open my $null, '>', '/dev/null' or - die "failed to open /dev/null: $!"; - my $nullfd = fileno($null); - my %rdr = (0 => fileno($r), 1 => $nullfd, 2 => $nullfd); - my $pid = spawn(\@cmd, undef, \%rdr); - close $null; - close $r or die "close \$r failed: $!"; - print $w $data or die "print \$w failed: $!"; - close $w or die "close \$w failed: $!"; - waitpid($pid, 0); + if ($train eq 'ham') { + $spamc->hamlearn(\$data); + } else { + $spamc->spamlearn(\$data); + } die "spamc failed with: $?\n" if $?; }; $err = $@; diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 013642d..f739ad0 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -24,7 +24,7 @@ use PublicInbox::Import; use PublicInbox::Git; use PublicInbox::Emergency; use PublicInbox::Filter::Base; -use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::Spamcheck::Spamc; # n.b: hopefully we can setup the emergency path without bailing due to # user error, we really want to setup the emergency destination ASAP @@ -44,9 +44,9 @@ my $main_repo = $dst->{mainrepo} or do_exit(1); # pre-check, MDA has stricter rules than an importer might; do_exit(0) unless PublicInbox::MDA->precheck($simple, $dst->{address}); - +my $spamc = PublicInbox::Spamcheck::Spamc->new; $str = ''; -my $spam_ok = do_spamc($ems->fh, \$str); +my $spam_ok = $spamc->spamcheck($ems->fh, \$str); $simple = undef; $emm = PublicInbox::Emergency->new($emergency); $emm->prepare(\$str); @@ -90,20 +90,5 @@ if (defined $im->add($mime)) { $mime->header_obj->header_raw('Message-ID'), " exists\n"; } -do_exit(0); - -# we depend on "report_safe 0" in /etc/spamassassin/*.cf with --headers -sub do_spamc { - my ($in, $out) = @_; - my $rdr = { 0 => fileno($in) }; - my ($fh, $pid) = popen_rd([qw/spamc -E --headers/], undef, $rdr); - defined $pid or die "failed to popen_rd spamc: $!\n"; - my $r; - do { - $r = sysread($fh, $$out, 65536, length($$out)); - } while (defined($r) && $r != 0); - close $fh or die "close failed: $!\n"; - waitpid($pid, 0); - ($? || $$out eq '') ? 0 : 1; -} +do_exit(0); diff --git a/t/spamcheck_spamc.t b/t/spamcheck_spamc.t new file mode 100644 index 0000000..65ac5c2 --- /dev/null +++ b/t/spamcheck_spamc.t @@ -0,0 +1,49 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use Cwd; +use Email::Simple; +use IO::File; +use File::Temp qw/tempdir/; +use Fcntl qw(:DEFAULT SEEK_SET); +my $tmpdir = tempdir('spamcheck_spamc-XXXXXX', TMPDIR => 1, CLEANUP => 1); + +use_ok 'PublicInbox::Spamcheck::Spamc'; +my $spamc = PublicInbox::Spamcheck::Spamc->new; +$spamc->{checkcmd} = [qw(cat)]; + +{ + open my $fh, '+>', "$tmpdir/file" or die "open failed: $!"; + ok(!$spamc->spamcheck($fh), 'empty '.ref($fh)); +} +ok(!$spamc->spamcheck(IO::File->new_tmpfile), 'IO::File->new_tmpfile'); + +my $dst = ''; +my $src = <<'EOF'; +Date: Thu, 01 Jan 1970 00:00:00 +0000 +To: +From: +Subject: test +Message-ID: + +EOF +ok($spamc->spamcheck(Email::Simple->new($src), \$dst), 'Email::Simple works'); +is($dst, $src, 'input == output'); + +$dst = ''; +$spamc->{checkcmd} = ['sh', '-c', 'cat; false']; +ok(!$spamc->spamcheck(Email::Simple->new($src), \$dst), 'Failed check works'); +is($dst, $src, 'input == output for spammy example'); + +for my $l (qw(ham spam)) { + my $file = "$tmpdir/$l.out"; + $spamc->{$l.'cmd'} = ['tee', $file ]; + my $method = $l.'learn'; + ok($spamc->$method(Email::Simple->new($src)), "$method OK"); + open my $fh, '<', $file or die "failed to open $file: $!"; + is(eval { local $/, <$fh> }, $src, "$l command ran alright"); +} + +done_testing();