user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 2/6] split out spamcheck/spamc to its own module.
  @ 2016-06-24 20:47  7% ` Eric Wong
  0 siblings, 0 replies; 1+ results
From: Eric Wong @ 2016-06-24 20:47 UTC (permalink / raw)
  To: meta

This should hopefully make it easier to try other anti-spam
systems (or none at all) in the future.
---
 MANIFEST                           |  2 +
 lib/PublicInbox/Spamcheck/Spamc.pm | 94 ++++++++++++++++++++++++++++++++++++++
 script/public-inbox-learn          | 21 +++------
 script/public-inbox-mda            | 23 ++--------
 t/spamcheck_spamc.t                | 49 ++++++++++++++++++++
 5 files changed, 156 insertions(+), 33 deletions(-)
 create mode 100644 lib/PublicInbox/Spamcheck/Spamc.pm
 create mode 100644 t/spamcheck_spamc.t

diff --git a/MANIFEST b/MANIFEST
index bc7d54c..834cb5d 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -69,6 +69,7 @@ lib/PublicInbox/Search.pm
 lib/PublicInbox/SearchIdx.pm
 lib/PublicInbox/SearchMsg.pm
 lib/PublicInbox/SearchView.pm
+lib/PublicInbox/Spamcheck/Spamc.pm
 lib/PublicInbox/Spawn.pm
 lib/PublicInbox/SpawnPP.pm
 lib/PublicInbox/Thread.pm
@@ -133,6 +134,7 @@ t/psgi_attach.t
 t/psgi_mount.t
 t/qspawn.t
 t/search.t
+t/spamcheck_spamc.t
 t/spawn.t
 t/utf8.mbox
 t/view.t
diff --git a/lib/PublicInbox/Spamcheck/Spamc.pm b/lib/PublicInbox/Spamcheck/Spamc.pm
new file mode 100644
index 0000000..312e52d
--- /dev/null
+++ b/lib/PublicInbox/Spamcheck/Spamc.pm
@@ -0,0 +1,94 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::Spamcheck::Spamc;
+use strict;
+use warnings;
+use PublicInbox::Spawn qw(popen_rd spawn);
+use IO::File;
+use Fcntl qw(:DEFAULT SEEK_SET);
+
+sub new {
+	my ($class) = @_;
+	bless {
+		checkcmd => [qw(spamc -E --headers)],
+		hamcmd => [qw(spamc -L ham)],
+		spamcmd => [qw(spamc -L spam)],
+	}, $class;
+}
+
+sub spamcheck {
+	my ($self, $msg, $out) = @_;
+
+	my $tmp;
+	my $fd = _msg_to_fd($self, $msg, \$tmp);
+	my $rdr = { 0 => $fd };
+	my ($fh, $pid) = popen_rd($self->{checkcmd}, undef, $rdr);
+	defined $pid or die "failed to popen_rd spamc: $!\n";
+	my $r;
+	unless (ref $out) {
+		my $buf = '';
+		$out = \$buf;
+	}
+	do {
+		$r = sysread($fh, $$out, 65536, length($$out));
+	} while (defined($r) && $r != 0);
+	defined $r or die "read failed: $!";
+	close $fh or die "close failed: $!";
+	waitpid($pid, 0);
+	($? || $$out eq '') ? 0 : 1;
+}
+
+sub hamlearn {
+	my ($self, $msg, $rdr) = @_;
+	_learn($self, $msg, $rdr, 'hamcmd');
+}
+
+sub spamlearn {
+	my ($self, $msg, $rdr) = @_;
+	_learn($self, $msg, $rdr, 'spamcmd');
+}
+
+sub _learn {
+	my ($self, $msg, $rdr, $field) = @_;
+	$rdr ||= {};
+	$rdr->{1} ||= $self->_devnull;
+	$rdr->{2} ||= $self->_devnull;
+	my $tmp;
+	$rdr->{0} = _msg_to_fd($self, $msg, \$tmp);
+	my $pid = spawn($self->{$field}, undef, $rdr);
+	waitpid($pid, 0);
+	!$?;
+}
+
+sub _devnull {
+	my ($self) = @_;
+	my $fd = $self->{-devnullfd};
+	return $fd if defined $fd;
+	open my $fh, '+>', '/dev/null' or
+				die "failed to open /dev/null: $!";
+	$self->{-devnull} = $fh;
+	$self->{-devnullfd} = fileno($fh);
+}
+
+sub _msg_to_fd {
+	my ($self, $msg, $tmpref) = @_;
+	my $tmpfh;
+	my $fd;
+	if (my $ref = ref($msg)) {
+
+		return $msg->fileno if $ref ne 'SCALAR' && $msg->can('fileno');
+
+		$tmpfh = IO::File->new_tmpfile;
+		$tmpfh->autoflush(1);
+		$msg = \($msg->as_string) if $ref ne 'SCALAR';
+		print $tmpfh $$msg or die "failed to print: $!";
+		sysseek($tmpfh, 0, SEEK_SET) or
+			die "sysseek(fh) failed: $!";
+		$$tmpref = $tmpfh;
+
+		return fileno($tmpfh);
+	}
+	$msg;
+}
+
+1;
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index b05ef05..7ef2a31 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -14,12 +14,13 @@ use Email::MIME;
 use Email::MIME::ContentType;
 $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
 use PublicInbox::Address;
-use PublicInbox::Spawn qw(spawn);
+use PublicInbox::Spamcheck::Spamc;
 my $train = shift or die "usage: $usage\n";
 if ($train !~ /\A(?:ham|spam)\z/) {
 	die "`$train' not recognized.\nusage: $usage\n";
 }
 
+my $spamc = PublicInbox::Spamcheck::Spamc->new;
 my $pi_config = PublicInbox::Config->new;
 my $err;
 my $mime = Email::MIME->new(eval {
@@ -27,19 +28,11 @@ my $mime = Email::MIME->new(eval {
 	my $data = scalar <STDIN>;
 	$data =~ s/\AFrom [^\r\n]*\r?\n//s;
 	eval {
-		my @cmd = (qw(spamc -L), $train);
-		my ($r, $w);
-		pipe($r, $w) or die "pipe failed: $!";
-		open my $null, '>', '/dev/null' or
-					die "failed to open /dev/null: $!";
-		my $nullfd = fileno($null);
-		my %rdr = (0 => fileno($r), 1 => $nullfd, 2 => $nullfd);
-		my $pid = spawn(\@cmd, undef, \%rdr);
-		close $null;
-		close $r or die "close \$r failed: $!";
-		print $w $data or die "print \$w failed: $!";
-		close $w or die "close \$w failed: $!";
-		waitpid($pid, 0);
+		if ($train eq 'ham') {
+			$spamc->hamlearn(\$data);
+		} else {
+			$spamc->spamlearn(\$data);
+		}
 		die "spamc failed with: $?\n" if $?;
 	};
 	$err = $@;
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 013642d..f739ad0 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -24,7 +24,7 @@ use PublicInbox::Import;
 use PublicInbox::Git;
 use PublicInbox::Emergency;
 use PublicInbox::Filter::Base;
-use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Spamcheck::Spamc;
 
 # n.b: hopefully we can setup the emergency path without bailing due to
 # user error, we really want to setup the emergency destination ASAP
@@ -44,9 +44,9 @@ my $main_repo = $dst->{mainrepo} or do_exit(1);
 
 # pre-check, MDA has stricter rules than an importer might;
 do_exit(0) unless PublicInbox::MDA->precheck($simple, $dst->{address});
-
+my $spamc = PublicInbox::Spamcheck::Spamc->new;
 $str = '';
-my $spam_ok = do_spamc($ems->fh, \$str);
+my $spam_ok = $spamc->spamcheck($ems->fh, \$str);
 $simple = undef;
 $emm = PublicInbox::Emergency->new($emergency);
 $emm->prepare(\$str);
@@ -90,20 +90,5 @@ if (defined $im->add($mime)) {
 			$mime->header_obj->header_raw('Message-ID'),
 			" exists\n";
 }
-do_exit(0);
-
-# we depend on "report_safe 0" in /etc/spamassassin/*.cf with --headers
-sub do_spamc {
-	my ($in, $out) = @_;
-	my $rdr = { 0 => fileno($in) };
-	my ($fh, $pid) = popen_rd([qw/spamc -E --headers/], undef, $rdr);
-	defined $pid or die "failed to popen_rd spamc: $!\n";
-	my $r;
-	do {
-		$r = sysread($fh, $$out, 65536, length($$out));
-	} while (defined($r) && $r != 0);
-	close $fh or die "close failed: $!\n";
-	waitpid($pid, 0);
 
-	($? || $$out eq '') ? 0 : 1;
-}
+do_exit(0);
diff --git a/t/spamcheck_spamc.t b/t/spamcheck_spamc.t
new file mode 100644
index 0000000..65ac5c2
--- /dev/null
+++ b/t/spamcheck_spamc.t
@@ -0,0 +1,49 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Cwd;
+use Email::Simple;
+use IO::File;
+use File::Temp qw/tempdir/;
+use Fcntl qw(:DEFAULT SEEK_SET);
+my $tmpdir = tempdir('spamcheck_spamc-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+
+use_ok 'PublicInbox::Spamcheck::Spamc';
+my $spamc = PublicInbox::Spamcheck::Spamc->new;
+$spamc->{checkcmd} = [qw(cat)];
+
+{
+	open my $fh, '+>', "$tmpdir/file" or die "open failed: $!";
+	ok(!$spamc->spamcheck($fh), 'empty '.ref($fh));
+}
+ok(!$spamc->spamcheck(IO::File->new_tmpfile), 'IO::File->new_tmpfile');
+
+my $dst = '';
+my $src = <<'EOF';
+Date: Thu, 01 Jan 1970 00:00:00 +0000
+To: <e@example.com>
+From: <e@example.com>
+Subject: test
+Message-ID: <testmessage@example.com>
+
+EOF
+ok($spamc->spamcheck(Email::Simple->new($src), \$dst), 'Email::Simple works');
+is($dst, $src, 'input == output');
+
+$dst = '';
+$spamc->{checkcmd} = ['sh', '-c', 'cat; false'];
+ok(!$spamc->spamcheck(Email::Simple->new($src), \$dst), 'Failed check works');
+is($dst, $src, 'input == output for spammy example');
+
+for my $l (qw(ham spam)) {
+	my $file = "$tmpdir/$l.out";
+	$spamc->{$l.'cmd'} = ['tee', $file ];
+	my $method = $l.'learn';
+	ok($spamc->$method(Email::Simple->new($src)), "$method OK");
+	open my $fh, '<', $file or die "failed to open $file: $!";
+	is(eval { local $/, <$fh> }, $src, "$l command ran alright");
+}
+
+done_testing();

^ permalink raw reply related	[relevance 7%]

Results 1-1 of 1 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2016-06-24 20:47     [PATCH 1/6] implement ListMirror SpamAssassin plugin Eric Wong
2016-06-24 20:47  7% ` [PATCH 2/6] split out spamcheck/spamc to its own module Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).