public-inbox.git  about / heads / tags
an "archives first" approach to mailing lists
blob b6098669fc3c41a0584c76912b4f0c21cad8f5e9 1971 bytes (raw)
$ git show v1.4.0:lib/PublicInbox/Spamcheck/Spamc.pm	# shows this blob on the CLI

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
 
# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>

# Default spam filter class for wrapping spamc(1)
package PublicInbox::Spamcheck::Spamc;
use strict;
use warnings;
use PublicInbox::Spawn qw(popen_rd spawn);
use IO::Handle;
use Fcntl qw(SEEK_SET);

sub new {
	my ($class) = @_;
	bless {
		checkcmd => [qw(spamc -E --headers)],
		hamcmd => [qw(spamc -L ham)],
		spamcmd => [qw(spamc -L spam)],
	}, $class;
}

sub spamcheck {
	my ($self, $msg, $out) = @_;

	my $rdr = { 0 => _msg_to_fh($self, $msg) };
	my ($fh, $pid) = popen_rd($self->{checkcmd}, undef, $rdr);
	my $r;
	unless (ref $out) {
		my $buf = '';
		$out = \$buf;
	}
again:
	do {
		$r = sysread($fh, $$out, 65536, length($$out));
	} while (defined($r) && $r != 0);
	unless (defined $r) {
		goto again if $!{EINTR};
		die "read failed: $!";
	}
	close $fh or die "close failed: $!";
	waitpid($pid, 0);
	($? || $$out eq '') ? 0 : 1;
}

sub hamlearn {
	my ($self, $msg, $rdr) = @_;
	_learn($self, $msg, $rdr, 'hamcmd');
}

sub spamlearn {
	my ($self, $msg, $rdr) = @_;
	_learn($self, $msg, $rdr, 'spamcmd');
}

sub _learn {
	my ($self, $msg, $rdr, $field) = @_;
	$rdr ||= {};
	$rdr->{0} = _msg_to_fh($self, $msg);
	$rdr->{1} ||= $self->_devnull;
	$rdr->{2} ||= $self->_devnull;
	my $pid = spawn($self->{$field}, undef, $rdr);
	waitpid($pid, 0);
	!$?;
}

sub _devnull {
	my ($self) = @_;
	$self->{-devnull} //= do {
		open my $fh, '+>', '/dev/null' or
				die "failed to open /dev/null: $!";
		$fh
	}
}

sub _msg_to_fh {
	my ($self, $msg) = @_;
	if (my $ref = ref($msg)) {
		my $fd = eval { fileno($msg) };
		return $msg if defined($fd) && $fd >= 0;

		open(my $tmpfh, '+>', undef) or die "failed to open: $!";
		$tmpfh->autoflush(1);
		$msg = \($msg->as_string) if $ref ne 'SCALAR';
		print $tmpfh $$msg or die "failed to print: $!";
		sysseek($tmpfh, 0, SEEK_SET) or
			die "sysseek(fh) failed: $!";

		return $tmpfh;
	}
	$msg;
}

1;

git clone https://public-inbox.org/public-inbox.git
git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git