From 61f05bf5869c3f471a16926b1a837ab0d95fb095 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 15 Jun 2016 00:14:27 +0000 Subject: filter: begin work on a new filter API This filter API should be independent of Email::Filter and hopefully less intrusive to long running processes. --- lib/PublicInbox/Filter/Base.pm | 100 +++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/Filter/Mirror.pm | 12 +++++ lib/PublicInbox/Filter/Vger.pm | 33 +++++++++++++ 3 files changed, 145 insertions(+) create mode 100644 lib/PublicInbox/Filter/Base.pm create mode 100644 lib/PublicInbox/Filter/Mirror.pm create mode 100644 lib/PublicInbox/Filter/Vger.pm (limited to 'lib') diff --git a/lib/PublicInbox/Filter/Base.pm b/lib/PublicInbox/Filter/Base.pm new file mode 100644 index 00000000..0991e874 --- /dev/null +++ b/lib/PublicInbox/Filter/Base.pm @@ -0,0 +1,100 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +# +# base class for creating per-list or per-project filters +package PublicInbox::Filter::Base; +use strict; +use warnings; +use PublicInbox::MsgIter; +use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian + +my $NO_HTML = '*** We only accept plain-text mail, no HTML ***'; +our %DEFAULTS = ( + reject_suffix => [ qw(exe bat cmd com pif scr vbs cpl zip) ], + reject_type => [ "text/html:$NO_HTML", "text/xhtml:$NO_HTML", + 'application/vnd.ms-*:No proprietary data formats' ], +); +our $INVALID_FN = qr/\0/; + +sub REJECT () { 100 } +sub ACCEPT { scalar @_ > 1 ? $_[1] : 1 } +sub IGNORE () { 0 } + +my %patmap = ('*' => '.*', '?' => '.', '[' => '[', ']' => ']'); +sub glob2pat { + my ($glob) = @_; + $glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge; + $glob; +} + +sub new { + my ($class, %opts) = @_; + my $self = bless { err => '', %opts }, $class; + foreach my $f (qw(reject_suffix reject_type)) { + # allow undef: + $self->{$f} = $DEFAULTS{$f} unless exists $self->{$f}; + } + if (defined $self->{reject_suffix}) { + my $tmp = $self->{reject_suffix}; + $tmp = join('|', map { glob2pat($_) } @$tmp); + $self->{reject_suffix} = qr/\.($tmp)\s*\z/i; + } + my $rt = []; + if (defined $self->{reject_type}) { + my $tmp = $self->{reject_type}; + @$rt = map { + my ($type, $msg) = split(':', $_, 2); + $type = lc $type; + $msg ||= "Unacceptable Content-Type: $type"; + my $re = glob2pat($type); + [ qr/\b$re\b/i, $msg ]; + } @$tmp; + } + $self->{reject_type} = $rt; + $self; +} + +sub reject ($$) { + my ($self, $reason) = @_; + $self->{err} = $reason; + REJECT; +} + +sub err ($) { $_[0]->{err} } + +# for MDA +sub delivery { + my ($self, $mime) = @_; + + my $rt = $self->{reject_type}; + my $reject_suffix = $self->{reject_suffix} || $INVALID_FN; + my (%sfx, %type); + + msg_iter($mime, sub { + my ($part, $depth, @idx) = @{$_[0]}; + + my $ct = $part->content_type || 'text/plain'; + foreach my $p (@$rt) { + if ($ct =~ $p->[0]) { + $type{$p->[1]} = 1; + } + } + + my $fn = $part->filename; + if (defined($fn) && $fn =~ $reject_suffix) { + $sfx{$1} = 1; + } + }); + + my @r; + if (keys %type) { + push @r, sort keys %type; + } + if (keys %sfx) { + push @r, 'Rejected suffixes(s): '.join(', ', sort keys %sfx); + } + + @r ? $self->reject(join("\n", @r)) : $self->ACCEPT; +} + +1; diff --git a/lib/PublicInbox/Filter/Mirror.pm b/lib/PublicInbox/Filter/Mirror.pm new file mode 100644 index 00000000..d9940889 --- /dev/null +++ b/lib/PublicInbox/Filter/Mirror.pm @@ -0,0 +1,12 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ + +# Dumb filter for blindly accepting everything +package PublicInbox::Filter::Mirror; +use base qw(PublicInbox::Filter::Base); +use strict; +use warnings; + +sub delivery { $_[0]->ACCEPT }; + +1; diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm new file mode 100644 index 00000000..9498081a --- /dev/null +++ b/lib/PublicInbox/Filter/Vger.pm @@ -0,0 +1,33 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ + +# Filter for vger.kernel.org list trailer +package PublicInbox::Filter::Vger; +use base qw(PublicInbox::Filter::Base); +use strict; +use warnings; + +my $l0 = qr/-+/; # older messages only had one '-' +my $l1 = + qr/To unsubscribe from this list: send the line "unsubscribe [\w-]+" in/; +my $l2 = qr/the body of a message to majordomo\@vger\.kernel\.org/; +my $l3 = + qr!More majordomo info at +http://vger\.kernel\.org/majordomo-info\.html!; + +# only LKML had this, and LKML nowadays has no list trailer since Jan 2016 +my $l4 = qr!Please read the FAQ at +http://www\.tux\.org/lkml/!; + +sub delivery { + my ($self, $mime) = @_; + my $s = $mime->as_string; + + # the vger appender seems to only work on the raw string, + # so in multipart (e.g. GPG-signed) messages, the list trailer + # becomes invisible to MIME-aware email clients. + if ($s =~ s/$l0\n$l1\n$l2\n$l3\n($l4\n)?\z//os) { + $mime = Email::MIME->new(\$s); + } + $self->ACCEPT($mime); +} + +1; -- cgit v1.2.3-24-ge0c7