about summary refs log tree commit homepage
path: root/lib/PublicInbox/Filter
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-06-15 00:14:27 +0000
committerEric Wong <e@80x24.org>2016-06-15 00:15:11 +0000
commit61f05bf5869c3f471a16926b1a837ab0d95fb095 (patch)
tree41841dbde442f896ca5ba0495284cbbd6de784ac /lib/PublicInbox/Filter
parent41aa4756879765d0c6d4d6e8754e0037b1a56fcc (diff)
downloadpublic-inbox-61f05bf5869c3f471a16926b1a837ab0d95fb095.tar.gz
This filter API should be independent of Email::Filter and
hopefully less intrusive to long running processes.
Diffstat (limited to 'lib/PublicInbox/Filter')
-rw-r--r--lib/PublicInbox/Filter/Base.pm100
-rw-r--r--lib/PublicInbox/Filter/Mirror.pm12
-rw-r--r--lib/PublicInbox/Filter/Vger.pm33
3 files changed, 145 insertions, 0 deletions
diff --git a/lib/PublicInbox/Filter/Base.pm b/lib/PublicInbox/Filter/Base.pm
new file mode 100644
index 00000000..0991e874
--- /dev/null
+++ b/lib/PublicInbox/Filter/Base.pm
@@ -0,0 +1,100 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# base class for creating per-list or per-project filters
+package PublicInbox::Filter::Base;
+use strict;
+use warnings;
+use PublicInbox::MsgIter;
+use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
+
+my $NO_HTML = '*** We only accept plain-text mail, no HTML ***';
+our %DEFAULTS = (
+        reject_suffix => [ qw(exe bat cmd com pif scr vbs cpl zip) ],
+        reject_type => [ "text/html:$NO_HTML", "text/xhtml:$NO_HTML",
+                'application/vnd.ms-*:No proprietary data formats' ],
+);
+our $INVALID_FN = qr/\0/;
+
+sub REJECT () { 100 }
+sub ACCEPT { scalar @_ > 1 ? $_[1] : 1 }
+sub IGNORE () { 0 }
+
+my %patmap = ('*' => '.*', '?' => '.', '[' => '[', ']' => ']');
+sub glob2pat {
+        my ($glob) = @_;
+        $glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge;
+        $glob;
+}
+
+sub new {
+        my ($class, %opts) = @_;
+        my $self = bless { err => '', %opts }, $class;
+        foreach my $f (qw(reject_suffix reject_type)) {
+                # allow undef:
+                $self->{$f} = $DEFAULTS{$f} unless exists $self->{$f};
+        }
+        if (defined $self->{reject_suffix}) {
+                my $tmp = $self->{reject_suffix};
+                $tmp = join('|', map { glob2pat($_) } @$tmp);
+                $self->{reject_suffix} = qr/\.($tmp)\s*\z/i;
+        }
+        my $rt = [];
+        if (defined $self->{reject_type}) {
+                my $tmp = $self->{reject_type};
+                @$rt = map {
+                        my ($type, $msg) = split(':', $_, 2);
+                        $type = lc $type;
+                        $msg ||= "Unacceptable Content-Type: $type";
+                        my $re = glob2pat($type);
+                        [ qr/\b$re\b/i, $msg ];
+                } @$tmp;
+        }
+        $self->{reject_type} = $rt;
+        $self;
+}
+
+sub reject ($$) {
+        my ($self, $reason) = @_;
+        $self->{err} = $reason;
+        REJECT;
+}
+
+sub err ($) { $_[0]->{err} }
+
+# for MDA
+sub delivery {
+        my ($self, $mime) = @_;
+
+        my $rt = $self->{reject_type};
+        my $reject_suffix = $self->{reject_suffix} || $INVALID_FN;
+        my (%sfx, %type);
+
+        msg_iter($mime, sub {
+                my ($part, $depth, @idx) = @{$_[0]};
+
+                my $ct = $part->content_type || 'text/plain';
+                foreach my $p (@$rt) {
+                        if ($ct =~ $p->[0]) {
+                                $type{$p->[1]} = 1;
+                        }
+                }
+
+                my $fn = $part->filename;
+                if (defined($fn) && $fn =~ $reject_suffix) {
+                        $sfx{$1} = 1;
+                }
+        });
+
+        my @r;
+        if (keys %type) {
+                push @r, sort keys %type;
+        }
+        if (keys %sfx) {
+                push @r, 'Rejected suffixes(s): '.join(', ', sort keys %sfx);
+        }
+
+        @r ? $self->reject(join("\n", @r)) : $self->ACCEPT;
+}
+
+1;
diff --git a/lib/PublicInbox/Filter/Mirror.pm b/lib/PublicInbox/Filter/Mirror.pm
new file mode 100644
index 00000000..d9940889
--- /dev/null
+++ b/lib/PublicInbox/Filter/Mirror.pm
@@ -0,0 +1,12 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Dumb filter for blindly accepting everything
+package PublicInbox::Filter::Mirror;
+use base qw(PublicInbox::Filter::Base);
+use strict;
+use warnings;
+
+sub delivery { $_[0]->ACCEPT };
+
+1;
diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm
new file mode 100644
index 00000000..9498081a
--- /dev/null
+++ b/lib/PublicInbox/Filter/Vger.pm
@@ -0,0 +1,33 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Filter for vger.kernel.org list trailer
+package PublicInbox::Filter::Vger;
+use base qw(PublicInbox::Filter::Base);
+use strict;
+use warnings;
+
+my $l0 = qr/-+/; # older messages only had one '-'
+my $l1 =
+ qr/To unsubscribe from this list: send the line "unsubscribe [\w-]+" in/;
+my $l2 = qr/the body of a message to majordomo\@vger\.kernel\.org/;
+my $l3 =
+  qr!More majordomo info at +http://vger\.kernel\.org/majordomo-info\.html!;
+
+# only LKML had this, and LKML nowadays has no list trailer since Jan 2016
+my $l4 = qr!Please read the FAQ at +http://www\.tux\.org/lkml/!;
+
+sub delivery {
+        my ($self, $mime) = @_;
+        my $s = $mime->as_string;
+
+        # the vger appender seems to only work on the raw string,
+        # so in multipart (e.g. GPG-signed) messages, the list trailer
+        # becomes invisible to MIME-aware email clients.
+        if ($s =~ s/$l0\n$l1\n$l2\n$l3\n($l4\n)?\z//os) {
+                $mime = Email::MIME->new(\$s);
+        }
+        $self->ACCEPT($mime);
+}
+
+1;