user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 0/9] big mda filter changes
@ 2016-06-15  0:37  7% Eric Wong
  2016-06-15  0:37  5% ` [PATCH 7/9] filter: begin work on a new filter API Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2016-06-15  0:37 UTC (permalink / raw)
  To: meta

Eric Wong (9):
      drop dependency on File::Path::Expand
      t/feed.t: make IPC::Run usage optional
      learn: remove IPC::Run dependency
      t/mda.t: remove senseless use of Email::Filter
      t/mda: use only Maildir for testing
      mda: precheck no longer depends on Email::Filter
      filter: begin work on a new filter API
      emergency: implement new emergency Maildir delivery
      mda: hook up new filter functionality

 INSTALL                          |   3 -
 Makefile.PL                      |   2 -
 lib/PublicInbox/Config.pm        |   3 +-
 lib/PublicInbox/Emergency.pm     |  96 +++++++++++
 lib/PublicInbox/Filter.pm        | 232 ---------------------------
 lib/PublicInbox/Filter/Base.pm   | 100 ++++++++++++
 lib/PublicInbox/Filter/Mirror.pm |  12 ++
 lib/PublicInbox/Filter/Vger.pm   |  33 ++++
 lib/PublicInbox/MDA.pm           |  11 +-
 script/public-inbox-learn        |  42 +++--
 script/public-inbox-mda          | 135 ++++++++--------
 t/emergency.t                    |  53 ++++++
 t/feed.t                         |  18 +--
 t/filter.t                       | 337 ---------------------------------------
 t/filter_base.t                  |  81 ++++++++++
 t/filter_mirror.t                |  40 +++++
 t/filter_vger.t                  |  46 ++++++
 t/mda.t                          |  79 ++-------
 t/precheck.t                     |  14 +-
 19 files changed, 586 insertions(+), 751 deletions(-)

 Note to self: get "git apply" to work on --irreversible-delete patches


^ permalink raw reply	[relevance 7%]

* [PATCH 7/9] filter: begin work on a new filter API
  2016-06-15  0:37  7% [PATCH 0/9] big mda filter changes Eric Wong
@ 2016-06-15  0:37  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2016-06-15  0:37 UTC (permalink / raw)
  To: meta

This filter API should be independent of Email::Filter and
hopefully less intrusive to long running processes.
---
 lib/PublicInbox/Filter/Base.pm   | 100 +++++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/Filter/Mirror.pm |  12 +++++
 lib/PublicInbox/Filter/Vger.pm   |  33 +++++++++++++
 t/filter_base.t                  |  81 +++++++++++++++++++++++++++++++
 t/filter_mirror.t                |  40 ++++++++++++++++
 t/filter_vger.t                  |  46 ++++++++++++++++++
 6 files changed, 312 insertions(+)
 create mode 100644 lib/PublicInbox/Filter/Base.pm
 create mode 100644 lib/PublicInbox/Filter/Mirror.pm
 create mode 100644 lib/PublicInbox/Filter/Vger.pm
 create mode 100644 t/filter_base.t
 create mode 100644 t/filter_mirror.t
 create mode 100644 t/filter_vger.t

diff --git a/lib/PublicInbox/Filter/Base.pm b/lib/PublicInbox/Filter/Base.pm
new file mode 100644
index 0000000..0991e87
--- /dev/null
+++ b/lib/PublicInbox/Filter/Base.pm
@@ -0,0 +1,100 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# base class for creating per-list or per-project filters
+package PublicInbox::Filter::Base;
+use strict;
+use warnings;
+use PublicInbox::MsgIter;
+use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
+
+my $NO_HTML = '*** We only accept plain-text mail, no HTML ***';
+our %DEFAULTS = (
+	reject_suffix => [ qw(exe bat cmd com pif scr vbs cpl zip) ],
+	reject_type => [ "text/html:$NO_HTML", "text/xhtml:$NO_HTML",
+		'application/vnd.ms-*:No proprietary data formats' ],
+);
+our $INVALID_FN = qr/\0/;
+
+sub REJECT () { 100 }
+sub ACCEPT { scalar @_ > 1 ? $_[1] : 1 }
+sub IGNORE () { 0 }
+
+my %patmap = ('*' => '.*', '?' => '.', '[' => '[', ']' => ']');
+sub glob2pat {
+	my ($glob) = @_;
+        $glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge;
+        $glob;
+}
+
+sub new {
+	my ($class, %opts) = @_;
+	my $self = bless { err => '', %opts }, $class;
+	foreach my $f (qw(reject_suffix reject_type)) {
+		# allow undef:
+		$self->{$f} = $DEFAULTS{$f} unless exists $self->{$f};
+	}
+	if (defined $self->{reject_suffix}) {
+		my $tmp = $self->{reject_suffix};
+		$tmp = join('|', map { glob2pat($_) } @$tmp);
+		$self->{reject_suffix} = qr/\.($tmp)\s*\z/i;
+	}
+	my $rt = [];
+	if (defined $self->{reject_type}) {
+		my $tmp = $self->{reject_type};
+		@$rt = map {
+			my ($type, $msg) = split(':', $_, 2);
+			$type = lc $type;
+			$msg ||= "Unacceptable Content-Type: $type";
+			my $re = glob2pat($type);
+			[ qr/\b$re\b/i, $msg ];
+		} @$tmp;
+	}
+	$self->{reject_type} = $rt;
+	$self;
+}
+
+sub reject ($$) {
+	my ($self, $reason) = @_;
+	$self->{err} = $reason;
+	REJECT;
+}
+
+sub err ($) { $_[0]->{err} }
+
+# for MDA
+sub delivery {
+	my ($self, $mime) = @_;
+
+	my $rt = $self->{reject_type};
+	my $reject_suffix = $self->{reject_suffix} || $INVALID_FN;
+	my (%sfx, %type);
+
+	msg_iter($mime, sub {
+		my ($part, $depth, @idx) = @{$_[0]};
+
+		my $ct = $part->content_type || 'text/plain';
+		foreach my $p (@$rt) {
+			if ($ct =~ $p->[0]) {
+				$type{$p->[1]} = 1;
+			}
+		}
+
+		my $fn = $part->filename;
+		if (defined($fn) && $fn =~ $reject_suffix) {
+			$sfx{$1} = 1;
+		}
+	});
+
+	my @r;
+	if (keys %type) {
+		push @r, sort keys %type;
+	}
+	if (keys %sfx) {
+		push @r, 'Rejected suffixes(s): '.join(', ', sort keys %sfx);
+	}
+
+	@r ? $self->reject(join("\n", @r)) : $self->ACCEPT;
+}
+
+1;
diff --git a/lib/PublicInbox/Filter/Mirror.pm b/lib/PublicInbox/Filter/Mirror.pm
new file mode 100644
index 0000000..d994088
--- /dev/null
+++ b/lib/PublicInbox/Filter/Mirror.pm
@@ -0,0 +1,12 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Dumb filter for blindly accepting everything
+package PublicInbox::Filter::Mirror;
+use base qw(PublicInbox::Filter::Base);
+use strict;
+use warnings;
+
+sub delivery { $_[0]->ACCEPT };
+
+1;
diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm
new file mode 100644
index 0000000..9498081
--- /dev/null
+++ b/lib/PublicInbox/Filter/Vger.pm
@@ -0,0 +1,33 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Filter for vger.kernel.org list trailer
+package PublicInbox::Filter::Vger;
+use base qw(PublicInbox::Filter::Base);
+use strict;
+use warnings;
+
+my $l0 = qr/-+/; # older messages only had one '-'
+my $l1 =
+ qr/To unsubscribe from this list: send the line "unsubscribe [\w-]+" in/;
+my $l2 = qr/the body of a message to majordomo\@vger\.kernel\.org/;
+my $l3 =
+  qr!More majordomo info at +http://vger\.kernel\.org/majordomo-info\.html!;
+
+# only LKML had this, and LKML nowadays has no list trailer since Jan 2016
+my $l4 = qr!Please read the FAQ at +http://www\.tux\.org/lkml/!;
+
+sub delivery {
+	my ($self, $mime) = @_;
+	my $s = $mime->as_string;
+
+	# the vger appender seems to only work on the raw string,
+	# so in multipart (e.g. GPG-signed) messages, the list trailer
+	# becomes invisible to MIME-aware email clients.
+	if ($s =~ s/$l0\n$l1\n$l2\n$l3\n($l4\n)?\z//os) {
+		$mime = Email::MIME->new(\$s);
+	}
+	$self->ACCEPT($mime);
+}
+
+1;
diff --git a/t/filter_base.t b/t/filter_base.t
new file mode 100644
index 0000000..ee5c730
--- /dev/null
+++ b/t/filter_base.t
@@ -0,0 +1,81 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use_ok 'PublicInbox::Filter::Base';
+
+{
+	my $f = PublicInbox::Filter::Base->new;
+	ok($f, 'created stock object');
+	ok(defined $f->{reject_suffix}, 'rejected suffix redefined');
+	is(ref($f->{reject_suffix}), 'Regexp', 'reject_suffix should be a RE');
+}
+
+{
+	my $f = PublicInbox::Filter::Base->new(reject_suffix => undef);
+	ok($f, 'created base object q/o reject_suffix');
+	ok(!defined $f->{reject_suffix}, 'reject_suffix not defined');
+}
+
+{
+	my $f = PublicInbox::Filter::Base->new;
+	my $html_body = "<html><body>hi</body></html>";
+	my $parts = [
+		Email::MIME->create(
+			attributes => {
+				content_type => 'text/xhtml; charset=UTF-8',
+				encoding => 'base64',
+			},
+			body => $html_body,
+		),
+		Email::MIME->create(
+			attributes => {
+				content_type => 'text/plain',
+				encoding => 'quoted-printable',
+			},
+			body => 'hi = "bye"',
+		)
+	];
+	my $email = Email::MIME->create(
+		header_str => [
+		  From => 'a@example.com',
+		  Subject => 'blah',
+		  'Content-Type' => 'multipart/alternative'
+		],
+		parts => $parts,
+	);
+	is($f->delivery($email), 100, "xhtml rejected");
+}
+
+{
+	my $f = PublicInbox::Filter::Base->new;
+	my $parts = [
+		Email::MIME->create(
+			attributes => {
+				content_type => 'application/vnd.ms-excel',
+				encoding => 'base64',
+			},
+			body => 'junk',
+		),
+		Email::MIME->create(
+			attributes => {
+				content_type => 'text/plain',
+				encoding => 'quoted-printable',
+			},
+			body => 'junk',
+		)
+	];
+	my $email = Email::MIME->create(
+		header_str => [
+		  From => 'a@example.com',
+		  Subject => 'blah',
+		  'Content-Type' => 'multipart/mixed'
+		],
+		parts => $parts,
+	);
+	is($f->delivery($email), 100, 'proprietary format rejected on glob');
+}
+
+done_testing();
diff --git a/t/filter_mirror.t b/t/filter_mirror.t
new file mode 100644
index 0000000..01be282
--- /dev/null
+++ b/t/filter_mirror.t
@@ -0,0 +1,40 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use_ok 'PublicInbox::Filter::Mirror';
+
+my $f = PublicInbox::Filter::Mirror->new;
+ok($f, 'created PublicInbox::Filter::Mirror object');
+{
+	my $html_body = "<html><body>hi</body></html>";
+	my $parts = [
+		Email::MIME->create(
+			attributes => {
+				content_type => 'text/html; charset=UTF-8',
+				encoding => 'base64',
+			},
+			body => $html_body,
+		),
+		Email::MIME->create(
+			attributes => {
+				content_type => 'text/plain',
+				encoding => 'quoted-printable',
+			},
+			body => 'hi = "bye"',
+		)
+	];
+	my $email = Email::MIME->create(
+		header_str => [
+		  From => 'a@example.com',
+		  Subject => 'blah',
+		  'Content-Type' => 'multipart/alternative'
+		],
+		parts => $parts,
+	);
+	is($f->ACCEPT, $f->delivery($email), 'accept any trash that comes');
+}
+
+done_testing();
diff --git a/t/filter_vger.t b/t/filter_vger.t
new file mode 100644
index 0000000..83a4c9e
--- /dev/null
+++ b/t/filter_vger.t
@@ -0,0 +1,46 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use_ok 'PublicInbox::Filter::Vger';
+
+my $f = PublicInbox::Filter::Vger->new;
+ok($f, 'created PublicInbox::Filter::Vger object');
+{
+	my $lkml = <<'EOF';
+From: foo@example.com
+Subject: test
+
+keep this
+--
+To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
+the body of a message to majordomo@vger.kernel.org
+More majordomo info at  http://vger.kernel.org/majordomo-info.html
+Please read the FAQ at  http://www.tux.org/lkml/
+EOF
+
+	my $mime = Email::MIME->new($lkml);
+	$mime = $f->delivery($mime);
+	is("keep this\n", $mime->body, 'normal message filtered OK');
+}
+
+{
+	my $no_nl = <<'EOF';
+From: foo@example.com
+Subject: test
+
+OSX users :P--
+To unsubscribe from this list: send the line "unsubscribe git" in
+the body of a message to majordomo@vger.kernel.org
+More majordomo info at  http://vger.kernel.org/majordomo-info.html
+EOF
+
+	my $mime = Email::MIME->new($no_nl);
+	$mime = $f->delivery($mime);
+	is('OSX users :P', $mime->body, 'missing trailing LF in original OK');
+}
+
+
+done_testing();

^ permalink raw reply related	[relevance 5%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2016-06-15  0:37  7% [PATCH 0/9] big mda filter changes Eric Wong
2016-06-15  0:37  5% ` [PATCH 7/9] filter: begin work on a new filter API Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).