user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH v2] add filter for RubyLang lists
  2017-06-21 23:33  5% [REJECT] add filter for RubyLang lists Eric Wong
  2017-06-22  0:31  7% ` Eric Wong
@ 2017-06-22  7:11  5% ` Eric Wong
  1 sibling, 0 replies; 3+ results
From: Eric Wong @ 2017-06-22  7:11 UTC (permalink / raw)
  To: meta

Unfortunately, it appears we have to reject this and instead add
support filtering at View time(*), due to DKIM signatures in
messages from ruby-lang.org.

(*) which may not be worth it
---
 MANIFEST                           |  1 +
 lib/PublicInbox/AltId.pm           |  4 +--
 lib/PublicInbox/Filter/RubyLang.pm | 63 ++++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/WatchMaildir.pm    |  2 +-
 4 files changed, 67 insertions(+), 3 deletions(-)
 create mode 100644 lib/PublicInbox/Filter/RubyLang.pm

diff --git a/MANIFEST b/MANIFEST
index d0b7f2b..c7c4a92 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -51,6 +51,7 @@ lib/PublicInbox/ExtMsg.pm
 lib/PublicInbox/Feed.pm
 lib/PublicInbox/Filter/Base.pm
 lib/PublicInbox/Filter/Mirror.pm
+lib/PublicInbox/Filter/RubyLang.pm
 lib/PublicInbox/Filter/SubjectTag.pm
 lib/PublicInbox/Filter/Vger.pm
 lib/PublicInbox/GetlineBody.pm
diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm
index 6fdc3a2..73fecd5 100644
--- a/lib/PublicInbox/AltId.pm
+++ b/lib/PublicInbox/AltId.pm
@@ -9,7 +9,7 @@ use URI::Escape qw(uri_unescape);
 # spec: TYPE:PREFIX:param1=value1&param2=value2&...
 # Example: serial:gmane:file=/path/to/altmsgmap.sqlite3
 sub new {
-	my ($class, $inbox, $spec) = @_;
+	my ($class, $inbox, $spec, $writable) = @_;
 	my ($type, $prefix, $query) = split(/:/, $spec, 3);
 	$type eq 'serial' or die "non-serial not supported, yet\n";
 
@@ -25,7 +25,7 @@ sub new {
 		$f = "$inbox->{mainrepo}/public-inbox/$f";
 	}
 	bless {
-		mm_alt => PublicInbox::Msgmap->new_file($f),
+		mm_alt => PublicInbox::Msgmap->new_file($f, $writable),
 		xprefix => 'X'.uc($prefix),
 	}, $class;
 }
diff --git a/lib/PublicInbox/Filter/RubyLang.pm b/lib/PublicInbox/Filter/RubyLang.pm
new file mode 100644
index 0000000..ec4bc32
--- /dev/null
+++ b/lib/PublicInbox/Filter/RubyLang.pm
@@ -0,0 +1,63 @@
+# Copyright (C) 2017 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Filter for lists.ruby-lang.org trailers
+package PublicInbox::Filter::RubyLang;
+use base qw(PublicInbox::Filter::Base);
+use strict;
+use warnings;
+
+my $l1 = qr/Unsubscribe:\s
+	<mailto:ruby-\w+-request\@ruby-lang\.org\?subject=unsubscribe>/x;
+my $l2 = qr{<http://lists\.ruby-lang\.org/cgi-bin/mailman/options/ruby-\w+>};
+
+sub new {
+	my ($class, %opts) = @_;
+	my $altid = delete $opts{-altid};
+	my $self = $class->SUPER::new(%opts);
+	# altid = serial:ruby-core:file=msgmap.sqlite3
+	if ($altid) {
+		require PublicInbox::MID; # mid_clean
+		my $ibx = $self->{-inbox};
+		require PublicInbox::AltId;
+		$self->{-altid} = PublicInbox::AltId->new($ibx, $altid, 1);
+	}
+	$self;
+}
+
+sub scrub {
+	my ($self, $mime) = @_;
+	# no msg_iter here, that is only for read-only access
+	$mime->walk_parts(sub {
+		my ($part) = $_[0];
+		my $ct = $part->content_type;
+		if (!$ct || $ct =~ m{\btext/plain\b}i) {
+			my $s = eval { $part->body_str };
+			if (defined $s && $s =~ s/\n?$l1\n$l2\n\z//os) {
+				$part->body_str_set($s);
+			}
+		}
+	});
+	my $altid = $self->{-altid};
+	if ($altid) {
+		my $hdr = $mime->header_obj;
+		my $mid = $hdr->header_raw('Message-ID');
+		unless (defined $mid) {
+			return $self->REJECT('Message-Id missing');
+		}
+		my $n = $hdr->header_raw('X-Mail-Count');
+		if (!defined($n) || $n !~ /\A\s*\d+\s*\z/) {
+			return $self->REJECT('X-Mail-Count not numeric');
+		}
+		$mid = PublicInbox::MID::mid_clean($mid);
+		$altid->{mm_alt}->mid_set($n, $mid);
+	}
+	$self->ACCEPT($mime);
+}
+
+sub delivery {
+	my ($self, $mime) = @_;
+	$self->scrub($mime);
+}
+
+1;
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index c436742..8588f16 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -242,7 +242,7 @@ sub _scrubber_for {
 	my ($inbox) = @_;
 	my $f = $inbox->{filter};
 	if ($f && $f =~ /::/) {
-		my @args;
+		my @args = (-inbox => $inbox);
 		# basic line splitting, only
 		# Perhaps we can have proper quote splitting one day...
 		($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
-- 
EW


^ permalink raw reply related	[relevance 5%]

* Re: [REJECT] add filter for RubyLang lists
  2017-06-21 23:33  5% [REJECT] add filter for RubyLang lists Eric Wong
@ 2017-06-22  0:31  7% ` Eric Wong
  2017-06-22  7:11  5% ` [PATCH v2] " Eric Wong
  1 sibling, 0 replies; 3+ results
From: Eric Wong @ 2017-06-22  0:31 UTC (permalink / raw)
  To: meta

Eric Wong <e@80x24.org> wrote:
> Unfortunately, it appears we have to reject this and instead add
> support filtering at View time(*), due to DKIM signatures in
> messages from ruby-lang.org.

Nevermind, I'll apply this patch.  DKIM signatures are only
added by Redmine, and invalid by the time it hits ruby-core.

^ permalink raw reply	[relevance 7%]

* [REJECT] add filter for RubyLang lists
@ 2017-06-21 23:33  5% Eric Wong
  2017-06-22  0:31  7% ` Eric Wong
  2017-06-22  7:11  5% ` [PATCH v2] " Eric Wong
  0 siblings, 2 replies; 3+ results
From: Eric Wong @ 2017-06-21 23:33 UTC (permalink / raw)
  To: meta

Unfortunately, it appears we have to reject this and instead add
support filtering at View time(*), due to DKIM signatures in
messages from ruby-lang.org.

(*) which may not be worth it
---
 MANIFEST                           |  1 +
 lib/PublicInbox/AltId.pm           |  4 +--
 lib/PublicInbox/Filter/RubyLang.pm | 59 ++++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/WatchMaildir.pm    |  2 +-
 4 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 lib/PublicInbox/Filter/RubyLang.pm

diff --git a/MANIFEST b/MANIFEST
index d0b7f2b..c7c4a92 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -51,6 +51,7 @@ lib/PublicInbox/ExtMsg.pm
 lib/PublicInbox/Feed.pm
 lib/PublicInbox/Filter/Base.pm
 lib/PublicInbox/Filter/Mirror.pm
+lib/PublicInbox/Filter/RubyLang.pm
 lib/PublicInbox/Filter/SubjectTag.pm
 lib/PublicInbox/Filter/Vger.pm
 lib/PublicInbox/GetlineBody.pm
diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm
index 6fdc3a2..73fecd5 100644
--- a/lib/PublicInbox/AltId.pm
+++ b/lib/PublicInbox/AltId.pm
@@ -9,7 +9,7 @@ use URI::Escape qw(uri_unescape);
 # spec: TYPE:PREFIX:param1=value1&param2=value2&...
 # Example: serial:gmane:file=/path/to/altmsgmap.sqlite3
 sub new {
-	my ($class, $inbox, $spec) = @_;
+	my ($class, $inbox, $spec, $writable) = @_;
 	my ($type, $prefix, $query) = split(/:/, $spec, 3);
 	$type eq 'serial' or die "non-serial not supported, yet\n";
 
@@ -25,7 +25,7 @@ sub new {
 		$f = "$inbox->{mainrepo}/public-inbox/$f";
 	}
 	bless {
-		mm_alt => PublicInbox::Msgmap->new_file($f),
+		mm_alt => PublicInbox::Msgmap->new_file($f, $writable),
 		xprefix => 'X'.uc($prefix),
 	}, $class;
 }
diff --git a/lib/PublicInbox/Filter/RubyLang.pm b/lib/PublicInbox/Filter/RubyLang.pm
new file mode 100644
index 0000000..a0e6d7b
--- /dev/null
+++ b/lib/PublicInbox/Filter/RubyLang.pm
@@ -0,0 +1,59 @@
+# Copyright (C) 2017 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Filter for lists.ruby-lang.org trailers
+package PublicInbox::Filter::RubyLang;
+use base qw(PublicInbox::Filter::Base);
+use strict;
+use warnings;
+
+my $l1 = qr/Unsubscribe:\s
+	<mailto:ruby-\w+-request\@ruby-lang\.org\?subject=unsubscribe>/x;
+my $l2 = qr{<http://lists\.ruby-lang\.org/cgi-bin/mailman/options/ruby-\w+>};
+
+sub new {
+	my ($class, %opts) = @_;
+	my $altid = delete $opts{-altid};
+	my $self = $class->SUPER::new(%opts);
+	# altid = serial:ruby-core:file=msgmap.sqlite3
+	if ($altid) {
+		require PublicInbox::MID; # mid_clean
+		my $ibx = $self->{-inbox};
+		require PublicInbox::AltId;
+		$self->{-altid} = PublicInbox::AltId->new($ibx, $altid, 1);
+	}
+	$self;
+}
+
+sub scrub {
+	my ($self, $mime) = @_;
+	# no msg_iter here, that is only for read-only access
+	$mime->walk_parts(sub {
+		my ($part) = $_[0];
+		my $ct = $part->content_type;
+		if (!$ct || $ct =~ m{\btext/plain\b}i) {
+			my $s = eval { $part->body_str };
+			if (defined $s && $s =~ s/\n?$l1\n$l2\n\z//os) {
+				$part->body_str_set($s);
+			}
+		}
+	});
+	my $altid = $self->{-altid};
+	if ($altid) {
+		my $hdr = $mime->header_obj;
+		my $n = $hdr->header_raw('X-Mail-Count');
+		my $mid = $hdr->header_raw('Message-ID');
+		if (defined $n && defined $mid && $n =~ /\A\s*\d+\s*\z/) {
+			$mid = PublicInbox::MID::mid_clean($mid);
+			$altid->{mm_alt}->mid_set($n, $mid);
+		}
+	}
+	$self->ACCEPT($mime);
+}
+
+sub delivery {
+	my ($self, $mime) = @_;
+	$self->scrub($mime);
+}
+
+1;
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index c436742..8588f16 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -242,7 +242,7 @@ sub _scrubber_for {
 	my ($inbox) = @_;
 	my $f = $inbox->{filter};
 	if ($f && $f =~ /::/) {
-		my @args;
+		my @args = (-inbox => $inbox);
 		# basic line splitting, only
 		# Perhaps we can have proper quote splitting one day...
 		($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
-- 
EW


^ permalink raw reply related	[relevance 5%]

Results 1-3 of 3 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2017-06-21 23:33  5% [REJECT] add filter for RubyLang lists Eric Wong
2017-06-22  0:31  7% ` Eric Wong
2017-06-22  7:11  5% ` [PATCH v2] " Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).