From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 4596020282; Thu, 22 Jun 2017 07:11:36 +0000 (UTC) Date: Thu, 22 Jun 2017 07:11:36 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [PATCH v2] add filter for RubyLang lists Message-ID: <20170622071136.GA8242@dcvr> References: <20170621233349.GA11714@dcvr> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20170621233349.GA11714@dcvr> List-Id: Unfortunately, it appears we have to reject this and instead add support filtering at View time(*), due to DKIM signatures in messages from ruby-lang.org. (*) which may not be worth it --- MANIFEST | 1 + lib/PublicInbox/AltId.pm | 4 +-- lib/PublicInbox/Filter/RubyLang.pm | 63 ++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/WatchMaildir.pm | 2 +- 4 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 lib/PublicInbox/Filter/RubyLang.pm diff --git a/MANIFEST b/MANIFEST index d0b7f2b..c7c4a92 100644 --- a/MANIFEST +++ b/MANIFEST @@ -51,6 +51,7 @@ lib/PublicInbox/ExtMsg.pm lib/PublicInbox/Feed.pm lib/PublicInbox/Filter/Base.pm lib/PublicInbox/Filter/Mirror.pm +lib/PublicInbox/Filter/RubyLang.pm lib/PublicInbox/Filter/SubjectTag.pm lib/PublicInbox/Filter/Vger.pm lib/PublicInbox/GetlineBody.pm diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm index 6fdc3a2..73fecd5 100644 --- a/lib/PublicInbox/AltId.pm +++ b/lib/PublicInbox/AltId.pm @@ -9,7 +9,7 @@ use URI::Escape qw(uri_unescape); # spec: TYPE:PREFIX:param1=value1¶m2=value2&... # Example: serial:gmane:file=/path/to/altmsgmap.sqlite3 sub new { - my ($class, $inbox, $spec) = @_; + my ($class, $inbox, $spec, $writable) = @_; my ($type, $prefix, $query) = split(/:/, $spec, 3); $type eq 'serial' or die "non-serial not supported, yet\n"; @@ -25,7 +25,7 @@ sub new { $f = "$inbox->{mainrepo}/public-inbox/$f"; } bless { - mm_alt => PublicInbox::Msgmap->new_file($f), + mm_alt => PublicInbox::Msgmap->new_file($f, $writable), xprefix => 'X'.uc($prefix), }, $class; } diff --git a/lib/PublicInbox/Filter/RubyLang.pm b/lib/PublicInbox/Filter/RubyLang.pm new file mode 100644 index 0000000..ec4bc32 --- /dev/null +++ b/lib/PublicInbox/Filter/RubyLang.pm @@ -0,0 +1,63 @@ +# Copyright (C) 2017 all contributors +# License: AGPL-3.0+ + +# Filter for lists.ruby-lang.org trailers +package PublicInbox::Filter::RubyLang; +use base qw(PublicInbox::Filter::Base); +use strict; +use warnings; + +my $l1 = qr/Unsubscribe:\s + /x; +my $l2 = qr{}; + +sub new { + my ($class, %opts) = @_; + my $altid = delete $opts{-altid}; + my $self = $class->SUPER::new(%opts); + # altid = serial:ruby-core:file=msgmap.sqlite3 + if ($altid) { + require PublicInbox::MID; # mid_clean + my $ibx = $self->{-inbox}; + require PublicInbox::AltId; + $self->{-altid} = PublicInbox::AltId->new($ibx, $altid, 1); + } + $self; +} + +sub scrub { + my ($self, $mime) = @_; + # no msg_iter here, that is only for read-only access + $mime->walk_parts(sub { + my ($part) = $_[0]; + my $ct = $part->content_type; + if (!$ct || $ct =~ m{\btext/plain\b}i) { + my $s = eval { $part->body_str }; + if (defined $s && $s =~ s/\n?$l1\n$l2\n\z//os) { + $part->body_str_set($s); + } + } + }); + my $altid = $self->{-altid}; + if ($altid) { + my $hdr = $mime->header_obj; + my $mid = $hdr->header_raw('Message-ID'); + unless (defined $mid) { + return $self->REJECT('Message-Id missing'); + } + my $n = $hdr->header_raw('X-Mail-Count'); + if (!defined($n) || $n !~ /\A\s*\d+\s*\z/) { + return $self->REJECT('X-Mail-Count not numeric'); + } + $mid = PublicInbox::MID::mid_clean($mid); + $altid->{mm_alt}->mid_set($n, $mid); + } + $self->ACCEPT($mime); +} + +sub delivery { + my ($self, $mime) = @_; + $self->scrub($mime); +} + +1; diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index c436742..8588f16 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -242,7 +242,7 @@ sub _scrubber_for { my ($inbox) = @_; my $f = $inbox->{filter}; if ($f && $f =~ /::/) { - my @args; + my @args = (-inbox => $inbox); # basic line splitting, only # Perhaps we can have proper quote splitting one day... ($f, @args) = split(/\s+/, $f) if $f =~ /\s+/; -- EW