From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 2BBB41F45B for ; Fri, 28 Dec 2018 19:17:36 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] add filter for gmane archives Date: Fri, 28 Dec 2018 19:17:36 +0000 Message-Id: <20181228191736.3627-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Extracted from import_slrnspool, since some spools get converted to mbox or what not. --- lib/PublicInbox/Filter/Gmane.pm | 41 +++++++++++++++++++++++++++++++++ scripts/import_slrnspool | 24 ++++--------------- 2 files changed, 45 insertions(+), 20 deletions(-) create mode 100644 lib/PublicInbox/Filter/Gmane.pm diff --git a/lib/PublicInbox/Filter/Gmane.pm b/lib/PublicInbox/Filter/Gmane.pm new file mode 100644 index 0000000..f9042df --- /dev/null +++ b/lib/PublicInbox/Filter/Gmane.pm @@ -0,0 +1,41 @@ +# Copyright (C) 2018 all contributors +# License: AGPL-3.0+ + +# Filter for importing some archives from gmane +package PublicInbox::Filter::Gmane; +use base qw(PublicInbox::Filter::Base); +use strict; +use warnings; + +sub scrub { + my ($self, $mime) = @_; + my $hdr = $mime->header_obj; + + # gmane rewrites Received headers, which increases spamminess + # Some older archives set Original-To + foreach my $x (qw(Received To)) { + my @h = $hdr->header_raw("Original-$x"); + if (@h) { + $hdr->header_set($x, @h); + $hdr->header_set("Original-$x"); + } + } + + # Approved triggers for the SA HEADER_SPAM rule, + # X-From is gmane specific + foreach my $drop (qw(Approved X-From)) { + $hdr->header_set($drop); + } + + # appears to be an old gmane bug: + $hdr->header_set('connect()'); + + $self->ACCEPT($mime); +} + +sub delivery { + my ($self, $mime) = @_; + $self->scrub($mime); +} + +1; diff --git a/scripts/import_slrnspool b/scripts/import_slrnspool index 1a7d77a..e7ea45c 100755 --- a/scripts/import_slrnspool +++ b/scripts/import_slrnspool @@ -35,6 +35,9 @@ if (($ibx->{version} || 1) == 2) { $ibx->{-primary_address}); } +$ibx->{filter} ||= 'PublicInbox::Filter::Gmane'; +my $filter = $ibx->filter; + sub key { "publicinbox.$ibx->{name}.importslrnspoolstate"; } @@ -68,26 +71,7 @@ for (; $exit == 0 && $n < $max; $n++) { print STDERR $fn, "\n"; my $mime = PublicInbox::MIME->new(eval { local $/; <$fh> }); - my $hdr = $mime->header_obj; - - # gmane rewrites Received headers, which increases spamminess - # Some older archives set Original-To - foreach my $x (qw(Received To)) { - my @h = $hdr->header_raw("Original-$x"); - if (@h) { - $hdr->header_set($x, @h); - $hdr->header_set("Original-$x"); - } - } - - # Approved triggers for the SA HEADER_SPAM rule, - # X-From is gmane specific - foreach my $drop (qw(Approved X-From)) { - $hdr->header_set($drop); - } - - # appears to be an old gmane bug: - $hdr->header_set('connect()'); + $filter->scrub($mime); $im->add($mime); $ok = $n + 1; -- EW