From 7715c70e5a8667fbd9eade0cffb6ab05a714dd5d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 28 Dec 2018 19:17:36 +0000 Subject: add filter for gmane archives Extracted from import_slrnspool, since some spools get converted to mbox or what not. --- MANIFEST | 1 + lib/PublicInbox/Filter/Gmane.pm | 41 +++++++++++++++++++++++++++++++++++++++++ scripts/import_slrnspool | 24 ++++-------------------- 3 files changed, 46 insertions(+), 20 deletions(-) create mode 100644 lib/PublicInbox/Filter/Gmane.pm diff --git a/MANIFEST b/MANIFEST index a455dbaf..dc851676 100644 --- a/MANIFEST +++ b/MANIFEST @@ -58,6 +58,7 @@ lib/PublicInbox/EvCleanup.pm lib/PublicInbox/ExtMsg.pm lib/PublicInbox/Feed.pm lib/PublicInbox/Filter/Base.pm +lib/PublicInbox/Filter/Gmane.pm lib/PublicInbox/Filter/Mirror.pm lib/PublicInbox/Filter/RubyLang.pm lib/PublicInbox/Filter/SubjectTag.pm diff --git a/lib/PublicInbox/Filter/Gmane.pm b/lib/PublicInbox/Filter/Gmane.pm new file mode 100644 index 00000000..f9042dfc --- /dev/null +++ b/lib/PublicInbox/Filter/Gmane.pm @@ -0,0 +1,41 @@ +# Copyright (C) 2018 all contributors +# License: AGPL-3.0+ + +# Filter for importing some archives from gmane +package PublicInbox::Filter::Gmane; +use base qw(PublicInbox::Filter::Base); +use strict; +use warnings; + +sub scrub { + my ($self, $mime) = @_; + my $hdr = $mime->header_obj; + + # gmane rewrites Received headers, which increases spamminess + # Some older archives set Original-To + foreach my $x (qw(Received To)) { + my @h = $hdr->header_raw("Original-$x"); + if (@h) { + $hdr->header_set($x, @h); + $hdr->header_set("Original-$x"); + } + } + + # Approved triggers for the SA HEADER_SPAM rule, + # X-From is gmane specific + foreach my $drop (qw(Approved X-From)) { + $hdr->header_set($drop); + } + + # appears to be an old gmane bug: + $hdr->header_set('connect()'); + + $self->ACCEPT($mime); +} + +sub delivery { + my ($self, $mime) = @_; + $self->scrub($mime); +} + +1; diff --git a/scripts/import_slrnspool b/scripts/import_slrnspool index 1a7d77a4..e7ea45c1 100755 --- a/scripts/import_slrnspool +++ b/scripts/import_slrnspool @@ -35,6 +35,9 @@ if (($ibx->{version} || 1) == 2) { $ibx->{-primary_address}); } +$ibx->{filter} ||= 'PublicInbox::Filter::Gmane'; +my $filter = $ibx->filter; + sub key { "publicinbox.$ibx->{name}.importslrnspoolstate"; } @@ -68,26 +71,7 @@ for (; $exit == 0 && $n < $max; $n++) { print STDERR $fn, "\n"; my $mime = PublicInbox::MIME->new(eval { local $/; <$fh> }); - my $hdr = $mime->header_obj; - - # gmane rewrites Received headers, which increases spamminess - # Some older archives set Original-To - foreach my $x (qw(Received To)) { - my @h = $hdr->header_raw("Original-$x"); - if (@h) { - $hdr->header_set($x, @h); - $hdr->header_set("Original-$x"); - } - } - - # Approved triggers for the SA HEADER_SPAM rule, - # X-From is gmane specific - foreach my $drop (qw(Approved X-From)) { - $hdr->header_set($drop); - } - - # appears to be an old gmane bug: - $hdr->header_set('connect()'); + $filter->scrub($mime); $im->add($mime); $ok = $n + 1; -- cgit v1.2.3-24-ge0c7