From 7715c70e5a8667fbd9eade0cffb6ab05a714dd5d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 28 Dec 2018 19:17:36 +0000 Subject: add filter for gmane archives Extracted from import_slrnspool, since some spools get converted to mbox or what not. --- scripts/import_slrnspool | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'scripts/import_slrnspool') diff --git a/scripts/import_slrnspool b/scripts/import_slrnspool index 1a7d77a4..e7ea45c1 100755 --- a/scripts/import_slrnspool +++ b/scripts/import_slrnspool @@ -35,6 +35,9 @@ if (($ibx->{version} || 1) == 2) { $ibx->{-primary_address}); } +$ibx->{filter} ||= 'PublicInbox::Filter::Gmane'; +my $filter = $ibx->filter; + sub key { "publicinbox.$ibx->{name}.importslrnspoolstate"; } @@ -68,26 +71,7 @@ for (; $exit == 0 && $n < $max; $n++) { print STDERR $fn, "\n"; my $mime = PublicInbox::MIME->new(eval { local $/; <$fh> }); - my $hdr = $mime->header_obj; - - # gmane rewrites Received headers, which increases spamminess - # Some older archives set Original-To - foreach my $x (qw(Received To)) { - my @h = $hdr->header_raw("Original-$x"); - if (@h) { - $hdr->header_set($x, @h); - $hdr->header_set("Original-$x"); - } - } - - # Approved triggers for the SA HEADER_SPAM rule, - # X-From is gmane specific - foreach my $drop (qw(Approved X-From)) { - $hdr->header_set($drop); - } - - # appears to be an old gmane bug: - $hdr->header_set('connect()'); + $filter->scrub($mime); $im->add($mime); $ok = $n + 1; -- cgit v1.2.3-24-ge0c7