From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-2.9 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, URIBL_BLOCKED shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 80C471F7B4; Sun, 11 Jan 2015 04:32:21 +0000 (UTC) Date: Sun, 11 Jan 2015 04:32:21 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] scripts/import_slrnspool: new incremental importer Message-ID: <20150111043221.GA24712@dcvr.yhbt.net> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline List-Id: This allows incremental imports of slrn spools, ideal for tracking lists via gmane. --- scripts/import_slrnspool | 69 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 scripts/import_slrnspool diff --git a/scripts/import_slrnspool b/scripts/import_slrnspool new file mode 100755 index 0000000..560c08c --- /dev/null +++ b/scripts/import_slrnspool @@ -0,0 +1,69 @@ +#!/usr/bin/perl -w +# Copyright (C) 2015, all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# Incremental (or one-shot) importer of a slrnpull news spool +=begin usage + export ORIGINAL_RECIPIENT=address@example.com + public-inbox-init $LISTNAME $GIT_DIR $HTTP_URL $ORIGINAL_RECIPIENT + ./import_slrnspool SLRNPULL_ROOT/news/foo/bar +=cut +use strict; +use warnings; +use PublicInbox::Config; +use Email::Filter; +use Email::LocalDelivery; +sub usage { "Usage:\n".join('',grep(/\t/, `head -n 10 $0`)) } +my $spool = shift @ARGV or die usage(); +my $recipient = $ENV{ORIGINAL_RECIPIENT}; +defined $recipient or die usage(); +my @mda = qw(public-inbox-mda); +my $config = PublicInbox::Config->new; +my $cfg = $config->lookup($recipient); +defined $cfg or exit(1); +use Data::Dumper; print STDERR Dumper($cfg); + +sub get_min { + my ($cfg) = @_; + $cfg->{importslrnspoolstate} || 1 +} + +sub set_min { + my ($cfg, $num) = @_; + my $f = PublicInbox::Config->default_file; + my @cmd = (qw/git config/, "--file=$f", + "publicinbox.$cfg->{listname}.importslrnspoolstate", $num); + system(@cmd) == 0 or die join(' ', @cmd). " failed: $?\n"; +} + +my $n = get_min(); +my $ok; +my $max_gap = 10000; +my $max = $n + $max_gap; + +for (; $n < $max; $n++) { + my $fn = "$spool/$n"; + print STDERR $fn, "\n"; + open(my $fh, '<', $fn) or next; + $max = $n + $max_gap; + my $f = Email::Filter->new(data => eval { local $/; <$fh> }); + my $s = $f->simple; + + # gmane rewrites Received headers, which increases spamminess + my @h = $s->header('Original-Received'); + if (@h) { + $s->header_set('Received', @h); + $s->header_set('Original-Received'); + } + + # triggers for the SA HEADER_SPAM rule + foreach my $drop (qw(Approved)) { $s->header_set($drop) } + + # appears to be an old gmane bug: + $s->header_set('connect()'); + + $f->exit(0); + $f->pipe(@mda); + $ok = $n + 1; + set_min($cfg, $ok); +} -- EW