From 599246c3aa88ca925f854281297410b73fd6f129 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Apr 2014 08:07:53 +0000 Subject: new scripts for importing slrn spools and maildirs The old import_gmane_spool script was inflexible, since we may import from maildir archives as well, so get everything into maildir, first. --- scripts/import_gmane_spool | 61 ---------------------------------------------- scripts/import_maildir | 52 +++++++++++++++++++++++++++++++++++++++ scripts/slrnspool2maildir | 45 ++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 61 deletions(-) delete mode 100755 scripts/import_gmane_spool create mode 100755 scripts/import_maildir create mode 100755 scripts/slrnspool2maildir (limited to 'scripts') diff --git a/scripts/import_gmane_spool b/scripts/import_gmane_spool deleted file mode 100755 index 3cda0bf6..00000000 --- a/scripts/import_gmane_spool +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/perl -w -# Copyright (C) 2013, Eric Wong and all contributors -# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) -# -# One-off script to convert an slrnpull news spool from gmane, usage: -=begin usage - mkdir -p $HOME/.public-inbox - MAINREPO=/path/to/your/repo.git - export ORIGINAL_RECIPIENT='list@example.com' - git init --bare $MAINREPO - export GIT_CONFIG=$HOME/.public-inbox/config - git config publicinbox.$LISTNAME.address $ORIGINAL_RECIPIENT - git config publicinbox.$LISTNAME.mainrepo $MAINREPO - unset GIT_CONFIG - ./import_gmane_spool SLRNPULL_ROOT/news/foo/bar -=cut -use strict; -use warnings; -use Email::Simple; -use PublicInbox::Filter; -use PublicInbox::Config; -use IPC::Run qw(run); -sub usage { "Usage:\n".join("",grep(/\t/, `head -n 24 $0`)) } -my $spool = shift @ARGV or die usage(); -defined $ENV{ORIGINAL_RECIPIENT} or die usage(); -my @args = ('public-inbox-mda'); - -chdir $spool or die "chdir $spool failed: $!\n"; - -foreach my $n (sort { $a <=> $b } grep(/\d+\z/, glob("*"))) { - if (open my $fh, '<', $n) { - my $s = eval { - local $/; - Email::Simple->new(<$fh>); - }; - - # gmane rewrites Received headers, which increases spamminess - my @h = $s->header("Original-Received"); - if (@h) { - $s->header_set("Received", @h); - $s->header_set("Original-Received"); - } - - # this is needed for "git rev-list --since=..." to work - local $ENV{GIT_COMMITTER_DATE} = $s->header('Date'); - - # triggers for the SA HEADER_SPAM rule - foreach my $drop (qw(Approved)) { $s->header_set($drop) } - - # appears to be an old gmane bug: - $s->header_set("connect()"); - - my $orig = $s->as_string; - close $fh or die "close failed: $!\n"; - eval { run(\@args, \$orig) }; - die "fail $n: $?\n" if $?; - die "fail $n: $@\n" if $@; - } else { - warn "Failed to open $n: $!\n"; - } -} diff --git a/scripts/import_maildir b/scripts/import_maildir new file mode 100755 index 00000000..aaabe80d --- /dev/null +++ b/scripts/import_maildir @@ -0,0 +1,52 @@ +#!/usr/bin/perl -w +# Copyright (C) 2014, Eric Wong and all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# Script to import a Maildir into a public-inbox +=begin usage + mkdir -p $HOME/.public-inbox + MAINREPO=/path/to/your/repo.git + export ORIGINAL_RECIPIENT='list@example.com' + git init --bare $MAINREPO + export GIT_CONFIG=$HOME/.public-inbox/config + git config publicinbox.$LISTNAME.address $ORIGINAL_RECIPIENT + git config publicinbox.$LISTNAME.mainrepo $MAINREPO + unset GIT_CONFIG + ./import_maildir /path/to/maildir/ +=cut +use strict; +use warnings; +use Email::Filter; +use Date::Parse qw/str2time/; +use IPC::Run qw/run/; +sub usage { "Usage:\n".join('', grep(/\t/, `head -n 24 $0`)) } +my $dir = shift @ARGV or die usage(); +defined $ENV{ORIGINAL_RECIPIENT} or die usage(); +my @mda = qw(public-inbox-mda); +foreach my $sub (qw(cur new tmp)) { + -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; +} + +my @msgs; +foreach my $sub (qw(cur new)) { + foreach my $fn (glob("$dir/$sub/*")) { + open my $fh, '<', $fn or next; + my $f = Email::Filter->new(data => eval { local $/; <$fh> }); + my $date = $f->simple->header('Date'); + my $t = eval { str2time($date) }; + $f->exit(0); + $f->ignore; + defined $t or next; + my @fn = split(m!/!, $fn); + push @msgs, [ $t, "$sub/" . pop @fn, $date ]; + } +} + +@msgs = sort { $b->[0] <=> $a->[0] } @msgs; +while (my $ary = pop @msgs) { + my $fn = "$dir/$ary->[1]"; + local $ENV{GIT_COMMITTER_DATE} = $ary->[2]; # this preserves timezone + run(\@mda, '<', $fn); +} + +1; diff --git a/scripts/slrnspool2maildir b/scripts/slrnspool2maildir new file mode 100755 index 00000000..fb7a6f25 --- /dev/null +++ b/scripts/slrnspool2maildir @@ -0,0 +1,45 @@ +#!/usr/bin/perl -w +# Copyright (C) 2013, Eric Wong and all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# One-off script to convert an slrnpull news spool to Maildir +=begin usage + ./slrnspool2maildir SLRNPULL_ROOT/news/foo/bar /path/to/maildir/ +=cut +use strict; +use warnings; +use Email::Filter; +use Email::LocalDelivery; +sub usage { "Usage:\n".join('',grep(/\t/, `head -n 24 $0`)) } +my $spool = shift @ARGV or die usage(); +my $dir = shift @ARGV or die usage(); +-d $dir or die "$dir is not a directory\n"; +$dir .= '/' unless $dir =~ m!/\z!; +foreach my $sub (qw(cur new tmp)) { + -d "$dir/$sub" or mkdir $sub or die "mkdir $dir/$sub failed: $!\n"; +} + +foreach my $n (grep(/\d+\z/, glob("$spool/*"))) { + if (open my $fh, '<', $n) { + my $f = Email::Filter->new(data => eval { local $/; <$fh> }); + my $s = $f->simple; + + # gmane rewrites Received headers, which increases spamminess + my @h = $s->header('Original-Received'); + if (@h) { + $s->header_set('Received', @h); + $s->header_set('Original-Received'); + } + + # triggers for the SA HEADER_SPAM rule + foreach my $drop (qw(Approved)) { $s->header_set($drop) } + + # appears to be an old gmane bug: + $s->header_set('connect()'); + + $f->exit(0); + $f->accept($dir); + } else { + warn "Failed to open $n: $!\n"; + } +} -- cgit v1.2.3-24-ge0c7