about summary refs log tree commit homepage
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/README2
-rwxr-xr-xscripts/dc-dlvr6
-rw-r--r--scripts/dupe-finder6
-rwxr-xr-xscripts/import_maildir27
-rwxr-xr-xscripts/import_slrnspool34
-rw-r--r--scripts/import_vger_from_mbox13
-rwxr-xr-xscripts/slrnspool2maildir89
-rwxr-xr-xscripts/ssoma-replay11
-rwxr-xr-xscripts/xhdr-num2mid2
9 files changed, 104 insertions, 86 deletions
diff --git a/scripts/README b/scripts/README
index 3b9c37da..7ffbd93c 100644
--- a/scripts/README
+++ b/scripts/README
@@ -1,5 +1,5 @@
 This directory contains informal scripts and random tools used
-in the development of public-inbox.  Some only exist only for
+in the development of public-inbox.  Some only exist for
 historical purposes, and some may not work anymore.
 
 See the "script/" directory (not "scripts/") for supported and
diff --git a/scripts/dc-dlvr b/scripts/dc-dlvr
index 90aab73b..ef6033b9 100755
--- a/scripts/dc-dlvr
+++ b/scripts/dc-dlvr
@@ -1,5 +1,5 @@
 #!/bin/sh
-# Copyright (C) 2008-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2008-2021 all contributors <meta@public-inbox.org>
 # License: GPL-3.0+ <http://www.gnu.org/licenses/gpl-3.0.txt>
 # This is installed as /etc/dc-dcvr on my system
 # to use with postfix main.cf: mailbox_command = /etc/dc-dlvr "$EXTENSION"
@@ -47,9 +47,9 @@ then
         rm_list="$rm_list $PREMSG"
         set +e
         mv -f $TMPMSG $PREMSG
-        $spamc -E --headers <$PREMSG >$TMPMSG
+        $spamc -E <$PREMSG >$TMPMSG
 else
-        $spamc -E --headers <$CDMSG >$TMPMSG
+        $spamc -E <$CDMSG >$TMPMSG
 fi
 err=$?
 
diff --git a/scripts/dupe-finder b/scripts/dupe-finder
index 6f873b6e..d9744fcb 100644
--- a/scripts/dupe-finder
+++ b/scripts/dupe-finder
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # ad-hoc tool for finding duplicates, unstable!
@@ -21,8 +21,8 @@ if (index($repo, '@') > 0) {
 }
 $ibx or die "No inbox";
 $ibx->search or die "search not available for inbox";
-my $dbh = $ibx->search->{over_ro}->connect;
-my $over = PublicInbox::Over->new($dbh->sqlite_db_filename);
+my $over = $ibx->over;
+my $dbh = $over->dbh;
 
 sub emit ($) {
         my ($nums) = @_;
diff --git a/scripts/import_maildir b/scripts/import_maildir
index fbf3f649..7228a3ad 100755
--- a/scripts/import_maildir
+++ b/scripts/import_maildir
@@ -1,22 +1,29 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2014, Eric Wong <e@80x24.org> and all contributors
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-#
-# Script to import a Maildir into a public-inbox
 =begin usage
+Ancient script to import a Maildir into a v1 public-inbox
+
+        # this is only if you want a v1 inbox
         export GIT_DIR=/path/to/your/repo.git
         export GIT_AUTHOR_EMAIL='list@example.com'
         export GIT_AUTHOR_NAME='list name'
         ./import_maildir /path/to/maildir/
+
+For v2 (strongly recommended), use:
+
+        lei convert /path/to/maildir -o /path/to/v2-inbox
+        # (and `lei daemon-kill' if you don't want the daemon to linger)
 =cut
-use strict;
-use warnings;
-use Email::Simple;
+use v5.12;
 use Date::Parse qw/str2time/;
-use PublicInbox::MIME;
+use PublicInbox::Eml;
 use PublicInbox::Git;
 use PublicInbox::Import;
-sub usage { "Usage:\n".join('', grep(/\t/, `head -n 24 $0`)) }
+sub usage {
+        open my $fh, '<', __FILE__;
+        ("Usage:\n", grep { /^=begin usage/../^=cut/ and !/^=/m } <$fh>);
+}
 my $dir = shift @ARGV or die usage();
 my $git_dir = `git rev-parse --git-dir`;
 chomp $git_dir;
@@ -28,7 +35,7 @@ my @msgs;
 foreach my $sub (qw(cur new)) {
         foreach my $fn (glob("$dir/$sub/*")) {
                 open my $fh, '<', $fn or next;
-                my $s = Email::Simple->new(eval { local $/; <$fh> });
+                my $s = PublicInbox::Eml->new(do { local $/; <$fh> });
                 my $date = $s->header('Date');
                 my $t = eval { str2time($date) };
                 defined $t or next;
@@ -45,7 +52,7 @@ my $im = PublicInbox::Import->new($git, $name, $email);
 while (my $ary = pop @msgs) {
         my $fn = "$dir/$ary->[1]";
         open my $fh, '<', $fn or next;
-        my $mime = PublicInbox::MIME->new(eval { local $/; <$fh> });
+        my $mime = PublicInbox::Eml->new(do { local $/; <$fh> });
         $im->add($mime);
 }
 $im->done;
diff --git a/scripts/import_slrnspool b/scripts/import_slrnspool
index e569d004..81df6c2e 100755
--- a/scripts/import_slrnspool
+++ b/scripts/import_slrnspool
@@ -1,20 +1,30 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-#
-# Incremental (or one-shot) importer of a slrnpull news spool
 =begin usage
+Incremental (or one-shot) importer of a slrnpull news spool.
+
+Since the news spool can appear as an MH folder, you may also use
+lei from public-inbox 2.0+ to convert it:
+
+        lei convert mh:$SLRNPULL_ROOT/news/foo/bar -o v2:/path/to/inbox/
+        # (and `lei daemon-kill' if you don't want the daemon to linger)
+
+But if you want to use this script:
+
         export ORIGINAL_RECIPIENT=address@example.com
-        public-inbox-init $INBOX $GIT_DIR $HTTP_URL $ORIGINAL_RECIPIENT
-        ./import_slrnspool SLRNPULL_ROOT/news/foo/bar
+        public-inbox-init -V2 $INBOX $INBOX_DIR $HTTP_URL $ORIGINAL_RECIPIENT
+        ./import_slrnspool $SLRNPULL_ROOT/news/foo/bar
 =cut
-use strict;
-use warnings;
+use v5.12;
 use PublicInbox::Config;
-use PublicInbox::MIME;
+use PublicInbox::Eml;
 use PublicInbox::Import;
 use PublicInbox::Git;
-sub usage { "Usage:\n".join('',grep(/\t/, `head -n 10 $0`)) }
+sub usage {
+        open my $fh, '<', __FILE__;
+        ("Usage:\n", grep { /^=begin usage/../^=cut/ and !/^=/m } <$fh>);
+}
 my $exit = 0;
 my $sighandler = sub { $exit = 1 };
 $SIG{INT} = $sighandler;
@@ -22,8 +32,8 @@ $SIG{TERM} = $sighandler;
 my $spool = shift @ARGV or die usage();
 my $recipient = $ENV{ORIGINAL_RECIPIENT};
 defined $recipient or die usage();
-my $config = PublicInbox::Config->new;
-my $ibx = $config->lookup($recipient);
+my $cfg = PublicInbox::Config->new;
+my $ibx = $cfg->lookup($recipient);
 my $git = $ibx->git;
 my $im;
 if ($ibx->version == 2) {
@@ -70,7 +80,7 @@ for (; $exit == 0 && $n < $max; $n++) {
         $max = $n + $max_gap;
         print STDERR $fn, "\n";
 
-        my $mime = PublicInbox::MIME->new(eval { local $/; <$fh> });
+        my $mime = PublicInbox::Eml->new(do { local $/; <$fh> });
         $filter->scrub($mime);
         $im->add($mime);
 
diff --git a/scripts/import_vger_from_mbox b/scripts/import_vger_from_mbox
index 0e5ba6b4..40ccf50b 100644
--- a/scripts/import_vger_from_mbox
+++ b/scripts/import_vger_from_mbox
@@ -1,21 +1,20 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use warnings;
+# consider `lei convert' instead since it handles more formats
+use v5.12;
 use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
-use PublicInbox::MIME;
 use PublicInbox::InboxWritable;
-use PublicInbox::Import;
-use PublicInbox::MDA;
 my $usage = "usage: $0 NAME EMAIL DIR <MBOX\n";
 my $dry_run;
 my $version = 2;
 my $variant = 'mboxrd';
+my $filter = 'PublicInbox::Filter::Vger';
 my %opts = (
         'n|dry-run' => \$dry_run,
         'V|version=i' => \$version,
         'F|format=s' => \$variant,
+        'filter=s' => \$filter,
 );
 GetOptions(%opts) or die $usage;
 if ($variant ne 'mboxrd' && $variant ne 'mboxo') {
@@ -29,7 +28,7 @@ my $ibx = {
         name => $name,
         version => $version,
         address => [ $email ],
-        filter => 'PublicInbox::Filter::Vger',
+        filter => $filter,
 };
 $ibx = PublicInbox::Inbox->new($ibx);
 unless ($dry_run) {
diff --git a/scripts/slrnspool2maildir b/scripts/slrnspool2maildir
index 0c21806a..ba0729ec 100755
--- a/scripts/slrnspool2maildir
+++ b/scripts/slrnspool2maildir
@@ -1,50 +1,55 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-#
-# One-off script to convert an slrnpull news spool to Maildir
 =begin usage
+One-off script to convert an slrnpull spool from gmane to Maildir
+Note: this contains Gmane-specific header munging to workaround
+the munging done by Gmane.
+
         ./slrnspool2maildir SLRNPULL_ROOT/news/foo/bar /path/to/maildir/
-=cut
-use strict;
-use warnings;
-use Email::Filter;
-use Email::LocalDelivery;
-sub usage { "Usage:\n".join('',grep(/\t/, `head -n 12 $0`)) }
-my $spool = shift @ARGV or die usage();
-my $dir = shift @ARGV or die usage();
--d $dir or die "$dir is not a directory\n";
-$dir .= '/' unless $dir =~ m!/\z!;
-foreach my $sub (qw(cur new tmp)) {
-        my $nd = "$dir/$sub";
-        -d $nd and next;
-        mkdir $nd or die "mkdir $nd failed: $!\n";
-}
 
-foreach my $n (grep(/\d+\z/, glob("$spool/*"))) {
-        if (open my $fh, '<', $n) {
-                my $f = Email::Filter->new(data => eval { local $/; <$fh> });
-                my $s = $f->simple;
+A generic replacement w/o Gmane-specific munging could treat
+the slrnpull spool as an MH folder with lei:
 
-                # gmane rewrites Received headers, which increases spamminess
-                # Some older archives set Original-To
-                foreach my $x (qw(Received To)) {
-                        my @h = $s->header("Original-$x");
-                        if (@h) {
-                                $s->header_set($x, @h);
-                                $s->header_set("Original-$x");
-                        }
+        lei convert mh:SLRNPULL_ROOT/news/foo/bar -o /path/to/maildir
+        # (and `lei daemon-kill' if you don't want the daemon to linger)
+=cut
+use v5.12;
+use autodie;
+# warning: unstable internal APIs:
+use PublicInbox::Eml;
+use PublicInbox::LeiToMail;
+use PublicInbox::MHreader;
+use PublicInbox::IO qw(read_all);
+use File::Path qw(make_path);
+use File::Spec ();
+sub usage {
+        open my $fh, '<', __FILE__;
+        ("Usage:\n", grep { /^=begin usage/../^=cut/ and !/^=/m } <$fh>);
+}
+my $spool = shift @ARGV or die usage();
+my $dst = shift @ARGV or die usage();
+$dst .= '/' unless $dst =~ m!/\z!;
+File::Path::make_path(map { $dst.$_ } qw(tmp new cur));
+$dst = File::Spec->rel2abs($dst).'/';
+opendir my $cwdfh, '.';
+my $mhr = PublicInbox::MHreader->new($spool, $cwdfh);
+my $smsg;
+$mhr->mh_each_eml(sub {
+        my ($d, $n, $kw, $eml) = @_;
+        # gmane rewrites Received headers, which increases spamminess
+        # Some older archives set Original-To
+        for my $x (qw(Received To)) {
+                my @h = $eml->header_raw("Original-$x");
+                if (@h) {
+                        $eml->header_set($x, @h);
+                        $eml->header_set("Original-$x");
                 }
-
-                # triggers for the SA HEADER_SPAM rule
-                foreach my $drop (qw(Approved)) { $s->header_set($drop) }
-
-                # appears to be an old gmane bug:
-                $s->header_set('connect()');
-
-                $f->exit(0);
-                $f->accept($dir);
-        } else {
-                warn "Failed to open $n: $!\n";
         }
-}
+        # `Approved' triggers the SA HEADER_SPAM rule
+        # `connect()' appears to be an old gmane bug:
+        $eml->header_set($_) for ('Approved', 'connect()');
+        my $buf = $eml->as_string;
+        $smsg->{blob} = $n;
+        PublicInbox::LeiToMail::_buf2maildir($dst, \$buf, $smsg, 'new/');
+});
diff --git a/scripts/ssoma-replay b/scripts/ssoma-replay
index 46b15d7e..70d0081d 100755
--- a/scripts/ssoma-replay
+++ b/scripts/ssoma-replay
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # A work-in-progress, but one day I hope this script is no longer
@@ -29,11 +29,8 @@ use strict;
 use Email::Simple;
 use URI::Escape qw/uri_escape_utf8/;
 use File::Temp qw/tempfile/;
-my ($fh, $filename) = tempfile('ssoma-replay-XXXXXXXX', TMPDIR => 1);
-my $msg = eval {
-        local $/;
-        Email::Simple->new(<STDIN>);
-};
+my ($fh, $filename) = tempfile('ssoma-replay-XXXX', TMPDIR => 1);
+my $msg = Email::Simple->new(do { local $/; <STDIN> });
 select $fh;
 
 # Note: the archive URL makes assumptions about where the
@@ -52,7 +49,7 @@ if (defined $list_id) {
         if (defined $domain) {
                 $archive_url = "https://$domain/$user/";
                 my $mid = $header_obj->header('Message-Id');
-                if ($mid =~ /\A<(.+)>\z/) {
+                if ($mid =~ /<[ \t]*([^>]+)?[ \t]*>/s) {
                         $mid = $1;
                 }
                 $mid = uri_escape_utf8($mid,
diff --git a/scripts/xhdr-num2mid b/scripts/xhdr-num2mid
index 19f5d0e0..3ca33f5d 100755
--- a/scripts/xhdr-num2mid
+++ b/scripts/xhdr-num2mid
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 # Useful for mapping article IDs from existing NNTP servers to MIDs
 use strict;