diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/README | 2 | ||||
-rwxr-xr-x | scripts/dc-dlvr | 6 | ||||
-rw-r--r-- | scripts/dupe-finder | 6 | ||||
-rwxr-xr-x | scripts/import_maildir | 27 | ||||
-rwxr-xr-x | scripts/import_slrnspool | 34 | ||||
-rw-r--r-- | scripts/import_vger_from_mbox | 13 | ||||
-rwxr-xr-x | scripts/slrnspool2maildir | 89 | ||||
-rwxr-xr-x | scripts/ssoma-replay | 11 | ||||
-rwxr-xr-x | scripts/xhdr-num2mid | 2 |
9 files changed, 104 insertions, 86 deletions
diff --git a/scripts/README b/scripts/README index 3b9c37da..7ffbd93c 100644 --- a/scripts/README +++ b/scripts/README @@ -1,5 +1,5 @@ This directory contains informal scripts and random tools used -in the development of public-inbox. Some only exist only for +in the development of public-inbox. Some only exist for historical purposes, and some may not work anymore. See the "script/" directory (not "scripts/") for supported and diff --git a/scripts/dc-dlvr b/scripts/dc-dlvr index 90aab73b..ef6033b9 100755 --- a/scripts/dc-dlvr +++ b/scripts/dc-dlvr @@ -1,5 +1,5 @@ #!/bin/sh -# Copyright (C) 2008-2020 all contributors <meta@public-inbox.org> +# Copyright (C) 2008-2021 all contributors <meta@public-inbox.org> # License: GPL-3.0+ <http://www.gnu.org/licenses/gpl-3.0.txt> # This is installed as /etc/dc-dcvr on my system # to use with postfix main.cf: mailbox_command = /etc/dc-dlvr "$EXTENSION" @@ -47,9 +47,9 @@ then rm_list="$rm_list $PREMSG" set +e mv -f $TMPMSG $PREMSG - $spamc -E --headers <$PREMSG >$TMPMSG + $spamc -E <$PREMSG >$TMPMSG else - $spamc -E --headers <$CDMSG >$TMPMSG + $spamc -E <$CDMSG >$TMPMSG fi err=$? diff --git a/scripts/dupe-finder b/scripts/dupe-finder index 6f873b6e..d9744fcb 100644 --- a/scripts/dupe-finder +++ b/scripts/dupe-finder @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org> +# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # ad-hoc tool for finding duplicates, unstable! @@ -21,8 +21,8 @@ if (index($repo, '@') > 0) { } $ibx or die "No inbox"; $ibx->search or die "search not available for inbox"; -my $dbh = $ibx->search->{over_ro}->connect; -my $over = PublicInbox::Over->new($dbh->sqlite_db_filename); +my $over = $ibx->over; +my $dbh = $over->dbh; sub emit ($) { my ($nums) = @_; diff --git a/scripts/import_maildir b/scripts/import_maildir index fbf3f649..7228a3ad 100755 --- a/scripts/import_maildir +++ b/scripts/import_maildir @@ -1,22 +1,29 @@ #!/usr/bin/perl -w -# Copyright (C) 2014, Eric Wong <e@80x24.org> and all contributors +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# -# Script to import a Maildir into a public-inbox =begin usage +Ancient script to import a Maildir into a v1 public-inbox + + # this is only if you want a v1 inbox export GIT_DIR=/path/to/your/repo.git export GIT_AUTHOR_EMAIL='list@example.com' export GIT_AUTHOR_NAME='list name' ./import_maildir /path/to/maildir/ + +For v2 (strongly recommended), use: + + lei convert /path/to/maildir -o /path/to/v2-inbox + # (and `lei daemon-kill' if you don't want the daemon to linger) =cut -use strict; -use warnings; -use Email::Simple; +use v5.12; use Date::Parse qw/str2time/; -use PublicInbox::MIME; +use PublicInbox::Eml; use PublicInbox::Git; use PublicInbox::Import; -sub usage { "Usage:\n".join('', grep(/\t/, `head -n 24 $0`)) } +sub usage { + open my $fh, '<', __FILE__; + ("Usage:\n", grep { /^=begin usage/../^=cut/ and !/^=/m } <$fh>); +} my $dir = shift @ARGV or die usage(); my $git_dir = `git rev-parse --git-dir`; chomp $git_dir; @@ -28,7 +35,7 @@ my @msgs; foreach my $sub (qw(cur new)) { foreach my $fn (glob("$dir/$sub/*")) { open my $fh, '<', $fn or next; - my $s = Email::Simple->new(eval { local $/; <$fh> }); + my $s = PublicInbox::Eml->new(do { local $/; <$fh> }); my $date = $s->header('Date'); my $t = eval { str2time($date) }; defined $t or next; @@ -45,7 +52,7 @@ my $im = PublicInbox::Import->new($git, $name, $email); while (my $ary = pop @msgs) { my $fn = "$dir/$ary->[1]"; open my $fh, '<', $fn or next; - my $mime = PublicInbox::MIME->new(eval { local $/; <$fh> }); + my $mime = PublicInbox::Eml->new(do { local $/; <$fh> }); $im->add($mime); } $im->done; diff --git a/scripts/import_slrnspool b/scripts/import_slrnspool index e569d004..81df6c2e 100755 --- a/scripts/import_slrnspool +++ b/scripts/import_slrnspool @@ -1,20 +1,30 @@ #!/usr/bin/perl -w -# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# -# Incremental (or one-shot) importer of a slrnpull news spool =begin usage +Incremental (or one-shot) importer of a slrnpull news spool. + +Since the news spool can appear as an MH folder, you may also use +lei from public-inbox 2.0+ to convert it: + + lei convert mh:$SLRNPULL_ROOT/news/foo/bar -o v2:/path/to/inbox/ + # (and `lei daemon-kill' if you don't want the daemon to linger) + +But if you want to use this script: + export ORIGINAL_RECIPIENT=address@example.com - public-inbox-init $INBOX $GIT_DIR $HTTP_URL $ORIGINAL_RECIPIENT - ./import_slrnspool SLRNPULL_ROOT/news/foo/bar + public-inbox-init -V2 $INBOX $INBOX_DIR $HTTP_URL $ORIGINAL_RECIPIENT + ./import_slrnspool $SLRNPULL_ROOT/news/foo/bar =cut -use strict; -use warnings; +use v5.12; use PublicInbox::Config; -use PublicInbox::MIME; +use PublicInbox::Eml; use PublicInbox::Import; use PublicInbox::Git; -sub usage { "Usage:\n".join('',grep(/\t/, `head -n 10 $0`)) } +sub usage { + open my $fh, '<', __FILE__; + ("Usage:\n", grep { /^=begin usage/../^=cut/ and !/^=/m } <$fh>); +} my $exit = 0; my $sighandler = sub { $exit = 1 }; $SIG{INT} = $sighandler; @@ -22,8 +32,8 @@ $SIG{TERM} = $sighandler; my $spool = shift @ARGV or die usage(); my $recipient = $ENV{ORIGINAL_RECIPIENT}; defined $recipient or die usage(); -my $config = PublicInbox::Config->new; -my $ibx = $config->lookup($recipient); +my $cfg = PublicInbox::Config->new; +my $ibx = $cfg->lookup($recipient); my $git = $ibx->git; my $im; if ($ibx->version == 2) { @@ -70,7 +80,7 @@ for (; $exit == 0 && $n < $max; $n++) { $max = $n + $max_gap; print STDERR $fn, "\n"; - my $mime = PublicInbox::MIME->new(eval { local $/; <$fh> }); + my $mime = PublicInbox::Eml->new(do { local $/; <$fh> }); $filter->scrub($mime); $im->add($mime); diff --git a/scripts/import_vger_from_mbox b/scripts/import_vger_from_mbox index 0e5ba6b4..40ccf50b 100644 --- a/scripts/import_vger_from_mbox +++ b/scripts/import_vger_from_mbox @@ -1,21 +1,20 @@ #!/usr/bin/perl -w -# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -use strict; -use warnings; +# consider `lei convert' instead since it handles more formats +use v5.12; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; -use PublicInbox::MIME; use PublicInbox::InboxWritable; -use PublicInbox::Import; -use PublicInbox::MDA; my $usage = "usage: $0 NAME EMAIL DIR <MBOX\n"; my $dry_run; my $version = 2; my $variant = 'mboxrd'; +my $filter = 'PublicInbox::Filter::Vger'; my %opts = ( 'n|dry-run' => \$dry_run, 'V|version=i' => \$version, 'F|format=s' => \$variant, + 'filter=s' => \$filter, ); GetOptions(%opts) or die $usage; if ($variant ne 'mboxrd' && $variant ne 'mboxo') { @@ -29,7 +28,7 @@ my $ibx = { name => $name, version => $version, address => [ $email ], - filter => 'PublicInbox::Filter::Vger', + filter => $filter, }; $ibx = PublicInbox::Inbox->new($ibx); unless ($dry_run) { diff --git a/scripts/slrnspool2maildir b/scripts/slrnspool2maildir index 0c21806a..ba0729ec 100755 --- a/scripts/slrnspool2maildir +++ b/scripts/slrnspool2maildir @@ -1,50 +1,55 @@ #!/usr/bin/perl -w -# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# -# One-off script to convert an slrnpull news spool to Maildir =begin usage +One-off script to convert an slrnpull spool from gmane to Maildir +Note: this contains Gmane-specific header munging to workaround +the munging done by Gmane. + ./slrnspool2maildir SLRNPULL_ROOT/news/foo/bar /path/to/maildir/ -=cut -use strict; -use warnings; -use Email::Filter; -use Email::LocalDelivery; -sub usage { "Usage:\n".join('',grep(/\t/, `head -n 12 $0`)) } -my $spool = shift @ARGV or die usage(); -my $dir = shift @ARGV or die usage(); --d $dir or die "$dir is not a directory\n"; -$dir .= '/' unless $dir =~ m!/\z!; -foreach my $sub (qw(cur new tmp)) { - my $nd = "$dir/$sub"; - -d $nd and next; - mkdir $nd or die "mkdir $nd failed: $!\n"; -} -foreach my $n (grep(/\d+\z/, glob("$spool/*"))) { - if (open my $fh, '<', $n) { - my $f = Email::Filter->new(data => eval { local $/; <$fh> }); - my $s = $f->simple; +A generic replacement w/o Gmane-specific munging could treat +the slrnpull spool as an MH folder with lei: - # gmane rewrites Received headers, which increases spamminess - # Some older archives set Original-To - foreach my $x (qw(Received To)) { - my @h = $s->header("Original-$x"); - if (@h) { - $s->header_set($x, @h); - $s->header_set("Original-$x"); - } + lei convert mh:SLRNPULL_ROOT/news/foo/bar -o /path/to/maildir + # (and `lei daemon-kill' if you don't want the daemon to linger) +=cut +use v5.12; +use autodie; +# warning: unstable internal APIs: +use PublicInbox::Eml; +use PublicInbox::LeiToMail; +use PublicInbox::MHreader; +use PublicInbox::IO qw(read_all); +use File::Path qw(make_path); +use File::Spec (); +sub usage { + open my $fh, '<', __FILE__; + ("Usage:\n", grep { /^=begin usage/../^=cut/ and !/^=/m } <$fh>); +} +my $spool = shift @ARGV or die usage(); +my $dst = shift @ARGV or die usage(); +$dst .= '/' unless $dst =~ m!/\z!; +File::Path::make_path(map { $dst.$_ } qw(tmp new cur)); +$dst = File::Spec->rel2abs($dst).'/'; +opendir my $cwdfh, '.'; +my $mhr = PublicInbox::MHreader->new($spool, $cwdfh); +my $smsg; +$mhr->mh_each_eml(sub { + my ($d, $n, $kw, $eml) = @_; + # gmane rewrites Received headers, which increases spamminess + # Some older archives set Original-To + for my $x (qw(Received To)) { + my @h = $eml->header_raw("Original-$x"); + if (@h) { + $eml->header_set($x, @h); + $eml->header_set("Original-$x"); } - - # triggers for the SA HEADER_SPAM rule - foreach my $drop (qw(Approved)) { $s->header_set($drop) } - - # appears to be an old gmane bug: - $s->header_set('connect()'); - - $f->exit(0); - $f->accept($dir); - } else { - warn "Failed to open $n: $!\n"; } -} + # `Approved' triggers the SA HEADER_SPAM rule + # `connect()' appears to be an old gmane bug: + $eml->header_set($_) for ('Approved', 'connect()'); + my $buf = $eml->as_string; + $smsg->{blob} = $n; + PublicInbox::LeiToMail::_buf2maildir($dst, \$buf, $smsg, 'new/'); +}); diff --git a/scripts/ssoma-replay b/scripts/ssoma-replay index 46b15d7e..70d0081d 100755 --- a/scripts/ssoma-replay +++ b/scripts/ssoma-replay @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org> +# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # A work-in-progress, but one day I hope this script is no longer @@ -29,11 +29,8 @@ use strict; use Email::Simple; use URI::Escape qw/uri_escape_utf8/; use File::Temp qw/tempfile/; -my ($fh, $filename) = tempfile('ssoma-replay-XXXXXXXX', TMPDIR => 1); -my $msg = eval { - local $/; - Email::Simple->new(<STDIN>); -}; +my ($fh, $filename) = tempfile('ssoma-replay-XXXX', TMPDIR => 1); +my $msg = Email::Simple->new(do { local $/; <STDIN> }); select $fh; # Note: the archive URL makes assumptions about where the @@ -52,7 +49,7 @@ if (defined $list_id) { if (defined $domain) { $archive_url = "https://$domain/$user/"; my $mid = $header_obj->header('Message-Id'); - if ($mid =~ /\A<(.+)>\z/) { + if ($mid =~ /<[ \t]*([^>]+)?[ \t]*>/s) { $mid = $1; } $mid = uri_escape_utf8($mid, diff --git a/scripts/xhdr-num2mid b/scripts/xhdr-num2mid index 19f5d0e0..3ca33f5d 100755 --- a/scripts/xhdr-num2mid +++ b/scripts/xhdr-num2mid @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org> +# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # Useful for mapping article IDs from existing NNTP servers to MIDs use strict; |