about summary refs log tree commit homepage
path: root/script
diff options
context:
space:
mode:
Diffstat (limited to 'script')
-rwxr-xr-xscript/lei144
-rwxr-xr-xscript/public-inbox-cindex102
-rwxr-xr-xscript/public-inbox-clone70
-rwxr-xr-xscript/public-inbox-compact53
-rwxr-xr-xscript/public-inbox-convert176
-rwxr-xr-xscript/public-inbox-edit73
-rwxr-xr-xscript/public-inbox-extindex91
-rwxr-xr-xscript/public-inbox-fetch39
-rwxr-xr-xscript/public-inbox-httpd56
-rwxr-xr-xscript/public-inbox-imapd8
-rwxr-xr-xscript/public-inbox-index155
-rwxr-xr-xscript/public-inbox-init240
-rwxr-xr-x[-rw-r--r--]script/public-inbox-learn59
-rwxr-xr-xscript/public-inbox-mda63
-rwxr-xr-xscript/public-inbox-netd6
-rwxr-xr-xscript/public-inbox-nntpd15
-rwxr-xr-xscript/public-inbox-pop3d8
-rwxr-xr-xscript/public-inbox-purge28
-rwxr-xr-xscript/public-inbox-watch72
-rwxr-xr-xscript/public-inbox-xcpdb77
-rwxr-xr-xscript/public-inbox.cgi9
21 files changed, 1164 insertions, 380 deletions
diff --git a/script/lei b/script/lei
new file mode 100755
index 00000000..087afc33
--- /dev/null
+++ b/script/lei
@@ -0,0 +1,144 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use v5.12;
+use Socket qw(AF_UNIX SOCK_SEQPACKET pack_sockaddr_un);
+use PublicInbox::CmdIPC4;
+my $narg = 5;
+my $sock;
+my $recv_cmd = PublicInbox::CmdIPC4->can('recv_cmd4');
+my $send_cmd = PublicInbox::CmdIPC4->can('send_cmd4') // do {
+        require PublicInbox::Syscall;
+        $recv_cmd = PublicInbox::Syscall->can('recv_cmd4');
+        PublicInbox::Syscall->can('send_cmd4');
+} // do {
+        my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= (
+                        $ENV{XDG_CACHE_HOME} //
+                        ( ($ENV{HOME} // '/nonexistent').'/.cache' )
+                        ).'/public-inbox/inline-c';
+        if (!-d $inline_dir) {
+                require File::Path;
+                File::Path::make_path($inline_dir);
+        }
+        require PublicInbox::Spawn; # takes ~50ms even if built *sigh*
+        $recv_cmd = PublicInbox::Spawn->can('recv_cmd4');
+        PublicInbox::Spawn->can('send_cmd4');
+} // die 'please install Inline::C or Socket::MsgHdr';
+
+my %pids;
+my $sigchld = sub {
+        my $flags = scalar(@_) ? POSIX::WNOHANG() : 0;
+        for my $pid (keys %pids) {
+                delete($pids{$pid}) if waitpid($pid, $flags) == $pid;
+        }
+};
+my @parent;
+my $exec_cmd = sub {
+        my ($fds, $argc, @argv) = @_;
+        my $parent = $$;
+        require POSIX;
+        my @old = (*STDIN{IO}, *STDOUT{IO}, *STDERR{IO});
+        my @rdr;
+        for my $fd (@$fds) {
+                open(my $newfh, '+<&=', $fd) or die "open +<&=$fd: $!";
+                push @rdr, shift(@old), $newfh;
+        }
+        my $do_exec = sub {
+                my @non_std; # ex. $op_p from lei_edit_search
+                while (my ($io, $newfh) = splice(@rdr, 0, 2)) {
+                        my $old_io = !!$io;
+                        open $io, '+<&', $newfh or die "open +<&=: $!";
+                        push @non_std, $io unless $old_io;
+                }
+                if (@non_std) {
+                        require Fcntl;
+                        fcntl($_, Fcntl::F_SETFD(), 0) for @non_std;
+                }
+                my %env = map { split(/=/, $_, 2) } splice(@argv, $argc);
+                @ENV{keys %env} = values %env;
+                umask 077;
+                exec(@argv);
+                warn "exec: @argv: $!\n";
+                POSIX::_exit(1);
+        };
+        $SIG{CHLD} = $sigchld;
+        my $pid = fork // die "fork: $!";
+        if ($pid == 0) {
+                $do_exec->() if $fds->[1]; # git-credential, pager
+
+                # parent backgrounds on MUA
+                POSIX::setsid() > 0 or die "setsid: $!";
+                @parent = ($parent);
+                return; # continue $recv_cmd in background
+        }
+        if ($fds->[1]) {
+                $pids{$pid} = undef;
+        } else {
+                $do_exec->(); # MUA reuses stdout
+        }
+};
+
+my $runtime_dir = ($ENV{XDG_RUNTIME_DIR} // '') . '/lei';
+if ($runtime_dir eq '/lei') {
+        require File::Spec;
+        $runtime_dir = File::Spec->tmpdir."/lei-$<";
+}
+unless (-d $runtime_dir) {
+        require File::Path;
+        File::Path::make_path($runtime_dir, { mode => 0700 });
+}
+my $path = "$runtime_dir/$narg.seq.sock";
+my $addr = pack_sockaddr_un($path);
+socket($sock, AF_UNIX, SOCK_SEQPACKET, 0) or die "socket: $!";
+unless (connect($sock, $addr)) { # start the daemon if not started
+        local $ENV{PERL5LIB} = join(':', @INC);
+        open(my $daemon, '-|', $^X, $^W ? ('-w') : (),
+                qw[-MPublicInbox::LEI -e PublicInbox::LEI::lazy_start(@ARGV)],
+                $path, $! + 0, $narg) or die "popen: $!";
+        while (<$daemon>) { warn $_ } # EOF when STDERR is redirected
+        close($daemon) or warn <<"";
+lei-daemon could not start, exited with \$?=$?
+
+        # try connecting again anyways, unlink+bind may be racy
+        connect($sock, $addr) or die <<"";
+connect($path): $! (after attempted daemon start)
+
+}
+# (Socket::MsgHdr|Inline::C), $sock are all available:
+open my $dh, '<', '.' or die "open(.) $!";
+my $buf = join("\0", scalar(@ARGV), @ARGV);
+while (my ($k, $v) = each %ENV) { $buf .= "\0$k=$v" }
+$buf .= "\0\0";
+$send_cmd->($sock, [0, 1, 2, fileno($dh)], $buf, 0) or die "sendmsg: $!";
+$SIG{TSTP} = sub { send($sock, 'STOP', 0); kill 'STOP', $$ };
+$SIG{CONT} = sub { send($sock, 'CONT', 0) };
+
+my $x_it_code = 0;
+while (1) {
+        my (@fds) = $recv_cmd->($sock, my $buf, 4096 * 33);
+        die "recvmsg: $!" if scalar(@fds) == 1 && !defined($fds[0]);
+        last if $buf eq '';
+        if ($buf =~ /\Aexec (.+)\z/) {
+                $exec_cmd->(\@fds, split(/\0/, $1));
+        } elsif ($buf eq '-WINCH') {
+                kill($buf, @parent); # for MUA
+        } elsif ($buf eq 'umask') {
+                send($sock, 'u'.pack('V', umask), 0) or die "send: $!"
+        } elsif ($buf =~ /\Ax_it ([0-9]+)\z/) {
+                $x_it_code ||= $1 + 0;
+                last;
+        } elsif ($buf =~ /\Achild_error ([0-9]+)\z/) {
+                $x_it_code ||= $1 + 0;
+        } elsif ($buf eq 'wait') {
+                $sigchld->();
+        } else {
+                $sigchld->();
+                die $buf;
+        }
+}
+$sigchld->();
+if (my $sig = ($x_it_code & 127)) {
+        kill $sig, $$;
+        sleep(1) while 1; # no self-pipe/signalfd, here, so we loop
+}
+exit($x_it_code >> 8);
diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex
new file mode 100755
index 00000000..dd00623a
--- /dev/null
+++ b/script/public-inbox-cindex
@@ -0,0 +1,102 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use v5.12;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: public-inbox-cindex [options] -g GIT_DIR [-g GIT_DIR]...
+usage: public-inbox-cindex [options] --project-list=FILE -r PROJECT_ROOT
+
+  Create and update search indices for code repos
+
+  -d EXTDIR           use EXTDIR instead of GIT_DIR/public-inbox-cindex
+  --no-fsync          speed up indexing, risk corruption on power outage
+  -L LEVEL            `medium', or `full' (default: medium)
+  --project-list=FILE use a cgit/gitweb-compatible list of projects
+  --update | -u       update previously-indexed code repos with `-d'
+  --jobs=NUM          set or disable parallelization (NUM=0)
+  --batch-size=BYTES  flush changes to OS after a given number of bytes
+  --max-size=BYTES    do not index commit diffs larger than the given size
+  --prune             prune old repos and commits
+  --reindex           reindex previously indexed repos
+  --verbose | -v      increase verbosity (may be repeated)
+
+BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
+See public-inbox-cindex(1) man page for full documentation.
+EOF
+my $opt = { fsync => 1, scan => 1 }; # --no-scan is hidden
+GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous
+                indexlevel|index-level|L=s join:s@
+                batch_size|batch-size=s max_size|max-size=s
+                include|I=s@ only=s@ all show:s@
+                project-list=s exclude=s@ project-root|r=s
+                git-dir|g=s@
+                sort-parallel=s sort-compress-program=s sort-buffer-size=s
+                d=s update|u scan! prune dry-run|n C=s@ help|h))
+        or die $help;
+if ($opt->{help}) { print $help; exit 0 };
+die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
+require IO::Handle;
+STDOUT->autoflush(1);
+STDERR->autoflush(1);
+$SIG{USR1} = 'IGNORE'; # to be overridden in cidx_sync
+$SIG{PIPE} = 'IGNORE';
+# require lazily to speed up --help
+require PublicInbox::Admin;
+PublicInbox::Admin::do_chdir(delete $opt->{C});
+my $cfg = $opt->{-pi_cfg} = PublicInbox::Config->new;
+my $cidx_dir = $opt->{d};
+PublicInbox::Admin::require_or_die('Xapian');
+PublicInbox::Admin::progress_prepare($opt);
+my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
+%ENV = (%ENV, %$env) if $env;
+
+my @git_dirs;
+require PublicInbox::CodeSearchIdx; # unstable internal API
+if (@ARGV) {
+        my @g = map { "-g $_" } @ARGV;
+        die <<EOM;
+Specify git directories with `-g' (or --git-dir=): @g
+Or use --project-list=... and --project-root=...
+EOM
+} elsif (defined(my $pl = $opt->{'project-list'})) {
+        my $pfx = $opt->{'project-root'} // die <<EOM;
+PROJECT_ROOT required for --project-list
+EOM
+        $opt->{'git-dir'} and die <<EOM;
+--project-list does not accept additional --git-dir directories
+(@{$opt->{'git-dir'}})
+EOM
+        open my $fh, '<', $pl or die "open($pl): $!\n";
+        chomp(@git_dirs = <$fh>);
+        $pfx .= '/';
+        $pfx =~ tr!/!/!s;
+        substr($_, 0, 0, $pfx) for @git_dirs;
+} elsif (my $gd = $opt->{'git-dir'}) {
+        @git_dirs = @$gd;
+} elsif (grep defined, @$opt{qw(show update prune)}) {
+} else {
+        warn "No --git-dir= nor --project-list= + --project-root= specified\n";
+        die $help;
+}
+
+$_ = PublicInbox::Admin::resolve_git_dir($_) for @git_dirs;
+if (defined $cidx_dir) { # external index
+        die "`%' is not allowed in $cidx_dir\n" if $cidx_dir =~ /\%/;
+        my $cidx = PublicInbox::CodeSearchIdx->new($cidx_dir, $opt);
+        @{$cidx->{git_dirs}} = @git_dirs; # may be empty
+        $cidx->cidx_run;
+} elsif (!@git_dirs) {
+        die $help
+} else {
+        die <<EOM if $opt->{update};
+--update requires `-d EXTDIR'
+EOM
+        for my $gd (@git_dirs) {
+                my $cd = "$gd/public-inbox-cindex";
+                my $cidx = PublicInbox::CodeSearchIdx->new($cd, { %$opt });
+                $cidx->{-cidx_internal} = 1;
+                @{$cidx->{git_dirs}} = ($gd);
+                $cidx->cidx_run;
+        }
+}
diff --git a/script/public-inbox-clone b/script/public-inbox-clone
new file mode 100755
index 00000000..c3e64485
--- /dev/null
+++ b/script/public-inbox-clone
@@ -0,0 +1,70 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# Wrapper to git clone remote public-inboxes
+use v5.12;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+my $opt = {};
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: public-inbox-clone [OPTIONS] INBOX_URL [INBOX_DIR]
+       public-inbox-clone [OPTIONS] ROOT_URL [DESTINATION]
+
+  clone remote public-inboxes or grokmirror manifests
+
+options:
+
+  --epoch=RANGE       range of v2 epochs to clone (e.g `2..5', `~0', `~1..')
+  --torsocks VAL      whether or not to wrap git and curl commands with
+                      torsocks (default: `auto')
+                      Must be one of: `auto', `no' or `yes'
+  --dry-run | -n      show what would be cloned without cloning
+  --verbose | -v      increase verbosity (may be repeated)
+    --quiet | -q      disable progress reporting
+    -C DIR            chdir to specified directory
+
+See public-inbox-clone(1) man page for --manifest, --remote-manifest,
+--objstore, --project-list, --post-update-hook, --include, --exclude,
+--prune, --keep-going, --jobs, --inbox-config
+EOF
+
+# cgit calls it `project-list', grokmirror calls it `projectslist',
+# support both :/
+GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ include|I=s@ exclude=s@
+        inbox-config=s inbox-version=i objstore=s manifest=s
+        remote-manifest=s project-list|projectslist=s post-update-hook=s@
+        prune|p keep-going|k exit-code purge
+        dry-run|n jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help;
+if ($opt->{help}) { print $help; exit };
+require PublicInbox::Admin; # loads Config
+PublicInbox::Admin::do_chdir(delete $opt->{C});
+PublicInbox::Admin::setup_signals();
+$SIG{PIPE} = 'IGNORE';
+
+my ($url, $dst, $extra) = @ARGV;
+die $help if !defined($url) || defined($extra);
+defined($dst) or ($dst) = ($url =~ m!/([^/]+)/?\z!);
+index($dst, "\n") >= 0 and die "`\\n' not allowed in `$dst'";
+
+# n.b. this is still a truckload of code...
+require File::Spec;
+require PublicInbox::LEI;
+require PublicInbox::LeiExternal;
+require PublicInbox::LeiMirror;
+
+$url = PublicInbox::LeiExternal::ext_canonicalize($url);
+my $lei = bless {
+        env => \%ENV, opt => $opt, cmd => 'public-inbox-clone',
+        0 => *STDIN{GLOB}, 2 => *STDERR{GLOB},
+}, 'PublicInbox::LEI';
+open $lei->{1}, '+<&=', 1 or die "dup: $!";
+open $lei->{3}, '.' or die "open . $!";
+my $mrr = bless {
+        lei => $lei,
+        src => $url,
+        dst => File::Spec->canonpath($dst),
+}, 'PublicInbox::LeiMirror';
+
+$? = 0;
+$mrr->do_mirror;
+$mrr->can('_wq_done_wait')->($$, $mrr, $lei);
+exit(($lei->{child_error} // 0) >> 8);
diff --git a/script/public-inbox-compact b/script/public-inbox-compact
index 5c681466..1062be5a 100755
--- a/script/public-inbox-compact
+++ b/script/public-inbox-compact
@@ -1,19 +1,44 @@
-#!/usr/bin/perl -w
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use warnings;
+use v5.12;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-use PublicInbox::InboxWritable;
-use PublicInbox::Xapcmd;
-use PublicInbox::Admin;
+my $opt = { compact => 1, -coarse_lock => 1,
+        -eidx_ok => 1, -cidx_ok => 1 };
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR>
+
+  Compact Xapian DBs in an inbox
+
+options:
+
+  --all               index all configured inboxes
+  --jobs=NUM          control parallelization
+
+See public-inbox-compact(1) man page for full documentation.
+EOF
+GetOptions($opt, qw(all C=s@ help|h),
+        # compact options:
+        qw(jobs|j=i quiet|q blocksize|b=s no-full|n fuller|F),
+) or die $help;
+if ($opt->{help}) { print $help; exit 0 };
+
+require PublicInbox::Admin;
 PublicInbox::Admin::require_or_die('-index');
-my $usage = "Usage: public-inbox-compact INBOX_DIR\n";
-my $opt = { compact => 1, -coarse_lock => 1 };
-GetOptions($opt, @PublicInbox::Xapcmd::COMPACT_OPT) or
-        die "bad command-line args\n$usage";
-my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage;
-foreach (@ibxs) {
-        my $ibx = PublicInbox::InboxWritable->new($_);
+PublicInbox::Admin::do_chdir(delete $opt->{C});
+PublicInbox::Admin::progress_prepare($opt);
+
+require PublicInbox::InboxWritable;
+require PublicInbox::Xapcmd;
+my $cfg = PublicInbox::Config->new;
+my ($ibxs, $eidxs, $cidxs) =
+        PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
+unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 }
+for my $ibx (@$ibxs) {
+        $ibx = PublicInbox::InboxWritable->new($ibx);
         PublicInbox::Xapcmd::run($ibx, 'compact', $opt);
 }
+for my $ibxish (@$eidxs, @$cidxs) {
+        my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef;
+        PublicInbox::Xapcmd::run($ibxish, 'compact', $opt);
+}
diff --git a/script/public-inbox-convert b/script/public-inbox-convert
index e13c13f4..713c2881 100755
--- a/script/public-inbox-convert
+++ b/script/public-inbox-convert
@@ -1,106 +1,126 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <http://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
-use warnings;
+use v5.10.1;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-use PublicInbox::InboxWritable;
-use PublicInbox::Config;
-use PublicInbox::Admin;
-use PublicInbox::V2Writable;
-use PublicInbox::Git;
-use PublicInbox::Spawn qw(spawn);
-use Cwd 'abs_path';
-use File::Copy 'cp'; # preserves permissions:
-my $usage = "Usage: public-inbox-convert OLD NEW\n";
-my $jobs;
-my $index = 1;
-my %opts = (
-        '--jobs|j=i' => \$jobs,
-        '--index!' => \$index,
-);
-GetOptions(%opts) or die "bad command-line args\n$usage";
-my $old_dir = shift(@ARGV) or die $usage;
-my $new_dir = shift(@ARGV) or die $usage;
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: public-inbox-convert [options] OLD NEW
+
+  convert v1 format inboxes to v2
+
+options:
+
+  --no-index          do not index after conversion
+  --jobs=NUM          set shards (NUM=0)
+  --verbose | -v      increase verbosity (may be repeated)
+
+index options (see public-inbox-index(1) man page for full description):
+
+  --no-fsync          speed up indexing, risk corruption on power outage
+  -L LEVEL            `basic', `medium', or `full' (default: full)
+  --compact | -c      run public-inbox-compact(1) after indexing
+  --sequential-shard  index Xapian shards sequentially for slow storage
+  --batch-size=BYTES  flush changes to OS after a given number of bytes
+  --max-size=BYTES    do not index messages larger than the given size
+
+See public-inbox-convert(1) man page for full documentation.
+EOF
+
+my $opt = {
+        index => 1,
+        # index defaults:
+        quiet => -1, compact => 0, maxsize => undef, fsync => 1,
+        reindex => 1, # we always reindex
+};
+GetOptions($opt, qw(jobs|j=i index! help|h C=s@),
+                # index options
+                qw(verbose|v+ rethread compact|c+ fsync|sync!
+                indexlevel|index-level|L=s max_size|max-size=s
+                batch_size|batch-size=s
+                sequential-shard|seq-shard
+                )) or die $help;
+if ($opt->{help}) { print $help; exit 0 };
+require PublicInbox::Admin;
+PublicInbox::Admin::do_chdir(delete $opt->{C});
+my $old_dir = shift(@ARGV) // '';
+my $new_dir = shift(@ARGV) // '';
+die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq '');
 die "$new_dir exists\n" if -d $new_dir;
 die "$old_dir not a directory\n" unless -d $old_dir;
-my $config = eval { PublicInbox::Config->new };
-$old_dir = abs_path($old_dir);
-my $old;
-if ($config) {
-        $config->each_inbox(sub {
-                $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir;
-        });
-}
-unless ($old) {
+
+require PublicInbox::Config;
+require PublicInbox::InboxWritable;
+
+my $cfg = PublicInbox::Config->new;
+my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg);
+@old > 1 and die "BUG: resolved several inboxes from $old_dir:\n",
+                map { "\t$_->{inboxdir}\n" } @old;
+my $old = PublicInbox::InboxWritable->new($old[0]);
+if (delete $old->{-unconfigured}) {
         warn "W: $old_dir not configured in " .
                 PublicInbox::Config::default_file() . "\n";
-        $old = {
-                inboxdir => $old_dir,
-                name => 'ignored',
-                address => [ 'old@example.com' ],
-        };
-        $old = PublicInbox::Inbox->new($old);
-}
-$old = PublicInbox::InboxWritable->new($old);
-if ($old->version >= 2) {
-        die "Only conversion from v1 inboxes is supported\n";
 }
+die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2;
 
-$old->{indexlevel} //= PublicInbox::Admin::detect_indexlevel($old);
-if ($index) {
+my $detected = $old->detect_indexlevel;
+$old->{indexlevel} //= $detected;
+my $env;
+if ($opt->{'index'}) {
         my $mods = {};
         PublicInbox::Admin::scan_ibx_modules($mods, $old);
         PublicInbox::Admin::require_or_die(keys %$mods);
+        PublicInbox::Admin::progress_prepare($opt);
+        $env = PublicInbox::Admin::index_prepare($opt, $cfg);
 }
-
+local %ENV = (%$env, %ENV) if $env;
 my $new = { %$old };
-$new->{inboxdir} = abs_path($new_dir);
+$new->{inboxdir} = PublicInbox::Config::rel2abs_collapsed($new_dir);
 $new->{version} = 2;
-$new = PublicInbox::InboxWritable->new($new);
+$new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} });
+$new->{-no_fsync} = 1 if !$opt->{fsync};
 my $v2w;
-$old->umask_prepare;
 
 sub link_or_copy ($$) {
         my ($src, $dst) = @_;
         link($src, $dst) and return;
         $!{EXDEV} or warn "link $src, $dst failed: $!, trying cp\n";
-        cp($src, $dst) or die "cp $src, $dst failed: $!\n";
+        require File::Copy; # preserves permissions:
+        File::Copy::cp($src, $dst) or die "cp $src, $dst failed: $!\n";
 }
 
-$old->with_umask(sub {
+{
+        my $restore = $old->with_umask;
         my $old_cfg = "$old->{inboxdir}/config";
         local $ENV{GIT_CONFIG} = $old_cfg;
         my $new_cfg = "$new->{inboxdir}/all.git/config";
-        $v2w = PublicInbox::V2Writable->new($new, 1);
-        $v2w->init_inbox($jobs);
+        $v2w = $new->importer(1);
+        $v2w->init_inbox(delete $opt->{jobs});
         unlink $new_cfg;
         link_or_copy($old_cfg, $new_cfg);
         if (my $alt = $new->{altid}) {
                 require PublicInbox::AltId;
                 foreach my $i (0..$#$alt) {
                         my $src = PublicInbox::AltId->new($old, $alt->[$i], 0);
-                        $src->mm_alt or next;
+                        $src = $src->mm_alt or next;
+                        $src = $src->{dbh}->sqlite_db_filename;
                         my $dst = PublicInbox::AltId->new($new, $alt->[$i], 1);
-                        $dst = $dst->{filename};
-                        $src->mm_alt->{dbh}->sqlite_backup_to_file($dst);
+                        $dst->mm_alt->{dbh}->sqlite_backup_from_file($src);
                 }
         }
         my $desc = "$old->{inboxdir}/description";
         link_or_copy($desc, "$new->{inboxdir}/description") if -e $desc;
         my $clone = "$old->{inboxdir}/cloneurl";
-        if (-e $clone) {
-                warn <<"";
+        warn <<"" if -e $clone;
 $clone may not be valid after migrating to v2, not copying
 
-        }
-});
+}
 my $state = '';
 my $head = $old->{ref_head} || 'HEAD';
-my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head);
-$v2w->idx_init;
+my $rd = $old->git->popen(qw(fast-export --use-done-feature), $head);
+$v2w->idx_init($opt);
 my $im = $v2w->importer;
-my ($r, $w) = $im->gfi_start;
+my $io = $im->gfi_start;
 my $h = '[0-9a-f]';
 my %D;
 my $last;
@@ -110,23 +130,17 @@ while (<$rd>) {
         } elsif (/^commit /) {
                 $state = 'commit';
         } elsif (/^data ([0-9]+)/) {
-                my $len = $1;
-                $w->print($_) or $im->wfail;
-                while ($len) {
-                        my $n = read($rd, my $tmp, $len) or die "read: $!";
-                        warn "$n != $len\n" if $n != $len;
-                        $len -= $n;
-                        $w->print($tmp) or $im->wfail;
-                }
+                print $io $_ or $im->wfail;
+                print $io PublicInbox::IO::read_all($rd, $1) or $im->wfail;
                 next;
         } elsif ($state eq 'commit') {
                 if (m{^M 100644 :([0-9]+) (${h}{2}/${h}{38})}o) {
                         my ($mark, $path) = ($1, $2);
                         $D{$path} = $mark;
                         if ($last && $last ne 'm') {
-                                $w->print("D $last\n") or $im->wfail;
+                                print $io "D $last\n" or $im->wfail;
                         }
-                        $w->print("M 100644 :$mark m\n") or $im->wfail;
+                        print $io "M 100644 :$mark m\n" or $im->wfail;
                         $last = 'm';
                         next;
                 }
@@ -134,31 +148,31 @@ while (<$rd>) {
                         my $mark = delete $D{$1};
                         defined $mark or die "undeleted path: $1\n";
                         if ($last && $last ne 'd') {
-                                $w->print("D $last\n") or $im->wfail;
+                                print $io "D $last\n" or $im->wfail;
                         }
-                        $w->print("M 100644 :$mark d\n") or $im->wfail;
+                        print $io "M 100644 :$mark d\n" or $im->wfail;
                         $last = 'd';
                         next;
                 }
         }
         last if $_ eq "done\n";
-        $w->print($_) or $im->wfail;
+        print $io $_ or $im->wfail;
 }
-$w = $r = undef;
-close $rd or die "close fast-export: $!\n";
-waitpid($pid, 0) or die "waitpid failed: $!\n";
-$? == 0 or die "fast-export failed: $?\n";
+$rd->close or die "fast-export: \$?=$? \$!=$!\n";
+$io = undef;
 $v2w->done;
-if (my $mm = $old->mm) {
+if (my $old_mm = $old->mm) {
         $old->cleanup;
-        $mm->{dbh}->sqlite_backup_to_file("$new_dir/msgmap.sqlite3");
+        $old_mm = $old_mm->{dbh}->sqlite_db_filename;
 
         # we want to trigger a reindex, not a from scratch index if
         # we're reusing the msgmap from an existing v1 installation.
-        $v2w->idx_init;
-        my $epoch0 = PublicInbox::Git->new($v2w->git_init(0));
+        $v2w->idx_init($opt);
+        $v2w->{mm}->{dbh}->sqlite_backup_from_file($old_mm);
+
+        my $epoch0 = PublicInbox::Git->new($v2w->{mg}->add_epoch(0));
         chop(my $cmt = $epoch0->qx(qw(rev-parse --verify), $head));
         $v2w->last_epoch_commit(0, $cmt);
 }
-$v2w->index_sync({reindex => 1}) if $index;
+$v2w->index_sync($opt) if delete $opt->{'index'};
 $v2w->done;
diff --git a/script/public-inbox-edit b/script/public-inbox-edit
index ae5d8289..88115d7c 100755
--- a/script/public-inbox-edit
+++ b/script/public-inbox-edit
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Used for editing messages in a public-inbox.
@@ -9,20 +9,36 @@ use warnings;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
 use PublicInbox::AdminEdit;
 use File::Temp 0.19 (); # 0.19 for TMPDIR
-use PublicInbox::ContentId qw(content_id);
+use PublicInbox::ContentHash qw(content_hash);
 use PublicInbox::MID qw(mid_clean mids);
 PublicInbox::Admin::check_require('-index');
-use PublicInbox::MIME;
-use PublicInbox::InboxWritable;
+use PublicInbox::Eml;
+use PublicInbox::InboxWritable qw(eml_from_path);
 use PublicInbox::Import;
 
-my $usage = "$0 -m MESSAGE_ID [--all] [INBOX_DIRS]";
+my $help = <<'EOF';
+usage: public-inbox-edit -m MESSAGE-ID [--all] [INBOX_DIRS]
+
+  destructively edit messages in a public inbox
+
+options:
+
+  --all               edit all configured inboxes
+  -m MESSAGE-ID       edit the message with a given Message-ID
+  -F FILE             edit the message matching the contents of FILE
+  --force             forcibly edit even if Message-ID is ambiguous
+  --raw               do not perform "From " line escaping
+
+See public-inbox-edit(1) man page for full documentation.
+EOF
+
 my $opt = { verbose => 1, all => 0, -min_inbox_version => 2, raw => 0 };
-my @opt = qw(mid|m=s file|F=s raw);
-GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or
-        die "bad command-line args\n$usage\n";
+my @opt = qw(mid|m=s file|F=s raw C=s@);
+GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or die $help;
+if ($opt->{help}) { print $help; exit 0 };
+PublicInbox::Admin::do_chdir(delete $opt->{C});
 
-my $cfg = eval { PublicInbox::Config->new };
+my $cfg = PublicInbox::Config->new;
 my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f"
 unless (defined $editor) {
         my $k = 'publicinbox.mailEditor';
@@ -43,7 +59,7 @@ if (defined $mid && defined $file) {
 my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
 PublicInbox::AdminEdit::check_editable(\@ibxs);
 
-my $found = {}; # cid => [ [ibx, smsg] [, [ibx, smsg] ] ]
+my $found = {}; # chash => [ [ibx, smsg] [, [ibx, smsg] ] ]
 
 sub find_mid ($$$) {
         my ($found, $mid, $ibxs) = @_;
@@ -52,10 +68,10 @@ sub find_mid ($$$) {
                 my ($id, $prev);
                 while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
                         my $ref = $ibx->msg_by_smsg($smsg);
-                        my $mime = PublicInbox::MIME->new($ref);
-                        my $cid = content_id($mime);
+                        my $mime = PublicInbox::Eml->new($ref);
+                        my $chash = content_hash($mime);
                         my $tuple = [ $ibx, $smsg ];
-                        push @{$found->{$cid} ||= []}, $tuple
+                        push @{$found->{$chash} ||= []}, $tuple
                 }
                 PublicInbox::InboxWritable::cleanup($ibx);
         }
@@ -92,13 +108,11 @@ Multiple messages with different content found matching
                 warn "Will edit all of them\n";
         }
 } else {
-        open my $fh, '<', $file or die "open($file) failed: $!";
-        my $orig = do { local $/; <$fh> };
-        my $mime = PublicInbox::MIME->new(\$orig);
-        my $mids = mids($mime->header_obj);
+        my $eml = eml_from_path($file) or die "open($file) failed: $!";
+        my $mids = mids($eml);
         find_mid($found, $_, \@ibxs) for (@$mids); # populates $found
-        my $cid = content_id($mime);
-        my $to_edit = $found->{$cid};
+        my $chash = content_hash($eml);
+        my $to_edit = $found->{$chash};
         unless ($to_edit) {
                 my $nr = scalar(keys %$found);
                 if ($nr > 0) {
@@ -116,11 +130,11 @@ $mids
                 }
                 exit 1;
         }
-        $found = { $cid => $to_edit };
+        $found = { $chash => $to_edit };
 }
 
 my %tmpopt = (
-        TEMPLATE => 'public-inbox-edit-XXXXXX',
+        TEMPLATE => 'public-inbox-edit-XXXX',
         TMPDIR => 1,
         SUFFIX => $opt->{raw} ? '.eml' : '.mbox',
 );
@@ -170,11 +184,10 @@ retry_edit:
         # rename/relink $edit_fn
         open my $new_fh, '<', $edit_fn or
                 die "can't read edited file ($edit_fn): $!\n";
-        my $new_raw = do { local $/; <$new_fh> };
+        my $new_raw = PublicInbox::IO::read_all $new_fh;
 
         if (!$opt->{raw}) {
-                # get rid of the From we added
-                $new_raw =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+                PublicInbox::Eml::strip_from($new_raw);
 
                 # check if user forgot to purge (in mutt) after editing
                 if ($new_raw =~ /^From /sm) {
@@ -206,8 +219,8 @@ W: possible message boundary splitting error
                 $new_raw =~ s/^>(>*From )/$1/gm;
         }
 
-        my $new_mime = PublicInbox::MIME->new(\$new_raw);
-        my $old_mime = PublicInbox::MIME->new($old_raw);
+        my $new_mime = PublicInbox::Eml->new(\$new_raw);
+        my $old_mime = PublicInbox::Eml->new($old_raw);
 
         # make sure we don't compare unwanted headers, since mutt adds
         # Content-Length, Status, and Lines headers:
@@ -216,10 +229,10 @@ W: possible message boundary splitting error
 
         # allow changing Received: and maybe other headers which can
         # contain sensitive info.
-        my $nhdr = $new_mime->header_obj;
-        my $ohdr = $old_mime->header_obj;
-        if (($nhdr->as_string eq $ohdr->as_string) &&
-            (content_id($new_mime) eq content_id($old_mime))) {
+        my $nhdr = $new_mime->header_obj->as_string;
+        my $ohdr = $old_mime->header_obj->as_string;
+        if (($nhdr eq $ohdr) &&
+            (content_hash($new_mime) eq content_hash($old_mime))) {
                 warn "No change detected to:\n", show_cmd($ibx, $smsg);
 
                 next unless $opt->{verbose};
diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex
new file mode 100755
index 00000000..2e5a5d2c
--- /dev/null
+++ b/script/public-inbox-extindex
@@ -0,0 +1,91 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...]
+
+  Create and update external (detached) search indices
+
+  --no-fsync          speed up indexing, risk corruption on power outage
+  --watch             run persistently and watch for inbox updates
+  -L LEVEL            `medium', or `full' (default: full)
+  --all               index all configured inboxes
+  --jobs=NUM          set or disable parallelization (NUM=0)
+  --batch-size=BYTES  flush changes to OS after a given number of bytes
+  --max-size=BYTES    do not index messages larger than the given size
+  --gc                perform garbage collection instead of indexing
+  --dedupe[=MSGID]    fix prior deduplication errors (may be repeated)
+  --reindex           index previously indexed inboxes
+  --fast              only reindex unseen/stale messages
+  --verbose | -v      increase verbosity (may be repeated)
+  --dry-run | -n      dry-run on --dedupe
+
+BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
+See public-inbox-extindex(1) man page for full documentation.
+EOF
+my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 };
+GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i
+                fsync|sync! fast dangerous
+                indexlevel|index-level|L=s max_size|max-size=s
+                batch_size|batch-size=s
+                dedupe:s@ gc commit-interval=i watch scan! dry-run|n
+                multi-pack-index! all C=s@ help|h))
+        or die $help;
+if ($opt->{help}) { print $help; exit 0 };
+die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
+require IO::Handle;
+STDOUT->autoflush(1);
+STDERR->autoflush(1);
+local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync
+# require lazily to speed up --help
+require PublicInbox::Admin;
+PublicInbox::Admin::do_chdir(delete $opt->{C});
+my $cfg = PublicInbox::Config->new;
+my $eidx_dir = shift(@ARGV);
+unless (defined $eidx_dir) {
+        if ($opt->{all} && $cfg->ALL) {
+                $eidx_dir = $cfg->ALL->{topdir};
+        } else {
+                die "E: $help";
+        }
+}
+my @ibxs;
+if ($opt->{gc}) {
+        die "E: inbox paths must not be specified with --gc\n" if @ARGV;
+        for my $sw (qw(all watch dry-run dedupe)) {
+                die "E: --$sw is not compatible with --gc\n" if $opt->{$sw};
+        }
+} else {
+        @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
+}
+$opt->{'dry-run'} && !$opt->{dedupe} and
+        die "E: --dry-run only affects --dedupe\n";
+$opt->{fast} && !$opt->{reindex} and
+        die "E: --fast only affects --reindex\n";
+
+PublicInbox::Admin::require_or_die(qw(-search));
+PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n";
+PublicInbox::Admin::progress_prepare($opt);
+my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
+local %ENV = (%ENV, %$env) if $env;
+require PublicInbox::ExtSearchIdx;
+my $eidx = PublicInbox::ExtSearchIdx->new($eidx_dir, $opt);
+if ($opt->{gc}) {
+        $eidx->attach_config($cfg);
+        $eidx->eidx_gc($opt);
+} else {
+        if ($opt->{all}) {
+                $eidx->attach_config($cfg);
+        } else {
+                $eidx->attach_config($cfg, \@ibxs);
+        }
+        if ($opt->{watch}) {
+                $cfg = undef; # save memory only after SIGHUP
+                $eidx->eidx_watch($opt);
+        } else {
+                $eidx->eidx_sync($opt);
+        }
+}
diff --git a/script/public-inbox-fetch b/script/public-inbox-fetch
new file mode 100755
index 00000000..6fd15328
--- /dev/null
+++ b/script/public-inbox-fetch
@@ -0,0 +1,39 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# Wrapper to git fetch remote public-inboxes
+use v5.12;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+my $opt = {};
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: public-inbox-fetch -C DESTINATION
+
+  fetch remote public-inboxes
+
+options:
+
+  --torsocks VAL      whether or not to wrap git and curl commands with
+                      torsocks (default: `auto')
+                      Must be one of: `auto', `no' or `yes'
+  -T NAME             Name of remote(s) to try (may be repeated)
+                      default: `origin' and `_grokmirror'
+  --exit-code         exit with 127 if no updates
+  --verbose | -v      increase verbosity (may be repeated)
+    --quiet | -q      increase verbosity (may be repeated)
+    -C DIR            chdir to specified directory
+EOF
+GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ try-remote|T=s@
+        prune|p
+        no-torsocks torsocks=s exit-code)) or die $help;
+if ($opt->{help}) { print $help; exit };
+require PublicInbox::Fetch; # loads Admin
+PublicInbox::Admin::do_chdir(delete $opt->{C});
+PublicInbox::Admin::setup_signals();
+$SIG{PIPE} = 'IGNORE';
+
+my $lei = bless {
+        env => \%ENV, opt => $opt, cmd => 'public-inbox-fetch',
+        0 => *STDIN{GLOB}, 1 => *STDOUT{GLOB}, 2 => *STDERR{GLOB},
+}, 'PublicInbox::LEI';
+PublicInbox::Fetch->do_fetch($lei, '.');
+exit(($lei->{child_error} // 0) >> 8);
diff --git a/script/public-inbox-httpd b/script/public-inbox-httpd
index 09da505e..caceae20 100755
--- a/script/public-inbox-httpd
+++ b/script/public-inbox-httpd
@@ -1,56 +1,8 @@
-#!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Standalone HTTP server for public-inbox.
-use strict;
+use v5.12;
 use PublicInbox::Daemon;
-BEGIN {
-        for (qw(Plack::Builder Plack::Util)) {
-                eval("require $_") or die "E: Plack is required for $0\n";
-        }
-        Plack::Builder->import;
-        require PublicInbox::HTTP;
-        require PublicInbox::HTTPD;
-}
-my %httpds;
-my $app;
-my $refresh = sub {
-        if (@ARGV) {
-                eval { $app = Plack::Util::load_psgi(@ARGV) };
-                if ($@) {
-                        die $@,
-"$0 runs in /, command-line paths must be absolute\n";
-                }
-        } else {
-                require PublicInbox::WWW;
-                my $www = PublicInbox::WWW->new;
-                $www->preload;
-                $app = builder {
-                        eval {
-                                enable 'Deflater',
-                                        content_type => [ qw(
-                                                text/html
-                                                text/plain
-                                                application/atom+xml
-                                                )]
-                        };
-
-                        eval { enable 'ReverseProxy' };
-                        $@ and warn
-"Plack::Middleware::ReverseProxy missing,\n",
-"URL generation for redirects may be wrong if behind a reverse proxy\n";
-
-                        enable 'Head';
-                        sub { $www->call(@_) };
-                };
-        }
-};
-
-PublicInbox::Daemon::run('0.0.0.0:8080', $refresh,
-        sub ($$$) { # post_accept
-                my ($client, $addr, $srv) = @_;
-                my $fd = fileno($srv);
-                my $h = $httpds{$fd} ||= PublicInbox::HTTPD->new($srv, $app);
-                PublicInbox::HTTP->new($client, $addr, $h),
-        });
+PublicInbox::Daemon::run('http://0.0.0.0:8080');
diff --git a/script/public-inbox-imapd b/script/public-inbox-imapd
new file mode 100755
index 00000000..0c96cdbb
--- /dev/null
+++ b/script/public-inbox-imapd
@@ -0,0 +1,8 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Standalone read-only IMAP server for public-inbox.
+use v5.12;
+use PublicInbox::Daemon;
+PublicInbox::Daemon::run('imap://0.0.0.0:143');
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 2c7c4f13..a13e44bf 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -1,35 +1,156 @@
-#!/usr/bin/perl -w
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+#!perl -w
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# Basic tool to create a Xapian search index for a git repository
-# configured for public-inbox.
+# Basic tool to create a Xapian search index for a public-inbox.
 # Usage with libeatmydata <https://www.flamingspork.com/projects/libeatmydata/>
 # highly recommended: eatmydata public-inbox-index INBOX_DIR
 
 use strict;
-use warnings;
+use v5.10.1;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-my $usage = "public-inbox-index INBOX_DIR";
-use PublicInbox::Admin;
-PublicInbox::Admin::require_or_die('-index');
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: public-inbox-index [options] INBOX_DIR
+
+  Create and update per-inbox search indices
+
+options:
+
+  --no-fsync          speed up indexing, risk corruption on power outage
+  -L LEVEL            `basic', `medium', or `full' (default: full)
+  -E EXTINDEX         update extindex (default: `all')
+  --all               index all configured inboxes
+  --compact | -c      run public-inbox-compact(1) after indexing
+  --sequential-shard  index Xapian shards sequentially for slow storage
+  --jobs=NUM          set or disable parallelization (NUM=0)
+  --batch-size=BYTES  flush changes to OS after a given number of bytes
+  --max-size=BYTES    do not index messages larger than the given size
+  --reindex           index previously indexed data (if upgrading)
+  --since=DATE        limit --reindex to changes after DATE
+  --until=DATE        limit --reindex to changes before DATE
+  --rethread          regenerate thread IDs (if upgrading, use sparingly)
+  --prune             prune git storage on discontiguous history
+  --verbose | -v      increase verbosity (may be repeated)
 
-my $opt = { quiet => -1 };
-GetOptions($opt, qw(verbose|v+ reindex jobs|j=i prune indexlevel|L=s))
-        or die "bad command-line args\n$usage";
-die "--jobs must be positive\n" if defined $opt->{jobs} && $opt->{jobs} <= 0;
+BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
+See public-inbox-index(1) man page for full documentation.
+EOF
+my $opt = {
+        quiet => -1, compact => 0, max_size => undef, fsync => 1,
+        'update-extindex' => [], # ":s@" optional arg sets '' if no arg given
+};
+GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
+                fsync|sync! xapian_only|xapian-only dangerous
+                indexlevel|index-level|L=s max_size|max-size=s
+                batch_size|batch-size=s
+                since|after=s until|before=s
+                sequential-shard|seq-shard
+                multi-pack-index!
+                no-update-extindex update-extindex|E=s@
+                fast-noop|F skip-docdata all C=s@ help|h))
+        or die $help;
+if ($opt->{help}) { print $help; exit 0 };
+die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
+if ($opt->{xapian_only} && !$opt->{reindex}) {
+        die "--xapian-only requires --reindex\n";
+}
+if ($opt->{reindex} && delete($opt->{'fast-noop'})) {
+        warn "--fast-noop ignored with --reindex\n";
+}
 
+# require lazily to speed up --help
+require PublicInbox::Admin;
+PublicInbox::Admin::require_or_die('-index');
+PublicInbox::Admin::do_chdir(delete $opt->{C});
 
-my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV);
+my $cfg = PublicInbox::Config->new; # Config is loaded by Admin
+$opt->{-use_cwd} = 1;
+my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
 PublicInbox::Admin::require_or_die('-index');
-unless (@ibxs) { print STDERR "Usage: $usage\n"; exit 1 }
+unless (@ibxs) { print STDERR $help; exit 1 }
+require PublicInbox::InboxWritable;
+
+my (@eidx, %eidx_seen);
+my $update_extindex = $opt->{'update-extindex'};
+if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) {
+        # extindex and normal inboxes may have different owners
+        push(@$update_extindex, 'all') if -w $ALL->{topdir};
+}
+@$update_extindex = () if $opt->{'no-update-extindex'};
+if (scalar @$update_extindex) {
+        PublicInbox::Admin::require_or_die('-search');
+        require PublicInbox::ExtSearchIdx;
+}
+for my $ei_name (@$update_extindex) {
+        my $es = $cfg->lookup_ei($ei_name);
+        my $topdir;
+        if (!$es && -d $ei_name) { # allow dirname or config section name
+                $topdir = $ei_name;
+        } elsif ($es) {
+                $topdir = $es->{topdir};
+        } else {
+                die "extindex `$ei_name' not configured or found\n";
+        }
+        my $o = { %$opt };
+        delete $o->{indexlevel} if ($o->{indexlevel}//'') eq 'basic';
+        $eidx_seen{$topdir} //=
+                push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $o));
+}
 my $mods = {};
+my @eidx_unconfigured;
 foreach my $ibx (@ibxs) {
+        $ibx = PublicInbox::InboxWritable->new($ibx);
+        # detect_indexlevel may also set $ibx->{-skip_docdata}
+        my $detected = $ibx->detect_indexlevel;
         # XXX: users can shoot themselves in the foot, with opt->{indexlevel}
-        $ibx->{indexlevel} //= $opt->{indexlevel} //
-                        PublicInbox::Admin::detect_indexlevel($ibx);
+        $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ?
+                        'full' : $detected);
         PublicInbox::Admin::scan_ibx_modules($mods, $ibx);
+        if (@eidx && $ibx->{-unconfigured}) {
+                push @eidx_unconfigured, "  $ibx->{inboxdir}\n";
+        }
 }
+warn <<EOF if @eidx_unconfigured;
+The following inboxes are unconfigured and will not be updated in
+@$update_extindex:\n@eidx_unconfigured
+EOF
+
+$opt->{compact} = 0 if !$mods->{'Xapian'}; # (or old Search::Xapian)
 
 PublicInbox::Admin::require_or_die(keys %$mods);
+my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
+local %ENV = (%ENV, %$env) if $env;
+PublicInbox::Xapcmd::check_compact() if $opt->{compact};
 PublicInbox::Admin::progress_prepare($opt);
-PublicInbox::Admin::index_inbox($_, undef, $opt) for @ibxs;
+for my $ibx (@ibxs) {
+        if ($opt->{compact} >= 2) {
+                PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt});
+        }
+        $ibx->{-no_fsync} = 1 if !$opt->{fsync};
+        $ibx->{-dangerous} = 1 if $opt->{dangerous};
+        $ibx->{-skip_docdata} //= $opt->{'skip-docdata'};
+
+        my $ibx_opt = $opt;
+        if (defined(my $s = $ibx->{lc('indexSequentialShard')})) {
+                defined(my $v = $cfg->git_bool($s)) or die <<EOL;
+publicInbox.$ibx->{name}.indexSequentialShard not boolean
+EOL
+                $ibx_opt = { %$opt, 'sequential-shard' => $v };
+        }
+        my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
+        last if $ibx_opt->{quit};
+        if (my $copt = $opt->{compact_opt}) {
+                local $copt->{jobs} = 0 if $ibx_opt->{'sequential-shard'};
+                PublicInbox::Xapcmd::run($ibx, 'compact', $copt);
+        }
+        last if $ibx_opt->{quit};
+        next if $ibx->{-unconfigured} || !$nidx;
+        for my $eidx (@eidx) {
+                $eidx->attach_inbox($ibx);
+        }
+}
+my $pr = $opt->{-progress};
+for my $eidx (@eidx) {
+        $pr->("indexing $eidx->{topdir} ...\n") if $pr;
+        $eidx->eidx_sync($opt);
+        last if $opt->{quit};
+}
diff --git a/script/public-inbox-init b/script/public-inbox-init
index 10d3ad45..cf6443f7 100755
--- a/script/public-inbox-init
+++ b/script/public-inbox-init
@@ -1,58 +1,115 @@
-#!/usr/bin/perl -w
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-#
-# Initializes a public-inbox, basically a wrapper for git-init(1)
 use strict;
-use warnings;
-sub usage {
-        print STDERR <<EOF;
-Usage: public-inbox-init NAME INBOX_DIR HTTP_URL ADDRESS [ADDRESS..]
-EOF
-        exit 1;
-}
+use v5.10.1;
 use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
-use PublicInbox::Admin;
-PublicInbox::Admin::require_or_die('-base');
-use PublicInbox::Config;
-use PublicInbox::InboxWritable;
-use PublicInbox::Import;
-use File::Temp qw/tempfile/;
-use PublicInbox::Lock;
-use File::Basename qw/dirname/;
-use File::Path qw/mkpath/;
+use autodie qw(open chmod close rename);
 use Fcntl qw(:DEFAULT);
-use Cwd qw/abs_path/;
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: public-inbox-init NAME INBOX_DIR HTTP_URL ADDRESS [ADDRESS..]
+
+  Initialize a public-inbox
+
+required arguments:
+
+  NAME                the name of the inbox
+  INBOX_DIR           pathname the inbox
+  HTTP_URL            HTTP (or HTTPS) URL
+  ADDRESS             email address(es), may be specified multiple times
+
+options:
+
+  -V2                 use scalable public-inbox-v2-format(5)
+  -L LEVEL            index level `basic', `medium', or `full' (default: full)
+  --ng NEWSGROUP      set NNTP newsgroup name
+  -c KEY=VALUE        set additional config option(s)
+  --skip-artnum=NUM   NNTP article numbers to skip
+  --skip-epoch=NUM    epochs to skip (-V2 only)
+  -j JOBS             number of indexing jobs (-V2 only), (default: 4)
+
+See public-inbox-init(1) man page for full documentation.
+EOF
 
-my $version = undef;
-my $indexlevel = undef;
-my $skip_epoch;
+require PublicInbox::Admin;
+PublicInbox::Admin::require_or_die('-base');
+
+my ($version, $indexlevel, $skip_epoch, $skip_artnum, $jobs, $show_help);
+my $skip_docdata;
+my $ng = '';
+my (@c_extra, @chdir);
 my %opts = (
         'V|version=i' => \$version,
-        'L|indexlevel=s' => \$indexlevel,
+        'L|index-level|indexlevel=s' => \$indexlevel,
         'S|skip|skip-epoch=i' => \$skip_epoch,
+        'skip-artnum=i' => \$skip_artnum,
+        'j|jobs=i' => \$jobs,
+        'ng|newsgroup=s' => \$ng,
+        'skip-docdata' => \$skip_docdata,
+        'help|h' => \$show_help,
+        'c=s@' => \@c_extra,
+        'C=s@' => \@chdir,
 );
-GetOptions(%opts) or usage();
-PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel;
-my $name = shift @ARGV or usage();
-my $inboxdir = shift @ARGV or usage();
-my $http_url = shift @ARGV or usage();
+my $usage_cb = sub {
+        print STDERR $help;
+        exit 1;
+};
+GetOptions(%opts) or $usage_cb->();
+if ($show_help) { print $help; exit 0 };
+my $name = shift @ARGV or $usage_cb->();
+my $inboxdir = shift @ARGV or $usage_cb->();
+my $http_url = shift @ARGV or $usage_cb->();
 my (@address) = @ARGV;
-@address or usage();
-my %seen;
+@address or $usage_cb->();
+PublicInbox::Admin::do_chdir(\@chdir);
+
+@c_extra = map {
+        my ($k, $v) = split(/=/, $_, 2);
+        defined($v) or die "Usage: -c KEY=VALUE\n";
+        $k =~ /\A[a-z]+\z/i or die "$k contains invalid characters\n";
+        $k = lc($k);
+        if ($k eq 'newsgroup') {
+                die "newsgroup already set ($ng)\n" if $ng ne '';
+                $ng = $v;
+                ();
+        } elsif ($k eq 'address') {
+                push @address, $v; # for conflict checking
+                ();
+        } elsif ($k =~ /\A(?:inboxdir|mainrepo)\z/) {
+                die "$k not allowed via -c $_\n"
+        } elsif ($k eq 'indexlevel') {
+                defined($indexlevel) and
+                        die "indexlevel already set ($indexlevel)\n";
+                $indexlevel = $v;
+                ();
+        } else {
+                $_
+        }
+} @c_extra;
+
+PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel;
+
+$ng =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! and
+        die "--newsgroup `$ng' is not valid\n";
+($ng =~ m!\A\.! || $ng =~ m!\.\z!) and
+        die "--newsgroup `$ng' must not start or end with `.'\n";
 
+require PublicInbox::Config;
 my $pi_config = PublicInbox::Config->default_file;
-my $dir = dirname($pi_config);
-mkpath($dir); # will croak on fatal errors
+my ($dir) = ($pi_config =~ m!(.*?/)[^/]+\z!);
+require File::Path;
+File::Path::mkpath($dir); # will croak on fatal errors
 
 # first, we grab a flock to prevent simultaneous public-inbox-init
 # processes from trampling over each other, or exiting with 255 on
 # O_EXCL failure below.  This gets unlocked automatically on exit:
+require PublicInbox::Lock;
 my $lock_obj = { lock_path => "$pi_config.flock" };
 PublicInbox::Lock::lock_acquire($lock_obj);
 
 # git-config will operate on this (and rename on success):
-my ($fh, $pi_config_tmp) = tempfile('pi-init-XXXXXXXX', DIR => $dir);
+require File::Temp;
+my $fh = File::Temp->new(TEMPLATE => 'pi-init-XXXX', DIR => $dir);
 
 # Now, we grab another lock to use git-config(1) locking, so it won't
 # wait on the lock, unlike some of our internal flock()-based locks.
@@ -64,22 +121,18 @@ sysopen($lockfh, $lockfile, O_RDWR|O_CREAT|O_EXCL) or do {
         warn "could not open config file: $lockfile: $!\n";
         exit(255);
 };
-my $auto_unlink = UnlinkMe->new($lockfile);
-my $perm;
+require PublicInbox::OnDestroy;
+my $auto_unlink = PublicInbox::OnDestroy::on_destroy(sub { unlink $lockfile });
+my $perm = 0644 & ~umask;
+my %seen;
 if (-e $pi_config) {
-        open(my $oh, '<', $pi_config) or die "unable to read $pi_config: $!\n";
-        my @st = stat($oh);
+        require PublicInbox::IO;
+        open(my $oh, '<', $pi_config);
+        my @st = stat($oh) or die "(f)stat failed on $pi_config: $!\n";
         $perm = $st[2];
-        defined $perm or die "(f)stat failed on $pi_config: $!\n";
-        chmod($perm & 07777, $fh) or
-                die "(f)chmod failed on future $pi_config: $!\n";
-        my $old;
-        {
-                local $/;
-                $old = <$oh>;
-        }
-        print $fh $old or die "failed to write: $!\n";
-        close $oh or die "failed to close $pi_config: $!\n";
+        chmod($perm & 07777, $fh);
+        print $fh PublicInbox::IO::read_all($oh);
+        close $oh;
 
         # yes, this conflict checking is racy if multiple instances of this
         # script are run by the same $PI_DIR
@@ -103,36 +156,35 @@ if (-e $pi_config) {
         exit(1) if $conflict;
 
         my $ibx = $cfg->lookup_name($name);
-        if ($ibx) {
-                if (!defined($indexlevel) && $ibx->{indexlevel}) {
-                        $indexlevel = $ibx->{indexlevel};
-                }
-        }
+        $indexlevel //= $ibx->{indexlevel} if $ibx;
 }
-close $fh or die "failed to close $pi_config_tmp: $!\n";
+my $pi_config_tmp = $fh->filename;
+close($fh);
 
 my $pfx = "publicinbox.$name";
 my @x = (qw/git config/, "--file=$pi_config_tmp");
 
-$inboxdir = abs_path($inboxdir);
+$inboxdir = PublicInbox::Config::rel2abs_collapsed($inboxdir);
+die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0;
+
 if (-f "$inboxdir/inbox.lock") {
         if (!defined $version) {
                 $version = 2;
         } elsif ($version != 2) {
-                die "$inboxdir is a -V2 repo, -V$version specified\n"
+                die "$inboxdir is a -V2 inbox, -V$version specified\n"
         }
 } elsif (-d "$inboxdir/objects") {
         if (!defined $version) {
                 $version = 1;
         } elsif ($version != 1) {
-                die "$inboxdir is a -V1 repo, -V$version specified\n"
+                die "$inboxdir is a -V1 inbox, -V$version specified\n"
         }
 }
 
 $version = 1 unless defined $version;
 
 if ($version == 1 && defined $skip_epoch) {
-        die "--skip-epoch is only supported for -V2 repos\n";
+        die "--skip-epoch is only supported for -V2 inboxes\n";
 }
 
 my $ibx = PublicInbox::Inbox->new({
@@ -144,42 +196,56 @@ my $ibx = PublicInbox::Inbox->new({
 });
 
 my $creat_opt = {};
-PublicInbox::InboxWritable->new($ibx, $creat_opt)->init_inbox(0, $skip_epoch);
-
-# needed for git prior to v2.1.0
-umask(0077) if defined $perm;
-
-foreach my $addr (@address) {
-        next if $seen{lc($addr)};
-        PublicInbox::Import::run_die([@x, "--add", "$pfx.address", $addr]);
+if (defined $jobs) {
+        die "--jobs is only supported for -V2 inboxes\n" if $version == 1;
+        die "--jobs=$jobs must be >= 1\n" if $jobs <= 0;
+        $creat_opt->{nproc} = $jobs;
 }
-PublicInbox::Import::run_die([@x, "$pfx.url", $http_url]);
-PublicInbox::Import::run_die([@x, "$pfx.inboxdir", $inboxdir]);
 
-if (defined($indexlevel)) {
-        PublicInbox::Import::run_die([@x, "$pfx.indexlevel", $indexlevel]);
+require PublicInbox::InboxWritable;
+$ibx = PublicInbox::InboxWritable->new($ibx, $creat_opt);
+if ($skip_docdata) {
+        $ibx->{indexlevel} //= 'full'; # ensure init_inbox writes xdb
+        $ibx->{indexlevel} eq 'basic' and
+                die "--skip-docdata ignored with --indexlevel=basic\n";
+        $ibx->{-skip_docdata} = $skip_docdata;
 }
+$ibx->init_inbox(0, $skip_epoch, $skip_artnum);
 
-# needed for git prior to v2.1.0
-if (defined $perm) {
-        chmod($perm & 07777, $pi_config_tmp) or
-                        die "(f)chmod failed on future $pi_config: $!\n";
+my $f = "$inboxdir/description";
+if (sysopen $fh, $f, O_CREAT|O_EXCL|O_WRONLY) {
+        print $fh "public inbox for $address[0]\n";
+        close $fh;
 }
 
-rename $pi_config_tmp, $pi_config or
-        die "failed to rename `$pi_config_tmp' to `$pi_config': $!\n";
-$auto_unlink->DESTROY;
+# needed for git prior to v2.1.0
+umask(0077);
 
-package UnlinkMe;
-use strict;
+require PublicInbox::Spawn;
+PublicInbox::Spawn->import(qw(run_die));
 
-sub new {
-        my ($klass, $file) = @_;
-        bless { file => $file }, $klass;
+foreach my $addr (@address) {
+        next if $seen{lc($addr)};
+        run_die([@x, "--add", "$pfx.address", $addr]);
 }
+run_die([@x, "$pfx.url", $http_url]);
+run_die([@x, "$pfx.inboxdir", $inboxdir]);
 
-sub DESTROY {
-        my $f = delete($_[0]->{file});
-        unlink($f) if defined($f);
+if (defined($indexlevel)) {
+        run_die([@x, "$pfx.indexlevel", $indexlevel]);
+}
+run_die([@x, "$pfx.newsgroup", $ng]) if $ng ne '';
+
+for my $kv (@c_extra) {
+        my ($k, $v) = split(/=/, $kv, 2);
+        # git 2.30+ has --fixed-value for idempotent invocations,
+        # but that's too new to depend on in 2021.  Perl quotemeta
+        # seems compatible enough for POSIX ERE which git uses
+        my $re = '^'.quotemeta($v).'$';
+        run_die([@x, qw(--replace-all), "$pfx.$k", $v, $re]);
 }
-1;
+
+# needed for git prior to v2.1.0
+chmod($perm & 07777, $pi_config_tmp);
+rename $pi_config_tmp, $pi_config;
+undef $auto_unlink; # trigger ->DESTROY
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 0d6c989b..a955cdf6 100644..100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -1,29 +1,49 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Used for training spam (via SpamAssassin) and removing messages from a
 # public-inbox
-my $usage = "$0 <spam|ham|rm> </path/to/message";
+my $help = <<EOF;
+usage: public-inbox-learn [OPTIONS] [spam|ham|rm] </path/to/RFC2822_message
+
+required action argument:
+
+   spam  unindex the message and train as spam
+     rm  remove the message without training as spam
+    ham  index the message (based on To:/Cc: headers) and train as ham
+
+options:
+
+  --all  scan all inboxes on `rm'
+
+See public-inbox-learn(1) man page for full documentation.
+EOF
 use strict;
-use warnings;
 use PublicInbox::Config;
 use PublicInbox::InboxWritable;
-use PublicInbox::MIME;
+use PublicInbox::Eml;
 use PublicInbox::Address;
 use PublicInbox::Spamcheck::Spamc;
-my $train = shift or die "usage: $usage\n";
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+my %opt = (all => 0);
+GetOptions(\%opt, qw(all help|h)) or die $help;
+use PublicInbox::Import;
+
+my $train = shift or die $help;
 if ($train !~ /\A(?:ham|spam|rm)\z/) {
-        die "`$train' not recognized.\nusage: $usage\n";
+        die "`$train' not recognized.\n$help";
 }
+die "--all only works with `rm'\n" if $opt{all} && $train ne 'rm';
 
 my $spamc = PublicInbox::Spamcheck::Spamc->new;
-my $pi_config = PublicInbox::Config->new;
+my $pi_cfg = PublicInbox::Config->new;
+local $PublicInbox::Import::DROP_UNIQUE_UNSUB;
+PublicInbox::Import::load_config($pi_cfg);
 my $err;
-my $mime = PublicInbox::MIME->new(eval {
-        local $/;
-        my $data = scalar <STDIN>;
-        $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+my $mime = PublicInbox::Eml->new(do{
+        my $data = PublicInbox::IO::read_all \*STDIN;
+        PublicInbox::Eml::strip_from($data);
 
         if ($train ne 'rm') {
                 eval {
@@ -36,7 +56,7 @@ my $mime = PublicInbox::MIME->new(eval {
                 };
                 $err = $@;
         }
-        $data
+        \$data
 });
 
 sub remove_or_add ($$$$) {
@@ -47,6 +67,7 @@ sub remove_or_add ($$$$) {
         $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name};
         $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr;
         $ibx = PublicInbox::InboxWritable->new($ibx);
+        $ibx->{indexlevel} = $ibx->detect_indexlevel;
         my $im = $ibx->importer(0);
 
         if ($train eq "rm") {
@@ -68,12 +89,12 @@ sub remove_or_add ($$$$) {
 }
 
 # spam is removed from all known inboxes since it is often Bcc:-ed
-if ($train eq 'spam') {
-        $pi_config->each_inbox(sub {
+if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) {
+        $pi_cfg->each_inbox(sub {
                 my ($ibx) = @_;
                 $ibx = PublicInbox::InboxWritable->new($ibx);
                 my $im = $ibx->importer(0);
-                $im->remove($mime, 'spam');
+                $im->remove($mime, $train);
                 $im->done;
         });
 } else {
@@ -84,7 +105,7 @@ if ($train eq 'spam') {
         for ($mime->header('Cc'), $mime->header('To')) {
                 foreach my $addr (PublicInbox::Address::emails($_)) {
                         $addr = lc($addr);
-                        $dests{$addr} //= $pi_config->lookup($addr) // 0;
+                        $dests{$addr} //= $pi_cfg->lookup($addr) // 0;
                 }
         }
 
@@ -92,12 +113,12 @@ if ($train eq 'spam') {
         my %seen;
         while (my ($addr, $ibx) = each %dests) {
                 next unless ref($ibx); # $ibx may be 0
-                next if $seen{"$ibx"}++;
+                next if $seen{0 + $ibx}++;
                 remove_or_add($ibx, $train, $mime, $addr);
         }
-        my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_config, $mime);
+        my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_cfg, $mime);
         for my $ibx (@$dests) {
-                next if !$seen{"$ibx"}++;
+                next if $seen{0 + $ibx}++;
                 remove_or_add($ibx, $train, $mime, $ibx->{-primary_address});
         }
 }
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index f37c7492..b463b07b 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -1,13 +1,29 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Mail delivery agent for public-inbox, run from your MTA upon mail delivery
+my $help = <<EOF;
+usage: public-inbox-mda [OPTIONS] </path/to/RFC2822_message
+
+options:
+
+  --no-precheck  skip internal checks for spam messages
+
+See public-inbox-mda(1) man page for full documentation.
+EOF
 use strict;
-use warnings;
-my $usage = 'public-inbox-mda [OPTIONS] < rfc2822_message';
-my $precheck = grep(/\A--no-precheck\z/, @ARGV) ? 0 : 1;
-my ($ems, $emm);
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+my ($ems, $emm, $show_help);
+my $precheck = 1;
+use PublicInbox::Import;
+local $PublicInbox::Import::DROP_UNIQUE_UNSUB; # does this need a CLI switch?
+GetOptions('precheck!' => \$precheck, 'help|h' => \$show_help) or
+        do { print STDERR $help; exit 1 };
+if ($show_help) {
+        print $help;
+        exit;
+}
 
 my $do_exit = sub {
         my ($code) = shift;
@@ -15,8 +31,7 @@ my $do_exit = sub {
         exit $code;
 };
 
-use Email::Simple;
-use PublicInbox::MIME;
+use PublicInbox::Eml;
 use PublicInbox::MDA;
 use PublicInbox::Config;
 use PublicInbox::Emergency;
@@ -24,29 +39,32 @@ use PublicInbox::Filter::Base;
 use PublicInbox::InboxWritable;
 use PublicInbox::Spamcheck;
 
-# n.b: hopefully we can setup the emergency path without bailing due to
-# user error, we really want to setup the emergency destination ASAP
+# n.b.: Hopefully we can set up the emergency path without bailing due to
+# user error, we really want to set up the emergency destination ASAP
 # in case there's bugs in our code or user error.
 my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/";
 $ems = PublicInbox::Emergency->new($emergency);
-my $str = eval { local $/; <STDIN> };
-$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+my $str = PublicInbox::IO::read_all \*STDIN;
+PublicInbox::Eml::strip_from($str);
 $ems->prepare(\$str);
-my $simple = Email::Simple->new(\$str);
-my $config = PublicInbox::Config->new;
+my $eml = PublicInbox::Eml->new(\$str);
+my $cfg = PublicInbox::Config->new;
 my $key = 'publicinboxmda.spamcheck';
 my $default = 'PublicInbox::Spamcheck::Spamc';
-my $spamc = PublicInbox::Spamcheck::get($config, $key, $default);
+my $spamc = PublicInbox::Spamcheck::get($cfg, $key, $default);
 my $dests = [];
+PublicInbox::Import::load_config($cfg, $do_exit);
+
 my $recipient = $ENV{ORIGINAL_RECIPIENT};
 if (defined $recipient) {
-        my $ibx = $config->lookup($recipient); # first check
+        my $ibx = $cfg->lookup($recipient); # first check
         push @$dests, $ibx if $ibx;
 }
 if (!scalar(@$dests)) {
-        $dests = PublicInbox::MDA->inboxes_for_list_id($config, $simple);
+        $dests = PublicInbox::MDA->inboxes_for_list_id($cfg, $eml);
         if (!scalar(@$dests) && !defined($recipient)) {
-                die "ORIGINAL_RECIPIENT not defined in ENV\n";
+                warn "ORIGINAL_RECIPIENT not defined in ENV\n";
+                $do_exit->(67); # EX_NOUSER
         }
         scalar(@$dests) or $do_exit->(67); # EX_NOUSER 5.1.1 user unknown
 }
@@ -61,7 +79,7 @@ my $err;
                 0;
         # pre-check, MDA has stricter rules than an importer might;
         } elsif ($precheck) {
-                !!PublicInbox::MDA->precheck($simple, $ibx->{address});
+                !!PublicInbox::MDA->precheck($eml, $ibx->{address});
         } else {
                 1;
         }
@@ -69,7 +87,7 @@ my $err;
 
 $do_exit->(67) if $err && scalar(@$dests) == 0;
 
-$simple = undef;
+$eml = undef;
 my $spam_ok;
 if ($spamc) {
         $str = '';
@@ -101,9 +119,10 @@ my @rejects;
 for my $ibx (@$dests) {
         mda_filter_adjust($ibx);
         my $filter = $ibx->filter;
-        my $mime = PublicInbox::MIME->new($str);
+        my $mime = PublicInbox::Eml->new($str);
         my $ret = $filter->delivery($mime);
-        if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message
+        if (ref($ret) && ($ret->isa('PublicInbox::Eml') ||
+                        $ret->isa('Email::MIME'))) { # filter altered message
                 $mime = $ret;
         } elsif ($ret == PublicInbox::Filter::Base::IGNORE) {
                 next; # nothing, keep looping
@@ -119,7 +138,7 @@ for my $ibx (@$dests) {
                 # destination succeeds
                 $emm->abort;
         } else { # v1-only
-                my $mid = $mime->header_obj->header_raw('Message-ID');
+                my $mid = $mime->header_raw('Message-ID');
                 # this message is similar to what ssoma-mda shows:
                 print STDERR "CONFLICT: Message-ID: $mid exists\n";
         }
diff --git a/script/public-inbox-netd b/script/public-inbox-netd
new file mode 100755
index 00000000..e8b1ca69
--- /dev/null
+++ b/script/public-inbox-netd
@@ -0,0 +1,6 @@
+#!/usr/bin/perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use v5.12;
+use PublicInbox::Daemon;
+PublicInbox::Daemon::run();
diff --git a/script/public-inbox-nntpd b/script/public-inbox-nntpd
index ce42de2d..aca27383 100755
--- a/script/public-inbox-nntpd
+++ b/script/public-inbox-nntpd
@@ -1,15 +1,8 @@
-#!/usr/bin/perl -w
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Standalone NNTP server for public-inbox.
-use strict;
-use warnings;
+use v5.12;
 use PublicInbox::Daemon;
-use PublicInbox::NNTP; # need to call import
-use PublicInbox::NNTPD;
-my $nntpd = PublicInbox::NNTPD->new;
-PublicInbox::Daemon::run('0.0.0.0:119',
-        sub { $nntpd->refresh_groups }, # refresh
-        sub ($$$) { PublicInbox::NNTP->new($_[0], $nntpd) }, # post_accept
-        $nntpd);
+PublicInbox::Daemon::run('nntp://0.0.0.0:119');
diff --git a/script/public-inbox-pop3d b/script/public-inbox-pop3d
new file mode 100755
index 00000000..ec944aee
--- /dev/null
+++ b/script/public-inbox-pop3d
@@ -0,0 +1,8 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Standalone POP3 server for public-inbox.
+use v5.12;
+use PublicInbox::Daemon;
+PublicInbox::Daemon::run('pop3://0.0.0.0:110');
diff --git a/script/public-inbox-purge b/script/public-inbox-purge
index c9b69c3d..618cfec4 100755
--- a/script/public-inbox-purge
+++ b/script/public-inbox-purge
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Used for purging messages entirely from a public-inbox.  Currently
@@ -10,23 +10,35 @@ use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
 use PublicInbox::AdminEdit;
 PublicInbox::Admin::check_require('-index');
 use PublicInbox::Filter::Base qw(REJECT);
-use PublicInbox::MIME;
+use PublicInbox::Eml;
 require PublicInbox::V2Writable;
 
-my $usage = "$0 [--all] [INBOX_DIRS] </path/to/message";
+my $help = <<EOF;
+usage: public-inbox-purge [--all] [INBOX_DIRS] </path/to/message
+
+  erase message entirely from an inbox (including history)
+
+options:
+
+  --all               purge from all configured inboxes
+
+See public-inbox-purge(1) man page for full documentation.
+EOF
+
 my $opt = { verbose => 1, all => 0, -min_inbox_version => 2 };
-GetOptions($opt, @PublicInbox::AdminEdit::OPT) or
-        die "bad command-line args\n$usage\n";
+GetOptions($opt, @PublicInbox::AdminEdit::OPT, 'C=s@') or die $help;
+if ($opt->{help}) { print $help; exit 0 };
 
+PublicInbox::Admin::do_chdir(delete $opt->{C});
 my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt);
 PublicInbox::AdminEdit::check_editable(\@ibxs);
 
-my $data = do { local $/; scalar <STDIN> };
-$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+my $data = PublicInbox::IO::read_all \*STDIN;
+PublicInbox::Eml::strip_from($data);
 my $n_purged = 0;
 
 foreach my $ibx (@ibxs) {
-        my $mime = PublicInbox::MIME->new($data);
+        my $mime = PublicInbox::Eml->new($data);
         my $v2w = PublicInbox::V2Writable->new($ibx, 0);
 
         my $commits = $v2w->purge($mime) || [];
diff --git a/script/public-inbox-watch b/script/public-inbox-watch
index 645abeda..9bcd42ed 100755
--- a/script/public-inbox-watch
+++ b/script/public-inbox-watch
@@ -1,26 +1,62 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+my $help = <<EOF;
+usage: public-inbox-watch
+
+See public-inbox-watch(1) man page for full documentation.
+EOF
+
 use strict;
-use warnings;
-use PublicInbox::WatchMaildir;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+use IO::Handle; # ->autoflush
+use PublicInbox::Watch;
+use PublicInbox::Import;
+local $PublicInbox::Import::DROP_UNIQUE_UNSUB;
 use PublicInbox::Config;
-my ($config, $watch_md);
+use PublicInbox::DS;
+my $do_scan = 1;
+GetOptions('scan!' => \$do_scan, # undocumented, testing only
+        'help|h' => \(my $show_help)) or do { print STDERR $help; exit 1 };
+if ($show_help) { print $help; exit 0 };
+PublicInbox::DS::block_signals();
+STDOUT->autoflush(1);
+STDERR->autoflush(1);
+local $0 = $0; # local since this script may be eval-ed
+my $watch = PublicInbox::Watch->new(PublicInbox::Config->new);
 my $reload = sub {
-        $config = PublicInbox::Config->new;
-        $watch_md->quit if $watch_md;
-        $watch_md = PublicInbox::WatchMaildir->new($config);
+        my $prev = $watch or return; # SIGQUIT issued
+        $watch->quit;
+        $watch = PublicInbox::Watch->new(PublicInbox::Config->new);
+        if ($watch) {
+                $watch->{sig} = $prev->{sig}; # prevent redundant signalfd
+                warn "# reloaded\n";
+        } else {
+                warn("E: reloading failed\n");
+                $watch = $prev;
+        }
 };
-$reload->();
-if ($watch_md) {
-        my $scan = sub { $watch_md->trigger_scan('full') if $watch_md };
-        $SIG{HUP} = $reload;
-        $SIG{USR1} = $scan;
-        $SIG{ALRM} = sub { $SIG{ALRM} = 'DEFAULT'; $scan->() };
-        $SIG{QUIT} = $SIG{TERM} = $SIG{INT} = sub {
-                $watch_md->quit if $watch_md;
-                $watch_md = undef;
+
+if ($watch) {
+        my $scan = sub {
+                return if !$watch;
+                warn "# scanning\n";
+                $watch->trigger_scan('full');
         };
-        alarm(1);
-        $watch_md->watch while ($watch_md);
+        my $quit = sub { # may be called in IMAP/NNTP children
+                $watch->quit if $watch;
+                $watch = undef;
+                $0 .= ' quitting';
+        };
+        my $sig = {
+                HUP => $reload,
+                USR1 => $scan,
+                CHLD => \&PublicInbox::DS::enqueue_reap,
+        };
+        $sig->{QUIT} = $sig->{TERM} = $sig->{INT} = $quit;
+        local @SIG{keys %$sig} = values(%$sig); # for non-signalfd/kqueue
+
+        # --no-scan is only intended for testing atm, undocumented.
+        PublicInbox::DS::requeue($scan) if $do_scan;
+        $watch->watch($sig) while ($watch);
 }
diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb
index 2b9f032c..fac54559 100755
--- a/script/public-inbox-xcpdb
+++ b/script/public-inbox-xcpdb
@@ -1,19 +1,70 @@
-#!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# xcpdb: Xapian copy database, a wrapper around Xapian's copydatabase(1)
+use v5.12;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-use PublicInbox::InboxWritable;
-use PublicInbox::Xapcmd;
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: public-inbox-xcpdb [options] <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR>
+
+  upgrade or reshard Xapian DB(s) used by public-inbox
+
+options:
+
+  --compact | -c      run public-inbox-compact(1) after indexing
+  --all               copy all configured inboxes
+  --reshard=NUM       change number the number of shards
+  --jobs=NUM          limit parallelism to JOBS count
+  --verbose | -v      increase verbosity (may be repeated)
+  --sequential-shard  copy+index Xapian shards sequentially (for slow HDD)
+
+index options (see public-inbox-index(1) man page for full description):
+
+  --no-fsync          speed up indexing, risk corruption on power outage
+  --batch-size=BYTES  flush changes to OS after a given number of bytes
+  --max-size=BYTES    do not index messages larger than the given size
+
+See public-inbox-xcpdb(1) man page for full documentation.
+EOF
+my $opt = { quiet => -1, compact => 0, fsync => 1,
+        -eidx_ok => 1, -cidx_ok => 1 };
+GetOptions($opt, qw(
+        fsync|sync! compact|c reshard|R=i
+        max_size|max-size=s batch_size|batch-size=s
+        sequential-shard|seq-shard
+        jobs|j=i quiet|q verbose|v
+        blocksize|b=s no-full|n fuller|F
+        all C=s@ help|h)) or die $help;
+if ($opt->{help}) { print $help; exit 0 };
+
 use PublicInbox::Admin;
 PublicInbox::Admin::require_or_die('-search');
-my $usage = "Usage: public-inbox-xcpdb [--compact] INBOX_DIR\n";
-my $opt = {};
-my @opt = (qw(compact reshard|R=i), @PublicInbox::Xapcmd::COMPACT_OPT);
-GetOptions($opt, @opt) or die "bad command-line args\n$usage";
-my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage;
-foreach (@ibxs) {
-        my $ibx = PublicInbox::InboxWritable->new($_);
-        # we rely on --no-renumber to keep docids synched to NNTP
+PublicInbox::Admin::do_chdir(delete $opt->{C});
+
+require PublicInbox::Config;
+my $cfg = PublicInbox::Config->new;
+my ($ibxs, $eidxs, $cidxs) =
+        PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
+unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 }
+my $idx_env = PublicInbox::Admin::index_prepare($opt, $cfg);
+
+# we only set XAPIAN_FLUSH_THRESHOLD for index, since cpdb doesn't
+# know sizes, only doccounts
+$opt->{-idx_env} = $idx_env;
+
+if ($opt->{'sequential-shard'} && ($opt->{jobs} // 1) > 1) {
+        warn "W: --jobs=$opt->{jobs} ignored with --sequential-shard\n";
+        $opt->{jobs} = 0;
+}
+
+require PublicInbox::InboxWritable;
+require PublicInbox::Xapcmd;
+# we rely on --no-renumber to keep docids synched for NNTP(artnum) + IMAP(UID)
+for my $ibx (@$ibxs) {
+        $ibx = PublicInbox::InboxWritable->new($ibx);
         PublicInbox::Xapcmd::run($ibx, 'cpdb', $opt);
 }
+
+for my $ibxish (@$eidxs, @$cidxs) {
+        my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef;
+        PublicInbox::Xapcmd::run($ibxish, 'cpdb', $opt);
+}
diff --git a/script/public-inbox.cgi b/script/public-inbox.cgi
index c766483a..3a430d5b 100755
--- a/script/public-inbox.cgi
+++ b/script/public-inbox.cgi
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ or later <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Enables using PublicInbox::WWW as a CGI script
@@ -13,14 +13,7 @@ BEGIN {
         PublicInbox::WWW->preload if $ENV{MOD_PERL};
 }
 my $www = PublicInbox::WWW->new;
-my $have_deflater = eval { require Plack::Middleware::Deflater; 1 };
 my $app = builder {
-        if ($have_deflater) {
-                enable 'Deflater',
-                        content_type => [ 'text/html', 'text/plain',
-                                        'application/atom+xml' ];
-        }
-
         # Enable to ensure redirects and Atom feed URLs are generated
         # properly when running behind a reverse proxy server which
         # sets the X-Forwarded-Proto request header.