about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/Address.pm20
-rw-r--r--lib/PublicInbox/AddressPP.pm33
-rw-r--r--lib/PublicInbox/Admin.pm265
-rw-r--r--lib/PublicInbox/AdminEdit.pm11
-rw-r--r--lib/PublicInbox/AltId.pm2
-rw-r--r--lib/PublicInbox/Aspawn.pm34
-rw-r--r--lib/PublicInbox/AutoReap.pm33
-rw-r--r--lib/PublicInbox/Cgit.pm63
-rw-r--r--lib/PublicInbox/CidxComm.pm28
-rw-r--r--lib/PublicInbox/CidxLogP.pm28
-rw-r--r--lib/PublicInbox/CidxXapHelperAux.pm48
-rw-r--r--lib/PublicInbox/CmdIPC4.pm56
-rw-r--r--lib/PublicInbox/CodeSearch.pm372
-rw-r--r--lib/PublicInbox/CodeSearchIdx.pm1391
-rw-r--r--lib/PublicInbox/Compat.pm24
-rw-r--r--lib/PublicInbox/CompressNoop.pm4
-rw-r--r--lib/PublicInbox/Config.pm525
-rw-r--r--lib/PublicInbox/ConfigIter.pm9
-rw-r--r--lib/PublicInbox/ContentDigestDbg.pm22
-rw-r--r--lib/PublicInbox/ContentHash.pm60
-rw-r--r--lib/PublicInbox/DS.pm699
-rw-r--r--lib/PublicInbox/DSKQXS.pm78
-rw-r--r--lib/PublicInbox/DSPoll.pm65
-rw-r--r--lib/PublicInbox/DSdeflate.pm (renamed from lib/PublicInbox/NNTPdeflate.pm)23
-rw-r--r--lib/PublicInbox/Daemon.pm733
-rw-r--r--lib/PublicInbox/DirIdle.pm77
-rw-r--r--lib/PublicInbox/DummyInbox.pm8
-rw-r--r--lib/PublicInbox/EOFpipe.pm17
-rw-r--r--lib/PublicInbox/Emergency.pm85
-rw-r--r--lib/PublicInbox/Eml.pm100
-rw-r--r--lib/PublicInbox/EmlContentFoo.pm2
-rw-r--r--lib/PublicInbox/Epoll.pm26
-rw-r--r--lib/PublicInbox/ExtMsg.pm73
-rw-r--r--lib/PublicInbox/ExtSearch.pm134
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm1429
-rw-r--r--lib/PublicInbox/FakeImport.pm26
-rw-r--r--lib/PublicInbox/FakeInotify.pm183
-rw-r--r--lib/PublicInbox/Feed.pm52
-rw-r--r--lib/PublicInbox/Fetch.pm225
-rw-r--r--lib/PublicInbox/Filter/Base.pm2
-rw-r--r--lib/PublicInbox/Filter/Gmane.pm2
-rw-r--r--lib/PublicInbox/Filter/Mirror.pm2
-rw-r--r--lib/PublicInbox/Filter/RubyLang.pm31
-rw-r--r--lib/PublicInbox/Filter/SubjectTag.pm2
-rw-r--r--lib/PublicInbox/Filter/Vger.pm4
-rw-r--r--lib/PublicInbox/Gcf2.pm141
-rw-r--r--lib/PublicInbox/Gcf2Client.pm77
-rw-r--r--lib/PublicInbox/GetlineBody.pm46
-rw-r--r--lib/PublicInbox/GetlineResponse.pm40
-rw-r--r--lib/PublicInbox/Git.pm702
-rw-r--r--lib/PublicInbox/GitAsyncCat.pm110
-rw-r--r--lib/PublicInbox/GitCredential.pm49
-rw-r--r--lib/PublicInbox/GitHTTPBackend.pm84
-rw-r--r--lib/PublicInbox/GzipFilter.pm155
-rw-r--r--lib/PublicInbox/HTTP.pm177
-rw-r--r--lib/PublicInbox/HTTPD.pm79
-rw-r--r--lib/PublicInbox/HTTPD/Async.pm110
-rw-r--r--lib/PublicInbox/HlMod.pm40
-rw-r--r--lib/PublicInbox/Hval.pm54
-rw-r--r--lib/PublicInbox/IMAP.pm328
-rw-r--r--lib/PublicInbox/IMAPClient.pm13
-rw-r--r--lib/PublicInbox/IMAPD.pm147
-rw-r--r--lib/PublicInbox/IMAPTracker.pm11
-rw-r--r--lib/PublicInbox/IMAPdeflate.pm126
-rw-r--r--lib/PublicInbox/IMAPsearchqp.pm16
-rw-r--r--lib/PublicInbox/IO.pm152
-rw-r--r--lib/PublicInbox/IPC.pm452
-rw-r--r--lib/PublicInbox/IdxStack.pm36
-rw-r--r--lib/PublicInbox/Import.pm311
-rw-r--r--lib/PublicInbox/In2Tie.pm4
-rw-r--r--lib/PublicInbox/In3Event.pm24
-rw-r--r--lib/PublicInbox/In3Watch.pm20
-rw-r--r--lib/PublicInbox/Inbox.pm450
-rw-r--r--lib/PublicInbox/InboxIdle.pm60
-rw-r--r--lib/PublicInbox/InboxWritable.pm222
-rw-r--r--lib/PublicInbox/Inotify.pm47
-rw-r--r--lib/PublicInbox/Inotify3.pm115
-rw-r--r--lib/PublicInbox/InputPipe.pm52
-rw-r--r--lib/PublicInbox/Isearch.pm141
-rw-r--r--lib/PublicInbox/KQNotify.pm84
-rw-r--r--lib/PublicInbox/LEI.pm1605
-rw-r--r--lib/PublicInbox/LI2Wrap.pm20
-rw-r--r--lib/PublicInbox/LeiALE.pm106
-rw-r--r--lib/PublicInbox/LeiAddExternal.pm72
-rw-r--r--lib/PublicInbox/LeiAddWatch.pm42
-rw-r--r--lib/PublicInbox/LeiAuth.pm72
-rw-r--r--lib/PublicInbox/LeiBlob.pm179
-rw-r--r--lib/PublicInbox/LeiConfig.pm55
-rw-r--r--lib/PublicInbox/LeiConvert.pm87
-rw-r--r--lib/PublicInbox/LeiCurl.pm85
-rw-r--r--lib/PublicInbox/LeiDedupe.pm137
-rw-r--r--lib/PublicInbox/LeiEditSearch.pm76
-rw-r--r--lib/PublicInbox/LeiExportKw.pm147
-rw-r--r--lib/PublicInbox/LeiExternal.pm119
-rw-r--r--lib/PublicInbox/LeiFinmsg.pm22
-rw-r--r--lib/PublicInbox/LeiForgetExternal.pm40
-rw-r--r--lib/PublicInbox/LeiForgetMailSync.pm26
-rw-r--r--lib/PublicInbox/LeiForgetSearch.pm79
-rw-r--r--lib/PublicInbox/LeiHelp.pm103
-rw-r--r--lib/PublicInbox/LeiImport.pm163
-rw-r--r--lib/PublicInbox/LeiImportKw.pm53
-rw-r--r--lib/PublicInbox/LeiIndex.pm46
-rw-r--r--lib/PublicInbox/LeiInit.pm40
-rw-r--r--lib/PublicInbox/LeiInput.pm544
-rw-r--r--lib/PublicInbox/LeiInspect.pm293
-rw-r--r--lib/PublicInbox/LeiLcat.pm160
-rw-r--r--lib/PublicInbox/LeiLsExternal.pm34
-rw-r--r--lib/PublicInbox/LeiLsLabel.pm17
-rw-r--r--lib/PublicInbox/LeiLsMailSource.pm126
-rw-r--r--lib/PublicInbox/LeiLsMailSync.pm31
-rw-r--r--lib/PublicInbox/LeiLsSearch.pm109
-rw-r--r--lib/PublicInbox/LeiLsWatch.pm15
-rw-r--r--lib/PublicInbox/LeiMailDiff.pm44
-rw-r--r--lib/PublicInbox/LeiMailSync.pm712
-rw-r--r--lib/PublicInbox/LeiMirror.pm1386
-rw-r--r--lib/PublicInbox/LeiNoteEvent.pm133
-rw-r--r--lib/PublicInbox/LeiOverview.pm263
-rw-r--r--lib/PublicInbox/LeiP2q.pm198
-rw-r--r--lib/PublicInbox/LeiPmdir.pm54
-rw-r--r--lib/PublicInbox/LeiQuery.pm241
-rw-r--r--lib/PublicInbox/LeiRediff.pm299
-rw-r--r--lib/PublicInbox/LeiRefreshMailSync.pm109
-rw-r--r--lib/PublicInbox/LeiReindex.pm49
-rw-r--r--lib/PublicInbox/LeiRemote.pm84
-rw-r--r--lib/PublicInbox/LeiRm.pm30
-rw-r--r--lib/PublicInbox/LeiRmWatch.pm31
-rw-r--r--lib/PublicInbox/LeiSavedSearch.pm304
-rw-r--r--lib/PublicInbox/LeiSearch.pm187
-rw-r--r--lib/PublicInbox/LeiSelfSocket.pm37
-rw-r--r--lib/PublicInbox/LeiStore.pm667
-rw-r--r--lib/PublicInbox/LeiStoreErr.pm62
-rw-r--r--lib/PublicInbox/LeiSucks.pm74
-rw-r--r--lib/PublicInbox/LeiTag.pm105
-rw-r--r--lib/PublicInbox/LeiToMail.pm878
-rw-r--r--lib/PublicInbox/LeiUp.pm222
-rw-r--r--lib/PublicInbox/LeiViewText.pm308
-rw-r--r--lib/PublicInbox/LeiWatch.pm12
-rw-r--r--lib/PublicInbox/LeiXSearch.pm660
-rw-r--r--lib/PublicInbox/Limiter.pm50
-rw-r--r--lib/PublicInbox/Linkify.pm29
-rw-r--r--lib/PublicInbox/Listener.pm58
-rw-r--r--lib/PublicInbox/Lock.pm65
-rw-r--r--lib/PublicInbox/MDA.pm6
-rw-r--r--lib/PublicInbox/MHreader.pm104
-rw-r--r--lib/PublicInbox/MID.pm33
-rw-r--r--lib/PublicInbox/MailDiff.pm137
-rw-r--r--lib/PublicInbox/ManifestJsGz.pm154
-rw-r--r--lib/PublicInbox/Mbox.pm249
-rw-r--r--lib/PublicInbox/MboxGz.pm14
-rw-r--r--lib/PublicInbox/MboxLock.pm134
-rw-r--r--lib/PublicInbox/MboxReader.pm188
-rw-r--r--lib/PublicInbox/MdirReader.pm108
-rw-r--r--lib/PublicInbox/MdirSort.pm46
-rw-r--r--lib/PublicInbox/MiscIdx.pm161
-rw-r--r--lib/PublicInbox/MiscSearch.pm154
-rw-r--r--lib/PublicInbox/MsgIter.pm16
-rw-r--r--lib/PublicInbox/MsgTime.pm49
-rw-r--r--lib/PublicInbox/Msgmap.pm145
-rw-r--r--lib/PublicInbox/MultiGit.pm145
-rw-r--r--lib/PublicInbox/NDC_PP.pm34
-rw-r--r--lib/PublicInbox/NNTP.pm771
-rw-r--r--lib/PublicInbox/NNTPD.pm76
-rw-r--r--lib/PublicInbox/NetNNTPSocks.pm36
-rw-r--r--lib/PublicInbox/NetReader.pm896
-rw-r--r--lib/PublicInbox/NetWriter.pm67
-rw-r--r--lib/PublicInbox/NewsWWW.pm44
-rw-r--r--lib/PublicInbox/OnDestroy.pm31
-rw-r--r--lib/PublicInbox/Over.pm118
-rw-r--r--lib/PublicInbox/OverIdx.pm265
-rw-r--r--lib/PublicInbox/POP3.pm428
-rw-r--r--lib/PublicInbox/POP3D.pm277
-rw-r--r--lib/PublicInbox/PktOp.pm72
-rw-r--r--lib/PublicInbox/ProcessPipe.pm43
-rw-r--r--lib/PublicInbox/Qspawn.pm436
-rw-r--r--lib/PublicInbox/Reply.pm22
-rw-r--r--lib/PublicInbox/RepoAtom.pm126
-rw-r--r--lib/PublicInbox/RepoList.pm39
-rw-r--r--lib/PublicInbox/RepoSnapshot.pm85
-rw-r--r--lib/PublicInbox/RepoTree.pm99
-rw-r--r--lib/PublicInbox/SHA.pm67
-rw-r--r--lib/PublicInbox/SaPlugin/ListMirror.pm10
-rw-r--r--lib/PublicInbox/SaPlugin/ListMirror.pod29
-rw-r--r--lib/PublicInbox/Search.pm606
-rw-r--r--lib/PublicInbox/SearchIdx.pm735
-rw-r--r--lib/PublicInbox/SearchIdxShard.pm166
-rw-r--r--lib/PublicInbox/SearchQuery.pm24
-rw-r--r--lib/PublicInbox/SearchThread.pm133
-rw-r--r--lib/PublicInbox/SearchView.pm168
-rw-r--r--lib/PublicInbox/Select.pm43
-rw-r--r--lib/PublicInbox/SharedKV.pm184
-rw-r--r--lib/PublicInbox/Sigfd.pm49
-rw-r--r--lib/PublicInbox/Smsg.pm69
-rw-r--r--lib/PublicInbox/SolverGit.pm180
-rw-r--r--lib/PublicInbox/Spamcheck.pm14
-rw-r--r--lib/PublicInbox/Spamcheck/Spamc.pm23
-rw-r--r--lib/PublicInbox/Spawn.pm469
-rw-r--r--lib/PublicInbox/SpawnPP.pm81
-rw-r--r--lib/PublicInbox/Syscall.pm660
-rw-r--r--lib/PublicInbox/TLS.pm30
-rw-r--r--lib/PublicInbox/TailNotify.pm97
-rw-r--r--lib/PublicInbox/TestCommon.pm851
-rw-r--r--lib/PublicInbox/Tmpfile.pm15
-rw-r--r--lib/PublicInbox/URIimap.pm118
-rw-r--r--lib/PublicInbox/URInntps.pm18
-rw-r--r--lib/PublicInbox/Umask.pm70
-rw-r--r--lib/PublicInbox/Unsubscribe.pm22
-rw-r--r--lib/PublicInbox/UserContent.pm12
-rw-r--r--lib/PublicInbox/V2Writable.pm798
-rw-r--r--lib/PublicInbox/View.pm651
-rw-r--r--lib/PublicInbox/ViewDiff.pm178
-rw-r--r--lib/PublicInbox/ViewVCS.pm662
-rw-r--r--lib/PublicInbox/WQBlocked.pm48
-rw-r--r--lib/PublicInbox/WQWorker.pm33
-rw-r--r--lib/PublicInbox/WWW.pm193
-rw-r--r--lib/PublicInbox/WWW.pod18
-rw-r--r--lib/PublicInbox/Watch.pm940
-rw-r--r--lib/PublicInbox/WwwAltId.pm32
-rw-r--r--lib/PublicInbox/WwwAtomStream.pm48
-rw-r--r--lib/PublicInbox/WwwAttach.pm51
-rw-r--r--lib/PublicInbox/WwwCoderepo.pm377
-rw-r--r--lib/PublicInbox/WwwHighlight.pm9
-rw-r--r--lib/PublicInbox/WwwListing.pm223
-rw-r--r--lib/PublicInbox/WwwStatic.pm41
-rw-r--r--lib/PublicInbox/WwwStream.pm278
-rw-r--r--lib/PublicInbox/WwwText.pm448
-rw-r--r--lib/PublicInbox/WwwTopics.pm85
-rw-r--r--lib/PublicInbox/XapClient.pm50
-rw-r--r--lib/PublicInbox/XapHelper.pm340
-rw-r--r--lib/PublicInbox/XapHelperCxx.pm141
-rw-r--r--lib/PublicInbox/Xapcmd.pm318
-rw-r--r--lib/PublicInbox/XhcMset.pm51
-rw-r--r--lib/PublicInbox/XhcMsetIterator.pm20
-rw-r--r--lib/PublicInbox/gcf2_libgit2.h2
-rw-r--r--lib/PublicInbox/xap_helper.h1134
-rw-r--r--lib/PublicInbox/xh_cidx.h277
-rw-r--r--lib/PublicInbox/xh_mset.h53
236 files changed, 34032 insertions, 8053 deletions
diff --git a/lib/PublicInbox/Address.pm b/lib/PublicInbox/Address.pm
index f413c2f6..3a59945c 100644
--- a/lib/PublicInbox/Address.pm
+++ b/lib/PublicInbox/Address.pm
@@ -1,8 +1,9 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::Address;
-use strict;
-use warnings;
+use v5.12;
+use parent qw(Exporter);
+our @EXPORT_OK = qw(pairs);
 
 sub xs_emails {
         grep { defined } map { $_->address() } parse_email_addresses($_[0])
@@ -17,17 +18,30 @@ sub xs_names {
         } parse_email_addresses($_[0]);
 }
 
+sub xs_pairs { # for JMAP, RFC 8621 section 4.1.2.3
+        [ map { # LHS (name) may be undef if there's an address
+                my @p = ($_->phrase // $_->comment, $_->address);
+                # show original if totally bogus:
+                $p[0] = $_->original unless defined $p[1];
+                \@p;
+        } parse_email_addresses($_[0]) ];
+}
+
 eval {
         require Email::Address::XS;
         Email::Address::XS->import(qw(parse_email_addresses));
         *emails = \&xs_emails;
         *names = \&xs_names;
+        *pairs = \&xs_pairs;
+        *objects = sub { Email::Address::XS->parse(@_) };
 };
 
 if ($@) {
         require PublicInbox::AddressPP;
         *emails = \&PublicInbox::AddressPP::emails;
         *names = \&PublicInbox::AddressPP::names;
+        *pairs = \&PublicInbox::AddressPP::pairs;
+        *objects = \&PublicInbox::AddressPP::objects;
 }
 
 1;
diff --git a/lib/PublicInbox/AddressPP.pm b/lib/PublicInbox/AddressPP.pm
index 74a82843..65ba36a9 100644
--- a/lib/PublicInbox/AddressPP.pm
+++ b/lib/PublicInbox/AddressPP.pm
@@ -1,7 +1,8 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::AddressPP;
 use strict;
+use v5.10.1; # TODO check regexps for unicode_strings compat
 
 # very loose regexes, here.  We don't need RFC-compliance,
 # just enough to make thing sanely displayable and pass to git
@@ -13,6 +14,7 @@ sub emails {
 }
 
 sub names {
+        # split by address and post-address comment
         my @p = split(/<?([^@<>]+)\@[\w\.\-]+>?\s*(\(.*?\))?(?:,\s*|\z)/,
                         $_[0]);
         my @ret;
@@ -35,4 +37,33 @@ sub names {
         @ret;
 }
 
+sub pairs { # for JMAP, RFC 8621 section 4.1.2.3
+        my ($s) = @_;
+        [ map {
+                my $addr = $_;
+                if ($s =~ s/\A\s*(.*?)\s*<\Q$addr\E>\s*(.*?)\s*(?:,|\z)// ||
+                    $s =~ s/\A\s*(.*?)\s*\Q$addr\E\s*(.*?)\s*(?:,|\z)//) {
+                        my ($phrase, $comment) = ($1, $2);
+                        $phrase =~ tr/\r\n\t / /s;
+                        $phrase =~ s/\A['"\s]*//;
+                        $phrase =~ s/['"\s]*\z//;
+                        $phrase =~ s/\s*<*\s*\z//;
+                        $phrase = undef if $phrase !~ /\S/;
+                        $comment = ($comment =~ /\((.*?)\)/) ? $1 : undef;
+                        [ $phrase // $comment, $addr ]
+                } else {
+                        ();
+                }
+        } emails($s) ];
+}
+
+# Mail::Address->name is inconsistent with Email::Address::XS, so we're
+# doing our own thing, here:
+sub objects { map { bless $_, __PACKAGE__ } @{pairs($_[0])} }
+
+# OO API for objects() results
+sub user { (split(/@/, $_[0]->[1]))[0] }
+sub host { (split(/@/, $_[0]->[1]))[1] }
+sub name { $_[0]->[0] // user($_[0]) }
+
 1;
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index fb88e621..a1b1fc07 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -1,20 +1,21 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # common stuff for administrative command-line tools
 # Unstable internal API
 package PublicInbox::Admin;
-use strict;
+use v5.12;
 use parent qw(Exporter);
-use Cwd qw(abs_path);
-use POSIX ();
-our @EXPORT_OK = qw(resolve_repo_dir setup_signals);
+our @EXPORT_OK = qw(setup_signals fmt_localtime);
 use PublicInbox::Config;
 use PublicInbox::Inbox;
-use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Spawn qw(run_qx);
+use PublicInbox::Eml;
+*rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed;
 
 sub setup_signals {
         my ($cb, $arg) = @_; # optional
+        require POSIX;
 
         # we call exit() here instead of _exit() so DESTROY methods
         # get called (e.g. File::Temp::Dir and PublicInbox::Msgmap)
@@ -27,48 +28,68 @@ sub setup_signals {
         };
 }
 
-sub resolve_repo_dir {
-        my ($cd, $ver) = @_;
-        my $prefix = defined $cd ? $cd : './';
-        if (-d $prefix && -f "$prefix/inbox.lock") { # v2
-                $$ver = 2 if $ver;
-                return abs_path($prefix);
+sub resolve_any_idxdir ($$) {
+        my ($cd, $lock_bn) = @_;
+        my $try = $cd // '.';
+        my $root_dev_ino;
+        while (1) {
+                if (-f "$try/$lock_bn") { # inbox.lock, ei.lock, cidx.lock
+                        return rel2abs_collapsed($try);
+                } elsif (-d $try) {
+                        my @try = stat _;
+                        $root_dev_ino //= do {
+                                my @root = stat('/') or die "stat /: $!\n";
+                                "$root[0]\0$root[1]";
+                        };
+                        return undef if "$try[0]\0$try[1]" eq $root_dev_ino;
+                        $try .= '/..'; # continue, cd up
+                } else {
+                        die "`$try' is not a directory\n";
+                }
         }
-        my $cmd = [ qw(git rev-parse --git-dir) ];
-        my $fh = popen_rd($cmd, undef, {-C => $cd});
-        my $dir = do { local $/; <$fh> };
-        close $fh or die "error in ".join(' ', @$cmd)." (cwd:$cd): $!\n";
-        chomp $dir;
-        $$ver = 1 if $ver;
-        return abs_path($cd) if ($dir eq '.' && defined $cd);
-        abs_path($dir);
 }
 
-# for unconfigured inboxes
-sub detect_indexlevel ($) {
-        my ($ibx) = @_;
+sub resolve_eidxdir ($) { resolve_any_idxdir($_[0], 'ei.lock') }
+sub resolve_cidxdir ($) { resolve_any_idxdir($_[0], 'cidx.lock') }
 
-        my $over = $ibx->over;
-        my $srch = $ibx->search;
-        delete @$ibx{qw(over search)}; # don't leave open FDs lying around
+sub resolve_inboxdir {
+        my ($cd, $ver) = @_;
+        my $dir;
+        if (defined($dir = resolve_any_idxdir($cd, 'inbox.lock'))) { # try v2
+                $$ver = 2 if $ver;
+        } elsif (defined($dir = resolve_git_dir($cd))) { # try v1
+                $$ver = 1 if $ver;
+        } # else: not an inbox at all
+        $dir;
+}
 
-        # brand new or never before indexed inboxes default to full
-        return 'full' unless $over;
-        my $l = 'basic';
-        return $l unless $srch;
-        if (my $xdb = $srch->xdb) {
-                $l = 'full';
-                my $m = $xdb->get_metadata('indexlevel');
-                if ($m eq 'medium') {
-                        $l = $m;
-                } elsif ($m ne '') {
-                        warn <<"";
-$ibx->{inboxdir} has unexpected indexlevel in Xapian: $m
+sub valid_pwd {
+        my $pwd = $ENV{PWD} // return;
+        my @st_pwd = stat $pwd or return;
+        my @st_cwd = stat '.' or die "stat(.): $!";
+        "@st_pwd[1,0]" eq "@st_cwd[1,0]" ? $pwd : undef;
+}
 
-                }
-                $ibx->{-skip_docdata} = 1 if $xdb->get_metadata('skip_docdata');
+sub resolve_git_dir {
+        my ($cd) = @_; # cd may be `undef' for cwd
+        # try v1 bare git dirs
+        my $pwd = valid_pwd();
+        my $env;
+        defined($pwd) && substr($cd // '/', 0, 1) ne '/' and
+                $env->{PWD} = "$pwd/$cd";
+        my $cmd = [ qw(git rev-parse --git-dir) ];
+        my $dir = run_qx($cmd, $env, { -C => $cd });
+        die "error in @$cmd (cwd:${\($cd // '.')}): $?\n" if $?;
+        chomp $dir;
+        # --absolute-git-dir requires git v2.13.0+, and we want to
+        # respect symlinks when $ENV{PWD} if $ENV{PWD} ne abs_path('.')
+        # since we store absolute GIT_DIR paths in cindex.
+        if (substr($dir, 0, 1) ne '/') {
+                substr($cd // '/', 0, 1) eq '/' or
+                        $cd = File::Spec->rel2abs($cd, $pwd);
+                $dir = rel2abs_collapsed($dir, $cd);
         }
-        $l;
+        $dir;
 }
 
 sub unconfigured_ibx ($$) {
@@ -78,8 +99,8 @@ sub unconfigured_ibx ($$) {
                 name => $name,
                 address => [ "$name\@example.com" ],
                 inboxdir => $dir,
-                # TODO: consumers may want to warn on this:
-                #-unconfigured => 1,
+                # consumers (-convert) warn on this:
+                -unconfigured => 1,
         });
 }
 
@@ -93,63 +114,100 @@ sub resolve_inboxes ($;$$) {
                 $cfg or die "--all specified, but $cfgfile not readable\n";
                 @$argv and die "--all specified, but directories specified\n";
         }
-
+        my (@old, @ibxs, @eidx, @cidx);
+        if ($opt->{-cidx_ok}) {
+                require PublicInbox::CodeSearchIdx;
+                @$argv = grep {
+                        if (defined(my $d = resolve_cidxdir($_))) {
+                                push @cidx, PublicInbox::CodeSearchIdx->new(
+                                                        $d, $opt);
+                                undef;
+                        } else {
+                                1;
+                        }
+                } @$argv;
+        }
+        if ($opt->{-eidx_ok}) {
+                require PublicInbox::ExtSearchIdx;
+                @$argv = grep {
+                        if (defined(my $ei = resolve_eidxdir($_))) {
+                                $ei = PublicInbox::ExtSearchIdx->new($ei, $opt);
+                                push @eidx, $ei;
+                                undef;
+                        } else {
+                                1;
+                        }
+                } @$argv;
+        }
         my $min_ver = $opt->{-min_inbox_version} || 0;
-        my (@old, @ibxs);
-        my %dir2ibx;
-        if ($cfg) {
+        # lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames,
+        # pathnames are not unique due to symlinks and bind mounts
+        if ($opt->{all}) {
                 $cfg->each_inbox(sub {
                         my ($ibx) = @_;
-                        my $path = abs_path($ibx->{inboxdir});
-                        if (defined($path)) {
-                                $dir2ibx{$path} = $ibx;
+                        if (-e $ibx->{inboxdir}) {
+                                push(@ibxs, $ibx) if $ibx->version >= $min_ver;
                         } else {
-                                warn <<EOF;
-W: $ibx->{name} $ibx->{inboxdir}: $!
-EOF
+                                warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n";
                         }
                 });
-        }
-        if ($opt->{all}) {
-                my @all = values %dir2ibx;
-                @all = grep { $_->version >= $min_ver } @all;
-                push @ibxs, @all;
+                # TODO: no way to configure cindex in config file, yet
         } else { # directories specified on the command-line
-                my $i = 0;
                 my @dirs = @$argv;
-                push @dirs, '.' unless @dirs;
-                foreach (@dirs) {
-                        my $v;
-                        my $dir = resolve_repo_dir($_, \$v);
-                        if ($v < $min_ver) {
+                push @dirs, '.' if !@dirs && $opt->{-use_cwd};
+                my %s2i; # "st_dev\0st_ino" => array index
+                for (my $i = 0; $i <= $#dirs; $i++) {
+                        my $dir = $dirs[$i];
+                        my @st = stat($dir) or die "stat($dir): $!\n";
+                        $dir = $dirs[$i] = resolve_inboxdir($dir, \(my $ver));
+                        if ($ver >= $min_ver) {
+                                $s2i{"$st[0]\0$st[1]"} //= $i;
+                        } else {
                                 push @old, $dir;
-                                next;
                         }
-                        my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i);
-                        $i++;
-                        push @ibxs, $ibx;
                 }
+                my $done = \'done';
+                eval {
+                        $cfg->each_inbox(sub {
+                                my ($ibx) = @_;
+                                return if $ibx->version < $min_ver;
+                                my $dir = $ibx->{inboxdir};
+                                if (my @s = stat $dir) {
+                                        my $i = delete($s2i{"$s[0]\0$s[1]"})
+                                                // return;
+                                        $ibxs[$i] = $ibx;
+                                        die $done if !keys(%s2i);
+                                } else {
+                                        warn "W: $ibx->{name} $dir: $!\n";
+                                }
+                        });
+                };
+                die $@ if $@ && $@ ne $done;
+                for my $i (sort { $a <=> $b } values %s2i) {
+                        $ibxs[$i] = unconfigured_ibx($dirs[$i], $i);
+                }
+                @ibxs = grep { defined } @ibxs; # duplicates are undef
         }
         if (@old) {
                 die "-V$min_ver inboxes not supported by $0\n\t",
                     join("\n\t", @old), "\n";
         }
-        @ibxs;
+        ($opt->{-eidx_ok} || $opt->{-cidx_ok}) ? (\@ibxs, \@eidx, \@cidx)
+                                                : @ibxs;
 }
 
-# TODO: make Devel::Peek optional, only used for daemon
-my @base_mod = qw(Devel::Peek);
+my @base_mod = ();
 my @over_mod = qw(DBD::SQLite DBI);
 my %mod_groups = (
         -index => [ @base_mod, @over_mod ],
         -base => \@base_mod,
-        -search => [ @base_mod, @over_mod, 'Search::Xapian' ],
+        -search => [ @base_mod, @over_mod, 'Xapian' ],
 );
 
 sub scan_ibx_modules ($$) {
         my ($mods, $ibx) = @_;
         if (!$ibx->{indexlevel} || $ibx->{indexlevel} ne 'basic') {
-                $mods->{'Search::Xapian'} = 1;
+                $mods->{'Xapian'} = 1;
         } else {
                 $mods->{$_} = 1 foreach @over_mod;
         }
@@ -161,10 +219,10 @@ sub check_require {
         while (my $mod = shift @mods) {
                 if (my $groups = $mod_groups{$mod}) {
                         push @mods, @$groups;
-                } elsif ($mod eq 'Search::Xapian') {
+                } elsif ($mod eq 'Xapian') {
                         require PublicInbox::Search;
                         PublicInbox::Search::load_xapian() or
-                                $err->{'Search::Xapian || Xapian'} = $@;
+                                $err->{'Xapian || Search::Xapian'} = $@;
                 } else {
                         eval "require $mod";
                         $err->{$mod} = $@ if $@;
@@ -208,13 +266,19 @@ sub index_terminate {
 
 sub index_inbox {
         my ($ibx, $im, $opt) = @_;
+        require PublicInbox::InboxWritable;
         my $jobs = delete $opt->{jobs} if $opt;
         if (my $pr = $opt->{-progress}) {
                 $pr->("indexing $ibx->{inboxdir} ...\n");
         }
-        local %SIG = %SIG;
+        local @SIG{keys %SIG} = values %SIG;
         setup_signals(\&index_terminate, $ibx);
-        if (ref($ibx) && $ibx->version == 2) {
+        my $idx = { current_info => $ibx->{inboxdir} };
+        local $SIG{__WARN__} = sub {
+                return if PublicInbox::Eml::warn_ignore(@_);
+                warn($idx->{current_info}, ': ', @_);
+        };
+        if ($ibx->version == 2) {
                 eval { require PublicInbox::V2Writable };
                 die "v2 requirements not met: $@\n" if $@;
                 $ibx->{-creat_opt}->{nproc} = $jobs;
@@ -225,25 +289,23 @@ sub index_inbox {
                         } else {
                                 my $n = $v2w->{shards};
                                 if ($jobs < ($n + 1) && !$opt->{reshard}) {
-                                        warn
-"Unable to respect --jobs=$jobs on index, inbox was created with $n shards\n";
+                                        warn <<EOM;
+Unable to respect --jobs=$jobs on index, inbox was created with $n shards
+EOM
                                 }
                         }
                 }
-                my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
-                local $SIG{__WARN__} = sub {
-                        $warn_cb->($v2w->{current_info}, ': ', @_);
-                };
-                $v2w->index_sync($opt);
+                $idx = $v2w;
         } else {
                 require PublicInbox::SearchIdx;
-                my $s = PublicInbox::SearchIdx->new($ibx, 1);
-                $s->index_sync($opt);
+                $idx = PublicInbox::SearchIdx->new($ibx, 1);
         }
+        $idx->index_sync($opt);
+        $idx->{nidx} // 0; # returns number processed
 }
 
-sub progress_prepare ($) {
-        my ($opt) = @_;
+sub progress_prepare ($;$) {
+        my ($opt, $dst) = @_;
 
         # public-inbox-index defaults to quiet, -xcpdb and -compact do not
         if (defined($opt->{quiet}) && $opt->{quiet} < 0) {
@@ -255,7 +317,8 @@ sub progress_prepare ($) {
                 $opt->{1} = $null; # suitable for spawn() redirect
         } else {
                 $opt->{verbose} ||= 1;
-                $opt->{-progress} = sub { print STDERR @_ };
+                $dst //= *STDERR{GLOB};
+                $opt->{-progress} = sub { print $dst '# ', @_ };
         }
 }
 
@@ -295,15 +358,35 @@ sub index_prepare ($$) {
         $opt->{batch_size} and
                 $env = { XAPIAN_FLUSH_THRESHOLD => '4294967295' };
 
-        for my $k (qw(sequential_shard)) {
+        for my $k (qw(sequential-shard)) {
                 my $git_key = "publicInbox.index".ucfirst($k);
-                $git_key =~ s/_([a-z])/\U$1/g;
+                $git_key =~ s/-([a-z])/\U$1/g;
                 defined(my $s = $opt->{$k} // $cfg->{lc($git_key)}) or next;
                 defined(my $v = $cfg->git_bool($s))
                                         or die "`$git_key=$s' not boolean\n";
                 $opt->{$k} = $v;
         }
+        for my $k (qw(since until)) {
+                my $v = $opt->{$k} // next;
+                $opt->{reindex} or die "--$k=$v requires --reindex\n";
+        }
         $env;
 }
 
+sub do_chdir ($) {
+        my $chdir = $_[0] // return;
+        for my $d (@$chdir) {
+                next if $d eq ''; # same as git(1)
+                chdir $d or die "cd $d: $!";
+        }
+}
+
+sub fmt_localtime ($) {
+        require POSIX;
+        my @lt = localtime $_[0];
+        my (undef, $M, $H, $d, $m, $Y) = @lt;
+        sprintf('%u-%02u-%02u % 2u:%02u ', $Y + 1900, $m + 1, $d, $H, $M)
+                .POSIX::strftime('%z', @lt);
+}
+
 1;
diff --git a/lib/PublicInbox/AdminEdit.pm b/lib/PublicInbox/AdminEdit.pm
index 4448dcc2..654141a7 100644
--- a/lib/PublicInbox/AdminEdit.pm
+++ b/lib/PublicInbox/AdminEdit.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # common stuff between -edit, -purge (and maybe -learn in the future)
@@ -19,16 +19,17 @@ sub check_editable ($) {
                 }
 
                 # Undefined indexlevel, so `full'...
-                # Search::Xapian exists and the DB can be read, at least, fine
+                # Xapian exists and the DB can be read, at least, fine
                 $ibx->search and next;
 
                 # it's possible for a Xapian directory to exist,
-                # but Search::Xapian to go missing/broken.
+                # but Xapian to go missing/broken.
                 # Make sure it's purged in that case:
                 $ibx->over or die "no over.sqlite3 in $ibx->{inboxdir}\n";
 
-                # $ibx->{search} is populated by $ibx->over call
-                my $xdir_ro = $ibx->{search}->xdir(1);
+                require PublicInbox::Search;
+                my $xdir_ro = PublicInbox::Search->new($ibx)->xdir(1);
+
                 my $nshard = 0;
                 foreach my $shard (<$xdir_ro/*>) {
                         if (-d $shard && $shard =~ m!/[0-9]+\z!) {
diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm
index 6d16242a..80757ceb 100644
--- a/lib/PublicInbox/AltId.pm
+++ b/lib/PublicInbox/AltId.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Used for giving serial numbers to messages.  This can be tied to
diff --git a/lib/PublicInbox/Aspawn.pm b/lib/PublicInbox/Aspawn.pm
new file mode 100644
index 00000000..49f8651a
--- /dev/null
+++ b/lib/PublicInbox/Aspawn.pm
@@ -0,0 +1,34 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# async system()/qx() which takes callback
+package PublicInbox::Aspawn;
+use v5.12;
+use parent qw(Exporter);
+use PublicInbox::DS qw(awaitpid);
+use PublicInbox::Spawn qw(spawn);
+our @EXPORT_OK = qw(run_await);
+
+sub _await_cb { # awaitpid cb
+        my ($pid, $cmd, $env, $opt, $cb, @args) = @_;
+        PublicInbox::Spawn::read_out_err($opt);
+        if ($? && !$opt->{quiet}) {
+                my ($status, $sig) = ($? >> 8, $? & 127);
+                my $msg = '';
+                $msg .= " (-C=$opt->{-C})" if defined $opt->{-C};
+                $msg .= " status=$status" if $status;
+                $msg .= " signal=$sig" if $sig;
+                warn "E: @$cmd", $msg, "\n";
+        }
+        $cb->($pid, $cmd, $env, $opt, @args) if $cb;
+}
+
+sub run_await {
+        my ($cmd, $env, $opt, $cb, @args) = @_;
+        $opt->{1} //= \(my $out);
+        my $pid = spawn($cmd, $env, $opt);
+        awaitpid($pid, \&_await_cb, $cmd, $env, $opt, $cb, @args);
+        awaitpid($pid); # synchronous for non-$in_loop
+}
+
+1;
diff --git a/lib/PublicInbox/AutoReap.pm b/lib/PublicInbox/AutoReap.pm
new file mode 100644
index 00000000..ae4984b8
--- /dev/null
+++ b/lib/PublicInbox/AutoReap.pm
@@ -0,0 +1,33 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# automatically kill + reap children when this goes out-of-scope
+package PublicInbox::AutoReap;
+use v5.12;
+
+sub new {
+        my (undef, $pid, $cb) = @_;
+        bless { pid => $pid, cb => $cb, owner => $$ }, __PACKAGE__
+}
+
+sub kill {
+        my ($self, $sig) = @_;
+        CORE::kill($sig // 'TERM', $self->{pid});
+}
+
+sub join {
+        my ($self, $sig) = @_;
+        my $pid = delete $self->{pid} or return;
+        $self->{cb}->() if defined $self->{cb};
+        CORE::kill($sig, $pid) if defined $sig;
+        my $r = waitpid($pid, 0);
+        $r == $pid or die "BUG? waitpid($pid) => $r (\$?=$? \$!=$!)";
+}
+
+sub DESTROY {
+        my ($self) = @_;
+        return if $self->{owner} != $$;
+        $self->join('TERM');
+}
+
+1;
diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm
index fb0d0e60..78fc9ca0 100644
--- a/lib/PublicInbox/Cgit.pm
+++ b/lib/PublicInbox/Cgit.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # wrapper for cgit(1) and git-http-backend(1) for browsing and
@@ -6,7 +6,8 @@
 # directive to be set in the public-inbox config file.
 
 package PublicInbox::Cgit;
-use strict;
+use v5.12;
+use parent qw(PublicInbox::WwwCoderepo);
 use PublicInbox::GitHTTPBackend;
 use PublicInbox::Git;
 # not bothering with Exporter for a one-off
@@ -16,9 +17,9 @@ use PublicInbox::Qspawn;
 use PublicInbox::WwwStatic qw(r);
 
 sub locate_cgit ($) {
-        my ($pi_config) = @_;
-        my $cgit_bin = $pi_config->{'publicinbox.cgitbin'};
-        my $cgit_data = $pi_config->{'publicinbox.cgitdata'};
+        my ($pi_cfg) = @_;
+        my $cgit_bin = $pi_cfg->{'publicinbox.cgitbin'};
+        my $cgit_data = $pi_cfg->{'publicinbox.cgitdata'};
 
         # /var/www/htdocs/cgit is the default install path from cgit.git
         # /usr/{lib,share}/cgit is where Debian puts cgit
@@ -40,10 +41,9 @@ sub locate_cgit ($) {
                 if (defined($cgit_bin) && $cgit_bin =~ m!\A(.+?)/[^/]+\z!) {
                         unshift @dirs, $1 if -d $1;
                 }
-                foreach my $d (@dirs) {
-                        my $f = "$d/cgit.css";
-                        next unless -f $f;
-                        $cgit_data = $d;
+                for (@dirs) {
+                        next unless -f "$_/cgit.css";
+                        $cgit_data = $_;
                         last;
                 }
         }
@@ -51,30 +51,20 @@ sub locate_cgit ($) {
 }
 
 sub new {
-        my ($class, $pi_config) = @_;
-        my ($cgit_bin, $cgit_data) = locate_cgit($pi_config);
-
+        my ($class, $pi_cfg) = @_;
+        my ($cgit_bin, $cgit_data) = locate_cgit($pi_cfg);
+        $cgit_bin // return; # fall back in WWW->cgit
         my $self = bless {
                 cmd => [ $cgit_bin ],
                 cgit_data => $cgit_data,
-                pi_config => $pi_config,
+                pi_cfg => $pi_cfg,
+                cgitrc => $pi_cfg->{'publicinbox.cgitrc'} // $ENV{CGIT_CONFIG},
         }, $class;
 
-        $pi_config->fill_all; # fill in -code_repos mapped to inboxes
-
         # some cgit repos may not be mapped to inboxes, so ensure those exist:
-        my $code_repos = $pi_config->{-code_repos};
-        foreach my $k (keys %$pi_config) {
-                $k =~ /\Acoderepo\.(.+)\.dir\z/ or next;
-                my $dir = $pi_config->{$k};
-                $code_repos->{$1} ||= PublicInbox::Git->new($dir);
-        }
-        while (my ($nick, $repo) = each %$code_repos) {
-                $self->{"\0$nick"} = $repo;
-        }
-        my $cgit_static = $pi_config->{-cgit_static};
-        my $static = join('|', map { quotemeta $_ } keys %$cgit_static);
-        $self->{static} = qr/\A($static)\z/;
+        PublicInbox::WwwCoderepo::prepare_coderepos($self);
+        my $s = join('|', map { quotemeta } keys %{$pi_cfg->{-cgit_static}});
+        $self->{static} = qr/\A($s)\z/;
         $self;
 }
 
@@ -95,14 +85,14 @@ my @PASS_ENV = qw(
 my $parse_cgi_headers = \&PublicInbox::GitHTTPBackend::parse_cgi_headers;
 
 sub call {
-        my ($self, $env) = @_;
+        my ($self, $env, $ctx) = @_; # $ctx is optional, used by WWW
         my $path_info = $env->{PATH_INFO};
         my $cgit_data;
 
         # handle requests without spawning cgit iff possible:
         if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
                 my ($nick, $path) = ($1, $2);
-                if (my PublicInbox::Git $git = $self->{"\0$nick"}) {
+                if (my $git = $self->{pi_cfg}->get_coderepo($nick)) {
                         return serve($env, $git, $path);
                 }
         } elsif ($path_info =~ m!$self->{static}! &&
@@ -111,17 +101,14 @@ sub call {
                 return PublicInbox::WwwStatic::response($env, [], $f);
         }
 
-        my $cgi_env = { PATH_INFO => $path_info };
-        foreach (@PASS_ENV) {
-                defined(my $v = $env->{$_}) or next;
-                $cgi_env->{$_} = $v;
-        }
-        $cgi_env->{'HTTPS'} = 'on' if $env->{'psgi.url_scheme'} eq 'https';
+        my %cgi_env = (CGIT_CONFIG => $self->{cgitrc}, PATH_INFO => $path_info);
+        @cgi_env{@PASS_ENV} = @$env{@PASS_ENV}; # spawn ignores undef vals
+        $cgi_env{HTTPS} = 'on' if $env->{'psgi.url_scheme'} eq 'https';
 
         my $rdr = input_prepare($env) or return r(500);
-        my $qsp = PublicInbox::Qspawn->new($self->{cmd}, $cgi_env, $rdr);
-        my $limiter = $self->{pi_config}->limiter('-cgit');
-        $qsp->psgi_return($env, $limiter, $parse_cgi_headers);
+        my $qsp = PublicInbox::Qspawn->new($self->{cmd}, \%cgi_env, $rdr);
+        my $limiter = $self->{pi_cfg}->limiter('-cgit');
+        $qsp->psgi_yield($env, $limiter, $parse_cgi_headers, $ctx);
 }
 
 1;
diff --git a/lib/PublicInbox/CidxComm.pm b/lib/PublicInbox/CidxComm.pm
new file mode 100644
index 00000000..80a235e9
--- /dev/null
+++ b/lib/PublicInbox/CidxComm.pm
@@ -0,0 +1,28 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Waits for initial comm(1) output for PublicInbox::CodeSearchIdx.
+# The initial output from `comm' can take a while to generate because
+# it needs to wait on:
+# `git cat-file --batch-all-objects --batch-check --unordered | sort'
+# We still rely on blocking reads, here, since comm should be fast once
+# it's seeing input.  (`--unordered | sort' is intentional for HDDs)
+package PublicInbox::CidxComm;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
+
+sub new {
+        my ($cls, $rd, $cidx, $drs) = @_;
+        my $self = bless { cidx => $cidx, drs => $drs }, $cls;
+        $self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT);
+}
+
+sub event_step {
+        my ($self) = @_;
+        my $rd = $self->{sock} // return warn('BUG?: no {sock}');
+        $self->close; # EPOLL_CTL_DEL
+        delete($self->{cidx})->cidx_read_comm($rd, delete $self->{drs});
+}
+
+1;
diff --git a/lib/PublicInbox/CidxLogP.pm b/lib/PublicInbox/CidxLogP.pm
new file mode 100644
index 00000000..5ea675bf
--- /dev/null
+++ b/lib/PublicInbox/CidxLogP.pm
@@ -0,0 +1,28 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Waits for initial `git log -p' output for PublicInbox::CodeSearchIdx.
+# The initial output from `git log -p' can take a while to generate,
+# CodeSearchIdx can process prune work while it's happening.  Once
+# `git log -p' starts generating output, it should be able to keep
+# up with Xapian indexing, so we still rely on blocking reads to simplify
+# cidx_read_log_p
+package PublicInbox::CidxLogP;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
+
+sub new {
+        my ($cls, $rd, $cidx, $git, $roots) = @_;
+        my $self = bless { cidx => $cidx, git => $git, roots => $roots }, $cls;
+        $self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT);
+}
+
+sub event_step {
+        my ($self) = @_;
+        my $rd = $self->{sock} // return warn('BUG?: no {sock}');
+        $self->close; # EPOLL_CTL_DEL
+        delete($self->{cidx})->cidx_read_log_p($self, $rd);
+}
+
+1;
diff --git a/lib/PublicInbox/CidxXapHelperAux.pm b/lib/PublicInbox/CidxXapHelperAux.pm
new file mode 100644
index 00000000..91c9b021
--- /dev/null
+++ b/lib/PublicInbox/CidxXapHelperAux.pm
@@ -0,0 +1,48 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Intended for PublicInbox::DS::event_loop for -cindex --associate,
+# this reports auxilliary status while dumping
+package PublicInbox::CidxXapHelperAux;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::Syscall qw(EPOLLIN);
+
+# rpipe connects to req->fp[1] in xap_helper.h
+sub new {
+        my ($cls, $rpipe, $cidx, $pfx) = @_;
+        my $self = bless { cidx => $cidx, pfx => $pfx }, $cls;
+        $rpipe->blocking(0);
+        $self->SUPER::new($rpipe, EPOLLIN);
+}
+
+sub event_step {
+        my ($self) = @_; # xap_helper.h is line-buffered
+        my $buf = delete($self->{buf}) // '';
+        my $n = sysread($self->{sock}, $buf, 65536, length($buf));
+        if (!defined($n)) {
+                return if $!{EAGAIN};
+                die "sysread: $!";
+        }
+        my $pfx = $self->{pfx};
+        if ($n == 0) {
+                warn "BUG? $pfx buf=$buf" if $buf ne '';
+                if (delete $self->{cidx}->{PENDING}->{$pfx}) {
+                        warn "BUG? $pfx did not get mset.size";
+                        $self->{cidx}->index_next;
+                }
+                return $self->close;
+        }
+        my @lines = split(/^/m, $buf);
+        $self->{buf} = pop @lines if substr($lines[-1], -1) ne "\n";
+        for my $l (@lines) {
+                if ($l =~ /\Amset\.size=[0-9]+ nr_out=[0-9]+\n\z/) {
+                        delete $self->{cidx}->{PENDING}->{$pfx};
+                        $self->{cidx}->index_next;
+                }
+                chomp $l;
+                $self->{cidx}->progress("$pfx $l");
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/CmdIPC4.pm b/lib/PublicInbox/CmdIPC4.pm
new file mode 100644
index 00000000..fc77bd03
--- /dev/null
+++ b/lib/PublicInbox/CmdIPC4.pm
@@ -0,0 +1,56 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# callers should use PublicInbox::CmdIPC4->can('send_cmd4') (or recv_cmd4)
+# first choice for script/lei front-end and 2nd choice for lei backend
+# libsocket-msghdr-perl is in Debian but not many other distros as of 2021.
+package PublicInbox::CmdIPC4;
+use v5.12;
+use Socket qw(SOL_SOCKET SCM_RIGHTS);
+
+sub sendmsg_retry ($) {
+        return 1 if $!{EINTR};
+        return unless ($!{ENOMEM} || $!{ENOBUFS} || $!{ETOOMANYREFS});
+        return if --$_[0] < 0;
+        warn "# sleeping on sendmsg: $! ($_[0] tries left)\n";
+        select(undef, undef, undef, 0.1);
+        1;
+}
+
+BEGIN { eval {
+require Socket::MsgHdr; # XS
+no warnings 'once';
+
+# any number of FDs per-sendmsg(2) + buffer
+*send_cmd4 = sub ($$$$;$) { # (sock, fds, buf, flags) = @_;
+        my ($sock, $fds, undef, $flags, $tries) = @_;
+        $tries //= 50;
+        my $mh = Socket::MsgHdr->new(buf => $_[2]);
+        $mh->cmsghdr(SOL_SOCKET, SCM_RIGHTS, pack('i' x scalar(@$fds), @$fds));
+        my $s;
+        do {
+                $s = Socket::MsgHdr::sendmsg($sock, $mh, $flags);
+        } while (!defined($s) && sendmsg_retry($tries));
+        $s;
+};
+
+*recv_cmd4 = sub ($$$) {
+        my ($s, undef, $len) = @_; # $_[1] = destination buffer
+        my $mh = Socket::MsgHdr->new(buflen => $len, controllen => 256);
+        my $r;
+        do {
+                $r = Socket::MsgHdr::recvmsg($s, $mh, 0);
+        } while (!defined($r) && $!{EINTR});
+        if (!defined($r)) {
+                $_[1] = '';
+                return (undef);
+        }
+        $_[1] = $mh->buf;
+        return () if $r == 0;
+        my (undef, undef, $data) = $mh->cmsghdr;
+        defined($data) ? unpack('i' x (length($data) / 4), $data) : ();
+};
+
+} } # /eval /BEGIN
+
+1;
diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm
new file mode 100644
index 00000000..e5fa4480
--- /dev/null
+++ b/lib/PublicInbox/CodeSearch.pm
@@ -0,0 +1,372 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# read-only external index for coderepos
+# currently, it only indexes commits and repository metadata
+# (pathname, root commits); not blob contents
+package PublicInbox::CodeSearch;
+use v5.12;
+use parent qw(PublicInbox::Search);
+use PublicInbox::Config;
+use PublicInbox::Search qw(retry_reopen int_val xap_terms);
+use PublicInbox::Compat qw(uniqstr);
+use Compress::Zlib qw(uncompress);
+use constant {
+        AT => 0, # author time YYYYMMDDHHMMSS, dt: for mail)
+        CT => 1, # commit time (Unix time stamp, like TS/rt: in mail)
+        CIDX_SCHEMA_VER => 1, # brand new schema for code search
+        # for repos (`Tr'), CT(col=1) is used for the latest tip commit time
+        # in refs/{heads,tags}.  AT(col=0) may be used to store disk usage
+        # in the future, but disk usage calculation is espensive w/ alternates
+};
+our @CODE_NRP;
+our @CODE_VMAP = (
+        [ AT, 'd:' ], # mairix compat
+        [ AT, 'dt:' ], # public-inbox mail compat
+        [ CT, 'ct:' ],
+);
+
+# note: the non-X term prefix allocations are shared with Xapian omega,
+# see xapian-applications/omega/docs/termprefixes.rst
+# bool_pfx_internal:
+#        type => 'T', # 'c' - commit, 'r' - repo GIT_DIR
+#        tags are not indexed, only normal branches (refs/heads/*), not hidden
+#        'P' # (pathname) GIT_DIR # uniq
+#        'G' # (group) root commit (may have multiple roots)
+my %bool_pfx_external = (
+        oid => 'Q', # type:commit - git OID hex (40|64)-byte SHA-(1|256)
+                # type:repo - rel2abs_collapsed(GIT_DIR)
+        parent => 'XP',
+        %PublicInbox::Search::PATCH_BOOL_COMMON,
+);
+
+my %prob_prefix = ( # copied from PublicInbox::Search
+        # do we care about committer? or partial commit OID via Xapian?
+        # o => 'XQ', # 'oid:' (bool) is exact, 'o:' (prob) can do partial
+        %PublicInbox::Search::PATCH_PROB_COMMON,
+
+        # default:
+        '' => 'S A XQUOT XFN ' . $PublicInbox::Search::NON_QUOTED_BODY
+);
+
+sub new {
+        my ($cls, $dir, $cfg) = @_;
+        # can't have a PublicInbox::Config here due to circular refs
+        bless { topdir => $dir, xpfx => "$dir/cidx".CIDX_SCHEMA_VER,
+                -cfg_f => $cfg->{-f} }, $cls;
+}
+
+sub join_data_key ($) { "join:$_[0]->{-cfg_f}" }
+
+sub join_data {
+        my ($self) = @_;
+        my $key = join_data_key($self);
+        my $cur = $self->xdb->get_metadata($key) or return;
+        $cur = eval { PublicInbox::Config::json()->decode(uncompress($cur)) };
+        warn "E: $@ (corrupt metadata in `$key' key?)" if $@;
+        my @m = grep { ref($cur->{$_}) ne 'ARRAY' } qw(ekeys roots ibx2root);
+        if (@m) {
+                warn <<EOM;
+W: $self->{topdir} join data for $self->{-cfg_f} missing: @m
+EOM
+                undef;
+        } elsif (@{$cur->{ekeys}} < @{$cur->{ibx2root}}) {
+                warn <<EOM;
+W: $self->{topdir} join data for $self->{-cfg_f} mismatched ekeys and ibx2root
+EOM
+                undef;
+        } else {
+                $cur;
+        }
+}
+
+sub qparse_new ($) {
+        my ($self) = @_;
+        my $qp = $self->qp_init_common;
+        my $cb = $qp->can('add_valuerangeprocessor') //
+                $qp->can('add_rangeprocessor'); # Xapian 1.5.0+
+        if (!@CODE_NRP) {
+                @CODE_NRP = map {
+                        $PublicInbox::Search::NVRP->new(@$_)
+                } @CODE_VMAP;
+        }
+        $cb->($qp, $_) for @CODE_NRP;
+        while (my ($name, $pfx) = each %bool_pfx_external) {
+                $qp->add_boolean_prefix($name, $_) for split(/ /, $pfx);
+        }
+        while (my ($name, $pfx) = each %prob_prefix) {
+                $qp->add_prefix($name, $_) for split(/ /, $pfx);
+        }
+        $qp;
+}
+
+sub generate_cxx () { # generates snippet for xap_helper.h
+        my $ret = <<EOM;
+# line ${\__LINE__} "${\__FILE__}"
+static NRP *code_nrp[${\scalar(@CODE_VMAP)}];
+static void code_nrp_init(void)
+{
+EOM
+        for (0..$#CODE_VMAP) {
+                my $x = $CODE_VMAP[$_];
+                $ret .= qq{\tcode_nrp[$_] = new NRP($x->[0], "$x->[1]");\n}
+        }
+$ret .= <<EOM;
+}
+
+# line ${\__LINE__} "${\__FILE__}"
+static void qp_init_code_search(Xapian::QueryParser *qp)
+{
+        for (size_t i = 0; i < MY_ARRAY_SIZE(code_nrp); i++)
+                qp->ADD_RP(code_nrp[i]);
+EOM
+        for my $name (sort keys %bool_pfx_external) {
+                for (split(/ /, $bool_pfx_external{$name})) {
+                        $ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n}
+                }
+        }
+        for my $name (sort keys %prob_prefix) {
+                for (split(/ /, $prob_prefix{$name})) {
+                        $ret .= qq{\tqp->add_prefix("$name", "$_");\n}
+                }
+        }
+        $ret .= "}\n";
+}
+
+# returns a Xapian::Query to filter by roots
+sub roots_filter { # retry_reopen callback
+        my ($self, $git_dir) = @_;
+        my $xdb = $self->xdb;
+        my $P = 'P'.$git_dir;
+        my ($cur, $end) = ($xdb->postlist_begin($P), $xdb->postlist_end($P));
+        if ($cur == $end) {
+                warn "W: $git_dir not indexed?\n";
+                return;
+        }
+        my @roots = xap_terms('G', $xdb, $cur->get_docid);
+        if (!@roots) {
+                warn "W: $git_dir has no root commits?\n";
+                return;
+        }
+        my $q = $PublicInbox::Search::X{Query}->new('G'.shift(@roots));
+        for my $r (@roots) {
+                $q = $PublicInbox::Search::X{Query}->new(
+                                        PublicInbox::Search::OP_OR(),
+                                        $q, 'G'.$r);
+        }
+        $q;
+}
+
+sub mset {
+        my ($self, $qry_str, $opt) = @_;
+        my $qp = $self->{qp} //= qparse_new($self);
+        my $qry = $qp->parse_query($qry_str, $self->{qp_flags});
+
+        # limit to commits with shared roots
+        if (defined(my $git_dir = $opt->{git_dir})) {
+                my $rf = retry_reopen($self, \&roots_filter, $git_dir)
+                        or return;
+
+                $qry = $PublicInbox::Search::X{Query}->new(
+                                PublicInbox::Search::OP_FILTER(),
+                                $qry, $rf);
+        }
+
+        # we only want commits:
+        $qry = $PublicInbox::Search::X{Query}->new(
+                                PublicInbox::Search::OP_FILTER(),
+                                $qry, 'T'.'c');
+        $self->do_enquire($qry, $opt, CT);
+}
+
+sub roots2paths { # for diagnostics
+        my ($self) = @_;
+        my $cur = $self->xdb->allterms_begin('G');
+        my $end = $self->{xdb}->allterms_end('G');
+        my $qrepo = $PublicInbox::Search::X{Query}->new('T'.'r');
+        my $enq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
+        $enq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
+        $enq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
+        my %ret;
+        for (; $cur != $end; $cur++) {
+                my $G_oidhex = $cur->get_termname;
+                my $qry = $PublicInbox::Search::X{Query}->new(
+                                PublicInbox::Search::OP_FILTER(),
+                                $qrepo, $G_oidhex);
+                $enq->set_query($qry);
+                my ($size, $off, $lim) = (0, 0, 100000);
+                my $dirs = $ret{substr($G_oidhex, 1)} = [];
+                do {
+                        my $mset = $enq->get_mset($off += $size, $lim);
+                        for my $x ($mset->items) {
+                                push @$dirs, xap_terms('P', $x->get_document);
+                        }
+                        $size = $mset->size;
+                } while ($size);
+                @$dirs = sort(uniqstr(@$dirs));
+        }
+        \%ret;
+}
+
+sub docids_of_git_dir ($$) {
+        my ($self, $git_dir) = @_;
+        my @ids = $self->docids_by_postlist('P'.$git_dir);
+        warn <<"" if @ids > 1;
+BUG: (non-fatal) $git_dir indexed multiple times in $self->{topdir}
+
+        @ids;
+}
+
+sub root_oids ($$) {
+        my ($self, $git_dir) = @_;
+        my @ids = docids_of_git_dir $self, $git_dir or warn <<"";
+BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir}
+
+        my @ret = map { xap_terms('G', $self->xdb, $_) } @ids;
+        @ret = uniqstr(@ret) if @ids > 1;
+        @ret;
+}
+
+sub paths2roots {
+        my ($self, $paths) = @_;
+        my %ret;
+        if ($paths) {
+                for my $p (keys %$paths) { @{$ret{$p}} = root_oids($self, $p) }
+        } else {
+                my $tmp = roots2paths($self);
+                for my $root_oidhex (keys %$tmp) {
+                        my $paths = delete $tmp->{$root_oidhex};
+                        push @{$ret{$_}}, $root_oidhex for @$paths;
+                }
+                @$_ = sort(@$_) for values %ret;
+        }
+        \%ret;
+}
+
+sub load_ct { # retry_reopen cb
+        my ($self, $git_dir) = @_;
+        my @ids = docids_of_git_dir $self, $git_dir or return;
+        for (@ids) {
+                my $doc = $self->get_doc($_) // next;
+                return int_val($doc, CT);
+        }
+}
+
+sub load_commit_times { # each_cindex callback
+        my ($self, $todo) = @_; # todo = [ [ time, git ], [ time, git ] ...]
+        my (@pending, $rec, $ct);
+        while ($rec = shift @$todo) {
+                $ct = $self->retry_reopen(\&load_ct, $rec->[1]->{git_dir});
+                if (defined $ct) {
+                        $rec->[0] = $ct;
+                } else { # may be in another cindex:
+                        push @pending, $rec;
+                }
+        }
+        @$todo = @pending;
+}
+
+sub load_coderepos { # each_cindex callback
+        my ($self, $pi_cfg) = @_;
+        my $name = $self->{name};
+        my $cfg_f = $pi_cfg->{-f};
+        my $lpfx = $self->{localprefix} or return warn <<EOM;
+W: cindex.$name.localprefix unset in $cfg_f, ignoring cindex.$name
+EOM
+        my $lre = join('|', map { $_ .= '/'; tr!/!/!s; quotemeta } @$lpfx);
+        $lre = qr!\A(?:$lre)!;
+        my $coderepos = $pi_cfg->{-coderepos};
+        my $nick_pfx = $name eq '' ? '' : "$name/";
+        my %dir2cr;
+        for my $p ($self->all_terms('P')) {
+                my $nick = $p;
+                $nick =~ s!$lre!$nick_pfx!s or next;
+                $dir2cr{$p} = $coderepos->{$nick} //= do {
+                        my $git = PublicInbox::Git->new($p);
+                        my %dedupe = ($nick => undef);
+                        ($git->{nick}) = keys %dedupe; # for git->pub_urls
+                        $git;
+                };
+        }
+        my $jd = $self->retry_reopen(\&join_data, $self) or return warn <<EOM;
+W: cindex.$name.topdir=$self->{topdir} has no usable join data for $cfg_f
+EOM
+        my ($ekeys, $roots, $ibx2root) = @$jd{qw(ekeys roots ibx2root)};
+        my $roots2paths = roots2paths($self);
+        my %dedupe; # 50x alloc reduction w/ lore + gko mirror (Mar 2024)
+        for my $root_offs (@$ibx2root) {
+                my $ekey = shift(@$ekeys) // die 'BUG: {ekeys} empty';
+                scalar(@$root_offs) or next;
+                my $ibx = $pi_cfg->lookup_eidx_key($ekey) // do {
+                        warn "W: `$ekey' gone from $cfg_f\n";
+                        next;
+                };
+                my $gits = $ibx->{-repo_objs} //= [];
+                my $cr_score = $ibx->{-cr_score} //= {};
+                my %ibx_p2g = map { $_->{git_dir} => $_ } @$gits;
+                my $ibx2self; # cindex has an association w/ inbox?
+                for (@$root_offs) { # sorted by $nr descending
+                        my ($nr, $root_off) = @$_;
+                        my $root_oid = $roots->[$root_off] // do {
+                                warn <<EOM;
+BUG: root #$root_off invalid in join data for `$ekey' with $cfg_f
+EOM
+                                next;
+                        };
+                        my $git_dirs = $roots2paths->{$root_oid};
+                        my @gits = map { $dir2cr{$_} // () } @$git_dirs;
+                        $cr_score->{$_->{nick}} //= $nr for @gits;
+                        @$git_dirs = grep { !$ibx_p2g{$_} } @$git_dirs;
+                        # @$git_dirs or warn "W: no matches for $root_oid\n";
+                        for (@$git_dirs) {
+                                if (my $git = $dir2cr{$_}) {
+                                        $ibx_p2g{$_} = $git;
+                                        $ibx2self = 1;
+                                        if (!$ibx->{-hide_www}) {
+                                                # don't stringify $nr directly
+                                                # to avoid long-lived PV
+                                                my $k = ($nr + 0)."\0".
+                                                        ($ibx + 0);
+                                                my $s = $dedupe{$k} //=
+                                                        [ $nr, $ibx->{name} ];
+                                                push @{$git->{ibx_score}}, $s;
+                                        }
+                                        push @$gits, $git;
+                                } else {
+                                        warn <<EOM;
+W: no coderepo available for $_ (localprefix=@$lpfx)
+EOM
+                                }
+                        }
+                }
+                if (@$gits) {
+                        push @{$ibx->{-csrch}}, $self if $ibx2self;
+                } else {
+                        delete $ibx->{-repo_objs};
+                        delete $ibx->{-cr_score};
+                }
+        }
+        for my $git (values %dir2cr) {
+                my $s = $git->{ibx_score};
+                @$s = sort { $b->[0] <=> $a->[0] } @$s if $s;
+        }
+        my $ALL = $pi_cfg->ALL or return;
+        my @alls_gits = sort {
+                scalar @{$b->{ibx_score} // []} <=>
+                        scalar @{$a->{ibx_score} // []}
+        } values %$coderepos;
+        my $gits = $ALL->{-repo_objs} //= [];
+        push @$gits, @alls_gits;
+        my $cr_score = $ALL->{-cr_score} //= {};
+        $cr_score->{$_->{nick}} //= scalar(@{$_->{ibx_score}//[]}) for @$gits;
+}
+
+sub repos_sorted {
+        my $pi_cfg = shift;
+        my @recs = map { [ 0, $_ ] } @_; # PublicInbox::Git objects
+        my @todo = @recs;
+        $pi_cfg->each_cindex(\&load_commit_times, \@todo);
+        @recs = sort { $b->[0] <=> $a->[0] } @recs; # sort by commit time
+}
+
+1;
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
new file mode 100644
index 00000000..6d777bf6
--- /dev/null
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -0,0 +1,1391 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# indexer for git coderepos, just commits and repo paths for now
+# this stores normalized absolute paths of indexed GIT_DIR inside
+# the DB itself and is designed to handle forks by designating roots
+# At minimum, it needs to have the pathnames of all git repos in
+# memory at runtime.  --join also requires all inbox pathnames to
+# be in memory (as it happens when loaded from ~/.public-inbox/config).
+#
+# Unlike mail search, docid isn't tied to NNTP artnum or IMAP UID,
+# there's no serial number dependency at all.  The first 32-bits of
+# the commit SHA-(1|256) is used to select a shard.
+#
+# We shard repos using the first 32-bits of sha256($ABS_GIT_DIR)
+#
+# --join associates root commits of coderepos to inboxes based on prefixes.
+#
+# Internally, each inbox is assigned a non-negative integer index ($IBX_OFF),
+# and each root commit object ID (SHA-1/SHA-256 hex) is also assigned
+# a non-negative integer index ($ROOT_COMMIT_OID_ID).
+#
+# join dumps to 2 intermediate files in $TMPDIR:
+#
+# * to_root_off - each line is of the format:
+#
+#        $PFX @ROOT_COMMIT_OID_OFFS
+#
+# * to_ibx_off - each line is of the format:
+#
+#        $PFX @IBX_OFFS
+#
+# $IBX_OFFS is a comma-delimited list of integers ($IBX_ID)
+# The $IBX_OFF here is ephemeral (per-join_data) and NOT related to
+# the `ibx_off' column of `over.sqlite3' for extindex.
+# @ROOT_COMMIT_OID_OFFS is space-delimited
+# In both cases, $PFX is typically the value of the 7-(hex)char dfpost
+# XDFPOST but it can be configured to use any combination of patchid,
+# dfpre, dfpost or dfblob.
+#
+# WARNING: this is vulnerable to arbitrary memory usage attacks if we
+# attempt to index or join against malicious coderepos with
+# thousands/millions of root commits.  Most coderepos have only one
+# root commit, some have several: git.git currently has 7,
+# torvalds/linux.git has 4.
+# --max-size= is required to keep memory usage reasonable for gigantic
+# commits.
+#
+# See PublicInbox::CodeSearch (read-only API) for more
+package PublicInbox::CodeSearchIdx;
+use v5.12;
+# parent order matters, we want ->DESTROY from IPC, not SearchIdx
+use parent qw(PublicInbox::CodeSearch PublicInbox::IPC PublicInbox::SearchIdx);
+use PublicInbox::DS qw(awaitpid);
+use PublicInbox::PktOp;
+use PublicInbox::IPC qw(nproc_shards);
+use POSIX qw(WNOHANG SEEK_SET strftime);
+use File::Path ();
+use File::Spec ();
+use List::Util qw(max);
+use PublicInbox::SHA qw(sha256_hex sha_all);
+use PublicInbox::Search qw(xap_terms);
+use PublicInbox::SearchIdx qw(add_val);
+use PublicInbox::Config qw(glob2re rel2abs_collapsed);
+use PublicInbox::Spawn qw(which spawn popen_rd);
+use PublicInbox::OnDestroy;
+use PublicInbox::CidxLogP;
+use PublicInbox::CidxComm;
+use PublicInbox::Git qw(%OFMT2HEXLEN);
+use PublicInbox::Compat qw(uniqstr);
+use PublicInbox::Aspawn qw(run_await);
+use Compress::Zlib qw(compress);
+use Carp qw(croak);
+use Time::Local qw(timegm);
+use autodie qw(close pipe open sysread seek sysseek send);
+our $DO_QUIT = 15; # signal number
+our (
+        $LIVE_JOBS, # integer
+        $GITS_NR, # number of coderepos
+        $MY_SIG, # like %SIG
+        $SIGSET,
+        $TXN_BYTES, # number of bytes in current shard transaction
+        $BATCH_BYTES,
+        @RDONLY_XDB, # Xapian::Database
+        @IDX_SHARDS, # clones of self
+        $MAX_SIZE,
+        $REINDEX, # PublicInbox::SharedKV
+        @GIT_DIR_GONE, # [ git_dir1, git_dir2 ]
+        $PRUNE_DONE, # marks off prune completions
+        $NCHANGE, # current number of changes
+        $NPROC,
+        $XHC, # XapClient
+        $REPO_CTX, # current repo being indexed in shards
+        $IDXQ, # PublicInbox::Git object arrayref
+        $SCANQ, # PublicInbox::Git object arrayref
+        %ALT_FH, # hexlen => tmp IO for TMPDIR git alternates
+        $TMPDIR, # File::Temp->newdir object for prune
+        @PRUNEQ, # GIT_DIRs to prepare for pruning
+        %TODO, @IBXQ, @IBX,
+        @JOIN, # join(1) command for --join
+        $CMD_ENV, # env for awk(1), comm(1), sort(1) commands during prune
+        @AWK, @COMM, @SORT, # awk(1), comm(1), sort(1) commands
+        %JOIN, # CLI --join= suboptions
+        @JOIN_PFX, # any combination of XDFID, XDFPRE, XDFPOST
+        @JOIN_DT, # YYYYmmddHHMMSS for dt:
+        $QRY_STR, # common query string for both code and inbox associations
+        $DUMP_IBX_WPIPE, # goes to sort(1)
+        $ANY_SHARD, # shard round-robin for scan fingerprinting
+        @OFF2ROOT,
+        $GIT_VER,
+        @NO_ABBREV,
+);
+
+# stop walking history if we see >$SEEN_MAX existing commits, this assumes
+# branches don't diverge by more than this number of commits...
+# git walks commits quickly if it doesn't have to read trees
+our $SEEN_MAX = 100000;
+
+# window for commits/emails to determine a inbox <-> coderepo association
+my $JOIN_WINDOW = 50000;
+
+our @PRUNE_BATCH = qw(cat-file --batch-all-objects --batch-check);
+
+# TODO: do we care about committer name + email? or tree OID?
+my @FMT = qw(H P ct an ae at s b); # (b)ody must be last
+
+# git log --stdin buffers all commits before emitting, thus --reverse
+# doesn't incur extra overhead.  We use --reverse to keep Xapian docids
+# increasing so we may be able to avoid sorting results in some cases
+my @LOG_STDIN = (qw(log --no-decorate --no-color --no-notes -p --stat -M
+        --reverse --stdin --no-walk=unsorted), '--pretty=format:%n%x00'.
+        join('%n', map { "%$_" } @FMT));
+
+sub new {
+        my (undef, $dir, $opt) = @_;
+        my $l = $opt->{indexlevel} // 'full';
+        $l !~ $PublicInbox::SearchIdx::INDEXLEVELS and
+                die "invalid indexlevel=$l\n";
+        $l eq 'basic' and die "E: indexlevel=basic not supported\n";
+        my $self = bless {
+                xpfx => "$dir/cidx".  PublicInbox::CodeSearch::CIDX_SCHEMA_VER,
+                cidx_dir => $dir,
+                creat => 1, # TODO: get rid of this, should be implicit
+                transact_bytes => 0, # for checkpoint
+                total_bytes => 0, # for lock_release
+                current_info => '',
+                parallel => 1,
+                -opt => $opt,
+                lock_path => "$dir/cidx.lock",
+        }, __PACKAGE__;
+        $self->{nshard} = count_shards($self) ||
+                nproc_shards({nproc => $opt->{jobs}});
+        $self->{-no_fsync} = 1 if !$opt->{fsync};
+        $self->{-dangerous} = 1 if $opt->{dangerous};
+        $self;
+}
+
+# This is similar to uniq(1) on the first column, but combines the
+# contents of subsequent columns using $OFS.
+our @UNIQ_FOLD = ($^X, $^W ? ('-w') : (), qw(-MList::Util=uniq -ane), <<'EOM');
+BEGIN { $ofs = $ENV{OFS} // ','; $apfx = '' }
+if ($F[0] eq $apfx) {
+        shift @F;
+        push @ids, @F;
+} else {
+        print $apfx.' '.join($ofs, uniq(@ids))."\n" if @ids;
+        ($apfx, @ids) = @F;
+}
+END { print $apfx.' '.join($ofs, uniq(@ids))."\n" if @ids }
+EOM
+
+# TODO: may be used for reshard/compact
+sub count_shards { scalar($_[0]->xdb_shards_flat) }
+
+sub update_commit ($$$) {
+        my ($self, $cmt, $roots) = @_; # fields from @FMT
+        my $x = 'Q'.$cmt->{H};
+        my ($docid, @extra) = sort { $a <=> $b } $self->docids_by_postlist($x);
+        @extra and warn "W: $cmt->{H} indexed multiple times, pruning ",
+                        join(', ', map { "#$_" } @extra), "\n";
+        $self->{xdb}->delete_document($_) for @extra;
+        my $doc = $PublicInbox::Search::X{Document}->new;
+        $doc->add_boolean_term($x);
+        $doc->add_boolean_term('G'.$_) for @$roots;
+        $doc->add_boolean_term('XP'.$_) for split(/ /, $cmt->{P});
+        $doc->add_boolean_term('T'.'c');
+
+        # Author-Time is compatible with dt: for mail search schema_version=15
+        add_val($doc, PublicInbox::CodeSearch::AT,
+                POSIX::strftime('%Y%m%d%H%M%S', gmtime($cmt->{at})));
+
+        # Commit-Time is the fallback used by rt: (TS) for mail search:
+        add_val($doc, PublicInbox::CodeSearch::CT, $cmt->{ct});
+
+        $self->term_generator->set_document($doc);
+
+        # email address is always indexed with positional data for usability
+        $self->index_phrase("$cmt->{an} <$cmt->{ae}>", 1, 'A');
+
+        $x = $cmt->{'s'};
+        $self->index_text($x, 1, 'S') if $x =~ /\S/s;
+        $doc->set_data($x); # subject is the first (and currently only) line
+
+        $x = delete $cmt->{b};
+        $self->index_body_text($doc, \$x) if $x =~ /\S/s;
+        defined($docid) ? $self->{xdb}->replace_document($docid, $doc) :
+                        $self->{xdb}->add_document($doc);
+}
+
+sub progress {
+        my ($self, @msg) = @_;
+        my $pr = $self->{-opt}->{-progress} or return;
+        $pr->($self->{git} ? ("$self->{git}->{git_dir}: ") : (), @msg, "\n");
+}
+
+sub check_objfmt_status ($$$) {
+        my ($git, $chld_err, $fmt) = @_;
+        my ($status, $sig) = ($chld_err >> 8, $chld_err & 127);
+        if (!$sig && $status == 1) { # unset, default is '' (SHA-1)
+                $fmt = 'sha1';
+        } elsif (!$sig && $status == 0) {
+                chomp($fmt ||= 'sha1');
+        }
+        $fmt // warn("git --git-dir=$git->{git_dir} config \$?=$chld_err");
+        $fmt;
+}
+
+sub store_repo { # wq_io_do, sends docid back
+        my ($self, $repo) = @_;
+        my $op_p = delete($self->{0}) // die 'BUG: no {0} op_p';
+        my $git = bless $repo, 'PublicInbox::Git';
+        my $rd = $git->popen(qw(config extensions.objectFormat));
+        $self->begin_txn_lazy;
+        $self->{xdb}->delete_document($_) for @{$repo->{to_delete}};
+        my $doc = $PublicInbox::Search::X{Document}->new;
+        add_val($doc, PublicInbox::CodeSearch::CT, $repo->{ct});
+        $doc->add_boolean_term("P$repo->{git_dir}");
+        $doc->add_boolean_term('T'.'r');
+        $doc->add_boolean_term('G'.$_) for @{$repo->{roots}};
+        $doc->set_data($repo->{fp}); # \n delimited
+        my $fmt = readline($rd);
+        $rd->close;
+        $fmt = check_objfmt_status $git, $?, $fmt;
+        $OFMT2HEXLEN{$fmt} // warn <<EOM; # store unknown formats anyways
+E: unknown extensions.objectFormat=$fmt in $repo->{git_dir}
+EOM
+        $doc->add_boolean_term('H'.$fmt);
+        my $did = $repo->{docid};
+        $did ? $self->{xdb}->replace_document($did, $doc)
+                : ($did = $self->{xdb}->add_document($doc));
+        send($op_p, "repo_stored $did", 0);
+}
+
+sub cidx_ckpoint ($;$) {
+        my ($self, $msg) = @_;
+        progress($self, $msg) if defined($msg);
+        $TXN_BYTES = $BATCH_BYTES; # reset
+        return if $PublicInbox::Search::X{CLOEXEC_UNSET};
+        $self->commit_txn_lazy;
+        $self->begin_txn_lazy;
+}
+
+sub truncate_cmt ($$) {
+        my ($cmt) = @_; # _[1] is $buf (giant)
+        my ($orig_len, $len);
+        $len = $orig_len = length($_[1]);
+        @$cmt{@FMT} = split(/\n/, $_[1], scalar(@FMT));
+        undef $_[1];
+        $len -= length($cmt->{b});
+
+        # try to keep the commit message body.
+        # n.b. this diffstat split may be unreliable but it's not worth
+        # perfection for giant commits:
+        my ($bdy) = split(/^---\n/sm, delete($cmt->{b}), 2);
+        if (($len + length($bdy)) <= $MAX_SIZE) {
+                $len += length($bdy);
+                $cmt->{b} = $bdy;
+                warn <<EOM;
+W: $cmt->{H}: truncated body ($orig_len => $len bytes)
+W: to be under --max-size=$MAX_SIZE
+EOM
+        } else {
+                $cmt->{b} = '';
+                warn <<EOM;
+W: $cmt->{H}: deleted body ($orig_len => $len bytes)
+W: to be under --max-size=$MAX_SIZE
+EOM
+        }
+        $len;
+}
+
+sub cidx_reap_log { # awaitpid cb
+        my ($pid, $cmd, $self, $op_p) = @_;
+        if (!$? || ($DO_QUIT && (($? & 127) == $DO_QUIT ||
+                                ($? & 127) == POSIX::SIGPIPE))) {
+                send($op_p, "shard_done $self->{shard}", 0);
+        } else {
+                warn "W: @$cmd (\$?=$?)\n";
+                $self->{xdb}->cancel_transaction;
+        }
+}
+
+sub shard_index { # via wq_io_do in IDX_SHARDS
+        my ($self, $git, $roots) = @_;
+
+        my $in = delete($self->{0}) // die 'BUG: no {0} input';
+        my $op_p = delete($self->{1}) // die 'BUG: no {1} op_p';
+        sysseek($in, 0, SEEK_SET);
+        my $cmd = $git->cmd(@NO_ABBREV, @LOG_STDIN);
+        my $rd = popen_rd($cmd, undef, { 0 => $in },
+                                \&cidx_reap_log, $cmd, $self, $op_p);
+        PublicInbox::CidxLogP->new($rd, $self, $git, $roots);
+        # CidxLogP->event_step will call cidx_read_log_p once there's input
+}
+
+# sharded reader for `git log --pretty=format: --stdin'
+sub cidx_read_log_p {
+        my ($self, $log_p, $rd) = @_;
+        my $git = delete $log_p->{git} // die 'BUG: no {git}';
+        local $self->{current_info} = "$git->{git_dir} [$self->{shard}]";
+        my $roots = delete $log_p->{roots} // die 'BUG: no {roots}';
+        # local-ized in parent before fork
+        $TXN_BYTES = $BATCH_BYTES;
+        local $self->{git} = $git; # for patchid
+        return if $DO_QUIT;
+        my $nr = 0;
+
+        # a patch may have \0, see c4201214cbf10636e2c1ab9131573f735b42c8d4
+        # in linux.git, so we use $/ = "\n\0" to check end-of-patch
+        my $FS = "\n\0";
+        my $len;
+        my $cmt = {};
+        local $/ = $FS;
+        my $buf = <$rd> // return; # leading $FS
+        $buf eq $FS or die "BUG: not LF-NUL: $buf\n";
+        $self->begin_txn_lazy;
+        while (!$DO_QUIT && defined($buf = <$rd>)) {
+                chomp($buf);
+                $/ = "\n";
+                $len = length($buf);
+                if (defined($MAX_SIZE) && $len > $MAX_SIZE) {
+                        $len = truncate_cmt($cmt, $buf);
+                } else {
+                        @$cmt{@FMT} = split(/\n/, $buf, scalar(@FMT));
+                }
+                if (($TXN_BYTES -= $len) <= 0) {
+                        cidx_ckpoint($self, "[$self->{shard}] $nr");
+                        $TXN_BYTES -= $len; # len may be huge, >TXN_BYTES;
+                }
+                update_commit($self, $cmt, $roots);
+                ++$nr;
+                cidx_ckpoint($self, "[$self->{shard}] $nr") if $TXN_BYTES <= 0;
+                $/ = $FS;
+        }
+        # return and wait for cidx_reap_log
+}
+
+sub shard_done { # called via PktOp on shard_index completion
+        my ($self, $repo_ctx, $on_destroy, $n) = @_;
+        $repo_ctx->{shard_ok}->{$n} = 1;
+}
+
+sub repo_stored {
+        my ($self, $repo_ctx, $drs, $did) = @_;
+        # check @IDX_SHARDS instead of DO_QUIT to avoid wasting prior work
+        # because shard_commit is fast
+        return unless @IDX_SHARDS;
+        $did > 0 or die "BUG: $repo_ctx->{repo}->{git_dir}: docid=$did";
+        my ($c, $p) = PublicInbox::PktOp->pair;
+        $c->{ops}->{shard_done} = [ $self, $repo_ctx,
+                                on_destroy(\&next_repos, $repo_ctx, $drs)];
+        # shard_done fires when all shards are committed
+        my @active = keys %{$repo_ctx->{active}};
+        $IDX_SHARDS[$_]->wq_io_do('shard_commit', [ $p->{op_p} ]) for @active;
+}
+
+sub prune_done { # called via prune_do completion
+        my ($self, $drs, $n) = @_;
+        return if $DO_QUIT || !$PRUNE_DONE;
+        die "BUG: \$PRUNE_DONE->[$n] already defined" if $PRUNE_DONE->[$n];
+        $PRUNE_DONE->[$n] = 1;
+        if (grep(defined, @$PRUNE_DONE) == @IDX_SHARDS) {
+                progress($self, 'prune done');
+                index_next($self); # may kick dump_roots_start
+        }
+}
+
+sub seen ($$) {
+        my ($xdb, $q) = @_; # $q = "Q$COMMIT_HASH"
+        for (1..100) {
+                my $ret = eval {
+                        $xdb->postlist_begin($q) != $xdb->postlist_end($q);
+                };
+                return $ret unless $@;
+                if (ref($@) =~ /\bDatabaseModifiedError\b/) {
+                        $xdb->reopen;
+                } else {
+                        Carp::croak($@);
+                }
+        }
+        Carp::croak('too many Xapian DB modifications in progress');
+}
+
+# used to select the shard for a GIT_DIR
+sub git_dir_hash ($) { hex(substr(sha256_hex($_[0]), 0, 8)) }
+
+sub _cb { # run_await cb
+        my ($pid, $cmd, undef, $opt, $cb, $self, $git, @arg) = @_;
+        return if $DO_QUIT;
+        return $cb->($opt, $self, $git, @arg) if $opt->{quiet};
+        $? ? ($git->{-cidx_err} = warn("W: @$cmd (\$?=$?)\n")) :
+                        $cb->($opt, $self, $git, @arg);
+}
+
+sub run_git {
+        my ($cmd, $opt, $cb, $self, $git, @arg) = @_;
+        run_await($git->cmd(@$cmd), undef, $opt, \&_cb, $cb, $self, $git, @arg)
+}
+
+# this is different from the grokmirror-compatible fingerprint since we
+# only care about --heads (branches) and --tags, and not even their names
+sub fp_start ($$) {
+        my ($self, $git) = @_;
+        return if $DO_QUIT;
+        open my $refs, '+>', undef;
+        $git->{-repo}->{refs} = $refs;
+        my ($c, $p) = PublicInbox::PktOp->pair;
+        my $next_on_err = on_destroy \&index_next, $self;
+        $c->{ops}->{fp_done} = [ $self, $git, $next_on_err ];
+        $IDX_SHARDS[++$ANY_SHARD % scalar(@IDX_SHARDS)]->wq_io_do('fp_async',
+                                        [ $p->{op_p}, $refs ], $git->{git_dir})
+}
+
+sub fp_async { # via wq_io_do in worker
+        my ($self, $git_dir) = @_;
+        my $op_p = delete $self->{0} // die 'BUG: no {0} op_p';
+        my $refs = delete $self->{1} // die 'BUG: no {1} refs';
+        my $git = PublicInbox::Git->new($git_dir);
+        run_git([qw(show-ref --heads --tags --hash)], { 1 => $refs },
+                \&fp_async_done, $self, $git, $op_p);
+}
+
+sub fp_async_done { # run_git cb from worker
+        my ($opt, $self, $git, $op_p) = @_;
+        my $refs = delete $opt->{1} // 'BUG: no {-repo}->{refs}';
+        sysseek($refs, 0, SEEK_SET);
+        send($op_p, 'fp_done '.sha_all(256, $refs)->hexdigest, 0);
+}
+
+sub fp_done { # called parent via PktOp by fp_async_done
+        my ($self, $git, $next_on_err, $hex) = @_;
+        $next_on_err->cancel;
+        return if $DO_QUIT;
+        $git->{-repo}->{fp} = $hex;
+        my $n = git_dir_hash($git->{git_dir}) % scalar(@RDONLY_XDB);
+        my $shard = bless { %$self, shard => $n }, ref($self);
+        $git->{-repo}->{shard_n} = $n;
+        delete @$shard{qw(lockfh lock_path)};
+        local $shard->{xdb} = $RDONLY_XDB[$n] // die "BUG: shard[$n] undef";
+        $shard->retry_reopen(\&check_existing, $self, $git);
+}
+
+sub check_existing { # retry_reopen callback
+        my ($shard, $self, $git) = @_;
+        my @docids = $shard->docids_of_git_dir($git->{git_dir});
+        my $docid = shift(@docids) // return prep_repo($self, $git); # new repo
+        my $doc = $shard->get_doc($docid) //
+                        die "BUG: no #$docid ($git->{git_dir})";
+        my $old_fp = $REINDEX ? "\0invalid" : $doc->get_data;
+        if ($old_fp eq $git->{-repo}->{fp}) { # no change
+                delete $git->{-repo};
+                return index_next($self);
+        }
+        $git->{-repo}->{docid} = $docid;
+        if (@docids) {
+                warn "BUG: $git->{git_dir} indexed multiple times, culling\n";
+                $git->{-repo}->{to_delete} = \@docids; # XXX needed?
+        }
+        prep_repo($self, $git);
+}
+
+sub partition_refs ($$$) {
+        my ($self, $git, $refs) = @_; # show-ref --heads --tags --hash output
+        sysseek($refs, 0, SEEK_SET);
+        my $rfh = $git->popen(qw(rev-list --stdin), undef, { 0 => $refs });
+        my $seen = 0;
+        my @shard_in = map {
+                $_->reopen;
+                open my $fh, '+>', undef;
+                $fh;
+        } @RDONLY_XDB;
+
+        my $n0 = $NCHANGE;
+        while (defined(my $cmt = <$rfh>)) {
+                chomp $cmt;
+                my $n = hex(substr($cmt, 0, 8)) % scalar(@RDONLY_XDB);
+                if ($REINDEX && $REINDEX->set_maybe(pack('H*', $cmt), '')) {
+                        say { $shard_in[$n] } $cmt;
+                        ++$NCHANGE;
+                } elsif (seen($RDONLY_XDB[$n], 'Q'.$cmt)) {
+                        last if ++$seen > $SEEN_MAX;
+                } else {
+                        say { $shard_in[$n] } $cmt;
+                        ++$NCHANGE;
+                        $seen = 0;
+                }
+                if ($DO_QUIT) {
+                        $rfh->close;
+                        return ();
+                }
+        }
+        $rfh->close;
+        return () if $DO_QUIT;
+        if (!$? || (($? & 127) == POSIX::SIGPIPE && $seen > $SEEN_MAX)) {
+                my $n = $NCHANGE - $n0;
+                progress($self, "$git->{git_dir}: $n commits") if $n;
+                return @shard_in;
+        }
+        die "git --git-dir=$git->{git_dir} rev-list: \$?=$?\n";
+}
+
+sub shard_commit { # via wq_io_do
+        my ($self) = @_;
+        my $op_p = delete($self->{0}) // die 'BUG: no {0} op_p';
+        $self->commit_txn_lazy;
+        send($op_p, "shard_done $self->{shard}", 0);
+}
+
+sub dump_roots_start {
+        my ($self, $do_join) = @_;
+        return if $DO_QUIT;
+        $XHC //= PublicInbox::XapClient::start_helper("-j$NPROC");
+        $do_join // die 'BUG: no $do_join';
+        progress($self, 'dumping IDs from coderepos');
+        local $self->{xdb};
+        @OFF2ROOT = $self->all_terms('G');
+        my $root2id = "$TMPDIR/root2id";
+        open my $fh, '>', $root2id;
+        my $nr = -1;
+        for (@OFF2ROOT) { print $fh $_, "\0", ++$nr, "\0" } # mmap-friendly
+        close $fh;
+        # dump_roots | sort -k1,1 | OFS=' ' uniq_fold >to_root_off
+        my ($sort_opt, $fold_opt);
+        pipe(local $sort_opt->{0}, my $sort_w);
+        pipe(local $fold_opt->{0}, local $sort_opt->{1});
+        my @sort = (@SORT, '-k1,1');
+        my $dst = "$TMPDIR/to_root_off";
+        open $fold_opt->{1}, '>', $dst;
+        my $fold_env = { %$CMD_ENV, OFS => ' ' };
+        run_await(\@sort, $CMD_ENV, $sort_opt, \&cmd_done, $do_join);
+        run_await(\@UNIQ_FOLD, $fold_env, $fold_opt, \&cmd_done, $do_join);
+        my $window = $JOIN{window} // $JOIN_WINDOW;
+        my @m = $window <= 0 ? () : ('-m', $window);
+        my @arg = ((map { ('-A', $_) } @JOIN_PFX), '-c',
+                @m, $root2id, $QRY_STR);
+        for my $d ($self->shard_dirs) {
+                pipe(my $err_r, my $err_w);
+                $XHC->mkreq([$sort_w, $err_w], qw(dump_roots -d), $d, @arg);
+                my $desc = "dump_roots $d";
+                $self->{PENDING}->{$desc} = $do_join;
+                PublicInbox::CidxXapHelperAux->new($err_r, $self, $desc);
+        }
+        progress($self, 'waiting on dump_roots sort');
+}
+
+sub dump_ibx { # sends to xap_helper.h
+        my ($self, $ibx_off) = @_;
+        my $ibx = $IBX[$ibx_off] // die "BUG: no IBX[$ibx_off]";
+        my $ekey = $ibx->eidx_key;
+        my $srch = $ibx->isrch or return warn <<EOM;
+W: $ekey not indexed for search
+EOM
+        # note: we don't send `-m MAX' to dump_ibx since we have to
+        # post-filter non-patch messages for now...
+        my @cmd = ('dump_ibx', $srch->xh_args,
+                        (map { ('-A', $_) } @JOIN_PFX), $ibx_off, $QRY_STR);
+        pipe(my $r, my $w);
+        $XHC->mkreq([$DUMP_IBX_WPIPE, $w], @cmd);
+        $self->{PENDING}->{$ekey} = $TODO{do_join};
+        PublicInbox::CidxXapHelperAux->new($r, $self, $ekey);
+}
+
+sub dump_ibx_start {
+        my ($self, $do_join) = @_;
+        return if $DO_QUIT;
+        $XHC //= PublicInbox::XapClient::start_helper("-j$NPROC");
+        my ($sort_opt, $fold_opt);
+        pipe(local $sort_opt->{0}, $DUMP_IBX_WPIPE);
+        pipe(local $fold_opt->{0}, local $sort_opt->{1});
+        my @sort = (@SORT, '-k1,1'); # sort only on JOIN_PFX
+        # pipeline: dump_ibx | sort -k1,1 | uniq_fold >to_ibx_off
+        open $fold_opt->{1}, '>', "$TMPDIR/to_ibx_off";
+        run_await(\@sort, $CMD_ENV, $sort_opt, \&cmd_done, $do_join);
+        run_await(\@UNIQ_FOLD, $CMD_ENV, $fold_opt, \&cmd_done, $do_join);
+}
+
+sub index_next ($) {
+        my ($self) = @_;
+        return if $DO_QUIT;
+        if ($IDXQ && @$IDXQ) {
+                index_repo($self, shift @$IDXQ);
+        } elsif ($SCANQ && @$SCANQ) {
+                fp_start $self, shift @$SCANQ;
+        } elsif ($TMPDIR) {
+                delete $TODO{dump_roots_start};
+                delete $TODO{dump_ibx_start}; # runs OnDestroy once
+                return dump_ibx($self, shift @IBXQ) if @IBXQ;
+                undef $DUMP_IBX_WPIPE; # done dumping inboxes
+                delete $TODO{do_join};
+        }
+        # else: wait for shards_active (post_loop_do) callback
+}
+
+sub next_repos { # OnDestroy cb
+        my ($repo_ctx, $drs) = @_;
+        my ($self, $repo, $active) = @$repo_ctx{qw(self repo active)};
+        progress($self, "$repo->{git_dir}: done");
+        return if $DO_QUIT || !$REPO_CTX;
+        my $n = grep { ! $repo_ctx->{shard_ok}->{$_} } keys %$active;
+        die "E: $repo->{git_dir} $n shards failed" if $n;
+        $REPO_CTX == $repo_ctx or die "BUG: $REPO_CTX != $repo_ctx";
+        $REPO_CTX = undef;
+        index_next($self);
+}
+
+sub index_done { # OnDestroy cb called when done indexing each code repo
+        my ($repo_ctx, $drs) = @_;
+        return if $DO_QUIT;
+        my ($self, $repo, $active) = @$repo_ctx{qw(self repo active)};
+        # $active may be undef here, but it's fine to vivify
+        my $n = grep { ! $repo_ctx->{shard_ok}->{$_} } keys %$active;
+        die "E: $repo->{git_dir} $n shards failed" if $n;
+        $repo_ctx->{shard_ok} = {}; # reset for future shard_done
+        $n = $repo->{shard_n};
+        $repo_ctx->{active}->{$n} = undef; # may vivify $repo_ctx->{active}
+        my ($c, $p) = PublicInbox::PktOp->pair;
+        $c->{ops}->{repo_stored} = [ $self, $repo_ctx, $drs ];
+        $IDX_SHARDS[$n]->wq_io_do('store_repo', [ $p->{op_p} ], $repo);
+        # repo_stored will fire once store_repo is done
+}
+
+sub index_repo {
+        my ($self, $git) = @_;
+        return if $DO_QUIT;
+        my $repo = $git->{-repo} // die 'BUG: no {-repo}';
+        return index_next($self) if $git->{-cidx_err};
+        if (!defined($repo->{ct})) {
+                warn "W: $git->{git_dir} has no commits, skipping\n";
+                return index_next($self);
+        }
+        return push(@$IDXQ, $git) if $REPO_CTX; # busy
+        delete $git->{-repo};
+        my $roots_fh = delete $repo->{roots_fh} // die 'BUG: no {roots_fh}';
+        seek($roots_fh, 0, SEEK_SET);
+        chomp(my @roots = PublicInbox::IO::read_all $roots_fh);
+        if (!@roots) {
+                warn("E: $git->{git_dir} has no root commits\n");
+                return index_next($self);
+        }
+        $repo->{roots} = \@roots;
+        local $self->{current_info} = $git->{git_dir};
+        my @shard_in = partition_refs($self, $git, delete($repo->{refs}));
+        $repo->{git_dir} = $git->{git_dir};
+        my $repo_ctx = $REPO_CTX = { self => $self, repo => $repo };
+        delete $git->{-cidx_gits_fini}; # may fire gits_fini
+        my $drs = delete $git->{-cidx_dump_roots_start};
+        my $index_done = on_destroy \&index_done, $repo_ctx, $drs;
+        my ($c, $p) = PublicInbox::PktOp->pair;
+        $c->{ops}->{shard_done} = [ $self, $repo_ctx, $index_done ];
+        for my $n (0..$#shard_in) {
+                $shard_in[$n]->flush or die "flush shard[$n]: $!";
+                -s $shard_in[$n] or next;
+                last if $DO_QUIT;
+                $IDX_SHARDS[$n]->wq_io_do('shard_index',
+                                        [ $shard_in[$n], $p->{op_p} ],
+                                        $git, \@roots);
+                $repo_ctx->{active}->{$n} = undef;
+        }
+        # shard_done fires when shard_index is done
+}
+
+sub ct_fini { # run_git cb
+        my ($opt, $self, $git, $index_repo) = @_;
+        my ($ct) = split(/\s+/, ${$opt->{1}}); # drop TZ + LF
+        $git->{-repo}->{ct} = $ct + 0;
+}
+
+# TODO: also index gitweb.owner and the full fingerprint for grokmirror?
+sub prep_repo ($$) {
+        my ($self, $git) = @_;
+        return if $DO_QUIT;
+        my $index_repo = on_destroy \&index_repo, $self, $git;
+        my $refs = $git->{-repo}->{refs} // die 'BUG: no {-repo}->{refs}';
+        sysseek($refs, 0, SEEK_SET);
+        open my $roots_fh, '+>', undef;
+        $git->{-repo}->{roots_fh} = $roots_fh;
+        run_git([ qw(rev-list --stdin --max-parents=0) ],
+                { 0 => $refs, 1 => $roots_fh }, \&PublicInbox::Config::noop,
+                $self, $git, $index_repo);
+        run_git([ qw[for-each-ref --sort=-committerdate
+                --format=%(committerdate:raw) --count=1
+                refs/heads/ refs/tags/] ], undef, # capture like qx
+                \&ct_fini, $self, $git, $index_repo);
+}
+
+# for PublicInbox::SearchIdx `git patch-id' call and with_umask
+sub git { $_[0]->{git} }
+
+sub load_existing ($) { # for -u/--update
+        my ($self) = @_;
+        my $dirs = $self->{git_dirs} //= [];
+        if ($self->{-opt}->{update} || $self->{-opt}->{prune}) {
+                local $self->{xdb};
+                $self->xdb or
+                        die "E: $self->{cidx_dir} non-existent for --update\n";
+                my @cur = grep {
+                        if (-e $_) {
+                                1;
+                        } else {
+                                push @GIT_DIR_GONE, $_;
+                                undef;
+                        }
+                } $self->all_terms('P');
+                if (@GIT_DIR_GONE && !$self->{-opt}->{prune}) {
+                        warn "W: the following repos no longer exist:\n",
+                                (map { "W:\t$_\n" } @GIT_DIR_GONE),
+                                "W: use --prune to remove them from ",
+                                $self->{cidx_dir}, "\n";
+                }
+                push @$dirs, @cur;
+        }
+        @$dirs = uniqstr @$dirs;
+}
+
+# SIG handlers:
+sub shard_quit { $DO_QUIT = POSIX->can("SIG$_[0]")->() }
+sub shard_usr1 { $TXN_BYTES = -1 }
+
+sub cidx_init ($) {
+        my ($self) = @_;
+        my $dir = $self->{cidx_dir};
+        unless (-d $dir) {
+                warn "# creating $dir\n" if !$self->{-opt}->{quiet};
+                File::Path::mkpath($dir);
+        }
+        $self->lock_acquire;
+        my @shards;
+        my $l = $self->{indexlevel} //= $self->{-opt}->{indexlevel};
+
+        for my $n (0..($self->{nshard} - 1)) {
+                my $shard = bless { %$self, shard => $n }, ref($self);
+                delete @$shard{qw(lockfh lock_path)};
+                my $xdb = $shard->idx_acquire;
+                if (!$n) {
+                        if (($l // '') eq 'medium') {
+                                $xdb->set_metadata('indexlevel', $l);
+                        } elsif (($l // '') eq 'full') {
+                                $xdb->set_metadata('indexlevel', ''); # unset
+                        }
+                        $l ||= $xdb->get_metadata('indexlevel') || 'full';
+                }
+                $shard->{indexlevel} = $l;
+                $shard->idx_release;
+                $shard->wq_workers_start("cidx shard[$n]", 1, $SIGSET, {
+                        siblings => \@shards, # for ipc_atfork_child
+                }, \&shard_done_wait, $self);
+                push @shards, $shard;
+        }
+        $self->{indexlevel} //= $l;
+        # this warning needs to happen after idx_acquire
+        state $once;
+        warn <<EOM if $PublicInbox::Search::X{CLOEXEC_UNSET} && !$once++;
+W: Xapian v1.2.21..v1.2.24 were missing close-on-exec on OFD locks,
+W: memory usage may be high for large indexing runs
+EOM
+        @shards;
+}
+
+# called when all git coderepos are done
+sub gits_fini {
+        undef $GITS_NR;
+        PublicInbox::DS::enqueue_reap(); # kick @post_loop_do
+}
+
+sub scan_git_dirs ($) {
+        my ($self) = @_;
+        @$SCANQ = () unless $self->{-opt}->{scan};
+        $GITS_NR = @$SCANQ or return;
+        my $gits_fini = on_destroy \&gits_fini;
+        $_->{-cidx_gits_fini} = $gits_fini for @$SCANQ;
+        if (my $drs = $TODO{dump_roots_start}) {
+                $_->{-cidx_dump_roots_start} = $drs for @$SCANQ;
+        }
+        progress($self, "scanning $GITS_NR code repositories...");
+}
+
+sub prune_init { # via wq_io_do in IDX_SHARDS
+        my ($self) = @_;
+        $self->{nr_prune} = 0;
+        $TXN_BYTES = $BATCH_BYTES;
+        $self->begin_txn_lazy;
+}
+
+sub prune_one { # via wq_io_do in IDX_SHARDS
+        my ($self, $term) = @_;
+        my @docids = $self->docids_by_postlist($term);
+        for (@docids) {
+                $TXN_BYTES -= $self->{xdb}->get_doclength($_) * 42;
+                $self->{xdb}->delete_document($_);
+        }
+        ++$self->{nr_prune};
+        $TXN_BYTES < 0 and
+                cidx_ckpoint($self, "prune [$self->{shard}] $self->{nr_prune}");
+}
+
+sub prune_commit { # via wq_io_do in IDX_SHARDS
+        my ($self) = @_;
+        my $prune_op_p = delete $self->{0} // die 'BUG: no {0} op_p';
+        my $nr = delete $self->{nr_prune} // die 'BUG: nr_prune undef';
+        cidx_ckpoint($self, "prune [$self->{shard}] $nr done") if $nr;
+        send($prune_op_p, "prune_done $self->{shard}", 0);
+}
+
+sub shards_active { # post_loop_do
+        return if $DO_QUIT;
+        return if grep(defined, $PRUNE_DONE, $SCANQ, $IDXQ) != 3;
+        return 1 if grep(defined, @$PRUNE_DONE) != @IDX_SHARDS;
+        return 1 if $GITS_NR || scalar(@$IDXQ) || $REPO_CTX;
+        return 1 if @IBXQ || keys(%TODO);
+        for my $s (grep { $_->{-wq_s1} } @IDX_SHARDS) {
+                $s->{-cidx_quit} = 1 if defined($s->{-wq_s1});
+                $s->wq_close; # may recurse via awaitpid outside of event_loop
+        }
+        scalar(grep { $_->{-cidx_quit} } @IDX_SHARDS);
+}
+
+# signal handlers
+sub kill_shards { $_->wq_kill(@_) for (@IDX_SHARDS) }
+
+sub parent_quit {
+        $DO_QUIT = POSIX->can("SIG$_[0]")->();
+        $XHC = 0; # stops the process
+        kill_shards(@_);
+        warn "# SIG$_[0] received, quitting...\n";
+}
+
+sub prep_umask ($) {
+        my ($self) = @_;
+        if ($self->{-cidx_internal}) { # respect core.sharedRepository
+                @{$self->{git_dirs}} == 1 or die 'BUG: only for GIT_DIR';
+                local $self->{git} =
+                        PublicInbox::Git->new($self->{git_dirs}->[0]);
+                $self->with_umask;
+        } elsif (-d $self->{cidx_dir}) { # respect existing perms
+                my @st = stat(_);
+                my $um = (~$st[2] & 0777);
+                $self->{umask} = $um; # for SearchIdx->with_umask
+                umask == $um or progress($self, 'using umask from ',
+                                                $self->{cidx_dir}, ': ',
+                                                sprintf('0%03o', $um));
+                on_destroy \&CORE::umask, umask($um);
+        } else {
+                $self->{umask} = umask; # for SearchIdx->with_umask
+                undef;
+        }
+}
+
+sub prep_alternate_end ($$) {
+        my ($objdir, $fmt) = @_;
+        my $hexlen = $OFMT2HEXLEN{$fmt} // return warn <<EOM;
+E: ignoring objdir=$objdir, unknown extensions.objectFormat=$fmt
+EOM
+        unless ($ALT_FH{$hexlen}) {
+                require PublicInbox::Import;
+                my $git_dir = "$TMPDIR/hexlen$hexlen.git";
+                PublicInbox::Import::init_bare($git_dir, 'cidx-all', $fmt);
+                open $ALT_FH{$hexlen}, '>', "$git_dir/objects/info/alternates";
+        }
+        say { $ALT_FH{$hexlen} } $objdir;
+}
+
+sub store_objfmt { # via wq_do - make early cidx users happy
+        my ($self, $docid, $git_dir, $fmt) = @_;
+        $self->begin_txn_lazy;
+        my $doc = $self->get_doc($docid) // return
+                warn "BUG? #$docid for $git_dir missing";
+        my @p = xap_terms('P', $doc) or return
+                warn "BUG? #$docid for $git_dir has no P(ath)";
+        @p == 1 or return warn "BUG? #$docid $git_dir multi: @p";
+        $p[0] eq $git_dir or return warn "BUG? #$docid $git_dir != @p";
+        $doc->add_boolean_term('H'.$fmt);
+        $self->{xdb}->replace_document($docid, $doc);
+        # wait for prune_commit to commit...
+}
+
+# TODO: remove prep_alternate_read and store_objfmt 1-2 years after 2.0 is out
+# they are for compatibility with pre-release indices
+sub prep_alternate_read { # run_git cb for config extensions.objectFormat
+        my ($opt, $self, $git, $objdir, $docid, $shard_n, $run_prune) = @_;
+        return if $DO_QUIT;
+        my $chld_err = $?;
+        prep_alternate_start($self, shift(@PRUNEQ), $run_prune) if @PRUNEQ;
+        my $fmt = check_objfmt_status $git, $chld_err, ${$opt->{1}};
+        $IDX_SHARDS[$shard_n]->wq_do('store_objfmt', # async
+                                        $docid, $git->{git_dir}, $fmt);
+        prep_alternate_end $objdir, $fmt;
+}
+
+sub prep_alternate_start {
+        my ($self, $git, $run_prune) = @_;
+        local $self->{xdb};
+        my ($o, $n, @ids, @fmt);
+start:
+        $o = $git->git_path('objects');
+        while (!-d $o) {
+                $git = shift(@PRUNEQ) // return;
+                $o = $git->git_path('objects');
+        }
+        $n = git_dir_hash($git->{git_dir}) % scalar(@RDONLY_XDB);
+        $self->{xdb} = $RDONLY_XDB[$n] // croak("BUG: no shard[$n]");
+        @ids = $self->docids_by_postlist('P'.$git->{git_dir});
+        @fmt = @ids ? xap_terms('H', $self->{xdb}, $ids[0]) : ();
+        @fmt > 1 and warn "BUG? multi `H' for shard[$n] #$ids[0]: @fmt";
+
+        if (@fmt) { # cache hit
+                prep_alternate_end $o, $fmt[0];
+                $git = shift(@PRUNEQ) and goto start;
+        } else { # compatibility w/ early cidx format
+                run_git([qw(config extensions.objectFormat)], { quiet => 1 },
+                        \&prep_alternate_read, $self, $git, $o, $ids[0], $n,
+                        $run_prune);
+        }
+}
+
+sub cmd_done { # run_await cb for sort, xapian-delve, sed failures
+        my ($pid, $cmd, undef, undef, $run_on_destroy) = @_;
+        $? and die "fatal: @$cmd (\$?=$?)\n";
+        # $run_on_destroy calls do_join() or run_prune()
+}
+
+sub current_join_data ($) {
+        my ($self) = @_;
+        local $self->{xdb} = $RDONLY_XDB[0] // die 'BUG: shard[0] undef';
+        # we support multiple PI_CONFIG files for a cindex:
+        $self->join_data;
+}
+
+# combined previously stored stats with new
+sub score_old_join_data ($$$) {
+        my ($self, $score, $ekeys_new) = @_;
+        my $old = ($JOIN{reset} ? undef : current_join_data($self)) or return;
+        progress($self, 'merging old join data...');
+        my ($ekeys_old, $roots_old, $ibx2root_old) =
+                                        @$old{qw(ekeys roots ibx2root)};
+        # score: "ibx_off root_off" => nr
+        my $i = -1;
+        my %root2id_new = map { $_ => ++$i } @OFF2ROOT;
+        $i = -1;
+        my %ekey2id_new = map { $_ => ++$i } @$ekeys_new;
+        for my $ibx_off_old (0..$#$ibx2root_old) {
+                my $root_offs_old = $ibx2root_old->[$ibx_off_old];
+                my $ekey = $ekeys_old->[$ibx_off_old] // do {
+                        warn "W: no ibx #$ibx_off_old in old join data\n";
+                        next;
+                };
+                my $ibx_off_new = $ekey2id_new{$ekey} // do {
+                        warn "W: `$ekey' no longer exists\n";
+                        next;
+                };
+                for (@$root_offs_old) {
+                        my ($nr, $rid_old) = @$_;
+                        my $root_old = $roots_old->[$rid_old] // do {
+                                warn "W: no root #$rid_old in old data\n";
+                                next;
+                        };
+                        my $rid_new = $root2id_new{$root_old} // do {
+                                warn "W: root `$root_old' no longer exists\n";
+                                next;
+                        };
+                        $score->{"$ibx_off_new $rid_new"} += $nr;
+                }
+        }
+}
+
+sub metadata_set { # via wq_do
+        my ($self, $key, $val, $commit) = @_;
+        $self->begin_txn_lazy;
+        $self->{xdb}->set_metadata($key, $val);
+        $self->commit_txn_lazy if $commit || defined(wantarray);
+}
+
+# runs once all inboxes and shards are dumped via OnDestroy
+sub do_join {
+        my ($self) = @_;
+        return if $DO_QUIT;
+        $XHC = 0; # should not be recreated again
+        @IDX_SHARDS or return warn("# aborting on no shards\n");
+        unlink("$TMPDIR/root2id");
+        my @pending = keys %{$self->{PENDING}};
+        die "BUG: pending=@pending jobs not done\n" if @pending;
+        progress($self, 'joining...');
+        my @join = (@JOIN, 'to_ibx_off', 'to_root_off');
+        if (my $time = which('time')) { unshift @join, $time };
+        my $rd = popen_rd(\@join, $CMD_ENV, { -C => "$TMPDIR" });
+        my %score;
+        while (<$rd>) { # PFX ibx_offs root_off
+                chop eq "\n" or die "no newline from @join: <$_>";
+                my (undef, $ibx_offs, @root_offs) = split / /, $_;
+                for my $ibx_off (split(/,/, $ibx_offs)) {
+                        ++$score{"$ibx_off $_"} for @root_offs;
+                }
+        }
+        $rd->close or die "fatal: @join failed: \$?=$?";
+        my $nr = scalar(keys %score) or do {
+                delete $TODO{joining};
+                return progress($self, 'no potential new pairings');
+        };
+        progress($self, "$nr potential new pairings...");
+        my @ekeys = map { $_->eidx_key } @IBX;
+        score_old_join_data($self, \%score, \@ekeys);
+        my $new;
+        while (my ($k, $nr) = each %score) {
+                my ($ibx_off, $root_off) = split(/ /, $k);
+                my ($ekey, $root) = ($ekeys[$ibx_off], $OFF2ROOT[$root_off]);
+                progress($self, "$ekey => $root has $nr matches");
+                push @{$new->{ibx2root}->[$ibx_off]}, [ $nr, $root_off ];
+        }
+        for my $ary (values %$new) { # sort by nr (largest first)
+                for (@$ary) { @$_ = sort { $b->[0] <=> $a->[0] } @$_ }
+        }
+        $new->{ekeys} = \@ekeys;
+        $new->{roots} = \@OFF2ROOT;
+        $new->{dt} = \@JOIN_DT;
+        $new = compress(PublicInbox::Config::json()->encode($new));
+        my $key = $self->join_data_key;
+        my $wait = $IDX_SHARDS[0]->wq_do('metadata_set', $key, $new);
+        delete $TODO{joining};
+}
+
+sub require_progs {
+        my $op = shift;
+        while (my ($x, $argv) = splice(@_, 0, 2)) {
+                my $e = $x;
+                $e =~ tr/a-z-/A-Z_/;
+                my $c = $ENV{$e} // $x;
+                $argv->[0] //= which($c) // die "E: `$x' required for --$op\n";
+        }
+}
+
+sub init_join_postfork ($) {
+        my ($self) = @_;
+        return unless $self->{-opt}->{join};
+        require_progs('join', join => \@JOIN);
+        my $d2 = '([0-9]{2})';
+        my $dt_re = qr!([0-9]{4})$d2$d2$d2$d2$d2!;
+        if (my $cur = $JOIN{reset} ? undef : current_join_data($self)) {
+                if (($cur->{dt}->[1] // '') =~ m!\A$dt_re\z!o) {
+                        my ($Y, $m, $d, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
+                        my $t = timegm($S, $M, $H, $d, $m - 1, $Y);
+                        $t = strftime('%Y%m%d%H%M%S', gmtime($t + 1));
+                        $JOIN{dt} //= "$t..";
+                } else {
+                        warn <<EOM;
+BUG?: previous --join invocation did not store usable `dt' key
+EOM
+                }
+        }
+        if ($JOIN{aggressive}) {
+                $JOIN{window} //= -1;
+                $JOIN{dt} //= '..1.month.ago';
+        }
+        $QRY_STR = $JOIN{dt} // '1.year.ago..';
+        index($QRY_STR, '..') >= 0 or die "E: dt:$QRY_STR is not a range\n";
+        # Account for send->apply delay (torvalds/linux.git mean is ~20 days
+        # from Author to CommitDate in cases where CommitDate > AuthorDate
+        $QRY_STR .= '1.month.ago' if $QRY_STR =~ /\.\.\z/;
+        @{$self->{git_dirs} // []} or die "E: no coderepos to join\n";
+        @IBX or die "E: no inboxes to join\n";
+        my $approx_git = PublicInbox::Git->new($self->{git_dirs}->[0]); # ugh
+        substr($QRY_STR, 0, 0) = 'dt:';
+        $self->query_approxidate($approx_git, $QRY_STR); # in-place
+        ($JOIN_DT[1]) = ($QRY_STR =~ /\.\.([0-9]{14})\z/); # YYYYmmddHHMMSS
+        ($JOIN_DT[0]) = ($QRY_STR =~ /\Adt:([0-9]{14})/); # YYYYmmddHHMMSS
+        $JOIN_DT[0] //= '19700101'.'000000'; # git uses unsigned times
+        $TODO{do_join} = on_destroy \&do_join, $self;
+        $TODO{joining} = 1; # keep shards_active() happy
+        $TODO{dump_ibx_start} = on_destroy \&dump_ibx_start,
+                                        $self, $TODO{do_join};
+        $TODO{dump_roots_start} = on_destroy \&dump_roots_start,
+                                        $self, $TODO{do_join};
+        progress($self, "will join in $QRY_STR date range...");
+        my $id = -1;
+        @IBXQ = map { ++$id } @IBX;
+}
+
+sub init_prune ($) {
+        my ($self) = @_;
+        return (@$PRUNE_DONE = map { 1 } @IDX_SHARDS) if !$self->{-opt}->{prune};
+
+        # Dealing with millions of commits here at once, so use faster tools.
+        # xapian-delve is nearly an order-of-magnitude faster than Xapian Perl
+        # bindings.  sed/awk are faster than Perl for simple stream ops, and
+        # sort+comm are more memory-efficient with gigantic lists.
+        # pipeline: delve | sed | sort >indexed_commits
+        my @delve = (undef, qw(-A Q -1));
+        my @sed = (undef, '-ne', 's/^Q//p');
+        @COMM = (undef, qw(-2 -3 indexed_commits -));
+        @AWK = (undef, '$2 == "commit" { print $1 }'); # --batch-check output
+        require_progs('prune', 'xapian-delve' => \@delve, sed => \@sed,
+                        comm => \@COMM, awk => \@AWK);
+        for (0..$#IDX_SHARDS) { push @delve, "$self->{xpfx}/$_" }
+        my $run_prune = on_destroy \&run_prune, $self, $TODO{dump_roots_start};
+        my ($sort_opt, $sed_opt, $delve_opt);
+        pipe(local $sed_opt->{0}, local $delve_opt->{1});
+        pipe(local $sort_opt->{0}, local $sed_opt->{1});
+        open($sort_opt->{1}, '+>', "$TMPDIR/indexed_commits");
+        run_await([@SORT, '-u'], $CMD_ENV, $sort_opt, \&cmd_done, $run_prune);
+        run_await(\@sed, $CMD_ENV, $sed_opt, \&cmd_done, $run_prune);
+        run_await(\@delve, undef, $delve_opt, \&cmd_done, $run_prune);
+        @PRUNEQ = @$SCANQ;
+        for (1..$LIVE_JOBS) {
+                prep_alternate_start($self, shift(@PRUNEQ) // last, $run_prune);
+        }
+}
+
+sub dump_git_commits { # run_await cb
+        my ($pid, $cmd, undef, $batch_opt, $self) = @_;
+        (defined($pid) && $?) and die "E: @$cmd \$?=$?";
+        return if $DO_QUIT;
+        my ($hexlen) = keys(%ALT_FH) or return; # done, DESTROY batch_opt->{1}
+        close(delete $ALT_FH{$hexlen}); # flushes `say' buffer
+        progress($self, "preparing $hexlen-byte hex OID commits for prune...");
+        my $g = PublicInbox::Git->new("$TMPDIR/hexlen$hexlen.git");
+        run_await($g->cmd(@PRUNE_BATCH), undef, $batch_opt,
+                        \&dump_git_commits, $self);
+}
+
+sub run_prune { # OnDestroy when `git config extensions.objectFormat' are done
+        my ($self, $drs) = @_;
+        return if $DO_QUIT;
+        # setup the following pipeline: (
+        #        git --git-dir=hexlen40.git cat-file \
+        #                --batch-all-objects --batch-check &&
+        #        git --git-dir=hexlen64.git cat-file \
+        #                --batch-all-objects --batch-check
+        # ) | awk | sort | comm | cidx_read_comm()
+        my ($awk_opt, $sort_opt, $batch_opt);
+        my $comm_opt = { -C => "$TMPDIR" };
+        pipe(local $awk_opt->{0}, $batch_opt->{1});
+        pipe(local $sort_opt->{0}, local $awk_opt->{1});
+        pipe(local $comm_opt->{0}, local $sort_opt->{1});
+        run_await(\@AWK, $CMD_ENV, $awk_opt, \&cmd_done);
+        run_await([@SORT, '-u'], $CMD_ENV, $sort_opt, \&cmd_done);
+        my $comm_rd = popen_rd(\@COMM, $CMD_ENV, $comm_opt, \&cmd_done, \@COMM);
+        PublicInbox::CidxComm->new($comm_rd, $self, $drs); # ->cidx_read_comm
+        push @PRUNE_BATCH, '--buffer' if $GIT_VER ge v2.6;
+
+        # Yes, we pipe --unordered git output to sort(1) because sorting
+        # inside git leads to orders-of-magnitude slowdowns on rotational
+        # storage.  GNU sort(1) also works well on larger-than-memory
+        # datasets, and it's not worth eliding sort(1) for old git.
+        push @PRUNE_BATCH, '--unordered' if $GIT_VER ge v2.19;
+        warn(sprintf(<<EOM, $GIT_VER)) if $GIT_VER lt v2.19;
+W: git v2.19+ recommended for high-latency storage (have git v%vd)
+EOM
+        dump_git_commits(undef, undef, undef, $batch_opt, $self);
+}
+
+sub cidx_read_comm { # via PublicInbox::CidxComm::event_step
+        my ($self, $comm_rd, $drs) = @_;
+        return if $DO_QUIT;
+        progress($self, 'starting prune...');
+        $_->wq_do('prune_init') for @IDX_SHARDS;
+        while (defined(my $cmt = <$comm_rd>)) {
+                chop($cmt) eq "\n" or die "BUG: no LF in comm output ($cmt)";
+                my $n = hex(substr($cmt, 0, 8)) % scalar(@IDX_SHARDS);
+                $IDX_SHARDS[$n]->wq_do('prune_one', 'Q'.$cmt);
+                last if $DO_QUIT;
+        }
+        for my $git_dir (@GIT_DIR_GONE) {
+                my $n = git_dir_hash($git_dir) % scalar(@IDX_SHARDS);
+                $IDX_SHARDS[$n]->wq_do('prune_one', 'P'.$git_dir);
+                last if $DO_QUIT;
+        }
+        my ($c, $p) = PublicInbox::PktOp->pair;
+        $c->{ops}->{prune_done} = [ $self, $drs ];
+        $_->wq_io_do('prune_commit', [ $p->{op_p} ]) for @IDX_SHARDS;
+}
+
+sub init_join_prefork ($) {
+        my ($self) = @_;
+        my $subopt = $self->{-opt}->{join} // return;
+        %JOIN = map {
+                my ($k, $v) = split /:/, $_, 2;
+                $k => $v // 1;
+        } split(/,/, join(',', @$subopt));
+        require PublicInbox::CidxXapHelperAux;
+        require PublicInbox::XapClient;
+        my @unknown;
+        my $pfx = $JOIN{prefixes} // 'dfpost7';
+        for my $p (split /\+/, $pfx) {
+                my $n = '';
+                $p =~ s/([0-9]+)\z// and $n = $1;
+                my $v = $PublicInbox::Search::PATCH_BOOL_COMMON{$p} //
+                        push(@unknown, $p);
+                push(@JOIN_PFX, map { $_.$n } split(/ /, $v));
+        }
+        @unknown and die <<EOM;
+E: --join=prefixes= contains unsupported prefixes: @unknown
+EOM
+        @JOIN_PFX = uniqstr @JOIN_PFX;
+        my %incl = map {
+                if (-f "$_/inbox.lock" || -d "$_/public-inbox") {
+                        rel2abs_collapsed($_) => undef;
+                } else {
+                        warn "W: `$_' is not a public inbox, skipping\n";
+                        ();
+                }
+        } (@{$self->{-opt}->{include} // []});
+        my $all = $self->{-opt}->{all};
+        if (my $only = $self->{-opt}->{only}) {
+                die <<'' if $all;
+E: --all is incompatible with --only
+
+                $incl{rel2abs_collapsed($_)} = undef for @$only;
+        }
+        unless (keys(%incl)) {
+                $all = 1;
+                warn <<EOM unless $self->{opt}->{quiet};
+# --all implied since no inboxes were specified with --only or --include
+EOM
+        }
+        $self->{-opt}->{-pi_cfg}->each_inbox(\&_prep_ibx, $self, \%incl, $all);
+        my $nr = scalar(@IBX) or die "E: no inboxes to join with\n";
+        progress($self, "will join with $nr inboxes in ",
+                        $self->{-opt}->{-pi_cfg}->{-f}, " using: $pfx");
+}
+
+sub _prep_ibx { # each_inbox callback
+        my ($ibx, $self, $incl, $all) = @_;
+        ($all || exists($incl->{$ibx->{inboxdir}})) and push @IBX, $ibx;
+}
+
+sub show_json { # for diagnostics (unstable output)
+        my ($self) = @_;
+        my $s = $self->{-opt}->{show} or return; # for diagnostics
+        local $self->{xdb};
+        my %ret;
+        my @todo = @$s;
+        while (defined(my $f = shift @todo)) {
+                if ($f =~ /\A(?:roots2paths|paths2roots|join_data)\z/) {
+                        $ret{$f} = $self->$f;
+                } elsif ($f eq '') { # default --show (no args)
+                        push @todo, qw(roots2paths join_data);
+                } else {
+                        warn "E: cannot show `$f'\n";
+                }
+        }
+        my $json = ref(PublicInbox::Config::json())->new;
+        $json->utf8->canonical->pretty; # n.b. FS pathnames may not be UTF-8...
+        say $json->encode(\%ret);
+}
+
+sub do_inits { # called via PublicInbox::DS::add_timer
+        my ($self) = @_;
+        grep !!$_, @{$self->{-opt}}{qw(scan prune)} and
+                @$SCANQ = map PublicInbox::Git->new($_), @{$self->{git_dirs}};
+        init_join_postfork $self;
+        init_prune $self;
+        scan_git_dirs $self;
+        my $max = $TODO{do_join} ? max($LIVE_JOBS, $NPROC) : $LIVE_JOBS;
+        index_next($self) for (1..$max);
+}
+
+sub cidx_run { # main entry point
+        my ($self) = @_;
+        my $restore_umask = prep_umask($self);
+        local $SIGSET = PublicInbox::DS::block_signals(
+                                        POSIX::SIGTSTP, POSIX::SIGCONT);
+        my $restore = on_destroy \&PublicInbox::DS::sig_setmask, $SIGSET;
+        local $PRUNE_DONE = [];
+        local $IDXQ = [];
+        local $SCANQ = [];
+        local ($DO_QUIT, $REINDEX, $TXN_BYTES, @GIT_DIR_GONE, @PRUNEQ,
+                $REPO_CTX, %ALT_FH, $TMPDIR, @AWK, @COMM, $CMD_ENV,
+                %TODO, @IBXQ, @IBX, @JOIN, %JOIN, @JOIN_PFX, @NO_ABBREV,
+                @JOIN_DT, $DUMP_IBX_WPIPE, @OFF2ROOT, $XHC, @SORT, $GITS_NR);
+        local $BATCH_BYTES = $self->{-opt}->{batch_size} //
+                                $PublicInbox::SearchIdx::BATCH_BYTES;
+        local $MAX_SIZE = $self->{-opt}->{max_size};
+        local $self->{PENDING} = {}; # used by PublicInbox::CidxXapHelperAux
+        my $cfg = $self->{-opt}->{-pi_cfg} // die 'BUG: -pi_cfg unset';
+        $self->{-cfg_f} = $cfg->{-f} = rel2abs_collapsed($cfg->{-f});
+        local $GIT_VER = PublicInbox::Git::git_version();
+        @NO_ABBREV = ('-c', 'core.abbrev='.($GIT_VER lt v2.31.0 ? 40 : 'no'));
+        if (grep { $_ } @{$self->{-opt}}{qw(prune join)}) {
+                require File::Temp;
+                $TMPDIR = File::Temp->newdir('cidx-all-git-XXXX', TMPDIR => 1);
+                $CMD_ENV = { TMPDIR => "$TMPDIR", LC_ALL => 'C', LANG => 'C' };
+                require_progs('(prune|join)', sort => \@SORT);
+                for (qw(parallel compress-program buffer-size)) { # GNU sort
+                        my $v = $self->{-opt}->{"sort-$_"};
+                        push @SORT, "--$_=$v" if defined $v;
+                }
+                ($self->{-opt}->{prune} && $GIT_VER le v2.6) and
+                        die "W: --prune requires git v2.6+\n";
+                init_join_prefork($self)
+        }
+        local @IDX_SHARDS = cidx_init($self); # forks workers
+        local $ANY_SHARD = -1;
+        local $self->{current_info} = '';
+        local $MY_SIG = {
+                CHLD => \&PublicInbox::DS::enqueue_reap,
+                USR1 => \&kill_shards,
+        };
+        local @PRUNE_BATCH = @PRUNE_BATCH;
+        $MY_SIG->{$_} = \&parent_quit for qw(TERM QUIT INT);
+        my $cb = $SIG{__WARN__} || \&CORE::warn;
+        local $SIG{__WARN__} = sub {
+                my $m = shift @_;
+                $self->{current_info} eq '' or
+                        $m =~ s/\A(#?\s*)/$1$self->{current_info}: /;
+                $cb->($m, @_);
+        };
+        load_existing($self) unless $self->{-cidx_internal};
+        if ($self->{-opt}->{reindex}) {
+                require PublicInbox::SharedKV;
+                $REINDEX = PublicInbox::SharedKV->new;
+                delete $REINDEX->{lock_path};
+                $REINDEX->dbh;
+        }
+        my @nc = grep { File::Spec->canonpath($_) ne $_ } @{$self->{git_dirs}};
+        if (@nc) {
+                warn "E: BUG? paths in $self->{cidx_dir} not canonicalized:\n";
+                for my $d (@{$self->{git_dirs}}) {
+                        my $c = File::Spec->canonpath($_);
+                        warn "E: $d => $c\n";
+                        $d = $c;
+                }
+                warn "E: canonicalized and attempting to continue\n";
+        }
+        if (defined(my $excl = $self->{-opt}->{exclude})) {
+                my $re = '(?:'.join('\\z|', map {
+                                glob2re($_) // qr/\A\Q$_\E/
+                        } @$excl).'\\z)';
+                my @excl;
+                @{$self->{git_dirs}} = grep {
+                        $_ =~ /$re/ ? (push(@excl, $_), 0) : 1;
+                } @{$self->{git_dirs}};
+                warn("# excluding $_\n") for @excl;
+                @GIT_DIR_GONE = uniqstr @GIT_DIR_GONE, @excl;
+        }
+        local $NCHANGE = 0;
+        local $NPROC = PublicInbox::IPC::detect_nproc();
+        local $LIVE_JOBS = $self->{-opt}->{jobs} || $NPROC || 2;
+        local @RDONLY_XDB = $self->xdb_shards_flat;
+        PublicInbox::DS::add_timer(0, \&do_inits, $self);
+
+        # FreeBSD ignores/discards SIGCHLD while signals are blocked and
+        # EVFILT_SIGNAL is inactive, so we pretend we have a SIGCHLD pending
+        PublicInbox::DS::enqueue_reap();
+
+        local @PublicInbox::DS::post_loop_do = (\&shards_active);
+        PublicInbox::DS::event_loop($MY_SIG, $SIGSET);
+        $self->lock_release(!!$NCHANGE);
+        show_json($self);
+}
+
+sub ipc_atfork_child { # @IDX_SHARDS
+        my ($self) = @_;
+        $self->SUPER::ipc_atfork_child;
+        $SIG{USR1} = \&shard_usr1;
+        $SIG{$_} = \&shard_quit for qw(INT TERM QUIT);
+        my $x = delete $self->{siblings} // die 'BUG: no {siblings}';
+        $_->wq_close for @$x;
+        undef;
+}
+
+sub shard_done_wait { # awaitpid cb via ipc_worker_reap
+        my ($pid, $shard, $self) = @_;
+        my $quit_req = delete($shard->{-cidx_quit});
+        return if $DO_QUIT;
+        if ($? == 0) { # success
+                $quit_req // warn 'BUG: {-cidx_quit} unset';
+        } else {
+                warn "PID:$pid $shard->{shard} exited with \$?=$?\n";
+                ++$self->{shard_err} if defined($self->{shard_err});
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/Compat.pm b/lib/PublicInbox/Compat.pm
new file mode 100644
index 00000000..78cba90e
--- /dev/null
+++ b/lib/PublicInbox/Compat.pm
@@ -0,0 +1,24 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# compatibility code for old Perl and standard modules, mainly
+# List::Util but maybe other stuff
+package PublicInbox::Compat;
+use v5.12;
+use parent qw(Exporter);
+require List::Util;
+
+our @EXPORT_OK = qw(uniqstr);
+
+# uniqstr is in List::Util 1.45+, which means Perl 5.26+;
+# so maybe 2030 for us since we need to support enterprise distros.
+# We can use uniqstr everywhere in our codebase and don't need
+# to account for special cases of `uniqnum' nor `uniq' in List::Util
+# even if they make more sense in some contexts
+no warnings 'once';
+*uniqstr = List::Util->can('uniqstr') // sub (@) {
+        my %seen;
+        grep { !$seen{$_}++ } @_;
+};
+
+1;
diff --git a/lib/PublicInbox/CompressNoop.pm b/lib/PublicInbox/CompressNoop.pm
index fe73c2d1..5135299f 100644
--- a/lib/PublicInbox/CompressNoop.pm
+++ b/lib/PublicInbox/CompressNoop.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Provide the same methods as Compress::Raw::Zlib::Deflate but
@@ -10,7 +10,7 @@ use Compress::Raw::Zlib qw(Z_OK);
 sub new { bless \(my $self), __PACKAGE__ }
 
 sub deflate { # ($self, $input, $output)
-        $_[2] .= $_[1];
+        $_[2] .= ref($_[1]) ? ${$_[1]} : $_[1];
         Z_OK;
 }
 
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index d57c361a..49659a2e 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Used throughout the project for reading configuration
@@ -10,33 +10,38 @@
 package PublicInbox::Config;
 use strict;
 use v5.10.1;
+use parent qw(Exporter);
+our @EXPORT_OK = qw(glob2re rel2abs_collapsed);
 use PublicInbox::Inbox;
-use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Git qw(git_exe);
+use PublicInbox::Spawn qw(popen_rd run_qx);
+our $LD_PRELOAD = $ENV{LD_PRELOAD}; # only valid at startup
+our $DEDUPE; # set to {} to dedupe or clear cache
 
 sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] }
 
 # returns key-value pairs of config directives in a hash
 # if keys may be multi-value, the value is an array ref containing all values
 sub new {
-        my ($class, $file) = @_;
+        my ($class, $file, $lei) = @_;
         $file //= default_file();
-        my $self;
-        if (ref($file) eq 'SCALAR') { # used by some tests
-                open my $fh, '<', $file or die;  # PerlIO::scalar
-                $self = config_fh_parse($fh, "\n", '=');
-        } else {
-                $self = git_config_dump($file);
+        my ($self, $set_dedupe);
+        if (-f $file && $DEDUPE) {
+                $file = rel2abs_collapsed($file);
+                $self = $DEDUPE->{$file} and return $self;
+                $set_dedupe = 1;
         }
-        bless $self, $class;
+        $self = git_config_dump($class, $file, $lei);
+        $self->{-f} = $file;
         # caches
         $self->{-by_addr} = {};
         $self->{-by_list_id} = {};
         $self->{-by_name} = {};
         $self->{-by_newsgroup} = {};
+        $self->{-by_eidx_key} = {};
         $self->{-no_obfuscate} = {};
         $self->{-limiters} = {};
-        $self->{-code_repos} = {}; # nick => PublicInbox::Git object
-        $self->{-cgitrc_unparsed} = $self->{'publicinbox.cgitrc'};
+        $self->{-coderepos} = {}; # nick => PublicInbox::Git object
 
         if (my $no = delete $self->{'publicinbox.noobfuscate'}) {
                 $no = _array($no);
@@ -59,7 +64,7 @@ sub new {
         if (my $css = delete $self->{'publicinbox.css'}) {
                 $self->{css} = _array($css);
         }
-
+        $DEDUPE->{$file} = $self if $set_dedupe;
         $self;
 }
 
@@ -86,9 +91,22 @@ sub lookup_list_id {
 
 sub lookup_name ($$) {
         my ($self, $name) = @_;
-        $self->{-by_name}->{$name} // _fill($self, "publicinbox.$name");
+        $self->{-by_name}->{$name} // _fill_ibx($self, $name);
+}
+
+sub lookup_ei {
+        my ($self, $name) = @_;
+        $self->{-ei_by_name}->{$name} //= _fill_ei($self, $name);
+}
+
+sub lookup_eidx_key {
+        my ($self, $eidx_key) = @_;
+        _lookup_fill($self, '-by_eidx_key', $eidx_key);
 }
 
+# special case for [extindex "all"]
+sub ALL { lookup_ei($_[0], 'all') }
+
 sub each_inbox {
         my ($self, $cb, @arg) = @_;
         # may auto-vivify if config file is non-existent:
@@ -107,9 +125,9 @@ sub lookup_newsgroup {
 sub limiter {
         my ($self, $name) = @_;
         $self->{-limiters}->{$name} //= do {
-                require PublicInbox::Qspawn;
+                require PublicInbox::Limiter;
                 my $max = $self->{"publicinboxlimiter.$name.max"} || 1;
-                my $limiter = PublicInbox::Qspawn::Limiter->new($max);
+                my $limiter = PublicInbox::Limiter->new($max);
                 $limiter->setup_rlimit($name, $self);
                 $limiter;
         };
@@ -123,20 +141,19 @@ sub default_file {
 
 sub config_fh_parse ($$$) {
         my ($fh, $rs, $fs) = @_;
-        my %rv;
-        my (%section_seen, @section_order);
+        my (%rv, %seen, @section_order, $line, $k, $v, $section, $cur, $i);
         local $/ = $rs;
-        while (defined(my $line = <$fh>)) {
-                chomp $line;
-                my ($k, $v) = split($fs, $line, 2);
-                my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/);
-                unless (defined $section_seen{$section}) {
-                        $section_seen{$section} = 1;
-                        push @section_order, $section;
-                }
-
-                my $cur = $rv{$k};
-                if (defined $cur) {
+        while (defined($line = <$fh>)) { # perf critical with giant configs
+                $i = index($line, $fs);
+                # $i may be -1 if $fs not found and it's a key-only entry
+                # (meaning boolean true).  Either way the -1 will drop the
+                # $rs either from $k or $v.
+                $k = substr($line, 0, $i);
+                $v = $i >= 0 ? substr($line, $i + 1, -1) : 1;
+                $section = substr($k, 0, rindex($k, '.'));
+                $seen{$section} //= push(@section_order, $section);
+
+                if (defined($cur = $rv{$k})) {
                         if (ref($cur) eq "ARRAY") {
                                 push @$cur, $v;
                         } else {
@@ -151,19 +168,39 @@ sub config_fh_parse ($$$) {
         \%rv;
 }
 
+sub tmp_cmd_opt ($$) {
+        my ($env, $opt) = @_;
+        # quiet global and system gitconfig if supported by installed git,
+        # but normally harmless if too noisy (NOGLOBAL no longer exists)
+        $env->{GIT_CONFIG_NOSYSTEM} = 1;
+        $env->{GIT_CONFIG_GLOBAL} = '/dev/null'; # git v2.32+
+        $opt->{-C} = '/'; # avoid $worktree/.git/config on MOST systems :P
+}
+
 sub git_config_dump {
-        my ($file) = @_;
-        return {} unless -e $file;
-        my @cmd = (qw/git config -z -l --includes/, "--file=$file");
-        my $cmd = join(' ', @cmd);
-        my $fh = popen_rd(\@cmd);
+        my ($class, $file, $lei) = @_;
+        my @opt_c = map { ('-c', $_) } @{$lei->{opt}->{c} // []};
+        $file = undef if !-e $file;
+        # XXX should we set {-f} if !-e $file?
+        return bless {}, $class if (!@opt_c && !defined($file));
+        my %env;
+        my $opt = { 2 => $lei->{2} // 2 };
+        if (@opt_c) {
+                unshift(@opt_c, '-c', "include.path=$file") if defined($file);
+                tmp_cmd_opt(\%env, $opt);
+        }
+        my @cmd = (git_exe, @opt_c, qw(config -z -l --includes));
+        push(@cmd, '-f', $file) if !@opt_c && defined($file);
+        my $fh = popen_rd(\@cmd, \%env, $opt);
         my $rv = config_fh_parse($fh, "\0", "\n");
-        close $fh or die "failed to close ($cmd) pipe: $?";
-        $rv;
+        $fh->close or die "@cmd failed: \$?=$?\n";
+        $rv->{-opt_c} = \@opt_c if @opt_c; # for ->urlmatch
+        $rv->{-f} = $file;
+        bless $rv, $class;
 }
 
-sub valid_inbox_name ($) {
-        my ($name) = @_;
+sub valid_foo_name ($;$) {
+        my ($name, $pfx) = @_;
 
         # Similar rules found in git.git/remote.c::valid_remote_nick
         # and git.git/refs.c::check_refname_component
@@ -171,6 +208,7 @@ sub valid_inbox_name ($) {
         if ($name eq '' || $name =~ /\@\{/ ||
             $name =~ /\.\./ || $name =~ m![/:\?\[\]\^~\s\f[:cntrl:]\*]! ||
             $name =~ /\A\./ || $name =~ /\.\z/) {
+                warn "invalid $pfx name: `$name'\n" if $pfx;
                 return 0;
         }
 
@@ -208,7 +246,6 @@ sub cgit_repo_merge ($$$) {
                         $rel =~ s!/?\.git\z!!;
         }
         $self->{"coderepo.$rel.dir"} //= $path;
-        $self->{"coderepo.$rel.cgiturl"} //= _array($rel);
 }
 
 sub is_git_dir ($) {
@@ -244,10 +281,11 @@ sub scan_tree_coderepo ($$) {
         scan_path_coderepo($self, $path, $path);
 }
 
-sub scan_projects_coderepo ($$$) {
-        my ($self, $list, $path) = @_;
-        open my $fh, '<', $list or do {
-                warn "failed to open cgit projectlist=$list: $!\n";
+sub scan_projects_coderepo ($$) {
+        my ($self, $path) = @_;
+        my $l = $self->{-cgit_project_list} // die 'BUG: no cgit_project_list';
+        open my $fh, '<', $l or do {
+                warn "failed to open cgit project-list=$l: $!\n";
                 return;
         };
         while (<$fh>) {
@@ -256,8 +294,20 @@ sub scan_projects_coderepo ($$$) {
         }
 }
 
+sub apply_cgit_scan_path {
+        my ($self, @paths) = @_;
+        @paths or @paths = @{$self->{-cgit_scan_path}};
+        if (defined($self->{-cgit_project_list})) {
+                for my $p (@paths) { scan_projects_coderepo($self, $p) }
+        } else {
+                for my $p (@paths) { scan_tree_coderepo($self, $p) }
+        }
+}
+
 sub parse_cgitrc {
         my ($self, $cgitrc, $nesting) = @_;
+        $cgitrc //= $self->{'publicinbox.cgitrc'} //
+                        $ENV{CGIT_CONFIG} // return;
         if ($nesting == 0) {
                 # defaults:
                 my %s = map { $_ => 1 } qw(/cgit.css /cgit.png
@@ -297,58 +347,50 @@ sub parse_cgitrc {
                         my ($k, $v) = ($1, $2);
                         $k =~ tr/-/_/;
                         $self->{"-cgit_$k"} = $v;
+                        delete $self->{-cgit_scan_path} if $k eq 'project_list';
                 } elsif (m!\Ascan-path=(.+)\z!) {
-                        if (defined(my $list = $self->{-cgit_project_list})) {
-                                scan_projects_coderepo($self, $list, $1);
-                        } else {
-                                scan_tree_coderepo($self, $1);
-                        }
+                        # this depends on being after project-list in the
+                        # config file, just like cgit.c
+                        push @{$self->{-cgit_scan_path}}, $1;
+                        apply_cgit_scan_path($self, $1);
                 } elsif (m!\A(?:css|favicon|logo|repo\.logo)=(/.+)\z!) {
                         # absolute paths for static files via PublicInbox::Cgit
                         $self->{-cgit_static}->{$1} = 1;
+                } elsif (s!\Asnapshots=\s*!!) {
+                        $self->{'coderepo.snapshots'} = $_;
                 }
         }
         cgit_repo_merge($self, $repo->{dir}, $repo) if $repo;
 }
 
-# parse a code repo
-# Only git is supported at the moment, but SVN and Hg are possibilities
-sub _fill_code_repo {
-        my ($self, $nick) = @_;
-        my $pfx = "coderepo.$nick";
-
-        # TODO: support gitweb and other repository viewers?
-        if (defined(my $cgitrc = delete $self->{-cgitrc_unparsed})) {
-                parse_cgitrc($self, $cgitrc, 0);
-        }
-        my $dir = $self->{"$pfx.dir"}; # aka "GIT_DIR"
-        unless (defined $dir) {
-                warn "$pfx.dir unset\n";
-                return;
-        }
-
-        my $git = PublicInbox::Git->new($dir);
-        foreach my $t (qw(blob commit tree tag)) {
-                $git->{$t.'_url_format'} =
-                                _array($self->{lc("$pfx.${t}UrlFormat")});
+sub valid_dir ($$) {
+        my $dir = get_1($_[0], $_[1]) // return;
+        index($dir, "\n") < 0 ? $dir : do {
+                warn "E: `$_[1]=$dir' must not contain `\\n'\n";
+                undef;
         }
+}
 
+# parse a code repo, only git is supported at the moment
+sub fill_coderepo {
+        my ($self, $nick) = @_;
+        my $pfx = "coderepo.$nick";
+        my $git = PublicInbox::Git->new(valid_dir($self, "$pfx.dir") // return);
         if (defined(my $cgits = $self->{"$pfx.cgiturl"})) {
                 $git->{cgit_url} = $cgits = _array($cgits);
                 $self->{"$pfx.cgiturl"} = $cgits;
-
-                # cgit supports "/blob/?id=%s", but it's only a plain-text
-                # display and requires an unabbreviated id=
-                foreach my $t (qw(blob commit tag)) {
-                        $git->{$t.'_url_format'} //= map {
-                                "$_/$t/?id=%s"
-                        } @$cgits;
-                }
         }
-
+        my %dedupe = ($nick => undef);
+        ($git->{nick}) = keys %dedupe;
         $git;
 }
 
+sub get_all {
+        my ($self, $key) = @_;
+        my $v = $self->{$key} // return;
+        _array($v);
+}
+
 sub git_bool {
         my ($val) = $_[-1]; # $_[0] may be $self, or $val
         if ($val =~ /\A(?:false|no|off|[\-\+]?(?:0x)?0+)\z/i) {
@@ -360,37 +402,75 @@ sub git_bool {
         }
 }
 
-sub _fill {
-        my ($self, $pfx) = @_;
-        my $ibx = {};
+# abs_path resolves symlinks, so we want to avoid it if rel2abs
+# is sufficient and doesn't leave "/.." or "/../"
+sub rel2abs_collapsed {
+        require File::Spec;
+        my $p = File::Spec->rel2abs(@_);
+        return $p if substr($p, -3, 3) ne '/..' && index($p, '/../') < 0;
+        require Cwd;
+        Cwd::abs_path($p);
+}
 
-        for my $k (qw(watch nntpserver)) {
-                my $v = $self->{"$pfx.$k"};
-                $ibx->{$k} = $v if defined $v;
-        }
-        for my $k (qw(filter inboxdir newsgroup replyto httpbackendmax feedmax
-                        indexlevel indexsequentialshard)) {
-                if (defined(my $v = $self->{"$pfx.$k"})) {
-                        if (ref($v) eq 'ARRAY') {
-                                warn <<EOF;
-W: $pfx.$k has multiple values, only using `$v->[-1]'
-EOF
-                                $ibx->{$k} = $v->[-1];
-                        } else {
-                                $ibx->{$k} = $v;
+sub get_1 {
+        my ($self, $key) = @_;
+        my $v = $self->{$key};
+        return $v if !ref($v);
+        warn "W: $key has multiple values, only using `$v->[-1]'\n";
+        $v->[-1];
+}
+
+sub repo_objs {
+        my ($self, $ibxish) = @_;
+        $ibxish->{-repo_objs} // do {
+                my $ibx_coderepos = $ibxish->{coderepo} // return;
+                parse_cgitrc($self, undef, 0);
+                my $coderepos = $self->{-coderepos};
+                my @repo_objs;
+                for my $nick (@$ibx_coderepos) {
+                        my @parts = split(m!/!, $nick);
+                        for (@parts) {
+                                @parts = () unless valid_foo_name($_);
+                        }
+                        unless (@parts) {
+                                warn "invalid coderepo name: `$nick'\n";
+                                next;
                         }
+                        my $repo = $coderepos->{$nick} //=
+                                                fill_coderepo($self, $nick);
+                        $repo ? push(@repo_objs, $repo) :
+                                warn("coderepo.$nick.dir unset\n");
+                }
+                if (scalar @repo_objs) {
+                        for (@repo_objs) {
+                                push @{$_->{ibx_names}}, $ibxish->{name};
+                        }
+                        $ibxish->{-repo_objs} = \@repo_objs;
+                } else {
+                        delete $ibxish->{coderepo};
                 }
         }
+}
 
-        # backwards compatibility:
-        $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"};
-        if (($ibx->{inboxdir} // '') =~ /\n/s) {
-                warn "E: `$ibx->{inboxdir}' must not contain `\\n'\n";
-                return;
-        }
-        foreach my $k (qw(obfuscate)) {
+sub _fill_ibx {
+        my ($self, $name) = @_;
+        my $pfx = "publicinbox.$name";
+        my $ibx = {};
+        for my $k (qw(watch)) {
                 my $v = $self->{"$pfx.$k"};
-                defined $v or next;
+                $ibx->{$k} = $v if defined $v;
+        }
+        for my $k (qw(filter newsgroup replyto httpbackendmax feedmax
+                        indexlevel indexsequentialshard boost)) {
+                my $v = get_1($self, "$pfx.$k") // next;
+                $ibx->{$k} = $v;
+        }
+
+        # "mainrepo" is backwards compatibility:
+        my $dir = $ibx->{inboxdir} = valid_dir($self, "$pfx.inboxdir") //
+                                valid_dir($self, "$pfx.mainrepo") // return;
+        for my $k (qw(obfuscate)) {
+                my $v = $self->{"$pfx.$k"} // next;
                 if (defined(my $bval = git_bool($v))) {
                         $ibx->{$k} = $bval;
                 } else {
@@ -399,24 +479,18 @@ EOF
         }
         # TODO: more arrays, we should support multi-value for
         # more things to encourage decentralization
-        foreach my $k (qw(address altid nntpmirror coderepo hide listid url
-                        infourl watchheader)) {
-                if (defined(my $v = $self->{"$pfx.$k"})) {
-                        $ibx->{$k} = _array($v);
-                }
+        for my $k (qw(address altid nntpmirror imapmirror
+                        coderepo hide listid url
+                        infourl watchheader
+                        nntpserver imapserver pop3server)) {
+                my $v = $self->{"$pfx.$k"} // next;
+                $ibx->{$k} = _array($v);
         }
 
-        return unless defined($ibx->{inboxdir});
-        my $name = $pfx;
-        $name =~ s/\Apublicinbox\.//;
-
-        if (!valid_inbox_name($name)) {
-                warn "invalid inbox name: '$name'\n";
-                return;
-        }
-
-        $ibx->{name} = $name;
-        $ibx->{-pi_config} = $self;
+        return unless valid_foo_name($name, 'publicinbox');
+        my %dedupe = ($name => undef);
+        ($ibx->{name}) = keys %dedupe; # used as a key everywhere
+        $ibx->{-pi_cfg} = $self;
         $ibx = PublicInbox::Inbox->new($ibx);
         foreach (@{$ibx->{address}}) {
                 my $lc_addr = lc($_);
@@ -429,8 +503,38 @@ EOF
                         $self->{-by_list_id}->{lc($list_id)} = $ibx;
                 }
         }
-        if (my $ng = $ibx->{newsgroup}) {
-                $self->{-by_newsgroup}->{$ng} = $ibx;
+        if (defined(my $ngname = $ibx->{newsgroup})) {
+                if (ref($ngname)) {
+                        delete $ibx->{newsgroup};
+                        warn 'multiple newsgroups not supported: '.
+                                join(', ', @$ngname). "\n";
+                # Newsgroup name needs to be compatible with RFC 3977
+                # wildmat-exact and RFC 3501 (IMAP) ATOM-CHAR.
+                # Leave out a few chars likely to cause problems or conflicts:
+                # '|', '<', '>', ';', '#', '$', '&',
+                } elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! ||
+                                $ngname eq '') {
+                        delete $ibx->{newsgroup};
+                        warn "newsgroup name invalid: `$ngname'\n";
+                } else {
+                        my $lc = $ibx->{newsgroup} = lc $ngname;
+                        warn <<EOM if $lc ne $ngname;
+W: newsgroup=`$ngname' lowercased to `$lc'
+EOM
+                        # PublicInbox::NNTPD does stricter ->nntp_usable
+                        # checks, keep this lean for startup speed
+                        my $cur = $self->{-by_newsgroup}->{$lc} //= $ibx;
+                        warn <<EOM if $cur != $ibx;
+W: newsgroup=`$lc' is used by both `$cur->{name}' and `$ibx->{name}'
+EOM
+                }
+        }
+        unless (defined $ibx->{newsgroup}) { # for ->eidx_key
+                my $abs = rel2abs_collapsed($dir);
+                if ($abs ne $dir) {
+                        warn "W: `$dir' canonicalized to `$abs'\n";
+                        $ibx->{inboxdir} = $abs;
+                }
         }
         $self->{-by_name}->{$name} = $ibx;
         if ($ibx->{obfuscate}) {
@@ -438,42 +542,171 @@ EOF
                 $ibx->{-no_obfuscate_re} = $self->{-no_obfuscate_re};
                 fill_all($self); # noop to populate -no_obfuscate
         }
+        if (my $es = ALL($self)) {
+                require PublicInbox::Isearch;
+                $ibx->{isrch} = PublicInbox::Isearch->new($ibx, $es);
+        }
+        my $cur = $self->{-by_eidx_key}->{my $ekey = $ibx->eidx_key} //= $ibx;
+        $cur == $ibx or warn
+                "W: `$ekey' used by both `$cur->{name}' and `$ibx->{name}'\n";
+        $ibx;
+}
 
-        if (my $ibx_code_repos = $ibx->{coderepo}) {
-                my $code_repos = $self->{-code_repos};
-                my $repo_objs = $ibx->{-repo_objs} = [];
-                foreach my $nick (@$ibx_code_repos) {
-                        my @parts = split(m!/!, $nick);
-                        my $valid = 0;
-                        $valid += valid_inbox_name($_) foreach (@parts);
-                        $valid == scalar(@parts) or next;
+sub _fill_ei ($$) {
+        my ($self, $name) = @_;
+        eval { require PublicInbox::ExtSearch } or return;
+        my $pfx = "extindex.$name";
+        my $d = valid_dir($self, "$pfx.topdir") // return;
+        -d $d or return;
+        my $es = PublicInbox::ExtSearch->new($d);
+        for my $k (qw(indexlevel indexsequentialshard)) {
+                my $v = get_1($self, "$pfx.$k") // next;
+                $es->{$k} = $v;
+        }
+        for my $k (qw(coderepo hide url infourl)) {
+                my $v = $self->{"$pfx.$k"} // next;
+                $es->{$k} = _array($v);
+        }
+        return unless valid_foo_name($name, 'extindex');
+        $es->{name} = $name;
+        $es;
+}
 
-                        my $repo = $code_repos->{$nick} //=
-                                                _fill_code_repo($self, $nick);
-                        push @$repo_objs, $repo if $repo;
-                }
+sub _fill_csrch ($$) {
+        my ($self, $name) = @_; # "" is a valid name for cindex
+        return if $name ne '' && !valid_foo_name($name, 'cindex');
+        eval { require PublicInbox::CodeSearch } or return;
+        my $pfx = "cindex.$name";
+        my $d = valid_dir($self, "$pfx.topdir") // return;
+        -d $d or return;
+        my $csrch = PublicInbox::CodeSearch->new($d, $self);
+        for my $k (qw(localprefix)) {
+                my $v = $self->{"$pfx.$k"} // next;
+                $csrch->{$k} = _array($v);
         }
+        $csrch->{name} = $name;
+        $csrch;
+}
 
-        $ibx
+sub lookup_cindex ($$) {
+        my ($self, $name) = @_;
+        $self->{-csrch_by_name}->{$name} //= _fill_csrch($self, $name);
+}
+
+sub each_cindex {
+        my ($self, $cb, @arg) = @_;
+        my @csrch = map {
+                lookup_cindex($self, substr($_, length('cindex.'))) // ()
+        } grep(m!\Acindex\.[^\./]*\z!, @{$self->{-section_order}});
+        if (ref($cb) eq 'CODE') {
+                $cb->($_, @arg) for @csrch;
+        } else { # string function
+                $_->$cb(@arg) for @csrch;
+        }
+}
+
+sub config_cmd {
+        my ($self, $env, $opt) = @_;
+        my $f = $self->{-f} // default_file();
+        my @opt_c = @{$self->{-opt_c} // []};
+        my @cmd = (git_exe, @opt_c, 'config');
+        @opt_c ? tmp_cmd_opt($env, $opt) : push(@cmd, '-f', $f);
+        \@cmd;
 }
 
 sub urlmatch {
-        my ($self, $key, $url) = @_;
+        my $self = shift;
+        my @bool = $_[0] eq '--bool' ? (shift) : ();
+        my ($key, $url, $try_git) = @_;
         state $urlmatch_broken; # requires git 1.8.5
         return if $urlmatch_broken;
-        my $file = default_file();
-        my $cmd = [qw/git config -z --includes --get-urlmatch/,
-                "--file=$file", $key, $url ];
-        my $fh = popen_rd($cmd);
-        local $/ = "\0";
-        my $val = <$fh>;
-        if (close($fh)) {
-                chomp($val);
-                $val;
-        } else {
-                $urlmatch_broken = 1 if (($? >> 8) != 1);
-                undef;
+        my (%env, %opt);
+        my $cmd = $self->config_cmd(\%env, \%opt);
+        push @$cmd, @bool, qw(--includes -z --get-urlmatch), $key, $url;
+        my $val = run_qx($cmd, \%env, \%opt);
+        if ($?) {
+                undef $val;
+                if (@bool && ($? >> 8) == 128) { # not boolean
+                } elsif (($? >> 8) != 1) {
+                        $urlmatch_broken = 1;
+                } elsif ($try_git) { # n.b. this takes cwd into account
+                        $val = run_qx([qw(git config), @bool,
+                                        qw(-z --get-urlmatch), $key, $url]);
+                        undef $val if $?;
+                }
+        }
+        $? = 0; # don't influence lei exit status
+        if (defined($val)) {
+                local $/ = "\0";
+                chomp $val;
+                $val = git_bool($val) if @bool;
         }
+        $val;
+}
+
+sub json {
+        state $json;
+        $json //= do {
+                for my $mod (qw(Cpanel::JSON::XS JSON::MaybeXS JSON JSON::PP)) {
+                        eval "require $mod" or next;
+                        # ->ascii encodes non-ASCII to "\uXXXX"
+                        $json = $mod->new->ascii(1) and last;
+                }
+                $json;
+        };
+}
+
+sub squote_maybe ($) {
+        my ($val) = @_;
+        if ($val =~ m{([^\w@\./,\%\+\-])}) {
+                $val =~ s/(['!])/'\\$1'/g; # '!' for csh
+                return "'$val'";
+        }
+        $val;
+}
+
+my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
+                '/**' => '/.*', '**/' => '.*/', '/**/' => '(?:/.*?/|/)',
+                '[' => '[', ']' => ']', ',' => ',' );
+
+sub glob2re ($) {
+        my ($re) = @_;
+        my $p = '';
+        my $in_bracket = 0;
+        my $qm = 0;
+        my $schema_host_port = '';
+
+        # don't glob URL-looking things that look like IPv6
+        if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) {
+                $schema_host_port = quotemeta $1; # "http://[::1]:1234"
+        }
+        my $changes = ($re =~ s!(/\*\*/|\A\*\*/|/\*\*\z|.)!
+                $re_map{$p eq '\\' ? '' : do {
+                        if ($1 eq '[') { ++$in_bracket }
+                        elsif ($1 eq ']') { --$in_bracket }
+                        elsif ($1 eq ',') { ++$qm } # no change
+                        $p = $1;
+                }} // do {
+                        $p = $1;
+                        ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
+                }!sge);
+        # bashism (also supported by curl): {a,b,c} => (a|b|c)
+        $changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/
+                        (my $in_braces = $2) =~ tr!,!|!;
+                        $1."($in_braces)";
+                        /sge);
+        ($changes - $qm) ? $schema_host_port.$re : undef;
+}
+
+sub get_coderepo {
+        my ($self, $nick) = @_;
+        $self->{-coderepos}->{$nick} // do {
+                defined($self->{-cgit_scan_path}) ? do {
+                        apply_cgit_scan_path($self);
+                        my $cr = fill_coderepo($self, $nick);
+                        $cr ? ($self->{-coderepos}->{$nick} = $cr) : undef;
+                } : undef;
+        };
 }
 
 1;
diff --git a/lib/PublicInbox/ConfigIter.pm b/lib/PublicInbox/ConfigIter.pm
index e6fa8172..f9e3451a 100644
--- a/lib/PublicInbox/ConfigIter.pm
+++ b/lib/PublicInbox/ConfigIter.pm
@@ -1,12 +1,11 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# Intended for PublicInbox::DS->EventLoop in read-only daemons
+# Intended for PublicInbox::DS::event_loop in read-only daemons
 # to avoid each_inbox() monopolizing the event loop when hundreds/thousands
 # of inboxes are in play.
 package PublicInbox::ConfigIter;
-use strict;
-use v5.10.1;
+use v5.12;
 
 sub new {
         my ($class, $pi_cfg, $cb, @args) = @_;
@@ -25,7 +24,7 @@ sub event_step {
         PublicInbox::DS::requeue($self) if defined($section);
 }
 
-# for generic PSGI servers
+# for generic PSGI servers, but also ManifestJsGz w/ ALL extindex
 sub each_section {
         my $self = shift;
         my ($pi_cfg, $i, $cb, @arg) = @$self;
diff --git a/lib/PublicInbox/ContentDigestDbg.pm b/lib/PublicInbox/ContentDigestDbg.pm
new file mode 100644
index 00000000..853624f1
--- /dev/null
+++ b/lib/PublicInbox/ContentDigestDbg.pm
@@ -0,0 +1,22 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::ContentDigestDbg; # cf. PublicInbox::ContentDigest
+use v5.12;
+use Data::Dumper;
+use PublicInbox::SHA;
+$Data::Dumper::Useqq = $Data::Dumper::Terse = 1;
+
+sub new { bless [ PublicInbox::SHA->new(256), $_[1] ], __PACKAGE__ }
+
+sub add {
+        $_[0]->[0]->add($_[1]);
+        my @dbg = split(/^/sm, $_[1]);
+        if (@dbg && $dbg[0] =~ /\A(To|Cc)\0/) { # fold excessively long lines
+                @dbg = map { split(/,/s, $_) } @dbg;
+        }
+        print { $_[0]->[1] } Dumper(\@dbg) or die "print $!";
+}
+
+sub hexdigest { $_[0]->[0]->hexdigest }
+
+1;
diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm
index 1fe22955..95ca2929 100644
--- a/lib/PublicInbox/ContentHash.pm
+++ b/lib/PublicInbox/ContentHash.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Unstable internal API.
@@ -8,18 +8,20 @@
 # See L<public-inbox-v2-format(5)> manpage for more details.
 package PublicInbox::ContentHash;
 use strict;
-use warnings;
-use base qw/Exporter/;
-our @EXPORT_OK = qw/content_hash content_digest/;
+use v5.10.1;
+use parent qw(Exporter);
+our @EXPORT_OK = qw(content_hash content_digest git_sha);
 use PublicInbox::MID qw(mids references);
 use PublicInbox::MsgIter;
 
 # not sure if less-widely supported hash families are worth bothering with
-use Digest::SHA;
+use PublicInbox::SHA; # faster, but no ->clone
+use Digest::SHA; # we still need this for ->clone
 
 sub digest_addr ($$$) {
         my ($dig, $h, $v) = @_;
         $v =~ tr/"//d;
+        $v =~ tr/\r\n\t / /s;
         $v =~ s/@([a-z0-9\_\.\-\(\)]*([A-Z])\S*)/'@'.lc($1)/ge;
         utf8::encode($v);
         $dig->add("$h\0$v\0");
@@ -43,7 +45,7 @@ sub content_dig_i {
         my $ct = $part->content_type || 'text/plain';
         my ($s, undef) = msg_part_text($part, $ct);
         if (defined $s) {
-                $s =~ s/\r\n/\n/gs;
+                $s =~ s/\r\n/\n/gs; # TODO: consider \r+\n to match View
                 $s =~ s/\s*\z//s;
                 utf8::encode($s);
         } else {
@@ -52,30 +54,36 @@ sub content_dig_i {
         $dig->add($s);
 }
 
-sub content_digest ($) {
-        my ($eml) = @_;
-        my $dig = Digest::SHA->new(256);
+sub content_digest ($;$$) {
+        my ($eml, $dig, $hash_mids) = @_;
+        $dig //= Digest::SHA->new(256);
 
         # References: and In-Reply-To: get used interchangeably
         # in some "duplicates" in LKML.  We treat them the same
         # in SearchIdx, so treat them the same for this:
         # do NOT consider the Message-ID as part of the content_hash
-        # if we got here, we've already got Message-ID reuse
-        my %seen = map { $_ => 1 } @{mids($eml)};
-        foreach my $mid (@{references($eml)}) {
-                $dig->add("ref\0$mid\0") unless $seen{$mid}++;
+        # if we got here, we've already got Message-ID reuse for v2.
+        #
+        # However, `lei q --dedupe=content' does use $hash_mids since
+        # it doesn't have any other dedupe
+        my $mids = mids($eml);
+        if ($hash_mids) {
+                $dig->add("mid\0$_\0") for @$mids;
+        }
+        my %seen = map { $_ => 1 } @$mids;
+        for (grep { !$seen{$_}++ } @{references($eml)}) {
+                utf8::encode($_);
+                $dig->add("ref\0$_\0");
         }
 
         # Only use Sender: if From is not present
         foreach my $h (qw(From Sender)) {
-                my @v = $eml->header($h);
-                if (@v) {
-                        digest_addr($dig, $h, $_) foreach @v;
-                }
+                my @v = $eml->header($h) or next;
+                digest_addr($dig, $h, $_) foreach @v;
+                last;
         }
         foreach my $h (qw(Subject Date)) {
-                my @v = $eml->header($h);
-                foreach my $v (@v) {
+                for my $v ($eml->header($h)) {
                         utf8::encode($v);
                         $dig->add("$h\0$v\0");
                 }
@@ -84,15 +92,23 @@ sub content_digest ($) {
         # not in the original message.  For the purposes of deduplication,
         # do not take it into account:
         foreach my $h (qw(To Cc)) {
-                my @v = $eml->header($h);
-                digest_addr($dig, $h, $_) foreach @v;
+                digest_addr($dig, $h, $_) for ($eml->header($h));
         }
         msg_iter($eml, \&content_dig_i, $dig);
         $dig;
 }
 
 sub content_hash ($) {
-        content_digest($_[0])->digest;
+        content_digest($_[0], PublicInbox::SHA->new(256))->digest;
+}
+
+# don't clone the result of this
+sub git_sha ($$) {
+        my ($n, $eml) = @_;
+        my $dig = PublicInbox::SHA->new($n);
+        my $bref = ref($eml) eq 'SCALAR' ? $eml : \($eml->as_string);
+        $dig->add('blob '.length($$bref)."\0", $$bref);
+        $dig;
 }
 
 1;
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index 9c278307..a6fec954 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -21,42 +21,43 @@
 #        (tmpio = [ GLOB, offset, [ length ] ])
 package PublicInbox::DS;
 use strict;
-use bytes;
-use POSIX qw(WNOHANG);
-use IO::Handle qw();
+use v5.10.1;
+use parent qw(Exporter);
+use bytes qw(length substr); # FIXME(?): needed for PublicInbox::NNTP
+use POSIX qw(WNOHANG sigprocmask SIG_SETMASK SIG_UNBLOCK);
 use Fcntl qw(SEEK_SET :DEFAULT O_APPEND);
 use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
-use parent qw(Exporter);
-our @EXPORT_OK = qw(now msg_more);
-use 5.010_001;
 use Scalar::Util qw(blessed);
-use PublicInbox::Syscall qw(:epoll);
+use PublicInbox::Syscall qw(%SIGNUM
+        EPOLLIN EPOLLOUT EPOLLONESHOT EPOLLEXCLUSIVE);
 use PublicInbox::Tmpfile;
-use Errno qw(EAGAIN EINVAL);
-use Carp qw(confess carp);
+use PublicInbox::Select;
+use PublicInbox::OnDestroy;
+use Errno qw(EAGAIN EINVAL ECHILD);
+use Carp qw(carp croak);
+our @EXPORT_OK = qw(now msg_more awaitpid add_timer add_uniq_timer);
 
 my $nextq; # queue for next_tick
-my $wait_pids; # list of [ pid, callback, callback_arg ]
-my $later_queue; # list of callbacks to run at some later interval
-my $EXPMAP; # fd -> idle_time
-our $EXPTIME = 180; # 3 minutes
-my ($later_timer, $reap_armed, $exp_timer);
-my $ToClose; # sockets to close when event loop is done
-our (
-     %DescriptorMap,             # fd (num) -> PublicInbox::DS object
-     $Epoll,                     # Global epoll fd (or DSKQXS ref)
-     $_io,                       # IO::Handle for Epoll
-
-     $PostLoopCallback,          # subref to call at the end of each loop, if defined (global)
-
-     $LoopTimeout,               # timeout of event loop in milliseconds
-     $DoneInit,                  # if we've done the one-time module init yet
+my $reap_armed;
+my @active; # FDs (or objs) returned by epoll/kqueue
+our (%AWAIT_PIDS, # pid => [ $callback, @args ]
+        $cur_runq, # only set inside next_tick
+     @FD_MAP, # fd (num) -> PublicInbox::DS object
+     $Poller, # global Select, Epoll, DSPoll, or DSKQXS ref
+
+     @post_loop_do,              # subref + args to call at the end of each loop
+
+     $loop_timeout,               # timeout of event loop in milliseconds
      @Timers,                    # timers
+     %UniqTimer,
      $in_loop,
      );
 
 Reset();
 
+# clobber everything explicitly to avoid DESTROY ordering problems w/ DBI
+END { Reset() }
+
 #####################################################################
 ### C L A S S   M E T H O D S
 #####################################################################
@@ -67,180 +68,156 @@ Reset all state
 
 =cut
 sub Reset {
-    %DescriptorMap = ();
-    $in_loop = $wait_pids = $later_queue = $reap_armed = undef;
-    $EXPMAP = {};
-    $nextq = $ToClose = $later_timer = $exp_timer = undef;
-    $LoopTimeout = -1;  # no timeout by default
-    @Timers = ();
-
-    $PostLoopCallback = undef;
-    $DoneInit = 0;
-
-    $_io = undef; # closes real $Epoll FD
-    $Epoll = undef; # may call DSKQXS::DESTROY
+        $Poller = bless [], 'PublicInbox::DummyPoller';
+        do {
+                $in_loop = undef; # first in case DESTROY callbacks use this
+                # clobbering $Poller may call DSKQXS::DESTROY,
+                # we must always have this set to something to avoid
+                # needing branches before ep_del/ep_mod calls (via ->close).
+                @FD_MAP = ();
+                @Timers = ();
+                %UniqTimer = ();
+                @post_loop_do = ();
+
+                # we may be called from an *atfork_child inside next_tick:
+                @$cur_runq = () if $cur_runq;
+                @active = ();
+                $nextq = undef; # may call ep_del
+                %AWAIT_PIDS = ();
+        } while (@Timers || $nextq || keys(%AWAIT_PIDS) ||
+                @active || @FD_MAP ||
+                @post_loop_do || keys(%UniqTimer) ||
+                scalar(@{$cur_runq // []})); # do not vivify cur_runq
 
-    *EventLoop = *FirstTimeEventLoop;
-}
-
-=head2 C<< CLASS->SetLoopTimeout( $timeout ) >>
-
-Set the loop timeout for the event loop to some value in milliseconds.
-
-A timeout of 0 (zero) means poll forever. A timeout of -1 means poll and return
-immediately.
-
-=cut
-sub SetLoopTimeout {
-    return $LoopTimeout = $_[1] + 0;
+        $reap_armed = undef;
+        $loop_timeout = -1;  # no timeout by default
+        $Poller = PublicInbox::Select->new;
 }
 
-=head2 C<< PublicInbox::DS::add_timer( $seconds, $coderef, $arg) >>
-
-Add a timer to occur $seconds from now. $seconds may be fractional, but timers
-are not guaranteed to fire at the exact time you ask for.
-
-=cut
-sub add_timer ($$;$) {
-    my ($secs, $coderef, $arg) = @_;
-
-    my $fire_time = now() + $secs;
-
-    my $timer = [$fire_time, $coderef, $arg];
-
-    if (!@Timers || $fire_time >= $Timers[-1][0]) {
-        push @Timers, $timer;
-        return $timer;
-    }
-
-    # Now, where do we insert?  (NOTE: this appears slow, algorithm-wise,
-    # but it was compared against calendar queues, heaps, naive push/sort,
-    # and a bunch of other versions, and found to be fastest with a large
-    # variety of datasets.)
-    for (my $i = 0; $i < @Timers; $i++) {
-        if ($Timers[$i][0] > $fire_time) {
-            splice(@Timers, $i, 0, $timer);
-            return $timer;
-        }
-    }
-
-    die "Shouldn't get here.";
-}
+sub _add_named_timer {
+        my ($name, $secs, $coderef, @args) = @_;
+        my $fire_time = now() + $secs;
+        my $timer = [$fire_time, $name, $coderef, @args];
 
-# keeping this around in case we support other FD types for now,
-# epoll_create1(EPOLL_CLOEXEC) requires Linux 2.6.27+...
-sub set_cloexec ($) {
-    my ($fd) = @_;
+        if (!@Timers || $fire_time >= $Timers[-1][0]) {
+                push @Timers, $timer;
+                return $timer;
+        }
 
-    $_io = IO::Handle->new_from_fd($fd, 'r+') or return;
-    defined(my $fl = fcntl($_io, F_GETFD, 0)) or return;
-    fcntl($_io, F_SETFD, $fl | FD_CLOEXEC);
+        # Now, where do we insert?  (NOTE: this appears slow, algorithm-wise,
+        # but it was compared against calendar queues, heaps, naive push/sort,
+        # and a bunch of other versions, and found to be fastest with a large
+        # variety of datasets.)
+        for (my $i = 0; $i < @Timers; $i++) {
+                if ($Timers[$i][0] > $fire_time) {
+                        splice(@Timers, $i, 0, $timer);
+                        return $timer;
+                }
+        }
+        die "Shouldn't get here.";
 }
 
-sub _InitPoller
-{
-    return if $DoneInit;
-    $DoneInit = 1;
+sub add_timer { _add_named_timer(undef, @_) }
 
-    if (PublicInbox::Syscall::epoll_defined())  {
-        $Epoll = epoll_create();
-        set_cloexec($Epoll) if (defined($Epoll) && $Epoll >= 0);
-    } else {
-        my $cls;
-        for (qw(DSKQXS DSPoll)) {
-            $cls = "PublicInbox::$_";
-            last if eval "require $cls";
-        }
-        $cls->import(qw(epoll_ctl epoll_wait));
-        $Epoll = $cls->new;
-    }
-    *EventLoop = *EpollEventLoop;
+sub add_uniq_timer { # ($name, $secs, $coderef, @args) = @_;
+        $UniqTimer{$_[0]} //= _add_named_timer(@_);
 }
 
-=head2 C<< CLASS->EventLoop() >>
-
-Start processing IO events. In most daemon programs this never exits. See
-C<PostLoopCallback> below for how to exit the loop.
-
-=cut
-sub FirstTimeEventLoop {
-    my $class = shift;
-
-    _InitPoller();
-
-    EventLoop($class);
+# caller sets return value to $Poller
+sub _InitPoller () {
+        my @try = ($^O eq 'linux' ? 'Epoll' : 'DSKQXS');
+        my $cls;
+        for (@try, 'DSPoll') {
+                $cls = "PublicInbox::$_";
+                last if eval "require $cls";
+        }
+        $cls->new;
 }
 
 sub now () { clock_gettime(CLOCK_MONOTONIC) }
 
 sub next_tick () {
-    my $q = $nextq or return;
-    $nextq = undef;
-    for (@$q) {
-        # we avoid "ref" on blessed refs to workaround a Perl 5.16.3 leak:
-        # https://rt.perl.org/Public/Bug/Display.html?id=114340
-        if (blessed($_)) {
-            $_->event_step;
-        } else {
-            $_->();
-        }
-    }
+        $cur_runq = $nextq or return;
+        $nextq = undef;
+        while (my $obj = shift @$cur_runq) {
+                # avoid "ref" on blessed refs to workaround a Perl 5.16.3 leak:
+                # https://rt.perl.org/Public/Bug/Display.html?id=114340
+                blessed($obj) ? $obj->event_step : $obj->();
+        }
+        1;
 }
 
 # runs timers and returns milliseconds for next one, or next event loop
 sub RunTimers {
-    next_tick();
+        my $ran = next_tick();
 
-    return (($nextq || $ToClose) ? 0 : $LoopTimeout) unless @Timers;
+        return ($nextq || $ran ? 0 : $loop_timeout) unless @Timers;
 
-    my $now = now();
+        my $now = now();
 
-    # Run expired timers
-    while (@Timers && $Timers[0][0] <= $now) {
-        my $to_run = shift(@Timers);
-        $to_run->[1]->($to_run->[2]);
-    }
+        # Run expired timers
+        while (@Timers && $Timers[0][0] <= $now) {
+                my $to_run = shift(@Timers);
+                delete $UniqTimer{$to_run->[1] // ''};
+                $to_run->[2]->(@$to_run[3..$#$to_run]);
+                $ran = 1;
+        }
 
-    # timers may enqueue into nextq:
-    return 0 if ($nextq || $ToClose);
+        # timers may enqueue into nextq:
+        return 0 if $nextq || $ran;
 
-    return $LoopTimeout unless @Timers;
+        return $loop_timeout unless @Timers;
 
-    # convert time to an even number of milliseconds, adding 1
-    # extra, otherwise floating point fun can occur and we'll
-    # call RunTimers like 20-30 times, each returning a timeout
-    # of 0.0000212 seconds
-    my $timeout = int(($Timers[0][0] - $now) * 1000) + 1;
+        # convert time to an even number of milliseconds, adding 1
+        # extra, otherwise floating point fun can occur and we'll
+        # call RunTimers like 20-30 times, each returning a timeout
+        # of 0.0000212 seconds
+        my $t = int(($Timers[0][0] - $now) * 1000) + 1;
 
-    # -1 is an infinite timeout, so prefer a real timeout
-    return $timeout     if $LoopTimeout == -1;
+        # -1 is an infinite timeout, so prefer a real timeout
+        ($loop_timeout < 0 || $loop_timeout >= $t) ? $t : $loop_timeout
+}
 
-    # otherwise pick the lower of our regular timeout and time until
-    # the next timer
-    return $LoopTimeout if $LoopTimeout < $timeout;
-    return $timeout;
+sub sig_setmask { sigprocmask(SIG_SETMASK, @_) or die "sigprocmask: $!" }
+
+# ensure we detect bugs, HW problems and user rlimits
+our @UNBLOCKABLE = (POSIX::SIGABRT, POSIX::SIGBUS, POSIX::SIGFPE,
+        POSIX::SIGILL, POSIX::SIGSEGV, POSIX::SIGXCPU, POSIX::SIGXFSZ);
+
+sub block_signals { # anything in @_ stays unblocked
+        my $newset = POSIX::SigSet->new;
+        $newset->fillset or die "fillset: $!";
+        for (@_, @UNBLOCKABLE) { $newset->delset($_) or die "delset($_): $!" }
+        my $oldset = POSIX::SigSet->new;
+        sig_setmask($newset, $oldset);
+        $oldset;
 }
 
-# We can't use waitpid(-1) safely here since it can hit ``, system(),
-# and other things.  So we scan the $wait_pids list, which is hopefully
-# not too big.  We keep $wait_pids small by not calling dwaitpid()
-# until we've hit EOF when reading the stdout of the child.
+sub await_cb ($;@) {
+        my ($pid, @cb_args) = @_;
+        my $cb = shift @cb_args or return;
+        eval { $cb->($pid, @cb_args) };
+        warn "E: awaitpid($pid): $@" if $@;
+}
 
+# This relies on our Perl process being single-threaded, or at least
+# no threads spawning and waiting on processes (``, system(), etc...)
+# Threads are officially discouraged by the Perl5 team, and I expect
+# that to remain the case.
 sub reap_pids {
         $reap_armed = undef;
-        my $tmp = $wait_pids or return;
-        $wait_pids = undef;
-        foreach my $ary (@$tmp) {
-                my ($pid, $cb, $arg) = @$ary;
-                my $ret = waitpid($pid, WNOHANG);
-                if ($ret == 0) {
-                        push @$wait_pids, $ary; # autovivifies @$wait_pids
-                } elsif ($cb) {
-                        eval { $cb->($arg, $pid) };
+        while (1) {
+                my $pid = waitpid(-1, WNOHANG) or return;
+                if (defined(my $cb_args = delete $AWAIT_PIDS{$pid})) {
+                        await_cb($pid, @$cb_args) if $cb_args;
+                } elsif ($pid == -1 && $! == ECHILD) {
+                        return requeue(\&dflush); # force @post_loop_do to run
+                } elsif ($pid > 0) {
+                        warn "W: reaped unknown PID=$pid: \$?=$?\n";
+                } else { # does this happen?
+                        return warn("W: waitpid(-1, WNOHANG) => $pid ($!)");
                 }
         }
-        # we may not be done, yet, and could've missed/masked a SIGCHLD:
-        $reap_armed //= requeue(\&reap_pids) if $wait_pids;
 }
 
 # reentrant SIGCHLD handler (since reap_pids is not reentrant)
@@ -248,65 +225,80 @@ sub enqueue_reap () { $reap_armed //= requeue(\&reap_pids) }
 
 sub in_loop () { $in_loop }
 
+# use inside @post_loop_do, returns number of busy clients
+sub close_non_busy () {
+        my $n = 0;
+        for my $s (grep defined, @FD_MAP) {
+                # close as much as possible, early as possible
+                ($s->busy ? ++$n : $s->close) if $s->can('busy');
+        }
+        $n;
+}
+
 # Internal function: run the post-event callback, send read events
 # for pushed-back data, and close pending connections.  returns 1
 # if event loop should continue, or 0 to shut it all down.
 sub PostEventLoop () {
-        # now we can close sockets that wanted to close during our event
-        # processing.  (we didn't want to close them during the loop, as we
-        # didn't want fd numbers being reused and confused during the event
-        # loop)
-        if (my $close_now = $ToClose) {
-                $ToClose = undef; # will be autovivified on push
-                @$close_now = map { fileno($_) } @$close_now;
-
-                # order matters, destroy expiry times, first:
-                delete @$EXPMAP{@$close_now};
-
-                # ->DESTROY methods may populate ToClose
-                delete @DescriptorMap{@$close_now};
-        }
-
         # by default we keep running, unless a postloop callback cancels it
-        $PostLoopCallback ? $PostLoopCallback->(\%DescriptorMap) : 1;
-}
-
-sub EpollEventLoop {
-    local $in_loop = 1;
-    do {
-        my @events;
-        my $i;
-        my $timeout = RunTimers();
-
-        # get up to 1000 events
-        my $evcount = epoll_wait($Epoll, 1000, $timeout, \@events);
-        for ($i=0; $i<$evcount; $i++) {
-            # it's possible epoll_wait returned many events, including some at the end
-            # that ones in the front triggered unregister-interest actions.  if we
-            # can't find the %sock entry, it's because we're no longer interested
-            # in that event.
-            $DescriptorMap{$events[$i]->[0]}->event_step;
-        }
-    } while (PostEventLoop());
-    _run_later();
+        @post_loop_do ? $post_loop_do[0]->(@post_loop_do[1..$#post_loop_do]) : 1
 }
 
-=head2 C<< CLASS->SetPostLoopCallback( CODEREF ) >>
-
-Sets post loop callback function.  Pass a subref and it will be
-called every time the event loop finishes.
-
-Return 1 (or any true value) from the sub to make the loop continue, 0 or false
-and it will exit.
+sub sigset_prep ($$$) {
+        my ($sig, $init, $each) = @_; # $sig: { signame => whatever }
+        my $ret = POSIX::SigSet->new;
+        $ret->$init or die "$init: $!";
+        for my $s (keys %$sig) {
+                my $num = $SIGNUM{$s} // POSIX->can("SIG$s")->();
+                $ret->$each($num) or die "$each ($s => $num): $!";
+        }
+        for (@UNBLOCKABLE) { $ret->$each($_) or die "$each ($_): $!" }
+        $ret;
+}
+
+sub allowset ($) { sigset_prep $_[0], 'fillset', 'delset' }
+sub unblockset ($) { sigset_prep $_[0], 'emptyset', 'addset' }
+
+# Start processing IO events. In most daemon programs this never exits. See
+# C<post_loop_do> for how to exit the loop.
+sub event_loop (;$$) {
+        my ($sig, $oldset) = @_;
+        $Poller //= _InitPoller();
+        require PublicInbox::Sigfd if $sig;
+        my $sigfd = $sig ? PublicInbox::Sigfd->new($sig) : undef;
+        if ($sigfd && $sigfd->{is_kq}) {
+                my $tmp = allowset($sig);
+                local @SIG{keys %$sig} = values(%$sig);
+                sig_setmask($tmp, my $old = POSIX::SigSet->new);
+                # Unlike Linux signalfd, EVFILT_SIGNAL can't handle
+                # signals received before the filter is created,
+                # so we peek at signals here.
+                sig_setmask($old);
+        }
+        local @SIG{keys %$sig} = values(%$sig) if $sig && !$sigfd;
+        local $SIG{PIPE} = 'IGNORE';
+        if (!$sigfd && $sig) {
+                # wake up every second to accept signals if we don't
+                # have signalfd or IO::KQueue:
+                sig_setmask($oldset) if $oldset;
+                sigprocmask(SIG_UNBLOCK, unblockset($sig)) or
+                        die "SIG_UNBLOCK: $!";
+                $loop_timeout = 1000;
+        }
+        $_[0] = $sigfd = $sig = undef; # $_[0] == sig
+        local $in_loop = 1;
+        do {
+                my $timeout = RunTimers();
 
-The callback function will be passed two parameters: \%DescriptorMap
+                # grab whatever FDs are ready
+                $Poller->ep_wait($timeout, \@active);
 
-=cut
-sub SetPostLoopCallback {
-    my ($class, $ref) = @_;
+                # map all FDs to their associated Perl object
+                @active = @FD_MAP[@active];
 
-    # global callback
-    $PostLoopCallback = (defined $ref && ref $ref eq 'CODE') ? $ref : undef;
+                while (my $obj = shift @active) {
+                        $obj->event_step;
+                }
+        } while (PostEventLoop());
 }
 
 #####################################################################
@@ -318,7 +310,7 @@ sub SetPostLoopCallback {
 =head2 C<< CLASS->new( $socket ) >>
 
 Create a new PublicInbox::DS subclass object for the given I<socket> which will
-react to events on it during the C<EventLoop>.
+react to events on it during the C<event_loop>.
 
 This is normally (always?) called from your subclass via:
 
@@ -330,62 +322,54 @@ sub new {
     $self->{sock} = $sock;
     my $fd = fileno($sock);
 
-    _InitPoller();
-
-    if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) {
+    $Poller //= _InitPoller();
+retry:
+    if ($Poller->ep_add($sock, $ev)) {
         if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) {
             $ev &= ~EPOLLEXCLUSIVE;
             goto retry;
         }
-        die "couldn't add epoll watch for $fd: $!\n";
+        die "EPOLL_CTL_ADD $self/$sock/$fd: $!";
     }
-    confess("DescriptorMap{$fd} defined ($DescriptorMap{$fd})")
-        if defined($DescriptorMap{$fd});
+    defined($FD_MAP[$fd]) and
+                croak("BUG: FD:$fd in use by $FD_MAP[$fd] (for $self/$sock)");
 
-    $DescriptorMap{$fd} = $self;
+    $FD_MAP[$fd] = $self;
 }
 
-
-#####################################################################
-### I N S T A N C E   M E T H O D S
-#####################################################################
+# for IMAP, NNTP, and POP3 which greet clients upon connect
+sub greet {
+        my ($self, $sock) = @_;
+        my $ev = EPOLLIN;
+        my $wbuf;
+        if ($sock->can('accept_SSL') && !$sock->accept_SSL) {
+                return if $! != EAGAIN || !($ev = PublicInbox::TLS::epollbit());
+                $wbuf = [ \&accept_tls_step, $self->can('do_greet')];
+        }
+        new($self, $sock, $ev | EPOLLONESHOT);
+        if ($wbuf) {
+                $self->{wbuf} = $wbuf;
+        } else {
+                $self->do_greet;
+        }
+        $self;
+}
 
 sub requeue ($) { push @$nextq, $_[0] } # autovivifies
 
-=head2 C<< $obj->close >>
-
-Close the socket.
-
-=cut
+# drop the IO::Handle ref, true if successful, false if not (or already dropped)
+# (this is closer to CORE::close than Danga::Socket::close)
 sub close {
-    my ($self) = @_;
-    my $sock = delete $self->{sock} or return;
-
-    # we need to flush our write buffer, as there may
-    # be self-referential closures (sub { $client->close })
-    # preventing the object from being destroyed
-    delete $self->{wbuf};
-
-    # if we're using epoll, we have to remove this from our epoll fd so we stop getting
-    # notifications about it
-    my $fd = fileno($sock);
-    epoll_ctl($Epoll, EPOLL_CTL_DEL, $fd, 0) and
-        confess("EPOLL_CTL_DEL: $!");
-
-    # we explicitly don't delete from DescriptorMap here until we
-    # actually close the socket, as we might be in the middle of
-    # processing an epoll_wait/etc that returned hundreds of fds, one
-    # of which is not yet processed and is what we're closing.  if we
-    # keep it in DescriptorMap, then the event harnesses can just
-    # looked at $pob->{sock} == undef and ignore it.  but if it's an
-    # un-accounted for fd, then it (understandably) freak out a bit
-    # and emit warnings, thinking their state got off.
+        my ($self) = @_;
+        my $sock = delete $self->{sock} or return;
 
-    # defer closing the actual socket until the event loop is done
-    # processing this round of events.  (otherwise we might reuse fds)
-    push @$ToClose, $sock; # autovivifies $ToClose
+        # we need to clear our write buffer, as there may
+        # be self-referential closures (sub { $client->close })
+        # preventing the object from being destroyed
+        delete $self->{wbuf};
+        $FD_MAP[fileno($sock)] = undef;
 
-    return 0;
+        !$Poller->ep_del($sock); # stop getting notifications
 }
 
 # portable, non-thread-safe sendfile emulation (no pread, yet)
@@ -431,8 +415,8 @@ next_buf:
                         shift @$wbuf;
                         goto next_buf;
                     }
-                } elsif ($! == EAGAIN) {
-                    epwait($sock, epbit($sock, EPOLLOUT) | EPOLLONESHOT);
+                } elsif ($! == EAGAIN && (my $ev = epbit($sock, EPOLLOUT))) {
+                    epwait($sock, $ev | EPOLLONESHOT);
                     return 0;
                 } else {
                     return $self->close;
@@ -461,39 +445,40 @@ sub rbuf_idle ($$) {
     }
 }
 
+# returns true if bytes are read, false otherwise
 sub do_read ($$$;$) {
-    my ($self, $rbuf, $len, $off) = @_;
-    my $r = sysread(my $sock = $self->{sock}, $$rbuf, $len, $off // 0);
-    return ($r == 0 ? $self->close : $r) if defined $r;
-    # common for clients to break connections without warning,
-    # would be too noisy to log here:
-    if ($! == EAGAIN) {
-        epwait($sock, epbit($sock, EPOLLIN) | EPOLLONESHOT);
-        rbuf_idle($self, $rbuf);
-        0;
-    } else {
-        $self->close;
-    }
+        my ($self, $rbuf, $len, $off) = @_;
+        my ($ev, $r, $s);
+        $r = sysread($s = $self->{sock}, $$rbuf, $len, $off // 0) and return $r;
+
+        if (!defined($r) && $! == EAGAIN && ($ev = epbit($s, EPOLLIN))) {
+                epwait($s, $ev | EPOLLONESHOT);
+                rbuf_idle($self, $rbuf);
+        } else {
+                $self->close;
+        }
+        undef;
 }
 
 # drop the socket if we hit unrecoverable errors on our system which
 # require BOFH attention: ENOSPC, EFBIG, EIO, EMFILE, ENFILE...
 sub drop {
-    my $self = shift;
-    carp(@_);
-    $self->close;
+        my $self = shift;
+        carp(@_);
+        $self->close;
+        undef;
 }
 
-# n.b.: use ->write/->read for this buffer to allow compatibility with
-# PerlIO::mmap or PerlIO::scalar if needed
 sub tmpio ($$$) {
-    my ($self, $bref, $off) = @_;
-    my $fh = tmpfile('wbuf', $self->{sock}, O_APPEND) or
-        return drop($self, "tmpfile $!");
-    $fh->autoflush(1);
-    my $len = bytes::length($$bref) - $off;
-    $fh->write($$bref, $len, $off) or return drop($self, "write ($len): $!");
-    [ $fh, 0 ] # [1] = offset, [2] = length, not set by us
+        my ($self, $bref, $off) = @_;
+        my $fh = tmpfile('wbuf', $self->{sock}, O_APPEND) or
+                return drop($self, "tmpfile $!");
+        $fh->autoflush(1);
+        my $len = length($$bref) - $off;
+        my $n = syswrite($fh, $$bref, $len, $off) //
+                return drop($self, "write ($len): $!");
+        $n == $len or return drop($self, "wrote $n < $len bytes");
+        [ $fh, 0 ] # [1] = offset, [2] = length, not set by us
 }
 
 =head2 C<< $obj->write( $data ) >>
@@ -523,7 +508,8 @@ sub write {
             push @$wbuf, $bref;
         } else {
             my $tmpio = $wbuf->[-1];
-            if ($tmpio && !defined($tmpio->[2])) { # append to tmp file buffer
+            if (ref($tmpio) eq 'ARRAY' && !defined($tmpio->[2])) {
+                # append to tmp file buffer
                 $tmpio->[0]->print($$bref) or return drop($self, "print: $!");
             } else {
                 my $tmpio = tmpio($self, $bref, 0) or return 0;
@@ -535,14 +521,15 @@ sub write {
         $bref->($self);
         return 1;
     } else {
-        my $to_write = bytes::length($$bref);
+        my $to_write = length($$bref);
         my $written = syswrite($sock, $$bref, $to_write);
 
         if (defined $written) {
             return 1 if $written == $to_write;
             requeue($self); # runs: event_step -> flush_write
         } elsif ($! == EAGAIN) {
-            epwait($sock, epbit($sock, EPOLLOUT) | EPOLLONESHOT);
+            my $ev = epbit($sock, EPOLLOUT) or return $self->close;
+            epwait($sock, $ev | EPOLLONESHOT);
             $written = 0;
         } else {
             return $self->close;
@@ -569,7 +556,7 @@ sub msg_more ($$) {
                 !$sock->can('stop_SSL')) {
         my $n = send($sock, $_[1], MSG_MORE);
         if (defined $n) {
-            my $nlen = bytes::length($_[1]) - $n;
+            my $nlen = length($_[1]) - $n;
             return 1 if $nlen == 0; # all done!
             # queue up the unwritten substring:
             my $tmpio = tmpio($self, \($_[1]), $n) or return 0;
@@ -584,9 +571,8 @@ sub msg_more ($$) {
 }
 
 sub epwait ($$) {
-    my ($sock, $ev) = @_;
-    epoll_ctl($Epoll, EPOLL_CTL_MOD, fileno($sock), $ev) and
-        confess("EPOLL_CTL_MOD $!");
+        my ($io, $ev) = @_;
+        $Poller->ep_mod($io, $ev) and croak("EPOLL_CTL_MOD($io): $!");
 }
 
 # return true if complete, false if incomplete (or failure)
@@ -595,84 +581,123 @@ sub accept_tls_step ($) {
     my $sock = $self->{sock} or return;
     return 1 if $sock->accept_SSL;
     return $self->close if $! != EAGAIN;
-    epwait($sock, PublicInbox::TLS::epollbit() | EPOLLONESHOT);
+    my $ev = PublicInbox::TLS::epollbit() or return $self->close;
+    epwait($sock, $ev | EPOLLONESHOT);
     unshift(@{$self->{wbuf}}, \&accept_tls_step); # autovivifies
     0;
 }
 
-# return true if complete, false if incomplete (or failure)
+# return value irrelevant
 sub shutdn_tls_step ($) {
     my ($self) = @_;
     my $sock = $self->{sock} or return;
     return $self->close if $sock->stop_SSL(SSL_fast_shutdown => 1);
     return $self->close if $! != EAGAIN;
-    epwait($sock, PublicInbox::TLS::epollbit() | EPOLLONESHOT);
+    my $ev = PublicInbox::TLS::epollbit() or return $self->close;
+    epwait($sock, $ev | EPOLLONESHOT);
     unshift(@{$self->{wbuf}}, \&shutdn_tls_step); # autovivifies
-    0;
 }
 
 # don't bother with shutdown($sock, 2), we don't fork+exec w/o CLOEXEC
 # or fork w/o exec, so no inadvertent socket sharing
 sub shutdn ($) {
-    my ($self) = @_;
-    my $sock = $self->{sock} or return;
-    if ($sock->can('stop_SSL')) {
-        shutdn_tls_step($self);
-    } else {
-        $self->close;
-    }
+        my ($self) = @_;
+        my $sock = $self->{sock} or return;
+        $sock->can('stop_SSL') ? shutdn_tls_step($self) : $self->close;
 }
 
-# must be called with eval, PublicInbox::DS may not be loaded (see t/qspawn.t)
-sub dwaitpid ($$$) {
-        die "Not in EventLoop\n" unless $in_loop;
-        push @$wait_pids, [ @_ ]; # [ $pid, $cb, $arg ]
+sub dflush {} # overridden by DSdeflate
+sub compressed {} # overridden by DSdeflate
+sub long_response_done {} # overridden by Net::NNTP
 
-        # We could've just missed our SIGCHLD, cover it, here:
-        enqueue_reap();
+sub long_step {
+        my ($self) = @_;
+        # wbuf is unset or empty, here; {long} may add to it
+        my ($fd, $cb, $t0, @args) = @{$self->{long_cb}};
+        my $more = eval { $cb->($self, @args) };
+        if ($@ || !$self->{sock}) { # something bad happened...
+                delete $self->{long_cb};
+                my $elapsed = now() - $t0;
+                $@ and warn("$@ during long response[$fd] - ",
+                                sprintf('%0.6f', $elapsed),"\n");
+                $self->out(" deferred[$fd] aborted - %0.6f", $elapsed);
+                $self->close;
+        } elsif ($more) { # $self->{wbuf}:
+                # control passed to ibx_async_cat if $more == \undef
+                requeue_once($self) if !ref($more);
+        } else { # all done!
+                delete $self->{long_cb};
+                $self->long_response_done;
+                my $elapsed = now() - $t0;
+                $self->out(" deferred[$fd] done - %0.6f", $elapsed);
+                my $wbuf = $self->{wbuf}; # do NOT autovivify
+                requeue($self) unless $wbuf && @$wbuf;
+        }
 }
 
-sub _run_later () {
-        my $run = $later_queue or return;
-        $later_timer = $later_queue = undef;
-        $_->() for @$run;
-}
+sub requeue_once {
+        my ($self) = @_;
+        # COMPRESS users all share the same DEFLATE context.
+        # Flush it here to ensure clients don't see each other's data
+        $self->dflush;
 
-sub later ($) {
-        push @$later_queue, $_[0]; # autovivifies @$later_queue
-        $later_timer //= add_timer(60, \&_run_later);
+        # no recursion, schedule another call ASAP,
+        # but only after all pending writes are done.
+        # autovivify wbuf.  wbuf may be populated by $cb,
+        # no need to rearm if so: (push returns new size of array)
+        $self->requeue if push(@{$self->{wbuf}}, \&long_step) == 1;
 }
 
-sub expire_old () {
-        my $now = now();
-        my $exp = $EXPTIME;
-        my $old = $now - $exp;
-        my %new;
-        while (my ($fd, $idle_at) = each %$EXPMAP) {
-                if ($idle_at < $old) {
-                        my $ds_obj = $DescriptorMap{$fd};
-                        $new{$fd} = $idle_at if !$ds_obj->shutdn;
+sub long_response ($$;@) {
+        my ($self, $cb, @args) = @_; # cb returns true if more, false if done
+        my $sock = $self->{sock} or return;
+        # make sure we disable reading during a long response,
+        # clients should not be sending us stuff and making us do more
+        # work while we are stream a response to them
+        $self->{long_cb} = [ fileno($sock), $cb, now(), @args ];
+        long_step($self); # kick off!
+        undef;
+}
+
+sub awaitpid {
+        my ($pid, @cb_args) = @_; # @cb_args = ($cb, @args), $cb may be undef
+        $AWAIT_PIDS{$pid} = \@cb_args if @cb_args;
+        # provide synchronous API
+        if (defined(wantarray) || (!$in_loop && !@cb_args)) {
+                my $ret = waitpid($pid, 0);
+                if ($ret == $pid) {
+                        my $cb_args = delete $AWAIT_PIDS{$pid};
+                        @cb_args = @$cb_args if !@cb_args && $cb_args;
+                        await_cb($pid, @cb_args);
                 } else {
-                        $new{$fd} = $idle_at;
+                        carp "waitpid($pid) => $ret ($!)";
+                        delete $AWAIT_PIDS{$pid};
                 }
+                return $ret;
+        } elsif ($in_loop) { # We could've just missed our SIGCHLD, cover it, here:
+                enqueue_reap();
         }
-        $EXPMAP = \%new;
-        $exp_timer = scalar(keys %new) ? later(\&expire_old) : undef;
 }
 
-sub update_idle_time {
-        my ($self) = @_;
-        my $sock = $self->{sock} or return;
-        $EXPMAP->{fileno($sock)} = now();
-        $exp_timer //= later(\&expire_old);
+# for persistent child process
+sub fork_persist () {
+        my $seed = rand(0xffffffff);
+        my $pid = PublicInbox::OnDestroy::fork_tmp;
+        if ($pid == 0) {
+                srand($seed);
+                eval { Net::SSLeay::randomize() }; # may not be loaded
+                Reset();
+        }
+        $pid;
 }
 
-sub not_idle_long {
-        my ($self, $now) = @_;
-        my $sock = $self->{sock} or return;
-        my $idle_at = $EXPMAP->{fileno($sock)} or return;
-        ($idle_at + $EXPTIME) > $now;
-}
+package PublicInbox::DummyPoller; # only used during Reset
+use v5.12;
+
+sub ep_del {}
+no warnings 'once';
+*ep_add = \&ep_del;
+*ep_mod = \&ep_del;
 
 1;
 
diff --git a/lib/PublicInbox/DSKQXS.pm b/lib/PublicInbox/DSKQXS.pm
index d1d3fe60..dc6621e4 100644
--- a/lib/PublicInbox/DSKQXS.pm
+++ b/lib/PublicInbox/DSKQXS.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # Licensed the same as Danga::Socket (and Perl5)
 # License: GPL-1.0+ or Artistic-1.0-Perl
 #  <https://www.gnu.org/licenses/gpl-1.0.txt>
@@ -11,15 +11,12 @@
 #
 # It also implements signalfd(2) emulation via "tie".
 package PublicInbox::DSKQXS;
-use strict;
-use warnings;
-use parent qw(Exporter);
+use v5.12;
 use Symbol qw(gensym);
 use IO::KQueue;
 use Errno qw(EAGAIN);
-use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT EPOLLET
-        EPOLL_CTL_ADD EPOLL_CTL_MOD EPOLL_CTL_DEL $SFD_NONBLOCK);
-our @EXPORT_OK = qw(epoll_ctl epoll_wait);
+use PublicInbox::OnDestroy;
+use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT EPOLLET);
 
 sub EV_DISPATCH () { 0x0080 }
 
@@ -41,23 +38,23 @@ sub kq_flag ($$) {
 
 sub new {
         my ($class) = @_;
-        bless { kq => IO::KQueue->new, owner_pid => $$ }, $class;
+        my $fgen = $PublicInbox::OnDestroy::fork_gen;
+        bless { kq => IO::KQueue->new, fgen => $fgen }, $class;
 }
 
 # returns a new instance which behaves like signalfd on Linux.
 # It's wasteful in that it uses another FD, but it simplifies
 # our epoll-oriented code.
 sub signalfd {
-        my ($class, $signo, $flags) = @_;
+        my ($class, $signo) = @_;
         my $sym = gensym;
-        tie *$sym, $class, $signo, $flags; # calls TIEHANDLE
+        tie *$sym, $class, $signo; # calls TIEHANDLE
         $sym
 }
 
 sub TIEHANDLE { # similar to signalfd()
-        my ($class, $signo, $flags) = @_;
+        my ($class, $signo) = @_;
         my $self = $class->new;
-        $self->{timeout} = ($flags & $SFD_NONBLOCK) ? 0 : -1;
         my $kq = $self->{kq};
         $kq->EV_SET($_, EVFILT_SIGNAL, EV_ADD) for @$signo;
         $self;
@@ -66,12 +63,11 @@ sub TIEHANDLE { # similar to signalfd()
 sub READ { # called by sysread() for signalfd compatibility
         my ($self, undef, $len, $off) = @_; # $_[1] = buf
         die "bad args for signalfd read" if ($len % 128) // defined($off);
-        my $timeout = $self->{timeout};
         my $sigbuf = $self->{sigbuf} //= [];
         my $nr = $len / 128;
         my $r = 0;
         $_[1] = '';
-        do {
+        while (1) {
                 while ($nr--) {
                         my $signo = shift(@$sigbuf) or last;
                         # caller only cares about signalfd_siginfo.ssi_signo:
@@ -79,13 +75,13 @@ sub READ { # called by sysread() for signalfd compatibility
                         $r += 128;
                 }
                 return $r if $r;
-                my @events = eval { $self->{kq}->kevent($timeout) };
+                my @events = eval { $self->{kq}->kevent(0) };
                 # workaround https://rt.cpan.org/Ticket/Display.html?id=116615
                 if ($@) {
                         next if $@ =~ /Interrupted system call/;
                         die;
                 }
-                if (!scalar(@events) && $timeout == 0) {
+                if (!scalar(@events)) {
                         $! = EAGAIN;
                         return;
                 }
@@ -94,36 +90,37 @@ sub READ { # called by sysread() for signalfd compatibility
                 # field shows coalesced signals, and maybe we'll use it
                 # in the future...
                 @$sigbuf = map { $_->[0] } @events;
-        } while (1);
+        }
 }
 
 # for fileno() calls in PublicInbox::DS
 sub FILENO { ${$_[0]->{kq}} }
 
-sub epoll_ctl {
-        my ($self, $op, $fd, $ev) = @_;
-        my $kq = $self->{kq};
-        if ($op == EPOLL_CTL_MOD) {
-                $kq->EV_SET($fd, EVFILT_READ, kq_flag(EPOLLIN, $ev));
-                eval { $kq->EV_SET($fd, EVFILT_WRITE, kq_flag(EPOLLOUT, $ev)) };
-        } elsif ($op == EPOLL_CTL_DEL) {
-                $kq->EV_SET($fd, EVFILT_READ, EV_DISABLE);
-                eval { $kq->EV_SET($fd, EVFILT_WRITE, EV_DISABLE) };
-        } else { # EPOLL_CTL_ADD
-                $kq->EV_SET($fd, EVFILT_READ, EV_ADD|kq_flag(EPOLLIN, $ev));
-
-                # we call this blindly for read-only FDs such as tied
-                # DSKQXS (signalfd emulation) and Listeners
-                eval {
-                        $kq->EV_SET($fd, EVFILT_WRITE, EV_ADD |
-                                                        kq_flag(EPOLLOUT, $ev));
-                };
-        }
+sub _ep_mod_add ($$$$) {
+        my ($kq, $fd, $ev, $add) = @_;
+        $kq->EV_SET($fd, EVFILT_READ, $add|kq_flag(EPOLLIN, $ev));
+
+        # we call this blindly for read-only FDs such as tied
+        # DSKQXS (signalfd emulation) and Listeners
+        eval { $kq->EV_SET($fd, EVFILT_WRITE, $add|kq_flag(EPOLLOUT, $ev)) };
         0;
 }
 
-sub epoll_wait {
-        my ($self, $maxevents, $timeout_msec, $events) = @_;
+sub ep_add { _ep_mod_add($_[0]->{kq}, fileno($_[1]), $_[2], EV_ADD) };
+sub ep_mod { _ep_mod_add($_[0]->{kq}, fileno($_[1]), $_[2], 0) };
+
+sub ep_del {
+        my ($self, $io, $ev) = @_;
+        my $kq = $_[0]->{kq} // return; # called in cleanup
+        my $fd = fileno($io);
+        $kq->EV_SET($fd, EVFILT_READ, EV_DISABLE);
+        eval { $kq->EV_SET($fd, EVFILT_WRITE, EV_DISABLE) };
+        0;
+}
+
+sub ep_wait {
+        my ($self, $timeout_msec, $events) = @_;
+        # n.b.: IO::KQueue is hard-coded to return up to 1000 events
         @$events = eval { $self->{kq}->kevent($timeout_msec) };
         if (my $err = $@) {
                 # workaround https://rt.cpan.org/Ticket/Display.html?id=116615
@@ -134,7 +131,7 @@ sub epoll_wait {
                 }
         }
         # caller only cares for $events[$i]->[0]
-        scalar(@$events);
+        $_ = $_->[0] for @$events;
 }
 
 # kqueue is close-on-fork (not exec), so we must not close it
@@ -142,9 +139,8 @@ sub epoll_wait {
 sub DESTROY {
         my ($self) = @_;
         my $kq = delete $self->{kq} or return;
-        if (delete($self->{owner_pid}) == $$) {
+        delete($self->{fgen}) == $PublicInbox::OnDestroy::fork_gen and
                 POSIX::close($$kq);
-        }
 }
 
 1;
diff --git a/lib/PublicInbox/DSPoll.pm b/lib/PublicInbox/DSPoll.pm
index 1d9b51d9..a7055ec9 100644
--- a/lib/PublicInbox/DSPoll.pm
+++ b/lib/PublicInbox/DSPoll.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # Licensed the same as Danga::Socket (and Perl5)
 # License: GPL-1.0+ or Artistic-1.0-Perl
 #  <https://www.gnu.org/licenses/gpl-1.0.txt>
@@ -9,50 +9,47 @@
 # an all encompassing emulation of epoll via IO::Poll, but just to
 # support cases public-inbox-nntpd/httpd care about.
 package PublicInbox::DSPoll;
-use strict;
-use warnings;
-use parent qw(Exporter);
+use v5.12;
 use IO::Poll;
-use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT EPOLL_CTL_DEL);
-our @EXPORT_OK = qw(epoll_ctl epoll_wait);
+use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT);
+use Carp qw(carp);
+use Errno ();
 
-sub new { bless {}, $_[0] } # fd => events
+sub new { bless {}, __PACKAGE__ } # fd => events
 
-sub epoll_ctl {
-        my ($self, $op, $fd, $ev) = @_;
-
-        # not wasting time on error checking
-        if ($op != EPOLL_CTL_DEL) {
-                $self->{$fd} = $ev;
-        } else {
-                delete $self->{$fd};
-        }
-        0;
-}
-
-sub epoll_wait {
-        my ($self, $maxevents, $timeout_msec, $events) = @_;
-        my @pset;
+sub ep_wait {
+        my ($self, $timeout_msec, $events) = @_;
+        my (@pset, $n, $fd, $revents, $nval);
         while (my ($fd, $events) = each %$self) {
                 my $pevents = $events & EPOLLIN ? POLLIN : 0;
                 $pevents |= $events & EPOLLOUT ? POLLOUT : 0;
                 push(@pset, $fd, $pevents);
         }
         @$events = ();
-        my $n = IO::Poll::_poll($timeout_msec, @pset);
-        if ($n >= 0) {
-                for (my $i = 0; $i < @pset; ) {
-                        my $fd = $pset[$i++];
-                        my $revents = $pset[$i++] or next;
-                        delete($self->{$fd}) if $self->{$fd} & EPOLLONESHOT;
-                        push @$events, [ $fd ];
-                }
-                my $nevents = scalar @$events;
-                if ($n != $nevents) {
-                        warn "BUG? poll() returned $n, but got $nevents";
+        $n = IO::Poll::_poll($timeout_msec, @pset) or return; # timeout expired
+        return if $n < 0 && $! == Errno::EINTR; # caller recalculates timeout
+        die "poll: $!" if $n < 0;
+        while (defined($fd = shift @pset)) {
+                $revents = shift @pset or next; # no event
+                if ($revents & POLLNVAL) {
+                        carp "E: FD=$fd invalid in poll";
+                        delete $self->{$fd};
+                        $nval = 1;
+                } else {
+                        delete $self->{$fd} if $self->{$fd} & EPOLLONESHOT;
+                        push @$events, $fd;
                 }
         }
-        $n;
+        if ($nval && !@$events) {
+                $! = Errno::EBADF;
+                die "poll: $!";
+        }
 }
 
+sub ep_del { delete($_[0]->{fileno($_[1])}); 0 }
+sub ep_add { $_[0]->{fileno($_[1])} = $_[2]; 0 }
+
+no warnings 'once';
+*ep_mod = \&ep_add;
+
 1;
diff --git a/lib/PublicInbox/NNTPdeflate.pm b/lib/PublicInbox/DSdeflate.pm
index 02af935f..539adf0f 100644
--- a/lib/PublicInbox/NNTPdeflate.pm
+++ b/lib/PublicInbox/DSdeflate.pm
@@ -1,7 +1,8 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # RFC 8054 NNTP COMPRESS DEFLATE implementation
+# RFC 4978 IMAP COMPRESS=DEFLATE extension
 #
 # RSS usage for 10K idle-but-did-something NNTP clients on 64-bit:
 #   TLS + DEFLATE[a] :  1.8 GB  (MemLevel=9, 1.2 GB with MemLevel=8)
@@ -14,23 +15,22 @@
 # [b] - memory-optimized implementation using a global deflate context.
 #       It's less efficient in terms of compression, but way more
 #       efficient in terms of server memory usage.
-package PublicInbox::NNTPdeflate;
+package PublicInbox::DSdeflate;
 use strict;
-use 5.010_001;
-use parent qw(PublicInbox::NNTP);
+use v5.10.1;
 use Compress::Raw::Zlib;
 
 my %IN_OPT = (
-        -Bufsize => PublicInbox::NNTP::LINE_MAX,
+        -Bufsize => 1024,
         -WindowBits => -15, # RFC 1951
         -AppendOutput => 1,
 );
 
 # global deflate context and buffer
-my $zbuf = \(my $buf = '');
-my $zout;
+my ($zout, $zbuf);
 {
         my $err;
+        $zbuf = \(my $initial = ''); # replaced by $next in dflush/write
         ($zout, $err) = Compress::Raw::Zlib::Deflate->new(
                 # nnrpd (INN) and Compress::Raw::Zlib favor MemLevel=9,
                 # the zlib C library and git use MemLevel=8 as the default
@@ -42,21 +42,18 @@ my $zout;
         $err == Z_OK or die "Failed to initialize zlib deflate stream: $err";
 }
 
-
 sub enable {
         my ($class, $self) = @_;
         my ($in, $err) = Compress::Raw::Zlib::Inflate->new(%IN_OPT);
         if ($err != Z_OK) {
-                $self->err("Inflate->new failed: $err");
-                $self->res('403 Unable to activate compression');
+                warn("Inflate->new failed: $err\n");
                 return;
         }
-        $self->res('206 Compression active');
         bless $self, $class;
         $self->{zin} = $in;
 }
 
-# overrides PublicInbox::NNTP::compressed
+# overrides PublicInbox::DS::compressed
 sub compressed { 1 }
 
 sub do_read ($$$$) {
@@ -103,7 +100,7 @@ sub msg_more ($$) {
         1;
 }
 
-sub zflush ($) {
+sub dflush ($) {
         my ($self) = @_;
 
         my $deflated = $zbuf;
diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm
index 1520f8f2..28458b19 100644
--- a/lib/PublicInbox/Daemon.pm
+++ b/lib/PublicInbox/Daemon.pm
@@ -1,93 +1,178 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# contains common daemon code for the httpd, imapd, and nntpd servers.
-# This may be used for read-only IMAP server if we decide to implement it.
+#
+# Contains common daemon code for the httpd, imapd, and nntpd servers
+# and designed for handling thousands of untrusted clients over slow
+# and/or lossy connections.
 package PublicInbox::Daemon;
-use strict;
-use warnings;
-use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
+use v5.12;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
 use IO::Handle; # ->autoflush
 use IO::Socket;
-use POSIX qw(WNOHANG :signal_h);
+use File::Spec;
+use POSIX qw(WNOHANG :signal_h F_SETFD);
 use Socket qw(IPPROTO_TCP SOL_SOCKET);
-sub SO_ACCEPTFILTER () { 0x1000 }
-use Cwd qw/abs_path/;
 STDOUT->autoflush(1);
 STDERR->autoflush(1);
-use PublicInbox::DS qw(now);
-use PublicInbox::Syscall qw($SFD_NONBLOCK);
-require PublicInbox::Listener;
+use PublicInbox::DS qw(now awaitpid);
+use PublicInbox::Listener;
 use PublicInbox::EOFpipe;
-use PublicInbox::Sigfd;
+use PublicInbox::Git;
 use PublicInbox::GitAsyncCat;
+use PublicInbox::Eml;
+use PublicInbox::Config;
+use PublicInbox::OnDestroy;
+use PublicInbox::Search;
+use PublicInbox::XapClient;
+our $SO_ACCEPTFILTER = 0x1000;
 my @CMD;
-my ($set_user, $oldset);
+my ($set_user, $oldset, $xh_workers);
 my (@cfg_listen, $stdout, $stderr, $group, $user, $pid_file, $daemonize);
-my $worker_processes = 1;
-my @listeners;
-my %pids;
-my %tls_opt; # scheme://sockname => args for IO::Socket::SSL->start_SSL
+my ($nworker, @listeners, %WORKERS, %logs);
+my %tls_opt; # scheme://sockname => args for IO::Socket::SSL::SSL_Context->new
 my $reexec_pid;
 my ($uid, $gid);
 my ($default_cert, $default_key);
-my %KNOWN_TLS = ( 443 => 'https', 563 => 'nntps', 993 => 'imaps' );
-my %KNOWN_STARTTLS = ( 119 => 'nntp', 143 => 'imap' );
-
-sub accept_tls_opt ($) {
-        my ($opt_str) = @_;
-        # opt_str: opt1=val1,opt2=val2 (opt may repeat for multi-value)
-        require PublicInbox::TLS;
+my %KNOWN_TLS = (443 => 'https', 563 => 'nntps', 993 => 'imaps', 995 =>'pop3s');
+my %KNOWN_STARTTLS = (110 => 'pop3', 119 => 'nntp', 143 => 'imap');
+my %SCHEME2PORT = map { $KNOWN_TLS{$_} => $_ + 0 } keys %KNOWN_TLS;
+for (keys %KNOWN_STARTTLS) { $SCHEME2PORT{$KNOWN_STARTTLS{$_}} = $_ + 0 }
+$SCHEME2PORT{http} = 80;
+
+our ($parent_pipe, %POST_ACCEPT, %XNETD);
+our %WORKER_SIG = (
+        INT => \&worker_quit,
+        QUIT => \&worker_quit,
+        TERM => \&worker_quit,
+        TTIN => 'IGNORE',
+        TTOU => 'IGNORE',
+        USR1 => \&reopen_logs,
+        USR2 => 'IGNORE',
+        WINCH => 'IGNORE',
+        CHLD => \&PublicInbox::DS::enqueue_reap,
+);
+
+sub listener_opt ($) {
+        my ($str) = @_; # opt1=val1,opt2=val2 (opt may repeat for multi-value)
         my $o = {};
         # allow ',' as delimiter since '&' is shell-unfriendly
-        foreach (split(/[,&]/, $opt_str)) {
+        for (split(/[,&]/, $str)) {
                 my ($k, $v) = split(/=/, $_, 2);
-                push @{$o->{$k} ||= []}, $v;
+                push @{$o->{$k}}, $v;
         }
 
         # key may be a part of cert.  At least
         # p5-io-socket-ssl/example/ssl_server.pl has this fallback:
-        $o->{cert} //= [ $default_cert ];
+        $o->{cert} //= [ $default_cert ] if defined($default_cert);
         $o->{key} //= defined($default_key) ? [ $default_key ] : $o->{cert};
-        my %ctx_opt = (SSL_server => 1);
+        $o;
+}
+
+sub check_absolute ($$) {
+        my ($var, $val) = @_;
+        die <<EOM if index($val // '/', '/') != 0;
+$var must be an absolute path when using --daemonize: $val
+EOM
+}
+
+sub accept_tls_opt ($) {
+        my ($opt) = @_;
+        my $o = ref($opt) eq 'HASH' ? $opt : listener_opt($opt);
+        return if !defined($o->{cert});
+        require PublicInbox::TLS;
+        my @ctx_opt;
         # parse out hostname:/path/to/ mappings:
-        foreach my $k (qw(cert key)) {
-                my $x = $ctx_opt{'SSL_'.$k.'_file'} = {};
+        for my $k (qw(cert key)) {
+                $o->{$k} // next;
+                push(@ctx_opt, "SSL_${k}_file", {});
                 foreach my $path (@{$o->{$k}}) {
                         my $host = '';
                         $path =~ s/\A([^:]+):// and $host = $1;
-                        $x->{$host} = $path;
+                        $ctx_opt[-1]->{$host} = $path;
+                        check_absolute($k, $path) if $daemonize;
                 }
         }
-        my $ctx = IO::Socket::SSL::SSL_Context->new(%ctx_opt) or
-                die 'SSL_Context->new: '.PublicInbox::TLS::err();
-
-        # save ~34K per idle connection (cf. SSL_CTX_set_mode(3ssl))
-        # RSS goes from 346MB to 171MB with 10K idle NNTPS clients on amd64
-        # cf. https://rt.cpan.org/Ticket/Display.html?id=129463
-        my $mode = eval { Net::SSLeay::MODE_RELEASE_BUFFERS() };
-        if ($mode && $ctx->{context}) {
-                eval { Net::SSLeay::CTX_set_mode($ctx->{context}, $mode) };
-                warn "W: $@ (setting SSL_MODE_RELEASE_BUFFERS)\n" if $@;
-        }
+        \@ctx_opt;
+}
 
-        { SSL_server => 1, SSL_startHandshake => 0, SSL_reuse_ctx => $ctx };
+sub do_chown ($) {
+        $uid // return;
+        my ($path) = @_;
+        chown($uid, $gid, $path) or warn "chown $path: $!\n";
+}
+
+sub open_log_path ($$) { # my ($fh, $path) = @_; # $_[0] is modified
+        open $_[0], '>>', $_[1] or die "open(>> $_[1]): $!";
+        $_[0]->autoflush(1);
+        do_chown($_[1]);
+        $_[0];
+}
+
+sub load_mod ($;$$) {
+        my ($scheme, $opt, $addr) = @_;
+        my $modc = "PublicInbox::\U$scheme";
+        $modc =~ s/S\z//;
+        my $mod = $modc.'D';
+        eval "require $mod"; # IMAPD|HTTPD|NNTPD|POP3D
+        die $@ if $@;
+        my %xn;
+        my $tlsd = $xn{tlsd} = $mod->new;
+        my %env = map {
+                substr($_, length('env.')) => $opt->{$_}->[-1];
+        } grep(/\Aenv\./, keys %$opt);
+        $xn{refresh} = sub {
+                my ($sig) = @_;
+                local @ENV{keys %env} = values %env;
+                $tlsd->refresh_groups($sig);
+        };
+        $xn{post_accept} = $tlsd->can('post_accept_cb') ?
+                        $tlsd->post_accept_cb : sub { $modc->new($_[0], $tlsd) };
+        my @paths = qw(out err);
+        if ($modc eq 'PublicInbox::HTTP') {
+                @paths = qw(err);
+                $xn{af_default} = 'httpready';
+                if (my $p = $opt->{psgi}) {
+                        die "multiple psgi= options specified\n" if @$p > 1;
+                        check_absolute('psgi=', $p->[0]) if $daemonize;
+                        $tlsd->{psgi} = $p->[0];
+                        warn "# $scheme://$addr psgi=$p->[0]\n";
+                }
+        }
+        for my $f (@paths) {
+                my $p = $opt->{$f} or next;
+                die "multiple $f= options specified\n" if @$p > 1;
+                check_absolute("$f=", $p->[0]) if $daemonize;
+                $p = File::Spec->canonpath($p->[0]);
+                $tlsd->{$f} = $logs{$p} //= open_log_path(my $fh, $p);
+                warn "# $scheme://$addr $f=$p\n";
+        }
+        # for per-listener $SIG{__WARN__}:
+        my $err = $tlsd->{err};
+        $tlsd->{warn_cb} = sub {
+                print $err @_ unless PublicInbox::Eml::warn_ignore(@_)
+        };
+        $opt->{'multi-accept'} and
+                $xn{'multi-accept'} = $opt->{'multi-accept'}->[-1];
+        \%xn;
 }
 
 sub daemon_prepare ($) {
         my ($default_listen) = @_;
         my $listener_names = {}; # sockname => IO::Handle
-        $oldset = PublicInbox::Sigfd::block_signals();
+        $oldset = PublicInbox::DS::block_signals();
         @CMD = ($0, @ARGV);
         my ($prog) = ($CMD[0] =~ m!([^/]+)\z!g);
+        my $dh = defined($default_listen) ? " (default: $default_listen)" : '';
         my $help = <<EOF;
 usage: $prog [-l ADDRESS] [--cert=FILE] [--key=FILE]
 
 options:
 
-  -l ADDRESS    address to listen on (default: $default_listen)
+  -l ADDRESS    address to listen on$dh
   --cert=FILE   default SSL/TLS certificate
-  --key=FILE    default SSL/TLS certificate
+  --key=FILE    default SSL/TLS certificate key
   -W WORKERS    number of worker processes to spawn (default: 1)
+  -X XWORKERS   number of Xapian helper processes (default: undefined)
 
 See public-inbox-daemon(8) and $prog(1) man pages for more.
 EOF
@@ -95,51 +180,64 @@ EOF
                 'l|listen=s' => \@cfg_listen,
                 '1|stdout=s' => \$stdout,
                 '2|stderr=s' => \$stderr,
-                'W|worker-processes=i' => \$worker_processes,
+                'W|worker-processes=i' => \$nworker,
                 'P|pid-file=s' => \$pid_file,
                 'u|user=s' => \$user,
                 'g|group=s' => \$group,
                 'D|daemonize' => \$daemonize,
+                'multi-accept=i' => \$PublicInbox::Listener::MULTI_ACCEPT,
                 'cert=s' => \$default_cert,
                 'key=s' => \$default_key,
+                'X|xapian-helpers=i' => \$xh_workers,
                 'help|h' => \(my $show_help),
         );
         GetOptions(%opt) or die $help;
         if ($show_help) { print $help; exit 0 };
 
+        $_ = File::Spec->canonpath($_ // next) for ($stdout, $stderr);
         if (defined $pid_file && $pid_file =~ /\.oldbin\z/) {
                 die "--pid-file cannot end with '.oldbin'\n";
         }
         @listeners = inherit($listener_names);
-
-        # allow socket-activation users to set certs once and not
-        # have to configure each socket:
-        my @inherited_names = keys(%$listener_names) if defined($default_cert);
+        my @inherited_names = keys(%$listener_names);
 
         # ignore daemonize when inheriting
         $daemonize = undef if scalar @listeners;
 
-        push @cfg_listen, $default_listen unless (@listeners || @cfg_listen);
-
+        unless (@listeners || @cfg_listen) {
+                $default_listen // die "no listeners specified\n";
+                push @cfg_listen, $default_listen
+        }
+        my ($default_scheme) = (($default_listen // '') =~ m!\A([^:]+)://!);
         foreach my $l (@cfg_listen) {
                 my $orig = $l;
-                my $scheme = '';
-                if ($l =~ s!\A([^:]+)://!!) {
-                        $scheme = $1;
-                } elsif ($l =~ /\A(?:\[[^\]]+\]|[^:]+):([0-9])+/) {
-                        my $s = $KNOWN_TLS{$1} // $KNOWN_STARTTLS{$1};
-                        $scheme = $s if defined $s;
+                my ($scheme, $port, $opt);
+                $l =~ s!\A([a-z0-9]+)://!! and $scheme = $1;
+                $scheme //= $default_scheme;
+                if ($l =~ /\A(?:\[[^\]]+\]|[^:]+):([0-9]+)/) {
+                        $port = $1 + 0;
+                        $scheme //= $KNOWN_TLS{$port} // $KNOWN_STARTTLS{$port};
+                }
+                $scheme // die "unable to determine URL scheme of $orig\n";
+                if (!defined($port) && index($l, '/') != 0) { # AF_UNIX socket
+                        $port = $SCHEME2PORT{$scheme} //
+                                die "no port in listen=$orig\n";
+                        $l =~ s!\A([^/]+)!$1:$port! or
+                                die "unable to add port=$port to $l\n";
                 }
+                $l =~ s!/\z!!; # chop one trailing slash
                 if ($l =~ s!/?\?(.+)\z!!) {
-                        $tls_opt{"$scheme://$l"} = accept_tls_opt($1);
+                        $opt = listener_opt($1);
+                        $tls_opt{"$scheme://$l"} = accept_tls_opt($opt);
                 } elsif (defined($default_cert)) {
                         $tls_opt{"$scheme://$l"} = accept_tls_opt('');
-                } elsif ($scheme =~ /\A(?:https|imaps|imaps)\z/) {
+                } elsif ($scheme =~ /\A(?:https|imaps|nntps|pop3s)\z/) {
                         die "$orig specified w/o cert=\n";
                 }
-                # TODO: use scheme to load either NNTP.pm or HTTP.pm
-
-                next if $listener_names->{$l}; # already inherited
+                if ($listener_names->{$l}) { # already inherited
+                        $XNETD{$l} = load_mod($scheme, $opt, $l);
+                        next;
+                }
                 my (%o, $sock_pkg);
                 if (index($l, '/') == 0) {
                         $sock_pkg = 'IO::Socket::UNIX';
@@ -166,63 +264,67 @@ EOF
                 }
                 $o{Listen} = 1024;
                 my $prev = umask 0000;
-                my $s = eval { $sock_pkg->new(%o) };
-                warn "error binding $l: $! ($@)\n" unless $s;
+                my $s = eval { $sock_pkg->new(%o) } or
+                        warn "error binding $l: $! ($@)\n";
                 umask $prev;
-                if ($s) {
-                        $listener_names->{sockname($s)} = $s;
-                        $s->blocking(0);
-                        push @listeners, $s;
-                }
+                $s // next;
+                $s->blocking(0);
+                my $sockname = sockname($s);
+                warn "# bound $scheme://$sockname\n";
+                $XNETD{$sockname} //= load_mod($scheme, $opt);
+                $listener_names->{$sockname} = $s;
+                push @listeners, $s;
         }
 
         # cert/key options in @cfg_listen takes precedence when inheriting,
         # but map well-known inherited ports if --listen isn't specified
-        # at all
-        for my $sockname (@inherited_names) {
-                $sockname =~ /:([0-9]+)\z/ or next;
-                if (my $scheme = $KNOWN_TLS{$1}) {
-                        $tls_opt{"$scheme://$sockname"} ||= accept_tls_opt('');
-                } elsif (($scheme = $KNOWN_STARTTLS{$1})) {
-                        next if $tls_opt{"$scheme://$sockname"};
-                        $tls_opt{''} ||= accept_tls_opt('');
+        # at all.  This allows socket-activation users to set certs once
+        # and not have to configure each socket:
+        if (defined $default_cert) {
+                my ($stls) = (($default_scheme // '') =~ /\A(pop3|nntp|imap)/);
+                for my $x (@inherited_names) {
+                        $x =~ /:([0-9]+)\z/ or next; # no TLS for AF_UNIX
+                        if (my $scheme = $KNOWN_TLS{$1}) {
+                                $XNETD{$x} //= load_mod($scheme);
+                                $tls_opt{"$scheme://$x"} ||= accept_tls_opt('');
+                        } elsif (($scheme = $KNOWN_STARTTLS{$1})) {
+                                $XNETD{$x} //= load_mod($scheme);
+                                $tls_opt{"$scheme://$x"} ||= accept_tls_opt('');
+                        } elsif (defined $stls) {
+                                $tls_opt{"$stls://$x"} ||= accept_tls_opt('');
+                        }
                 }
         }
-
-        die "No listeners bound\n" unless @listeners;
-}
-
-sub check_absolute ($$) {
-        my ($var, $val) = @_;
-        if (defined $val && index($val, '/') != 0) {
-                die
-"--$var must be an absolute path when using --daemonize: $val\n";
+        if (defined $default_scheme) {
+                for my $x (@inherited_names) {
+                        $XNETD{$x} //= load_mod($default_scheme);
+                }
         }
+        die "No listeners bound\n" unless @listeners;
 }
 
 sub daemonize () {
         if ($daemonize) {
+                require Cwd;
                 foreach my $i (0..$#ARGV) {
                         my $arg = $ARGV[$i];
                         next unless -e $arg;
-                        $ARGV[$i] = abs_path($arg);
+                        $ARGV[$i] = Cwd::abs_path($arg);
                 }
-                check_absolute('stdout', $stdout);
-                check_absolute('stderr', $stderr);
-                check_absolute('pid-file', $pid_file);
+                check_absolute('--stdout', $stdout);
+                check_absolute('--stderr', $stderr);
+                check_absolute('--pid-file', $pid_file);
+                check_absolute('--cert', $default_cert);
+                check_absolute('--key', $default_key);
 
                 chdir '/' or die "chdir failed: $!";
         }
-
-        return unless (defined $pid_file || defined $group || defined $user
-                        || $daemonize);
-
-        eval { require Net::Server::Daemonize };
-        if ($@) {
-                die
-"Net::Server required for --pid-file, --group, --user, and --daemonize\n$@\n";
+        if (defined($pid_file) || defined($group) || defined($user)) {
+                eval { require Net::Server::Daemonize; 1 } // die <<EOF;
+Net::Server required for --pid-file, --group, --user
+$@
+EOF
         }
-
         Net::Server::Daemonize::check_pid_file($pid_file) if defined $pid_file;
         $uid = Net::Server::Daemonize::get_uid($user) if defined $user;
         if (defined $group) {
@@ -241,24 +343,38 @@ sub daemonize () {
         };
 
         if ($daemonize) {
-                my $pid = fork;
-                die "could not fork: $!\n" unless defined $pid;
+                my $pid = PublicInbox::OnDestroy::fork_tmp;
                 exit if $pid;
-
                 open(STDIN, '+<', '/dev/null') or
                                         die "redirect stdin failed: $!\n";
                 open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n";
                 open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n";
                 POSIX::setsid();
-                $pid = fork;
-                die "could not fork: $!\n" unless defined $pid;
+                $pid = PublicInbox::OnDestroy::fork_tmp;
                 exit if $pid;
         }
         return unless defined $pid_file;
 
         write_pid($pid_file);
-        # for ->DESTROY:
-        bless { pid => $$, pid_file => \$pid_file }, __PACKAGE__;
+        on_destroy \&unlink_pid_file_safe_ish, \$pid_file;
+}
+
+sub has_busy_clients { # post_loop_do CB
+        my ($state) = @_;
+        my $now = now();
+        my $n = PublicInbox::DS::close_non_busy();
+        if ($n) {
+                if ($state->{-w} < now()) {
+                        warn "$$ quitting, $n client(s) left\n";
+                        $state->{-w} = now() + 5;
+                }
+                unless (defined $state->{0}) {
+                        $state->{0} = (split(/\s+/, $0))[0];
+                        $state->{0} =~ s!\A.*?([^/]+)\z!$1!;
+                }
+                $0 = "$state->{0} quitting, $n client(s) left";
+        }
+        $n; # true: loop continues, false: loop breaks
 }
 
 sub worker_quit { # $_[0] = signal name or number (unused)
@@ -267,51 +383,35 @@ sub worker_quit { # $_[0] = signal name or number (unused)
 
         $_->close foreach @listeners; # call PublicInbox::DS::close
         @listeners = ();
-        my $proc_name;
-        my $warn = 0;
+
         # drop idle connections and try to quit gracefully
-        PublicInbox::DS->SetPostLoopCallback(sub {
-                my ($dmap, undef) = @_;
-                my $n = 0;
-                my $now = now();
-
-                foreach my $s (values %$dmap) {
-                        $s->can('busy') or next;
-                        if ($s->busy($now)) {
-                                ++$n;
-                        } else {
-                                # close as much as possible, early as possible
-                                $s->close;
-                        }
-                }
-                if ($n) {
-                        if (($warn + 5) < now()) {
-                                warn "$$ quitting, $n client(s) left\n";
-                                $warn = now();
-                        }
-                        unless (defined $proc_name) {
-                                $proc_name = (split(/\s+/, $0))[0];
-                                $proc_name =~ s!\A.*?([^/]+)\z!$1!;
-                        }
-                        $0 = "$proc_name quitting, $n client(s) left";
-                }
-                $n; # true: loop continues, false: loop breaks
-        });
+        @PublicInbox::DS::post_loop_do = (\&has_busy_clients, { -w => 0 })
+}
+
+sub spawn_xh () {
+        $xh_workers // return;
+        require PublicInbox::XhcMset;
+        local $) = $gid if defined $gid;
+        local $( = $gid if defined $gid;
+        local $> = $uid if defined $uid;
+        local $< = $uid if defined $uid;
+        $PublicInbox::Search::XHC = eval {
+                local $ENV{STDERR_PATH} = $stderr;
+                local $ENV{STDOUT_PATH} = $stdout;
+                PublicInbox::XapClient::start_helper('-j', $xh_workers)
+        };
+        warn "E: $@" if $@;
+        awaitpid($PublicInbox::Search::XHC->{io}->attached_pid, \&respawn_xh)
+                if $PublicInbox::Search::XHC;
 }
 
 sub reopen_logs {
-        if ($stdout) {
-                open STDOUT, '>>', $stdout or
-                        warn "failed to redirect stdout to $stdout: $!\n";
-                STDOUT->autoflush(1);
-                do_chown($stdout);
-        }
-        if ($stderr) {
-                open STDERR, '>>', $stderr or
-                        warn "failed to redirect stderr to $stderr: $!\n";
-                STDERR->autoflush(1);
-                do_chown($stderr);
-        }
+        my ($sig) = @_;
+        $logs{$stdout} //= \*STDOUT if defined $stdout;
+        $logs{$stderr} //= \*STDERR if defined $stderr;
+        while (my ($p, $fh) = each %logs) { open_log_path($fh, $p) }
+        ($sig && defined($xh_workers) && $PublicInbox::Search::XHC) and
+                kill('USR1', $PublicInbox::Search::XHC->{io}->attached_pid);
 }
 
 sub sockname ($) {
@@ -371,17 +471,16 @@ sub inherit ($) {
         my $end = $fds + 2; # LISTEN_FDS_START - 1
         my @rv = ();
         foreach my $fd (3..$end) {
-                my $s = IO::Handle->new_from_fd($fd, 'r');
+                open(my $s, '<&=', $fd) or warn "fdopen fd=$fd: $!";
                 if (my $k = sockname($s)) {
-                        if ($s->blocking) {
-                                $s->blocking(0);
-                                warn <<"";
-Inherited socket (fd=$fd) is blocking, making it non-blocking.
+                        my $prev_was_blocking = $s->blocking(0);
+                        warn <<"" if $prev_was_blocking;
+Inherited socket ($k fd=$fd) is blocking, making it non-blocking.
 Set 'NonBlocking = true' in the systemd.service unit to avoid stalled
 processes when multiple service instances start.
 
-                        }
                         $listener_names->{$k} = $s;
+                        warn "# inherited $k fd=$fd\n";
                         push @rv, $s;
                 } else {
                         warn "failed to inherit fd=$fd (LISTEN_FDS=$fds)";
@@ -400,177 +499,147 @@ sub upgrade { # $_[0] = signal name or number (unused)
                         warn "BUG: .oldbin suffix exists: $pid_file\n";
                         return;
                 }
-                unlink_pid_file_safe_ish($$, $pid_file);
+                unlink_pid_file_safe_ish(\$pid_file);
                 $pid_file .= '.oldbin';
                 write_pid($pid_file);
         }
-        my $pid = fork;
-        unless (defined $pid) {
-                warn "fork failed: $!\n";
-                return;
-        }
-        if ($pid == 0) {
-                use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD);
+        my $pid = eval { PublicInbox::OnDestroy::fork_tmp };
+        if (!defined($pid)) {
+                warn "fork failed: $! $@\n";
+        } elsif ($pid == 0) {
                 $ENV{LISTEN_FDS} = scalar @listeners;
                 $ENV{LISTEN_PID} = $$;
                 foreach my $s (@listeners) {
                         # @listeners are globs with workers, PI::L w/o workers
                         $s = $s->{sock} if ref($s) eq 'PublicInbox::Listener';
-
-                        my $fl = fcntl($s, F_GETFD, 0);
-                        fcntl($s, F_SETFD, $fl &= ~FD_CLOEXEC);
+                        fcntl($s, F_SETFD, 0) // die "F_SETFD: $!";
                 }
                 exec @CMD;
                 die "Failed to exec: $!\n";
+        } else {
+                awaitpid($pid, \&upgrade_aborted);
+                $reexec_pid = $pid;
         }
-        $reexec_pid = $pid;
 }
 
-sub kill_workers ($) {
-        my ($s) = @_;
-
-        while (my ($pid, $id) = each %pids) {
-                kill $s, $pid;
-        }
-}
+sub kill_workers ($) { kill $_[0], values(%WORKERS) }
 
-sub upgrade_aborted ($) {
-        my ($p) = @_;
-        warn "reexec PID($p) died with: $?\n";
+sub upgrade_aborted {
+        my ($pid) = @_;
+        warn "reexec PID($pid) died with: $?\n";
         $reexec_pid = undef;
         return unless $pid_file;
 
         my $file = $pid_file;
         $file =~ s/\.oldbin\z// or die "BUG: no '.oldbin' suffix in $file";
-        unlink_pid_file_safe_ish($$, $pid_file);
+        unlink_pid_file_safe_ish(\$pid_file);
         $pid_file = $file;
         eval { write_pid($pid_file) };
         warn $@, "\n" if $@;
 }
 
-sub reap_children { # $_[0] = 'CHLD' or POSIX::SIGCHLD()
-        while (1) {
-                my $p = waitpid(-1, WNOHANG) or return;
-                if (defined $reexec_pid && $p == $reexec_pid) {
-                        upgrade_aborted($p);
-                } elsif (defined(my $id = delete $pids{$p})) {
-                        warn "worker[$id] PID($p) died with: $?\n";
-                } elsif ($p > 0) {
-                        warn "unknown PID($p) reaped: $?\n";
-                } else {
-                        return;
-                }
-        }
-}
-
-sub unlink_pid_file_safe_ish ($$) {
-        my ($unlink_pid, $file) = @_;
-        return unless defined $unlink_pid && $unlink_pid == $$;
+sub unlink_pid_file_safe_ish ($) {
+        my ($fref) = @_;
 
-        open my $fh, '<', $file or return;
+        open my $fh, '<', $$fref or return;
         local $/ = "\n";
         defined(my $read_pid = <$fh>) or return;
         chomp $read_pid;
-        if ($read_pid == $unlink_pid) {
-                Net::Server::Daemonize::unlink_pid_file($file);
-        }
+        Net::Server::Daemonize::unlink_pid_file($$fref) if $read_pid == $$;
 }
 
 sub master_quit ($) {
         exit unless @listeners;
         @listeners = ();
-        kill_workers($_[0]);
+        exit unless kill_workers($_[0]);
+}
+
+sub reap_worker { # awaitpid CB
+        my ($pid, $nr) = @_;
+        warn "worker[$nr] died \$?=$?\n" if $?;
+        delete $WORKERS{$nr};
+        exit if !@listeners && !keys(%WORKERS);
+        PublicInbox::DS::requeue(\&start_workers);
+}
+
+sub start_worker ($) {
+        my ($nr) = @_;
+        return unless @listeners;
+        my $pid = PublicInbox::DS::fork_persist;
+        if ($pid == 0) {
+                undef %WORKERS;
+                undef $xh_workers;
+                local $PublicInbox::DS::Poller; # allow epoll/kqueue
+                $set_user->() if $set_user;
+                PublicInbox::EOFpipe->new($parent_pipe, \&worker_quit);
+                worker_loop();
+                exit 0;
+        } else {
+                $WORKERS{$nr} = $pid;
+                awaitpid($pid, \&reap_worker, $nr);
+        }
+}
+
+sub start_workers {
+        my @idx = grep { !defined($WORKERS{$_}) } (0..($nworker - 1)) or return;
+        eval { start_worker($_) for @idx };
+        warn "E: $@\n" if $@;
+}
+
+sub trim_workers {
+        my @nr = grep { $_ >= $nworker } keys %WORKERS;
+        kill('TERM', @WORKERS{@nr});
 }
 
 sub master_loop {
-        pipe(my ($p0, $p1)) or die "failed to create parent-pipe: $!";
-        my $set_workers = $worker_processes;
+        local $parent_pipe;
+        pipe($parent_pipe, my $p1) or die "failed to create parent-pipe: $!";
+        my $set_workers = $nworker; # for SIGWINCH
         reopen_logs();
-        my $ignore_winch;
-        my $sig = {
-                USR1 => sub { reopen_logs(); kill_workers($_[0]); },
+        spawn_xh;
+        my $msig = {
+                USR1 => sub { reopen_logs($_[0]); kill_workers($_[0]); },
                 USR2 => \&upgrade,
                 QUIT => \&master_quit,
                 INT => \&master_quit,
                 TERM => \&master_quit,
                 WINCH => sub {
-                        return if $ignore_winch || !@listeners;
-                        if (-t STDIN || -t STDOUT || -t STDERR) {
-                                $ignore_winch = 1;
-                                warn <<EOF;
-ignoring SIGWINCH since we are not daemonized
-EOF
-                        } else {
-                                $worker_processes = 0;
-                        }
+                        $nworker = 0;
+                        trim_workers();
                 },
                 HUP => sub {
-                        return unless @listeners;
-                        $worker_processes = $set_workers;
+                        $nworker = $set_workers; # undo WINCH
                         kill_workers($_[0]);
+                        PublicInbox::DS::requeue(\&start_workers)
                 },
                 TTIN => sub {
-                        return unless @listeners;
-                        if ($set_workers > $worker_processes) {
-                                ++$worker_processes;
+                        if ($set_workers > $nworker) {
+                                ++$nworker;
                         } else {
-                                $worker_processes = ++$set_workers;
+                                $nworker = ++$set_workers;
                         }
+                        PublicInbox::DS::requeue(\&start_workers);
                 },
                 TTOU => sub {
-                        $worker_processes = --$set_workers if $set_workers > 0;
+                        return if $nworker <= 0;
+                        --$nworker;
+                        trim_workers();
                 },
-                CHLD => \&reap_children,
+                CHLD => \&PublicInbox::DS::enqueue_reap,
         };
-        my $sigfd = PublicInbox::Sigfd->new($sig, 0);
-        local %SIG = (%SIG, %$sig) if !$sigfd;
-        PublicInbox::Sigfd::sig_setmask($oldset) if !$sigfd;
-        while (1) { # main loop
-                my $n = scalar keys %pids;
-                unless (@listeners) {
-                        exit if $n == 0;
-                        $set_workers = $worker_processes = $n = 0;
-                }
-
-                if ($n > $worker_processes) {
-                        while (my ($k, $v) = each %pids) {
-                                kill('TERM', $k) if $v >= $worker_processes;
-                        }
-                        $n = $worker_processes;
-                }
-                my $want = $worker_processes - 1;
-                if ($n <= $want) {
-                        PublicInbox::Sigfd::block_signals() if !$sigfd;
-                        for my $i ($n..$want) {
-                                my $pid = fork;
-                                if (!defined $pid) {
-                                        warn "failed to fork worker[$i]: $!\n";
-                                } elsif ($pid == 0) {
-                                        $set_user->() if $set_user;
-                                        return $p0; # run normal work code
-                                } else {
-                                        warn "PID=$pid is worker[$i]\n";
-                                        $pids{$pid} = $i;
-                                }
-                        }
-                        PublicInbox::Sigfd::sig_setmask($oldset) if !$sigfd;
-                }
-
-                if ($sigfd) { # Linux and IO::KQueue users:
-                        $sigfd->wait_once;
-                } else { # wake up every second
-                        sleep(1);
-                }
-        }
+        $msig->{WINCH} = sub {
+                warn "ignoring SIGWINCH since we are not daemonized\n";
+        } if -t STDIN || -t STDOUT || -t STDERR;
+        start_workers();
+        PublicInbox::DS::event_loop($msig, $oldset);
         exit # never gets here, just for documentation
 }
 
-sub tls_start_cb ($$) {
-        my ($opt, $orig_post_accept) = @_;
+sub tls_cb {
+        my ($post_accept, $tlsd) = @_;
         sub {
                 my ($io, $addr, $srv) = @_;
-                my $ssl = IO::Socket::SSL->start_SSL($io, %$opt);
-                $orig_post_accept->($ssl, $addr, $srv);
+                $post_accept->(PublicInbox::TLS::start($io, $tlsd), $addr, $srv)
         }
 }
 
@@ -584,95 +653,91 @@ sub defer_accept ($$) {
                 my $sec = unpack('i', $x);
                 return if $sec > 0; # systemd users may set a higher value
                 setsockopt($s, IPPROTO_TCP, $TCP_DEFER_ACCEPT, 1);
-        } elsif ($^O eq 'freebsd') {
-                my $x = getsockopt($s, SOL_SOCKET, SO_ACCEPTFILTER);
-                return if defined $x; # don't change if set
+        } elsif ($^O =~ /\A(?:freebsd|netbsd|dragonfly)\z/) {
+                my $x = getsockopt($s, SOL_SOCKET, $SO_ACCEPTFILTER);
+                return if ($x // "\0") =~ /[^\0]/s; # don't change if set
                 my $accf_arg = pack('a16a240', $af_name, '');
-                setsockopt($s, SOL_SOCKET, SO_ACCEPTFILTER, $accf_arg);
+                setsockopt($s, SOL_SOCKET, $SO_ACCEPTFILTER, $accf_arg);
         }
 }
 
-sub daemon_loop ($$$$) {
-        my ($refresh, $post_accept, $tlsd, $af_default) = @_;
-        my %post_accept;
-        while (my ($k, $v) = each %tls_opt) {
-                if ($k =~ s!\A(?:https|imaps|nntps)://!!) {
-                        $post_accept{$k} = tls_start_cb($v, $post_accept);
-                } elsif ($tlsd) { # STARTTLS, $k eq '' is OK
-                        $tlsd->{accept_tls} = $v;
+sub daemon_loop () {
+        local $PublicInbox::Config::DEDUPE = {}; # enable dedupe cache
+        my $refresh = $WORKER_SIG{HUP} = sub {
+                my ($sig) = @_;
+                %$PublicInbox::Config::DEDUPE = (); # clear cache
+                for my $xn (values %XNETD) {
+                        delete $xn->{tlsd}->{ssl_ctx}; # PublicInbox::TLS::start
+                        eval { $xn->{refresh}->($sig) };
+                        warn "refresh $@\n" if $@;
                 }
-        }
-        my $sig = {
-                HUP => $refresh,
-                INT => \&worker_quit,
-                QUIT => \&worker_quit,
-                TERM => \&worker_quit,
-                TTIN => 'IGNORE',
-                TTOU => 'IGNORE',
-                USR1 => \&reopen_logs,
-                USR2 => 'IGNORE',
-                WINCH => 'IGNORE',
-                CHLD => \&PublicInbox::DS::enqueue_reap,
         };
-        if ($worker_processes > 0) {
+        while (my ($k, $ctx_opt) = each %tls_opt) {
+                $ctx_opt // next;
+                my ($scheme, $l) = split(m!://!, $k, 2);
+                my $xn = $XNETD{$l} // die "BUG: no xnetd for $k";
+                $xn->{tlsd}->{ssl_ctx_opt} //= $ctx_opt;
+                $scheme =~ m!\A(?:https|imaps|nntps|pop3s)! and
+                        $POST_ACCEPT{$l} = tls_cb(@$xn{qw(post_accept tlsd)});
+        }
+        undef %tls_opt;
+        if ($nworker > 0) {
                 $refresh->(); # preload by default
-                my $fh = master_loop(); # returns if in child process
-                PublicInbox::EOFpipe->new($fh, \&worker_quit, undef);
+                return master_loop();
         } else {
                 reopen_logs();
                 $set_user->() if $set_user;
-                $sig->{USR2} = sub { worker_quit() if upgrade() };
+                $WORKER_SIG{USR2} = sub { worker_quit() if upgrade() };
                 $refresh->();
         }
+        local $PublicInbox::DS::Poller; # allow epoll/kqueue
+        worker_loop();
+}
+
+sub worker_loop {
         $uid = $gid = undef;
         reopen_logs();
+        spawn_xh; # only for -W0
         @listeners = map {;
-                my $tls_cb = $post_accept{sockname($_)};
+                my $l = sockname($_);
+                my $tls_cb = $POST_ACCEPT{$l};
+                my $xn = $XNETD{$l} // die "BUG: no xnetd for $l";
 
                 # NNTPS, HTTPS, HTTP, IMAPS and POP3S are client-first traffic
                 # IMAP, NNTP and POP3 are server-first
-                defer_accept($_, $tls_cb ? 'dataready' : $af_default);
+                defer_accept($_, $tls_cb ? 'dataready' : $xn->{af_default});
 
                 # this calls epoll_create:
-                PublicInbox::Listener->new($_, $tls_cb || $post_accept)
+                PublicInbox::Listener->new($_, $tls_cb || $xn->{post_accept},
+                                                $xn->{'multi-accept'})
         } @listeners;
-        my $sigfd = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK);
-        local %SIG = (%SIG, %$sig) if !$sigfd;
-        if (!$sigfd) {
-                # wake up every second to accept signals if we don't
-                # have signalfd or IO::KQueue:
-                PublicInbox::Sigfd::sig_setmask($oldset);
-                PublicInbox::DS->SetLoopTimeout(1000);
-        }
-        PublicInbox::DS->EventLoop;
+        PublicInbox::DS::event_loop(\%WORKER_SIG, $oldset);
 }
 
-sub run ($$$;$) {
-        my ($default, $refresh, $post_accept, $tlsd) = @_;
-        local $SIG{PIPE} = 'IGNORE';
-        daemon_prepare($default);
-        my $af_default = $default =~ /:8080\z/ ? 'httpready' : undef;
-        my $for_destroy = daemonize();
-
-        # this wastes a bit of memory for non-PublicInbox::WWW -httpd users
-        # oh well...
-        eval {
-                require PublicInbox::Gcf2;
-                require PublicInbox::Gcf2Client;
-        };
-        local $PublicInbox::GitAsyncCat::GCF2C =
-                                PublicInbox::Gcf2Client::new() if !$@;
-
-        daemon_loop($refresh, $post_accept, $tlsd, $af_default);
-        PublicInbox::DS->Reset;
-        # ->DESTROY runs when $for_destroy goes out-of-scope
+sub respawn_xh { # awaitpid cb
+        my ($pid) = @_;
+        return unless @listeners;
+        warn "W: xap_helper PID:$pid died: \$?=$?, respawning...\n";
+        spawn_xh;
 }
 
-sub do_chown ($) {
-        my ($path) = @_;
-        if (defined $uid and !chown($uid, $gid, $path)) {
-                warn "could not chown $path: $!\n";
-        }
+sub run {
+        my ($default_listen) = @_;
+        $nworker = 1;
+        local (%XNETD, %POST_ACCEPT);
+        daemon_prepare($default_listen);
+        my $unlink_on_leave = daemonize();
+
+        # localize GCF2C for tests:
+        local $PublicInbox::GitAsyncCat::GCF2C;
+        local $PublicInbox::Git::async_warn = 1;
+        local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb();
+        local %WORKER_SIG = %WORKER_SIG;
+        local $PublicInbox::XapClient::tries = 0;
+        local $PublicInbox::Search::XHC if defined($xh_workers);
+
+        daemon_loop();
+        # $unlink_on_leave runs
 }
 
 sub write_pid ($) {
@@ -681,8 +746,4 @@ sub write_pid ($) {
         do_chown($path);
 }
 
-sub DESTROY {
-        unlink_pid_file_safe_ish($_[0]->{pid}, ${$_[0]->{pid_file}});
-}
-
 1;
diff --git a/lib/PublicInbox/DirIdle.pm b/lib/PublicInbox/DirIdle.pm
index 458285e2..230df166 100644
--- a/lib/PublicInbox/DirIdle.pm
+++ b/lib/PublicInbox/DirIdle.pm
@@ -1,56 +1,99 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Used by public-inbox-watch for Maildir (and possibly MH in the future)
 package PublicInbox::DirIdle;
-use strict;
+use v5.12;
 use parent 'PublicInbox::DS';
-use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
+use PublicInbox::Syscall qw(EPOLLIN);
 use PublicInbox::In2Tie;
 
-my ($MAIL_IN, $ino_cls);
-if ($^O eq 'linux' && eval { require Linux::Inotify2; 1 }) {
-        $MAIL_IN = Linux::Inotify2::IN_MOVED_TO() |
-                Linux::Inotify2::IN_CREATE();
-        $ino_cls = 'Linux::Inotify2';
+my ($MAIL_IN, $MAIL_GONE, $ino_cls);
+if ($^O eq 'linux' && eval { require PublicInbox::Inotify; 1 }) {
+        $MAIL_IN = PublicInbox::Inotify::IN_MOVED_TO() |
+                PublicInbox::Inotify::IN_CREATE();
+        $MAIL_GONE = PublicInbox::Inotify::IN_DELETE() |
+                        PublicInbox::Inotify::IN_DELETE_SELF() |
+                        PublicInbox::Inotify::IN_MOVE_SELF() |
+                        PublicInbox::Inotify::IN_MOVED_FROM();
+        $ino_cls = 'PublicInbox::Inotify';
 # Perl 5.22+ is needed for fileno(DIRHANDLE) support:
 } elsif ($^V ge v5.22 && eval { require PublicInbox::KQNotify }) {
         $MAIL_IN = PublicInbox::KQNotify::MOVED_TO_OR_CREATE();
+        $MAIL_GONE = PublicInbox::KQNotify::NOTE_DELETE() |
+                PublicInbox::KQNotify::NOTE_REVOKE() |
+                PublicInbox::KQNotify::NOTE_RENAME();
         $ino_cls = 'PublicInbox::KQNotify';
 } else {
         require PublicInbox::FakeInotify;
         $MAIL_IN = PublicInbox::FakeInotify::MOVED_TO_OR_CREATE();
+        $MAIL_GONE = PublicInbox::FakeInotify::IN_DELETE() |
+                        PublicInbox::FakeInotify::IN_DELETE_SELF() |
+                        PublicInbox::FakeInotify::IN_MOVE_SELF();
 }
 
 sub new {
-        my ($class, $dirs, $cb) = @_;
+        my ($class, $cb) = @_;
         my $self = bless { cb => $cb }, $class;
         my $inot;
         if ($ino_cls) {
                 $inot = $ino_cls->new or die "E: $ino_cls->new: $!";
                 my $io = PublicInbox::In2Tie::io($inot);
-                $self->SUPER::new($io, EPOLLIN | EPOLLET);
+                $self->SUPER::new($io, EPOLLIN);
         } else {
                 require PublicInbox::FakeInotify;
                 $inot = PublicInbox::FakeInotify->new; # starts timer
         }
-
-        # Linux::Inotify2->watch or similar
-        $inot->watch($_, $MAIL_IN) for @$dirs;
         $self->{inot} = $inot;
-        PublicInbox::FakeInotify::poll_once($self) if !$ino_cls;
         $self;
 }
 
+sub add_watches {
+        my ($self, $dirs, $gone) = @_;
+        my $fl = $MAIL_IN | ($gone ? $MAIL_GONE : 0);
+        my @ret;
+        for my $d (@$dirs) {
+                my $w = $self->{inot}->watch($d, $fl) or next;
+                push @ret, $w;
+        }
+        PublicInbox::FakeInotify::poll_once($self) if !$ino_cls;
+        @ret
+}
+
+sub rm_watches {
+        my ($self, $dir) = @_;
+        my $inot = $self->{inot};
+        if (my $cb = $inot->can('rm_watches')) { # TODO for fake watchers
+                $cb->($inot, $dir);
+        }
+}
+
+sub close {
+        my ($self) = @_;
+        delete $self->{cb};
+        $self->SUPER::close; # if using real kevent/inotify
+}
+
 sub event_step {
         my ($self) = @_;
-        my $cb = $self->{cb};
-        local $PublicInbox::DS::in_loop = 0; # waitpid() synchronously
+        my $cb = $self->{cb} or return;
+        local $PublicInbox::DS::in_loop = 0; # waitpid() synchronously (FIXME)
         eval {
-                my @events = $self->{inot}->read; # Linux::Inotify2->read
+                my @events = $self->{inot}->read; # Inotify3->read
                 $cb->($_) for @events;
         };
         warn "$self->{inot}->read err: $@\n" if $@;
 }
 
+sub force_close {
+        my ($self) = @_;
+        my $inot = delete $self->{inot} // return;
+        if ($inot->can('fh')) { # Inotify3 or Linux::Inotify2 2.3+
+                $inot->fh->close or warn "CLOSE ERROR: $!";
+        } elsif ($inot->isa('Linux::Inotify2')) {
+                require PublicInbox::LI2Wrap;
+                PublicInbox::LI2Wrap::wrapclose($inot);
+        }
+}
+
 1;
diff --git a/lib/PublicInbox/DummyInbox.pm b/lib/PublicInbox/DummyInbox.pm
index 69b0b683..c516eec4 100644
--- a/lib/PublicInbox/DummyInbox.pm
+++ b/lib/PublicInbox/DummyInbox.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # An EXAMINE-able, PublicInbox::Inbox-like object for IMAP.  Some
@@ -7,16 +7,16 @@
 package PublicInbox::DummyInbox;
 use strict;
 
-sub created_at { 0 } # Msgmap::created_at
+sub uidvalidity { 0 } # Msgmap::created_at
 sub mm { shift }
 sub uid_range { [] } # Over::uid_range
 sub subscribe_unlock { undef };
 
 no warnings 'once';
-*max = \&created_at;
+*max = \&uidvalidity;
 *query_xover = \&uid_range;
 *over = \&mm;
-*search = *unsubscribe_unlock =
+*isrch = *search = *unsubscribe_unlock =
         *get_art = *description = *base_url = \&subscribe_unlock;
 
 1;
diff --git a/lib/PublicInbox/EOFpipe.pm b/lib/PublicInbox/EOFpipe.pm
index 489caf82..77b699a2 100644
--- a/lib/PublicInbox/EOFpipe.pm
+++ b/lib/PublicInbox/EOFpipe.pm
@@ -1,23 +1,24 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 package PublicInbox::EOFpipe;
-use strict;
+use v5.12;
 use parent qw(PublicInbox::DS);
-use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
+use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT $F_SETPIPE_SZ);
 
 sub new {
-        my (undef, $rd, $cb, $arg) = @_;
-        my $self = bless {  cb => $cb, arg => $arg }, __PACKAGE__;
-        # 1031: F_SETPIPE_SZ, 4096: page size
-        fcntl($rd, 1031, 4096) if $^O eq 'linux';
+        my (undef, $rd, @cb_args) = @_;
+        my $self = bless { cb_args => \@cb_args }, __PACKAGE__;
+        # 4096: page size
+        fcntl($rd, $F_SETPIPE_SZ, 4096) if $F_SETPIPE_SZ;
         $self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT);
 }
 
 sub event_step {
         my ($self) = @_;
         if ($self->do_read(my $buf, 1) == 0) { # auto-closed
-                $self->{cb}->($self->{arg});
+                my ($cb, @args) = @{delete $self->{cb_args}};
+                $cb->(@args);
         }
 }
 
diff --git a/lib/PublicInbox/Emergency.pm b/lib/PublicInbox/Emergency.pm
index b705e776..968d7d6f 100644
--- a/lib/PublicInbox/Emergency.pm
+++ b/lib/PublicInbox/Emergency.pm
@@ -1,97 +1,78 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Emergency Maildir delivery for MDA
 package PublicInbox::Emergency;
-use strict;
-use warnings;
+use v5.12;
 use Fcntl qw(:DEFAULT SEEK_SET);
 use Sys::Hostname qw(hostname);
-use IO::Handle; # ->flush, ->autoflush
+use IO::Handle; # ->flush
+use Errno qw(EEXIST);
+use File::Path ();
 
 sub new {
         my ($class, $dir) = @_;
-
-        foreach (qw(new tmp cur)) {
-                my $d = "$dir/$_";
-                next if -d $d;
-                require File::Path;
-                if (!File::Path::mkpath($d) && !-d $d) {
-                        die "failed to mkpath($d): $!\n";
-                }
-        }
-        bless { dir => $dir, files => {}, t => 0, cnt => 0, pid => $$ }, $class;
+        File::Path::make_path(map { $dir.$_ } qw(/tmp /new /cur));
+        bless { dir => $dir, t => 0 }, $class;
 }
 
 sub _fn_in {
-        my ($self, $dir) = @_;
-        my @host = split(/\./, hostname);
+        my ($self, $pid, $dir) = @_;
+        my $host = $self->{-host} //= (split(/\./, hostname))[0] // 'localhost';
         my $now = time;
+        my $n;
         if ($self->{t} != $now) {
                 $self->{t} = $now;
-                $self->{cnt} = 0;
+                $n = $self->{cnt} = 0;
         } else {
-                $self->{cnt}++;
+                $n = ++$self->{cnt};
         }
-
-        my $f;
-        do {
-                $f = "$self->{dir}/$dir/$self->{t}.$$"."_$self->{cnt}.$host[0]";
-                $self->{cnt}++;
-        } while (-e $f);
-        $f;
+        "$self->{dir}/$dir/$self->{t}.$pid"."_$n.$host";
 }
 
 sub prepare {
         my ($self, $strref) = @_;
-
-        die "already in transaction: $self->{tmp}" if $self->{tmp};
+        my $pid = $$;
+        my $tmp_key = "tmp.$pid";
+        die "BUG: in transaction: $self->{$tmp_key}" if $self->{$tmp_key};
         my ($tmp, $fh);
         do {
-                $tmp = _fn_in($self, 'tmp');
+                $tmp = _fn_in($self, $pid, 'tmp');
                 $! = undef;
-        } while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_RDWR) && $!{EEXIST});
-        print $fh $$strref or die "write failed: $!";
-        $fh->flush or die "flush failed: $!";
-        $fh->autoflush(1);
+        } while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_RDWR) and $! == EEXIST);
+        print $fh $$strref or die "print: $!";
+        $fh->flush or die "flush: $!";
         $self->{fh} = $fh;
-        $self->{tmp} = $tmp;
+        $self->{$tmp_key} = $tmp;
 }
 
 sub abort {
         my ($self) = @_;
         delete $self->{fh};
-        my $tmp = delete $self->{tmp} or return;
-
-        unlink($tmp) or warn "Failed to unlink $tmp: $!";
+        my $tmp = delete $self->{"tmp.$$"} or return;
+        unlink($tmp) or warn "W: unlink($tmp): $!";
         undef;
 }
 
 sub fh {
         my ($self) = @_;
-        my $fh = $self->{fh} or die "{fh} not open!\n";
-        seek($fh, 0, SEEK_SET) or die "seek(fh) failed: $!";
-        sysseek($fh, 0, SEEK_SET) or die "sysseek(fh) failed: $!";
+        my $fh = $self->{fh} or die "BUG: {fh} not open";
+        seek($fh, 0, SEEK_SET) or die "seek: $!";
+        sysseek($fh, 0, SEEK_SET) or die "sysseek: $!";
         $fh;
 }
 
 sub commit {
         my ($self) = @_;
-        $$ == $self->{pid} or return; # no-op in forked child
-
+        my $pid = $$;
+        my $tmp = delete $self->{"tmp.$pid"} or return;
         delete $self->{fh};
-        my $tmp = delete $self->{tmp} or return;
-        my $new;
+        my ($new, $ok);
         do {
-                $new = _fn_in($self, 'new');
-        } while (!link($tmp, $new) && $!{EEXIST});
-        my @sn = stat($new) or die "stat $new failed: $!";
-        my @st = stat($tmp) or die "stat $tmp failed: $!";
-        if ($st[0] == $sn[0] && $st[1] == $sn[1]) {
-                unlink($tmp) or warn "Failed to unlink $tmp: $!";
-        } else {
-                warn "stat($new) and stat($tmp) differ";
-        }
+                $new = _fn_in($self, $pid, 'new');
+        } while (!($ok = link($tmp, $new)) && $! == EEXIST);
+        die "link($tmp, $new): $!" unless $ok;
+        unlink($tmp) or warn "W: unlink($tmp): $!";
 }
 
 sub DESTROY { commit($_[0]) }
diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm
index 571edc5c..d59d7c3f 100644
--- a/lib/PublicInbox/Eml.pm
+++ b/lib/PublicInbox/Eml.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Lazy MIME parser, it still slurps the full message but keeps short
@@ -28,7 +28,7 @@ package PublicInbox::Eml;
 use strict;
 use v5.10.1;
 use Carp qw(croak);
-use Encode qw(find_encoding decode encode); # stdlib
+use Encode qw(find_encoding); # stdlib
 use Text::Wrap qw(wrap); # stdlib, we need Perl 5.6+ for $huge
 use MIME::Base64 3.05; # Perl 5.10.0 / 5.9.2
 use MIME::QuotedPrint 3.05; # ditto
@@ -51,7 +51,9 @@ $MIME_ENC{quotedprint} = $MIME_ENC{'quoted-printable'} = $MIME_ENC{qp};
 $MIME_DEC{quotedprint} = $MIME_DEC{'quoted-printable'} = $MIME_DEC{qp};
 $MIME_ENC{$_} = \&identity_codec for qw(7bit 8bit binary);
 
-my %DECODE_ADDRESS = map { $_ => 1 } qw(From To Cc Sender Reply-To);
+my %DECODE_ADDRESS = map {
+        ($_ => 1, "Resent-$_" => 1)
+} qw(From To Cc Sender Reply-To Bcc);
 my %DECODE_FULL = (
         Subject => 1,
         'Content-Description' => 1,
@@ -120,9 +122,10 @@ sub new {
                 my $hdr = substr($$ref, 0, $header_size_limit + 1);
                 hdr_truncate($hdr) if length($hdr) > $header_size_limit;
                 bless { hdr => \$hdr, crlf => $1 }, __PACKAGE__;
-        } else { # nothing useful
-                my $hdr = $$ref = '';
-                bless { hdr => \$hdr, crlf => "\n" }, __PACKAGE__;
+        } else { # just a body w/o header?
+                my $hdr = '';
+                my $eol = ($$ref =~ /(\r?\n)/) ? $1 : "\n";
+                bless { hdr => \$hdr, crlf => $eol, bdy => $ref }, __PACKAGE__;
         }
 }
 
@@ -141,6 +144,7 @@ sub header_raw {
         my $re = re_memo($_[1]);
         my @v = (${ $_[0]->{hdr} } =~ /$re/g);
         for (@v) {
+                utf8::decode($_); # SMTPUTF8
                 # for compatibility w/ Email::Simple::Header,
                 s/\s+\z//s;
                 s/\A\s+//s;
@@ -217,7 +221,7 @@ sub mp_descend ($$) {
                 # There's also a case where quoted text showed up in the
                 # preamble
                 # <20060515162817.65F0F1BBAE@citi.umich.edu>
-                unshift(@parts, new_sub(undef, \$pre)) if $pre =~ /:/s;
+                unshift(@parts, new_sub(undef, \$pre)) if index($pre, ':') >= 0;
                 return \@parts;
         }
         # "multipart", but no boundary found, treat as single part
@@ -234,6 +238,7 @@ sub mp_descend ($$) {
 # $cb - user-supplied callback sub
 # $arg - user-supplied arg (think pthread_create)
 # $once - unref body scalar during iteration
+# $all - used by IMAP server, only
 sub each_part {
         my ($self, $cb, $arg, $once, $all) = @_;
         my $p = mp_descend($self, $once // 0) or
@@ -329,11 +334,24 @@ sub body_set {
         undef;
 }
 
+# workaround https://rt.cpan.org/Public/Bug/Display.html?id=139622
+# Encode 2.87..3.12 leaks on croak, so we defer and croak ourselves
+our @enc_warn;
+my $enc_warn = sub { push @enc_warn, @_ };
+
 sub body_str_set {
-        my ($self, $body_str) = @_;
-        my $charset = ct($self)->{attributes}->{charset} or
-                Carp::confess('body_str was given, but no charset is defined');
-        body_set($self, \(encode($charset, $body_str, Encode::FB_CROAK)));
+        my ($self, $str) = @_;
+        my $cs = ct($self)->{attributes}->{charset} //
+                croak('body_str was given, but no charset is defined');
+        my $enc = find_encoding($cs) // croak "unknown encoding `$cs'";
+        my $tmp;
+        {
+                local @enc_warn;
+                local $SIG{__WARN__} = $enc_warn;
+                $tmp = $enc->encode($str, Encode::FB_WARN);
+                croak(@enc_warn) if @enc_warn;
+        };
+        body_set($self, \$tmp);
 }
 
 sub content_type { scalar header($_[0], 'Content-Type') }
@@ -347,14 +365,15 @@ sub header_set {
         $pfx .= ': ';
         my $len = 78 - length($pfx);
         @vals = map {;
+                utf8::encode(my $v = $_); # to bytes, support SMTPUTF8
                 # folding differs from Email::Simple::Header,
                 # we favor tabs for visibility (and space savings :P)
                 if (length($_) >= $len && (/\n[^ \t]/s || !/\n/s)) {
                         local $Text::Wrap::columns = $len;
                         local $Text::Wrap::huge = 'overflow';
-                        $pfx . wrap('', "\t", $_) . $self->{crlf};
+                        $pfx . wrap('', "\t", $v) . $self->{crlf};
                 } else {
-                        $pfx . $_ . $self->{crlf};
+                        $pfx . $v . $self->{crlf};
                 }
         } @vals;
         $$hdr =~ s!$re!shift(@vals) // ''!ge; # replace current headers, first
@@ -378,7 +397,9 @@ sub header_str_set {
         header_set($self, $name, @vals);
 }
 
-sub mhdr_decode ($) { eval { $MIME_Header->decode($_[0]) } // $_[0] }
+sub mhdr_decode ($) {
+        eval { $MIME_Header->decode($_[0], Encode::FB_DEFAULT) } // $_[0];
+}
 
 sub filename {
         my $dis = header_raw($_[0], 'Content-Disposition');
@@ -447,15 +468,19 @@ sub body {
 sub body_str {
         my ($self) = @_;
         my $ct = ct($self);
-        my $charset = $ct->{attributes}->{charset};
-        if (!$charset) {
-                if ($STR_TYPE{$ct->{type}} && $STR_SUBTYPE{$ct->{subtype}}) {
+        my $cs = $ct->{attributes}->{charset} // do {
+                ($STR_TYPE{$ct->{type}} && $STR_SUBTYPE{$ct->{subtype}}) and
                         return body($self);
-                }
-                Carp::confess("can't get body as a string for ",
+                croak("can't get body as a string for ",
                         join("\n\t", header_raw($self, 'Content-Type')));
-        }
-        decode($charset, body($self), Encode::FB_CROAK);
+        };
+        my $enc = find_encoding($cs) or croak "unknown encoding `$cs'";
+        my $ret = body($self);
+        local @enc_warn;
+        local $SIG{__WARN__} = $enc_warn;
+        $ret = $enc->decode($ret, Encode::FB_WARN);
+        croak(@enc_warn) if @enc_warn;
+        $ret;
 }
 
 sub as_string {
@@ -475,9 +500,42 @@ sub charset_set {
 
 sub crlf { $_[0]->{crlf} // "\n" }
 
+sub raw_size {
+        my ($self) = @_;
+        my $len = length(${$self->{hdr}});
+        defined($self->{bdy}) and
+                $len += length(${$self->{bdy}}) + length($self->{crlf});
+        $len;
+}
+
+# warnings to ignore when handling spam mailboxes and maybe other places
+sub warn_ignore {
+        my $s = "@_";
+        # Email::Address::XS warnings
+        $s =~ /^Argument contains empty /
+        || $s =~ /^Element at index [0-9]+.*? contains /
+        # PublicInbox::MsgTime
+        || $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/
+        || $s =~ /^bad Date: .+? in /
+        # Encode::Unicode::UTF7
+        || $s =~ /^Bad UTF7 data escape at /
+}
+
+# this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..."
+sub warn_ignore_cb {
+        my $cb = $SIG{__WARN__} // \&CORE::warn;
+        sub { $cb->(@_) unless warn_ignore(@_) }
+}
+
 sub willneed { re_memo($_) for @_ }
 
 willneed(qw(From To Cc Date Subject Content-Type In-Reply-To References
                 Message-ID X-Alt-Message-ID));
 
+# This fixes an old bug from import (pre-a0c07cba0e5d8b6a)
+# mutt also pipes single RFC822 messages with a "From " line,
+# but no Content-Length or "From " escaping.
+# "git format-patch" also generates such files by default.
+sub strip_from { $_[0] =~ s/\A[\r\n]*From [^\n]*\n//s }
+
 1;
diff --git a/lib/PublicInbox/EmlContentFoo.pm b/lib/PublicInbox/EmlContentFoo.pm
index c163eaf5..80fc7364 100644
--- a/lib/PublicInbox/EmlContentFoo.pm
+++ b/lib/PublicInbox/EmlContentFoo.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
 # Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES
 # This library is free software; you can redistribute it and/or modify
 # it under the same terms as Perl itself.
diff --git a/lib/PublicInbox/Epoll.pm b/lib/PublicInbox/Epoll.pm
new file mode 100644
index 00000000..7e0aa6e7
--- /dev/null
+++ b/lib/PublicInbox/Epoll.pm
@@ -0,0 +1,26 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# OO API for epoll
+package PublicInbox::Epoll;
+use v5.12;
+use PublicInbox::Syscall qw(epoll_create epoll_ctl epoll_wait
+        EPOLL_CTL_ADD EPOLL_CTL_MOD EPOLL_CTL_DEL);
+use Fcntl qw(F_SETFD FD_CLOEXEC);
+use autodie qw(open fcntl);
+
+sub new {
+        open(my $fh, '+<&=', epoll_create());
+        fcntl($fh, F_SETFD, FD_CLOEXEC);
+        bless \$fh, __PACKAGE__;
+}
+
+sub ep_add { epoll_ctl(fileno(${$_[0]}), EPOLL_CTL_ADD, fileno($_[1]), $_[2]) }
+sub ep_mod { epoll_ctl(fileno(${$_[0]}), EPOLL_CTL_MOD, fileno($_[1]), $_[2]) }
+sub ep_del { epoll_ctl(fileno(${$_[0]}), EPOLL_CTL_DEL, fileno($_[1]), 0) }
+
+# n.b. maxevents=1000 is the historical default.  maxevents=1 (yes, one)
+# is more fair under load with multiple worker processes sharing one listener
+sub ep_wait { epoll_wait(fileno(${$_[0]}), 1000, @_[1, 2]) }
+
+1;
diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 03faf3a1..95feb885 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Used by the web interface to link to messages outside of the our
@@ -11,7 +11,7 @@ use warnings;
 use PublicInbox::Hval qw(ascii_html prurl mid_href);
 use PublicInbox::WwwStream qw(html_oneshot);
 use PublicInbox::Smsg;
-our $MIN_PARTIAL_LEN = 16;
+our $MIN_PARTIAL_LEN = 14; # for 'XXXXXXXXXX.fsf' msgids gnus generates
 
 # TODO: user-configurable
 our @EXT_URL = map { ascii_html($_) } (
@@ -32,8 +32,8 @@ sub PARTIAL_MAX () { 100 }
 sub search_partial ($$) {
         my ($ibx, $mid) = @_;
         return if length($mid) < $MIN_PARTIAL_LEN;
-        my $srch = $ibx->search or return;
-        my $opt = { limit => PARTIAL_MAX, mset => 2 };
+        my $srch = $ibx->isrch or return;
+        my $opt = { limit => PARTIAL_MAX, relevance => -1 };
         my @try = ("m:$mid*");
         my $chop = $mid;
         if ($chop =~ s/(\W+)(\w*)\z//) {
@@ -76,7 +76,7 @@ sub search_partial ($$) {
 sub ext_msg_i {
         my ($other, $ctx) = @_;
 
-        return if $other->{name} eq $ctx->{-inbox}->{name} || !$other->base_url;
+        return if $other->{name} eq $ctx->{ibx}->{name} || !$other->base_url;
 
         my $mm = $other->mm or return;
 
@@ -103,25 +103,54 @@ sub ext_msg_step {
         }
 }
 
+sub ext_msg_ALL ($) {
+        my ($ctx) = @_;
+        my $ALL = $ctx->{www}->{pi_cfg}->ALL or return;
+        my $by_eidx_key = $ctx->{www}->{pi_cfg}->{-by_eidx_key};
+        my $cur_key = eval { $ctx->{ibx}->eidx_key } //
+                        return partial_response($ctx); # $cur->{ibx} == $ALL
+        my %seen = ($cur_key => 1);
+        my ($id, $prev);
+        while (my $x = $ALL->over->next_by_mid($ctx->{mid}, \$id, \$prev)) {
+                my $xr3 = $ALL->over->get_xref3($x->{num});
+                for my $k (@$xr3) {
+                        $k =~ s/:[0-9]+:$x->{blob}\z// or next;
+                        next if $k eq $cur_key;
+                        my $ibx = $by_eidx_key->{$k} // next;
+                        $ibx->base_url or next;
+                        push(@{$ctx->{found}}, $ibx) unless $seen{$k}++;
+                }
+        }
+        return exact($ctx) if $ctx->{found};
+
+        # fall back to partial MID matching
+        for my $ibxish ($ctx->{ibx}, $ALL) {
+                my $mids = search_partial($ibxish, $ctx->{mid}) or next;
+                push @{$ctx->{partial}}, [ $ibxish, $mids ];
+                last if ($ctx->{n_partial} += scalar(@$mids)) >= PARTIAL_MAX;
+        }
+        partial_response($ctx);
+}
+
 sub ext_msg {
         my ($ctx) = @_;
-        sub {
+        ext_msg_ALL($ctx) // sub {
                 $ctx->{-wcb} = $_[0]; # HTTP server write callback
 
                 if ($ctx->{env}->{'pi-httpd.async'}) {
                         require PublicInbox::ConfigIter;
                         my $iter = PublicInbox::ConfigIter->new(
-                                                $ctx->{www}->{pi_config},
+                                                $ctx->{www}->{pi_cfg},
                                                 \&ext_msg_step, $ctx);
                         $iter->event_step;
                 } else {
-                        $ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, $ctx);
+                        $ctx->{www}->{pi_cfg}->each_inbox(\&ext_msg_i, $ctx);
                         finalize_exact($ctx);
                 }
         };
 }
 
-# called via PublicInbox::DS->EventLoop
+# called via PublicInbox::DS::event_loop
 sub event_step {
         my ($ctx, $sync) = @_;
         # can't find a partial match in current inbox, try the others:
@@ -141,7 +170,7 @@ sub finalize_exact {
 
         # fall back to partial MID matching
         my $mid = $ctx->{mid};
-        my $cur = $ctx->{-inbox};
+        my $cur = $ctx->{ibx};
         my $mids = search_partial($cur, $mid);
         if ($mids) {
                 $ctx->{n_partial} = scalar(@$mids);
@@ -159,7 +188,13 @@ sub finalize_exact {
         finalize_partial($ctx);
 }
 
-sub finalize_partial {
+sub _url_pfx ($$) {
+        my ($ctx, $u) = @_;
+        (index($u, '://') < 0 && index($u, '/') != 0) ?
+                "$ctx->{-upfx}../$u" : $u;
+}
+
+sub partial_response ($) {
         my ($ctx) = @_;
         my $mid = $ctx->{mid};
         my $code = 404;
@@ -167,16 +202,17 @@ sub finalize_partial {
         my $html = ascii_html($mid);
         my $title = "&lt;$html&gt; not found";
         my $s = "<pre>Message-ID &lt;$html&gt;\nnot found\n";
+        $ctx->{-upfx} //= '../';
         if (my $n_partial = $ctx->{n_partial}) {
                 $code = 300;
                 my $es = $n_partial == 1 ? '' : 'es';
                 $n_partial .= '+' if ($n_partial == PARTIAL_MAX);
                 $s .= "\n$n_partial partial match$es found:\n\n";
-                my $cur_name = $ctx->{-inbox}->{name};
+                my $cur_name = $ctx->{ibx}->{name};
                 foreach my $pair (@{$ctx->{partial}}) {
                         my ($ibx, $res) = @$pair;
-                        my $env = $ctx->{env} if $ibx->{name} eq $cur_name;
-                        my $u = $ibx->base_url($env) or next;
+                        my $e = $ibx->{name} eq $cur_name ? $ctx->{env} : undef;
+                        my $u = _url_pfx($ctx, $ibx->base_url($e) // next);
                         foreach my $m (@$res) {
                                 my $href = mid_href($m);
                                 my $html = ascii_html($m);
@@ -191,10 +227,11 @@ sub finalize_partial {
         }
         $ctx->{-html_tip} = $s .= '</pre>';
         $ctx->{-title_html} = $title;
-        $ctx->{-upfx} = '../';
-        $ctx->{-wcb}->(html_oneshot($ctx, $code));
+        html_oneshot($ctx, $code);
 }
 
+sub finalize_partial ($) { $_[0]->{-wcb}->(partial_response($_[0])) }
+
 sub ext_urls {
         my ($ctx, $mid, $href, $html) = @_;
 
@@ -222,13 +259,13 @@ sub exact {
         my $title = "&lt;$html&gt; found in ";
         my $end = @$found == 1 ? 'another inbox' : 'other inboxes';
         $ctx->{-title_html} = $title . $end;
-        $ctx->{-upfx} = '../';
+        $ctx->{-upfx} //= '../';
         my $ext_urls = ext_urls($ctx, $mid, $href, $html);
         my $code = (@$found == 1 && $ext_urls eq '') ? 200 : 300;
         $ctx->{-html_tip} = join('',
                         "<pre>Message-ID: &lt;$html&gt;\nfound in $end:\n\n",
                                 (map {
-                                        my $u = $_->base_url;
+                                        my $u = _url_pfx($ctx, $_->base_url);
                                         qq(<a\nhref="$u$href/">$u$html/</a>\n)
                                 } @$found),
                         $ext_urls, '</pre>');
diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm
new file mode 100644
index 00000000..d43c23e6
--- /dev/null
+++ b/lib/PublicInbox/ExtSearch.pm
@@ -0,0 +1,134 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Read-only external (detached) index for cross inbox search.
+# This is a read-only counterpart to PublicInbox::ExtSearchIdx
+# and behaves like PublicInbox::Inbox AND PublicInbox::Search
+package PublicInbox::ExtSearch;
+use strict;
+use v5.10.1;
+use PublicInbox::Over;
+use PublicInbox::Inbox;
+use PublicInbox::MiscSearch;
+use DBI qw(:sql_types); # SQL_BLOB
+
+# for ->reopen, ->mset, ->mset_to_artnums
+use parent qw(PublicInbox::Search);
+
+sub new {
+        my ($class, $topdir) = @_;
+        bless {
+                topdir => $topdir,
+                -primary_address => 'unknown@example.com',
+                # xpfx => 'ei15'
+                xpfx => "$topdir/ei".PublicInbox::Search::SCHEMA_VERSION
+        }, $class;
+}
+
+sub misc {
+        my ($self) = @_;
+        $self->{misc} //= PublicInbox::MiscSearch->new("$self->{xpfx}/misc");
+}
+
+# same as per-inbox ->over, for now...
+sub over {
+        my ($self) = @_;
+        $self->{over} // eval {
+                PublicInbox::Inbox::_cleanup_later($self);
+                my $over = PublicInbox::Over->new("$self->{xpfx}/over.sqlite3");
+                $over->dbh; # may die
+                $self->{over} = $over;
+        };
+}
+
+sub git {
+        my ($self) = @_;
+        $self->{git} //= do {
+                PublicInbox::Inbox::_cleanup_later($self);
+                PublicInbox::Git->new("$self->{topdir}/ALL.git");
+        };
+}
+
+# returns a hashref of { $NEWSGROUP_NAME => $ART_NO } using the `xref3' table
+sub nntp_xref_for { # NNTP only
+        my ($self, $xibx, $xsmsg) = @_;
+        my $dbh = over($self)->dbh;
+
+        my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
+
+        $sth->execute($xibx->{newsgroup});
+        my $xibx_id = $sth->fetchrow_array // do {
+                warn "W: `$xibx->{newsgroup}' not found in $self->{topdir}\n";
+                return;
+        };
+
+        $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT docid FROM xref3 WHERE oidbin = ? AND xnum = ? AND ibx_id = ? LIMIT 1
+
+        $sth->bind_param(1, $xsmsg->oidbin, SQL_BLOB);
+
+        # NNTP::cmd_over can set {num} to zero according to RFC 3977 8.3.2
+        $sth->bind_param(2, $xsmsg->{num} || $xsmsg->{-orig_num});
+        $sth->bind_param(3, $xibx_id);
+        $sth->execute;
+        my $docid = $sth->fetchrow_array // do {
+                warn <<EOF;
+W: `$xibx->{newsgroup}:$xsmsg->{num}' not found in $self->{topdir}"
+EOF
+                return;
+        };
+
+        # LIMIT is number of newsgroups on server:
+        $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id,xnum FROM xref3 WHERE docid = ? AND ibx_id != ?
+
+        $sth->execute($docid, $xibx_id);
+        my $rows = $sth->fetchall_arrayref;
+
+        my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT eidx_key FROM inboxes WHERE ibx_id = ? LIMIT 1
+
+        my %xref = map {
+                my ($ibx_id, $xnum) = @$_;
+
+                $eidx_key_sth->execute($ibx_id);
+                my $eidx_key = $eidx_key_sth->fetchrow_array;
+
+                # only include if there's a newsgroup name
+                $eidx_key && index($eidx_key, '/') >= 0 ?
+                        () : ($eidx_key => $xnum)
+        } @$rows;
+        $xref{$xibx->{newsgroup}} = $xsmsg->{num};
+        \%xref;
+}
+
+sub mm { undef }
+
+sub altid_map { {} }
+
+sub description {
+        my ($self) = @_;
+        ($self->{description} //=
+                PublicInbox::Git::cat_desc("$self->{topdir}/description")) //
+                '$EXTINDEX_DIR/description missing';
+}
+
+sub search {
+        PublicInbox::Inbox::_cleanup_later($_[0]);
+        $_[0];
+}
+
+sub thing_type { 'external index' }
+
+no warnings 'once';
+*base_url = \&PublicInbox::Inbox::base_url;
+*smsg_eml = \&PublicInbox::Inbox::smsg_eml;
+*smsg_by_mid = \&PublicInbox::Inbox::smsg_by_mid;
+*msg_by_mid = \&PublicInbox::Inbox::msg_by_mid;
+*modified = \&PublicInbox::Inbox::modified;
+
+*max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
+*isrch = \&search;
+
+1;
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
new file mode 100644
index 00000000..883dbea3
--- /dev/null
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -0,0 +1,1429 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Detached/external index cross inbox search indexing support
+# read-write counterpart to PublicInbox::ExtSearch
+#
+# It's based on the same ideas as public-inbox-v2-format(5) using
+# over.sqlite3 for dedupe and sharded Xapian.  msgmap.sqlite3 is
+# missing, so there is no Message-ID conflict resolution, meaning
+# no NNTP support for now.
+#
+# v2 has a 1:1 mapping of index:inbox or msgmap for NNTP support.
+# This is intended to be an M:N index:inbox mapping, but it'll likely
+# be 1:N in common practice (M==1)
+
+package PublicInbox::ExtSearchIdx;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::ExtSearch PublicInbox::Umask PublicInbox::Lock);
+use Carp qw(croak carp);
+use Scalar::Util qw(blessed);
+use Sys::Hostname qw(hostname);
+use File::Glob qw(bsd_glob GLOB_NOSORT);
+use PublicInbox::MultiGit;
+use PublicInbox::Spawn ();
+use PublicInbox::Search;
+use PublicInbox::SearchIdx qw(prepare_stack is_ancestor is_bad_blob);
+use PublicInbox::OverIdx;
+use PublicInbox::MiscIdx;
+use PublicInbox::MID qw(mids);
+use PublicInbox::V2Writable;
+use PublicInbox::InboxWritable;
+use PublicInbox::ContentHash qw(content_hash);
+use PublicInbox::Eml;
+use PublicInbox::DS qw(now add_timer);
+use DBI qw(:sql_types); # SQL_BLOB
+use PublicInbox::Admin qw(fmt_localtime);
+
+sub new {
+        my (undef, $dir, $opt) = @_;
+        my $l = $opt->{indexlevel} // 'full';
+        $l !~ $PublicInbox::SearchIdx::INDEXLEVELS and
+                die "invalid indexlevel=$l\n";
+        $l eq 'basic' and die "E: indexlevel=basic not yet supported\n";
+        my $self = bless {
+                xpfx => "$dir/ei".PublicInbox::Search::SCHEMA_VERSION,
+                topdir => $dir,
+                creat => $opt->{creat},
+                ibx_map => {}, # (newsgroup//inboxdir) => $ibx
+                ibx_active => [], # by config section order
+                ibx_known => [], # by config section order
+                indexlevel => $l,
+                transact_bytes => 0,
+                total_bytes => 0,
+                current_info => '',
+                parallel => 1,
+                lock_path => "$dir/ei.lock",
+        }, __PACKAGE__;
+        $self->{shards} = $self->count_shards ||
+                nproc_shards({ nproc => $opt->{jobs} });
+        my $oidx = PublicInbox::OverIdx->new("$self->{xpfx}/over.sqlite3");
+        $self->{-no_fsync} = $oidx->{-no_fsync} = 1 if !$opt->{fsync};
+        $self->{-dangerous} = 1 if $opt->{dangerous};
+        $self->{oidx} = $oidx;
+        $self
+}
+
+sub attach_inbox {
+        my ($self, $ibx, $types) = @_;
+        $self->{ibx_map}->{$ibx->eidx_key} //= do {
+                delete $self->{-ibx_ary_known}; # invalidate cache
+                delete $self->{-ibx_ary_active}; # invalidate cache
+                $types //= [ qw(active known) ];
+                for my $t (@$types) {
+                        push @{$self->{"ibx_$t"}}, $ibx;
+                }
+                $ibx;
+        }
+}
+
+sub _ibx_attach { # each_inbox callback
+        my ($ibx, $self, $types) = @_;
+        attach_inbox($self, $ibx, $types);
+}
+
+sub attach_config {
+        my ($self, $cfg, $ibxs) = @_;
+        $self->{cfg} = $cfg;
+        my $types;
+        if ($ibxs) {
+                for my $ibx (@$ibxs) {
+                        $self->{ibx_map}->{$ibx->eidx_key} //= do {
+                                push @{$self->{ibx_active}}, $ibx;
+                                push @{$self->{ibx_known}}, $ibx;
+                                $ibx;
+                        }
+                }
+                # invalidate cache
+                delete $self->{-ibx_ary_known};
+                delete $self->{-ibx_ary_active};
+                $types = [ 'known' ];
+        }
+        $types //= [ qw(known active) ];
+        $cfg->each_inbox(\&_ibx_attach, $self, $types);
+}
+
+sub check_batch_limit ($) {
+        my ($req) = @_;
+        my $self = $req->{self};
+        my $new_smsg = $req->{new_smsg};
+        my $n = $self->{transact_bytes} += $new_smsg->{bytes};
+
+        # set flag for PublicInbox::V2Writable::index_todo:
+        ${$req->{need_checkpoint}} = 1 if $n >= $self->{batch_bytes};
+}
+
+sub bad_ibx_id ($$;$) {
+        my ($self, $ibx_id, $cb) = @_;
+        my $msg = "E: bad/stale ibx_id=#$ibx_id encountered";
+        my $ekey = $self->{oidx}->dbh->selectrow_array(<<EOM, undef, $ibx_id);
+SELECT eidx_key FROM inboxes WHERE ibx_id = ? LIMIT 1
+EOM
+        $msg .= " (formerly `$ekey')" if defined $ekey;
+        $cb //= \&carp;
+        $cb->($msg, "\nE: running $0 --gc may be required");
+}
+
+sub check_xr3 ($$$) {
+        my ($self, $id2pos, $xr3) = @_;
+        @$xr3 = grep {
+                defined($id2pos->{$_->[0]}) ? 1 : bad_ibx_id($self, $_->[0])
+        } @$xr3;
+}
+
+sub apply_boost ($$) {
+        my ($req, $smsg) = @_;
+        my $id2pos = $req->{id2pos}; # index in ibx_sorted
+        my $xr3 = $req->{self}->{oidx}->get_xref3($smsg->{num}, 1);
+        check_xr3($req->{self}, $id2pos, $xr3);
+        @$xr3 = sort { # sort ascending
+                $id2pos->{$a->[0]} <=> $id2pos->{$b->[0]}
+                                ||
+                $a->[1] <=> $b->[1] # break ties with {xnum}
+        } @$xr3;
+        my $new_smsg = $req->{new_smsg};
+        return if $xr3->[0]->[2] ne $new_smsg->oidbin; # loser
+
+        # replace the old smsg with the more boosted one
+        $new_smsg->{num} = $smsg->{num};
+        $new_smsg->populate($req->{eml}, $req);
+        $req->{self}->{oidx}->add_overview($req->{eml}, $new_smsg);
+}
+
+sub remove_doc ($$) {
+        my ($self, $docid) = @_;
+        $self->{oidx}->delete_by_num($docid);
+        $self->{oidx}->eidxq_del($docid);
+        $self->idx_shard($docid)->ipc_do('xdb_remove', $docid);
+}
+
+sub _unref_doc ($$$$$;$) {
+        my ($sync, $docid, $ibx, $xnum, $oidbin, $eml) = @_;
+        my $smsg;
+        if (ref($docid)) {
+                $smsg = $docid;
+                $docid = $smsg->{num};
+        }
+        if (defined($oidbin) && defined($xnum) && blessed($ibx) && $ibx->over) {
+                my $smsg = $ibx->over->get_art($xnum);
+                if ($smsg && $smsg->oidbin eq $oidbin) {
+                        carp("BUG: (non-fatal) ".$ibx->eidx_key.
+                                " #$xnum $smsg->{blob} still valid");
+                        return;
+                }
+        }
+        my $s = 'DELETE FROM xref3 WHERE oidbin = ?';
+        $s .= ' AND ibx_id = ?' if defined($ibx);
+        $s .= ' AND xnum = ?' if defined($xnum);
+        my $del = $sync->{self}->{oidx}->dbh->prepare_cached($s);
+        my $col = 0;
+        $del->bind_param(++$col, $oidbin, SQL_BLOB);
+        $del->bind_param(++$col, $ibx->{-ibx_id}) if $ibx;
+        $del->bind_param(++$col, $xnum) if defined($xnum);
+        $del->execute;
+        my $xr3 = $sync->{self}->{oidx}->get_xref3($docid);
+        if (scalar(@$xr3) == 0) { # all gone
+                remove_doc($sync->{self}, $docid);
+        } else { # enqueue for reindex of remaining messages
+                if ($ibx) {
+                        my $ekey = $ibx->{-gc_eidx_key} // $ibx->eidx_key;
+                        my $idx = $sync->{self}->idx_shard($docid);
+                        $idx->ipc_do('remove_eidx_info', $docid, $ekey, $eml);
+                } # else: we can't remove_eidx_info in reindex-only path
+
+                # replace invalidated blob ASAP with something which should be
+                # readable since we may commit the transaction on checkpoint.
+                # eidxq processing will re-apply boost
+                $smsg //= $sync->{self}->{oidx}->get_art($docid);
+                my $hex = unpack('H*', $oidbin);
+                if ($smsg && $smsg->{blob} eq $hex) {
+                        $xr3->[0] =~ /:([a-f0-9]{40,}+)\z/ or
+                                die "BUG: xref $xr3->[0] has no OID";
+                        $sync->{self}->{oidx}->update_blob($smsg, $1);
+                }
+                # yes, add, we'll need to re-apply boost
+                $sync->{self}->{oidx}->eidxq_add($docid);
+        }
+        @$xr3
+}
+
+sub do_xpost ($$) {
+        my ($req, $smsg) = @_;
+        my $self = $req->{self};
+        my $docid = $smsg->{num};
+        my $oid = $req->{oid};
+        my $xibx = $req->{ibx};
+        my $eml = $req->{eml};
+        if (my $new_smsg = $req->{new_smsg}) { # 'm' on cross-posted message
+                my $eidx_key = $xibx->eidx_key;
+                my $xnum = $req->{xnum};
+                $self->{oidx}->add_xref3($docid, $xnum, $oid, $eidx_key);
+                my $idx = $self->idx_shard($docid);
+                $idx->ipc_do('add_eidx_info', $docid, $eidx_key, $eml);
+                apply_boost($req, $smsg) if $req->{boost_in_use};
+        } else { # 'd' no {xnum}
+                $self->git->async_wait_all;
+                $oid = pack('H*', $oid);
+                _unref_doc($req, $docid, $xibx, undef, $oid, $eml);
+        }
+}
+
+# called by V2Writable::sync_prepare
+sub artnum_max { $_[0]->{oidx}->eidx_max }
+
+sub index_unseen ($) {
+        my ($req) = @_;
+        my $new_smsg = $req->{new_smsg} or die 'BUG: {new_smsg} unset';
+        my $eml = delete $req->{eml};
+        $new_smsg->populate($eml, $req);
+        my $self = $req->{self};
+        my $docid = $self->{oidx}->adj_counter('eidx_docid', '+');
+        $new_smsg->{num} = $docid;
+        my $idx = $self->idx_shard($docid);
+        $self->{oidx}->add_overview($eml, $new_smsg);
+        my $oid = $new_smsg->{blob};
+        my $ibx = delete $req->{ibx} or die 'BUG: {ibx} unset';
+        $self->{oidx}->add_xref3($docid, $req->{xnum}, $oid, $ibx->eidx_key);
+        $idx->index_eml($eml, $new_smsg, $ibx->eidx_key);
+        check_batch_limit($req);
+}
+
+sub do_finalize ($) {
+        my ($req) = @_;
+        if (my $indexed = $req->{indexed}) { # duplicated messages
+                do_xpost($req, $_) for @$indexed;
+        } elsif (exists $req->{new_smsg}) { # totally unseen messsage
+                index_unseen($req);
+        } else {
+                # `d' message was already unindexed in the v1/v2 inboxes,
+                # so it's too noisy to warn, here.
+        }
+        # cur_cmt may be undef for unindex_oid, set by V2Writable::index_todo
+        if (defined(my $cur_cmt = $req->{cur_cmt})) {
+                ${$req->{latest_cmt}} = $cur_cmt;
+        }
+}
+
+sub do_step ($) { # main iterator for adding messages to the index
+        my ($req) = @_;
+        my $self = $req->{self} // die 'BUG: {self} missing';
+        while (1) {
+                if (my $next_arg = $req->{next_arg}) {
+                        if (my $smsg = $self->{oidx}->next_by_mid(@$next_arg)) {
+                                $req->{cur_smsg} = $smsg;
+                                $self->git->cat_async($smsg->{blob},
+                                                        \&ck_existing, $req);
+                                return; # ck_existing calls do_step
+                        }
+                        delete $req->{next_arg};
+                }
+                die "BUG: {cur_smsg} still set" if $req->{cur_smsg};
+                my $mid = shift(@{$req->{mids}}) // last;
+                my ($id, $prev);
+                $req->{next_arg} = [ $mid, \$id, \$prev ];
+                # loop again
+        }
+        do_finalize($req);
+}
+
+sub _blob_missing ($$) { # called when a known $smsg->{blob} is gone
+        my ($req, $smsg) = @_;
+        # xnum and ibx are unknown, we only call this when an entry from
+        # /ei*/over.sqlite3 is bad, not on entries from xap*/over.sqlite3
+        $req->{self}->git->async_wait_all;
+        _unref_doc($req, $smsg, undef, undef, $smsg->oidbin);
+}
+
+sub ck_existing { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $req) = @_;
+        my $smsg = delete $req->{cur_smsg} or die 'BUG: {cur_smsg} missing';
+        if ($type eq 'missing') {
+                _blob_missing($req, $smsg);
+        } elsif (!is_bad_blob($oid, $type, $size, $smsg->{blob})) {
+                my $self = $req->{self} // die 'BUG: {self} missing';
+                local $self->{current_info} = "$self->{current_info} $oid";
+                my $cur = PublicInbox::Eml->new($bref);
+                if (content_hash($cur) eq $req->{chash}) {
+                        push @{$req->{indexed}}, $smsg; # for do_xpost
+                } # else { index_unseen later }
+        }
+        do_step($req);
+}
+
+# is the messages visible in the inbox currently being indexed?
+# return the number if so
+sub cur_ibx_xnum ($$;$) {
+        my ($req, $bref, $mismatch) = @_;
+        my $ibx = $req->{ibx} or die 'BUG: current {ibx} missing';
+
+        $req->{eml} = PublicInbox::Eml->new($bref);
+        $req->{chash} = content_hash($req->{eml});
+        $req->{mids} = mids($req->{eml});
+        for my $mid (@{$req->{mids}}) {
+                my ($id, $prev);
+                while (my $x = $ibx->over->next_by_mid($mid, \$id, \$prev)) {
+                        return $x->{num} if $x->{blob} eq $req->{oid};
+                        push @$mismatch, $x if $mismatch;
+                }
+        }
+        undef;
+}
+
+sub index_oid { # git->cat_async callback for 'm'
+        my ($bref, $oid, $type, $size, $req) = @_;
+        my $self = $req->{self};
+        local $self->{current_info} = "$self->{current_info} $oid";
+        return if is_bad_blob($oid, $type, $size, $req->{oid});
+        my $new_smsg = $req->{new_smsg} = bless {
+                blob => $oid,
+        }, 'PublicInbox::Smsg';
+        $new_smsg->set_bytes($$bref, $size);
+        ++${$req->{nr}};
+        my $mismatch = [];
+        $req->{xnum} = cur_ibx_xnum($req, $bref, $mismatch) // do {
+                warn "# deleted\n";
+                warn "# mismatch $_->{blob}\n" for @$mismatch;
+                ${$req->{latest_cmt}} = $req->{cur_cmt} //
+                        die "BUG: {cur_cmt} unset ($oid)\n";
+                return;
+        };
+        do_step($req);
+}
+
+sub unindex_oid { # git->cat_async callback for 'd'
+        my ($bref, $oid, $type, $size, $req) = @_;
+        my $self = $req->{self};
+        local $self->{current_info} = "$self->{current_info} $oid";
+        return if is_bad_blob($oid, $type, $size, $req->{oid});
+        return if defined(cur_ibx_xnum($req, $bref)); # was re-added
+        do_step($req);
+}
+
+# overrides V2Writable::last_commits, called by sync_ranges via sync_prepare
+sub last_commits {
+        my ($self, $sync) = @_;
+        my $heads = [];
+        my $ekey = $sync->{ibx}->eidx_key;
+        my $uv = $sync->{ibx}->uidvalidity;
+        for my $i (0..$sync->{epoch_max}) {
+                $heads->[$i] = $self->{oidx}->eidx_meta("lc-v2:$ekey//$uv;$i");
+        }
+        $heads;
+}
+
+sub _ibx_index_reject ($) {
+        my ($ibx) = @_;
+        $ibx->mm // return 'unindexed, no msgmap.sqlite3';
+        $ibx->uidvalidity // return 'no UIDVALIDITY';
+        $ibx->over // return 'unindexed, no over.sqlite3';
+        undef;
+}
+
+sub _sync_inbox ($$$) {
+        my ($self, $sync, $ibx) = @_;
+        my $ekey = $ibx->eidx_key;
+        if (defined(my $err = _ibx_index_reject($ibx))) {
+                return "W: skipping $ekey ($err)";
+        }
+        $sync->{ibx} = $ibx;
+        $sync->{nr} = \(my $nr = 0);
+        my $v = $ibx->version;
+        if ($v == 2) {
+                $sync->{epoch_max} = $ibx->max_git_epoch // return;
+                sync_prepare($self, $sync); # or return # TODO: once MiscIdx is stable
+        } elsif ($v == 1) {
+                my $uv = $ibx->uidvalidity;
+                my $lc = $self->{oidx}->eidx_meta("lc-v1:$ekey//$uv");
+                my $head = $ibx->mm->last_commit //
+                        return "E: $ibx->{inboxdir} is not indexed";
+                my $stk = prepare_stack($sync, $lc ? "$lc..$head" : $head);
+                my $unit = { stack => $stk, git => $ibx->git };
+                push @{$sync->{todo}}, $unit;
+        } else {
+                return "E: $ekey unsupported inbox version (v$v)";
+        }
+        for my $unit (@{delete($sync->{todo}) // []}) {
+                last if $sync->{quit};
+                index_todo($self, $sync, $unit);
+        }
+        $self->{midx}->index_ibx($ibx) unless $sync->{quit};
+        $ibx->git->cleanup; # done with this inbox, now
+        undef;
+}
+
+sub eidx_gc_scan_inboxes ($$) {
+        my ($self, $sync) = @_;
+        my ($x3_doc, $ibx_ck);
+restart:
+        $x3_doc = $self->{oidx}->dbh->prepare(<<EOM);
+SELECT docid,xnum,oidbin FROM xref3 WHERE ibx_id = ?
+EOM
+        $ibx_ck = $self->{oidx}->dbh->prepare(<<EOM);
+SELECT ibx_id,eidx_key FROM inboxes
+EOM
+        $ibx_ck->execute;
+        while (my ($ibx_id, $eidx_key) = $ibx_ck->fetchrow_array) {
+                next if $self->{ibx_map}->{$eidx_key};
+                $self->{midx}->remove_eidx_key($eidx_key);
+                warn "# deleting messages for $eidx_key...\n";
+                $x3_doc->execute($ibx_id);
+                my $ibx = { -ibx_id => $ibx_id, -gc_eidx_key => $eidx_key };
+                while (my ($docid, $xnum, $oid) = $x3_doc->fetchrow_array) {
+                        my $r = _unref_doc($sync, $docid, $ibx, $xnum, $oid);
+                        $oid = unpack('H*', $oid);
+                        $r = $r ? 'unref' : 'remove';
+                        warn "# $r #$docid $eidx_key $oid\n";
+                        if (checkpoint_due($sync)) {
+                                $x3_doc = $ibx_ck = undef;
+                                reindex_checkpoint($self, $sync);
+                                goto restart;
+                        }
+                }
+                $self->{oidx}->dbh->do(<<'', undef, $ibx_id);
+DELETE FROM inboxes WHERE ibx_id = ?
+
+                # drop last_commit info
+                my $pat = $eidx_key;
+                $pat =~ s/([_%\\])/\\$1/g;
+                $self->{oidx}->dbh->do('PRAGMA case_sensitive_like = ON');
+                my $lc_i = $self->{oidx}->dbh->prepare(<<'');
+SELECT key FROM eidx_meta WHERE key LIKE ? ESCAPE ?
+
+                $lc_i->execute("lc-%:$pat//%", '\\');
+                while (my ($key) = $lc_i->fetchrow_array) {
+                        next if $key !~ m!\Alc-v[1-9]+:\Q$eidx_key\E//!;
+                        warn "# removing $key\n";
+                        $self->{oidx}->dbh->do(<<'', undef, $key);
+DELETE FROM eidx_meta WHERE key = ?
+
+                }
+                warn "# $eidx_key removed\n";
+        }
+}
+
+sub eidx_gc_scan_shards ($$) { # TODO: use for lei/store
+        my ($self, $sync) = @_;
+        my $nr = $self->{oidx}->dbh->do(<<'');
+DELETE FROM xref3 WHERE docid NOT IN (SELECT num FROM over)
+
+        warn "# eliminated $nr stale xref3 entries\n" if $nr != 0;
+        reindex_checkpoint($self, $sync) if checkpoint_due($sync);
+
+        # fixup from old bugs:
+        $nr = $self->{oidx}->dbh->do(<<'');
+DELETE FROM over WHERE num > 0 AND num NOT IN (SELECT docid FROM xref3)
+
+        warn "# eliminated $nr stale over entries\n" if $nr != 0;
+        reindex_checkpoint($self, $sync) if checkpoint_due($sync);
+
+        $nr = $self->{oidx}->dbh->do(<<'');
+DELETE FROM eidxq WHERE docid NOT IN (SELECT num FROM over)
+
+        warn "# eliminated $nr stale reindex queue entries\n" if $nr != 0;
+        reindex_checkpoint($self, $sync) if checkpoint_due($sync);
+
+        my ($cur) = $self->{oidx}->dbh->selectrow_array(<<EOM);
+SELECT MIN(num) FROM over WHERE num > 0
+EOM
+        $cur // return; # empty
+        my ($r, $n, %active_shards);
+        $nr = 0;
+        while (1) {
+                $r = $self->{oidx}->dbh->selectcol_arrayref(<<"", undef, $cur);
+SELECT num FROM over WHERE num >= ? ORDER BY num ASC LIMIT 10000
+
+                last unless scalar(@$r);
+                while (defined($n = shift @$r)) {
+                        for my $i ($cur..($n - 1)) {
+                                my $idx = idx_shard($self, $i);
+                                $idx->ipc_do('xdb_remove_quiet', $i);
+                                $active_shards{$idx} = $idx;
+                        }
+                        $cur = $n + 1;
+                }
+                if (checkpoint_due($sync)) {
+                        for my $idx (values %active_shards) {
+                                $nr += $idx->ipc_do('nr_quiet_rm')
+                        }
+                        %active_shards = ();
+                        reindex_checkpoint($self, $sync);
+                }
+        }
+        warn "# eliminated $nr stale Xapian documents\n" if $nr != 0;
+}
+
+sub eidx_gc {
+        my ($self, $opt) = @_;
+        $self->{cfg} or die "E: GC requires ->attach_config\n";
+        $opt->{-idx_gc} = 1;
+        my $sync = {
+                need_checkpoint => \(my $need_checkpoint = 0),
+                check_intvl => 10,
+                next_check => now() + 10,
+                checkpoint_unlocks => 1,
+                -opt => $opt,
+                self => $self,
+        };
+        $self->idx_init($opt); # acquire lock via V2Writable::_idx_init
+        eidx_gc_scan_inboxes($self, $sync);
+        eidx_gc_scan_shards($self, $sync);
+        done($self);
+}
+
+sub _ibx_for ($$$) {
+        my ($self, $sync, $smsg) = @_;
+        my $ibx_id = delete($smsg->{ibx_id}) // die 'BUG: {ibx_id} unset';
+        my $pos = $sync->{id2pos}->{$ibx_id} //
+                bad_ibx_id($self, $ibx_id, \&croak);
+        $self->{-ibx_ary_known}->[$pos] //
+                die "BUG: ibx for $smsg->{blob} not mapped"
+}
+
+sub _fd_constrained ($) {
+        my ($self) = @_;
+        $self->{-fd_constrained} //= do {
+                my $soft;
+                if (eval { require BSD::Resource; 1 }) {
+                        my $NOFILE = BSD::Resource::RLIMIT_NOFILE();
+                        ($soft, undef) = BSD::Resource::getrlimit($NOFILE);
+                } else {
+                        chomp($soft = `sh -c 'ulimit -n'`);
+                }
+                if (defined($soft)) {
+                        # $want is an estimate
+                        my $want = scalar(@{$self->{ibx_active}}) + 64;
+                        my $ret = $want > $soft;
+                        if ($ret) {
+                                warn <<EOF;
+RLIMIT_NOFILE=$soft insufficient (want: $want), will close DB handles early
+EOF
+                        }
+                        $ret;
+                } else {
+                        warn "Unable to determine RLIMIT_NOFILE: $@\n";
+                        1;
+                }
+        };
+}
+
+sub _reindex_finalize ($$$) {
+        my ($req, $smsg, $eml) = @_;
+        my $sync = $req->{sync};
+        my $self = $sync->{self};
+        my $by_chash = delete $req->{by_chash} or die 'BUG: no {by_chash}';
+        my $nr = scalar(keys(%$by_chash)) or die 'BUG: no content hashes';
+        my $orig_smsg = $req->{orig_smsg} // die 'BUG: no {orig_smsg}';
+        my $docid = $smsg->{num} = $orig_smsg->{num};
+        $self->{oidx}->add_overview($eml, $smsg); # may rethread
+        check_batch_limit({ %$sync, new_smsg => $smsg });
+        my $chash0 = $smsg->{chash} // die "BUG: $smsg->{blob} no {chash}";
+        my $stable = delete($by_chash->{$chash0}) //
+                                die "BUG: $smsg->{blob} chash missing";
+        my $idx = $self->idx_shard($docid);
+        my $top_smsg = pop @$stable;
+        $top_smsg == $smsg or die 'BUG: top_smsg != smsg';
+        my $ibx = _ibx_for($self, $sync, $smsg);
+        $idx->index_eml($eml, $smsg, $ibx->eidx_key);
+        for my $x (reverse @$stable) {
+                $ibx = _ibx_for($self, $sync, $x);
+                my $hdr = delete $x->{hdr} // die 'BUG: no {hdr}';
+                $idx->ipc_do('add_eidx_info', $docid, $ibx->eidx_key, $hdr);
+        }
+        return if $nr == 1; # likely, all good
+
+        $self->git->async_wait_all;
+        warn "W: #$docid split into $nr due to deduplication change\n";
+        my @todo;
+        for my $ary (values %$by_chash) {
+                for my $x (reverse @$ary) {
+                        warn "removing #$docid xref3 $x->{blob}\n";
+                        my $bin = $x->oidbin;
+                        my $n = _unref_doc($sync, $docid, undef, undef, $bin);
+                        die "BUG: $x->{blob} invalidated #$docid" if $n == 0;
+                }
+                my $x = pop(@$ary) // die "BUG: #$docid {by_chash} empty";
+                $x->{num} = delete($x->{xnum}) // die '{xnum} unset';
+                $ibx = _ibx_for($self, $sync, $x);
+                if (my $over = $ibx->over) {
+                        my $e = $over->get_art($x->{num});
+                        $e->{blob} eq $x->{blob} or die <<EOF;
+$x->{blob} != $e->{blob} (${\$ibx->eidx_key}:$e->{num});
+EOF
+                        push @todo, $ibx, $e;
+                        $over->dbh_close if _fd_constrained($self);
+                } else {
+                        die "$ibx->{inboxdir}: over.sqlite3 unusable: $!\n";
+                }
+        }
+        undef $by_chash;
+        while (my ($ibx, $e) = splice(@todo, 0, 2)) {
+                reindex_unseen($self, $sync, $ibx, $e);
+        }
+}
+
+sub _reindex_oid { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $req) = @_;
+        my $sync = $req->{sync};
+        my $self = $sync->{self};
+        my $orig_smsg = $req->{orig_smsg} // die 'BUG: no {orig_smsg}';
+        my $expect_oid = $req->{xr3r}->[$req->{ix}]->[2];
+        my $docid = $orig_smsg->{num};
+        if (is_bad_blob($oid, $type, $size, $expect_oid)) {
+                my $oidbin = pack('H*', $expect_oid);
+                my $remain = _unref_doc($sync, $docid, undef, undef, $oidbin);
+                if ($remain == 0) {
+                        warn "W: #$docid ($oid) gone or corrupt\n";
+                } elsif (my $next_oid = $req->{xr3r}->[++$req->{ix}]->[2]) {
+                        $self->git->cat_async($next_oid, \&_reindex_oid, $req);
+                } else {
+                        warn "BUG: #$docid ($oid) gone (UNEXPECTED)\n";
+                }
+                return;
+        }
+        my $ci = $self->{current_info};
+        local $self->{current_info} = "$ci #$docid $oid";
+        my $re_smsg = bless { blob => $oid }, 'PublicInbox::Smsg';
+        $re_smsg->set_bytes($$bref, $size);
+        my $eml = PublicInbox::Eml->new($bref);
+        $re_smsg->populate($eml, { autime => $orig_smsg->{ds},
+                                cotime => $orig_smsg->{ts} });
+        my $chash = content_hash($eml);
+        $re_smsg->{chash} = $chash;
+        $re_smsg->{xnum} = $req->{xr3r}->[$req->{ix}]->[1];
+        $re_smsg->{ibx_id} = $req->{xr3r}->[$req->{ix}]->[0];
+        $re_smsg->{hdr} = $eml->header_obj;
+        push @{$req->{by_chash}->{$chash}}, $re_smsg;
+        if (my $next_oid = $req->{xr3r}->[++$req->{ix}]->[2]) {
+                $self->git->cat_async($next_oid, \&_reindex_oid, $req);
+        } else { # last $re_smsg is the highest priority xref3
+                local $self->{current_info} = "$ci #$docid";
+                _reindex_finalize($req, $re_smsg, $eml);
+        }
+}
+
+sub _reindex_smsg ($$$) {
+        my ($self, $sync, $smsg) = @_;
+        my $docid = $smsg->{num};
+        my $xr3 = $self->{oidx}->get_xref3($docid, 1);
+        if (scalar(@$xr3) == 0) { # _reindex_check_stale should've covered this
+                warn <<"";
+BUG? #$docid $smsg->{blob} is not referenced by inboxes during reindex
+
+                remove_doc($self, $docid);
+                return;
+        }
+
+        # we sort {xr3r} in the reverse order of ibx_sorted so we can
+        # hit the common case in _reindex_finalize without rereading
+        # from git (or holding multiple messages in memory).
+        my $id2pos = $sync->{id2pos}; # index in ibx_sorted
+        check_xr3($self, $id2pos, $xr3);
+        @$xr3 = sort { # sort descending
+                $id2pos->{$b->[0]} <=> $id2pos->{$a->[0]}
+                                ||
+                $b->[1] <=> $a->[1] # break ties with {xnum}
+        } @$xr3;
+        @$xr3 = map { [ $_->[0], $_->[1], unpack('H*', $_->[2]) ] } @$xr3;
+        my $req = { orig_smsg => $smsg, sync => $sync, xr3r => $xr3, ix => 0 };
+        $self->git->cat_async($xr3->[$req->{ix}]->[2], \&_reindex_oid, $req);
+}
+
+sub checkpoint_due ($) {
+        my ($sync) = @_;
+        ${$sync->{need_checkpoint}} || (now() > $sync->{next_check});
+}
+
+sub host_ident () {
+        # I've copied FS images and only changed the hostname before,
+        # so prepend hostname.  Use `state' since these a BOFH can change
+        # these while this process is running and we always want to be
+        # able to release locks taken by this process.
+        state $retval = hostname . '-' . do {
+                my $m; # machine-id(5) is systemd
+                if (open(my $fh, '<', '/etc/machine-id')) { $m = <$fh> }
+                # (g)hostid(1) is in GNU coreutils, kern.hostid is most BSDs
+                chomp($m ||= `{ sysctl -n kern.hostid ||
+                                hostid || ghostid; } 2>/dev/null`
+                        || "no-machine-id-or-hostid-on-$^O");
+                $m;
+        };
+}
+
+sub eidxq_release {
+        my ($self) = @_;
+        my $expect = delete($self->{-eidxq_locked}) or return;
+        my ($owner_pid, undef) = split(/-/, $expect);
+        return if $owner_pid != $$; # shards may fork
+        my $oidx = $self->{oidx};
+        $oidx->begin_lazy;
+        my $cur = $oidx->eidx_meta('eidxq_lock') // '';
+        if ($cur eq $expect) {
+                $oidx->eidx_meta('eidxq_lock', '');
+                return 1;
+        } elsif ($cur ne '') {
+                warn "E: eidxq_lock($expect) stolen by $cur\n";
+        } else {
+                warn "E: eidxq_lock($expect) released by another process\n";
+        }
+        undef;
+}
+
+sub DESTROY {
+        my ($self) = @_;
+        eidxq_release($self) and $self->{oidx}->commit_lazy;
+}
+
+sub _eidxq_take ($) {
+        my ($self) = @_;
+        my $val = "$$-${\time}-$>-".host_ident;
+        $self->{oidx}->eidx_meta('eidxq_lock', $val);
+        $self->{-eidxq_locked} = $val;
+}
+
+sub eidxq_lock_acquire ($) {
+        my ($self) = @_;
+        my $oidx = $self->{oidx};
+        $oidx->begin_lazy;
+        my $cur = $oidx->eidx_meta('eidxq_lock') || return _eidxq_take($self);
+        if (my $locked = $self->{-eidxq_locked}) { # be lazy
+                return $locked if $locked eq $cur;
+        }
+        my ($pid, $time, $euid, $ident) = split(/-/, $cur, 4);
+        my $t = fmt_localtime($time);
+        local $self->{current_info} = 'eidxq';
+        if ($euid == $> && $ident eq host_ident) {
+                kill(0, $pid) and warn <<EOM and return;
+# PID:$pid (re)indexing since $t, it will continue our work
+EOM
+                if ($!{ESRCH}) {
+                        warn "# eidxq_lock is stale ($cur), clobbering\n";
+                        return _eidxq_take($self);
+                }
+                warn "E: kill(0, $pid) failed: $!\n"; # fall-through:
+        }
+        my $fn = $oidx->dbh->sqlite_db_filename;
+        warn <<EOF;
+W: PID:$pid, UID:$euid on $ident is indexing Xapian since $t
+W: If this is unexpected, delete `eidxq_lock' from the `eidx_meta' table:
+W:        sqlite3 $fn 'DELETE FROM eidx_meta WHERE key = "eidxq_lock"'
+EOF
+        undef;
+}
+
+sub ibx_sorted ($$) {
+        my ($self, $type) = @_;
+        $self->{"-ibx_ary_$type"} //= do {
+                # highest boost first, stable for config-ordering tiebreaker
+                use sort 'stable';
+                [ sort {
+                        ($b->{boost} // 0) <=> ($a->{boost} // 0)
+                  } @{$self->{'ibx_'.$type} // die "BUG: $type unknown"} ];
+        }
+}
+
+sub prep_id2pos ($) {
+        my ($self) = @_;
+        my %id2pos;
+        my $pos = 0;
+        $id2pos{$_->{-ibx_id}} = $pos++ for (@{ibx_sorted($self, 'known')});
+        \%id2pos;
+}
+
+sub eidxq_process ($$) { # for reindexing
+        my ($self, $sync) = @_;
+        local $self->{current_info} = 'eidxq process';
+        return unless ($self->{cfg} && eidxq_lock_acquire($self));
+        my $dbh = $self->{oidx}->dbh;
+        my $tot = $dbh->selectrow_array('SELECT COUNT(*) FROM eidxq') or return;
+        ${$sync->{nr}} = 0;
+        local $sync->{-regen_fmt} = "%u/$tot\n";
+        my $pr = $sync->{-opt}->{-progress};
+        if ($pr) {
+                my $min = $dbh->selectrow_array('SELECT MIN(docid) FROM eidxq');
+                my $max = $dbh->selectrow_array('SELECT MAX(docid) FROM eidxq');
+                $pr->("Xapian indexing $min..$max (total=$tot)\n");
+        }
+        $sync->{id2pos} //= prep_id2pos($self);
+        my ($del, $iter);
+restart:
+        $del = $dbh->prepare('DELETE FROM eidxq WHERE docid = ?');
+        $iter = $dbh->prepare('SELECT docid FROM eidxq ORDER BY docid ASC');
+        $iter->execute;
+        while (defined(my $docid = $iter->fetchrow_array)) {
+                last if $sync->{quit};
+                if (my $smsg = $self->{oidx}->get_art($docid)) {
+                        _reindex_smsg($self, $sync, $smsg);
+                } else {
+                        warn "E: #$docid does not exist in over\n";
+                }
+                $del->execute($docid);
+                ++${$sync->{nr}};
+
+                if (checkpoint_due($sync)) {
+                        $dbh = $del = $iter = undef;
+                        reindex_checkpoint($self, $sync); # release lock
+                        $dbh = $self->{oidx}->dbh;
+                        goto restart;
+                }
+        }
+        $self->git->async_wait_all;
+        $pr->("reindexed ${$sync->{nr}}/$tot\n") if $pr;
+}
+
+sub _reindex_unseen { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $req) = @_;
+        return if is_bad_blob($oid, $type, $size, $req->{oid});
+        my $self = $req->{self} // die 'BUG: {self} unset';
+        local $self->{current_info} = "$self->{current_info} $oid";
+        my $new_smsg = bless { blob => $oid, }, 'PublicInbox::Smsg';
+        $new_smsg->set_bytes($$bref, $size);
+        my $eml = $req->{eml} = PublicInbox::Eml->new($bref);
+        $req->{new_smsg} = $new_smsg;
+        $req->{chash} = content_hash($eml);
+        $req->{mids} = mids($eml); # do_step iterates through this
+        do_step($req); # enter the normal indexing flow
+}
+
+# --reindex may catch totally unseen messages, this handles them
+sub reindex_unseen ($$$$) {
+        my ($self, $sync, $ibx, $xsmsg) = @_;
+        my $req = {
+                %$sync, # has {self}
+                autime => $xsmsg->{ds},
+                cotime => $xsmsg->{ts},
+                oid => $xsmsg->{blob},
+                ibx => $ibx,
+                xnum => $xsmsg->{num},
+                # {mids} and {chash} will be filled in at _reindex_unseen
+        };
+        warn "# reindex_unseen ${\$ibx->eidx_key}:$req->{xnum}:$req->{oid}\n";
+        $self->git->cat_async($xsmsg->{blob}, \&_reindex_unseen, $req);
+}
+
+sub _unref_stale_range ($$$) {
+        my ($sync, $ibx, $lt_or_gt) = @_;
+        my $r;
+        my $lim = 10000;
+        do {
+                $r = $sync->{self}->{oidx}->dbh->selectall_arrayref(
+                        <<EOS, undef, $ibx->{-ibx_id});
+SELECT docid,xnum,oidbin FROM xref3
+WHERE ibx_id = ? AND $lt_or_gt LIMIT $lim
+EOS
+                return if $sync->{quit};
+                for (@$r) { # hopefully rare, not worth optimizing:
+                        my ($docid, $xnum, $oidbin) = @$_;
+                        my $hex = unpack('H*', $oidbin);
+                        warn("# $xnum:$hex (#$docid): stale\n");
+                        _unref_doc($sync, $docid, $ibx, $xnum, $oidbin);
+                }
+        } while (scalar(@$r) == $lim);
+        1;
+}
+
+sub _reindex_check_ibx ($$$) {
+        my ($self, $sync, $ibx) = @_;
+        my $ibx_id = $ibx->{-ibx_id};
+        my $slice = 10000;
+        my $opt = { limit => $slice };
+        my ($beg, $end) = (1, $slice);
+        my $ekey = $ibx->eidx_key;
+        my ($max, $max0);
+        do {
+                $max0 = $ibx->mm->num_highwater;
+                sync_inbox($self, $sync, $ibx) and return; # warned
+                $max = $ibx->mm->num_highwater;
+                return if $sync->{quit};
+        } while ($max > $max0 &&
+                warn("# $ekey moved $max0..$max, resyncing..\n"));
+        $end = $max if $end > $max;
+
+        # first, check if we missed any messages in target $ibx
+        my $msgs;
+        my $pr = $sync->{-opt}->{-progress};
+        local $sync->{-regen_fmt} = "$ekey checking %u/$max\n";
+        ${$sync->{nr}} = 0;
+        my $fast = $sync->{-opt}->{fast};
+        my $usr; # _unref_stale_range (< $lo) called
+        my ($lo, $hi);
+        while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end, $opt)})) {
+                ${$sync->{nr}} = $beg;
+                $beg = $msgs->[-1]->{num} + 1;
+                $end = $beg + $slice;
+                $end = $max if $end > $max;
+                if (checkpoint_due($sync)) {
+                        reindex_checkpoint($self, $sync); # release lock
+                }
+                ($lo, $hi) = ($msgs->[0]->{num}, $msgs->[-1]->{num});
+                $usr //= _unref_stale_range($sync, $ibx, "xnum < $lo");
+                my $x3a = $self->{oidx}->dbh->selectall_arrayref(
+                        <<"", undef, $ibx_id, $lo, $hi);
+SELECT xnum,oidbin,docid FROM xref3 WHERE
+ibx_id = ? AND xnum >= ? AND xnum <= ?
+
+                my %x3m;
+                for (@$x3a) {
+                        my $k = pack('J', $_->[0]) . $_->[1];
+                        push @{$x3m{$k}}, $_->[2];
+                }
+                undef $x3a;
+                for my $xsmsg (@$msgs) {
+                        my $k = pack('JH*', $xsmsg->{num}, $xsmsg->{blob});
+                        my $docids = delete($x3m{$k});
+                        if (!defined($docids)) {
+                                reindex_unseen($self, $sync, $ibx, $xsmsg);
+                        } elsif (!$fast) {
+                                for my $num (@$docids) {
+                                        $self->{oidx}->eidxq_add($num);
+                                }
+                        }
+                        return if $sync->{quit};
+                }
+                next unless scalar keys %x3m;
+                $self->git->async_wait_all; # wait for reindex_unseen
+
+                # eliminate stale/mismatched entries
+                my %mismatch = map { $_->{num} => $_->{blob} } @$msgs;
+                while (my ($k, $docids) = each %x3m) {
+                        my ($xnum, $hex) = unpack('JH*', $k);
+                        my $bin = pack('H*', $hex);
+                        my $exp = $mismatch{$xnum};
+                        if (defined $exp) {
+                                my $smsg = $ibx->over->get_art($xnum) // next;
+                                # $xnum may be expired by another process
+                                if ($smsg->{blob} eq $hex) {
+                                        warn <<"";
+BUG: (non-fatal) $ekey #$xnum $smsg->{blob} still matches (old exp: $exp)
+
+                                        next;
+                                } # else: continue to unref
+                        }
+                        my $m = defined($exp) ? "mismatch (!= $exp)" : 'stale';
+                        warn("# $xnum:$hex (#@$docids): $m\n");
+                        for my $i (@$docids) {
+                                _unref_doc($sync, $i, $ibx, $xnum, $bin);
+                        }
+                        return if $sync->{quit};
+                }
+        }
+        defined($hi) and ($hi < $max) and
+                _unref_stale_range($sync, $ibx, "xnum > $hi AND xnum <= $max");
+}
+
+sub _reindex_inbox ($$$) {
+        my ($self, $sync, $ibx) = @_;
+        my $ekey = $ibx->eidx_key;
+        local $self->{current_info} = $ekey;
+        if (defined(my $err = _ibx_index_reject($ibx))) {
+                warn "W: cannot reindex $ekey ($err)\n";
+        } else {
+                _reindex_check_ibx($self, $sync, $ibx);
+        }
+        delete @$ibx{qw(over mm search git)}; # won't need these for a bit
+}
+
+sub eidx_reindex {
+        my ($self, $sync) = @_;
+        return unless $self->{cfg};
+
+        # acquire eidxq_lock early because full reindex takes forever
+        # and incremental -extindex processes can run during our checkpoints
+        if (!eidxq_lock_acquire($self)) {
+                warn "E: aborting --reindex\n";
+                return;
+        }
+        for my $ibx (@{ibx_sorted($self, 'active')}) {
+                _reindex_inbox($self, $sync, $ibx);
+                last if $sync->{quit};
+        }
+        $self->git->async_wait_all; # ensure eidxq gets filled completely
+        eidxq_process($self, $sync) unless $sync->{quit};
+}
+
+sub sync_inbox {
+        my ($self, $sync, $ibx) = @_;
+        my $err = _sync_inbox($self, $sync, $ibx);
+        delete @$ibx{qw(mm over)};
+        warn $err, "\n" if defined($err);
+        $err;
+}
+
+sub dd_smsg { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $dd) = @_;
+        my $smsg = $dd->{smsg} // die 'BUG: dd->{smsg} missing';
+        my $self = $dd->{self} // die 'BUG: {self} missing';
+        my $per_mid = $dd->{per_mid} // die 'BUG: {per_mid} missing';
+        if ($type eq 'missing') {
+                _blob_missing($dd, $smsg);
+        } elsif (!is_bad_blob($oid, $type, $size, $smsg->{blob})) {
+                local $self->{current_info} = "$self->{current_info} $oid";
+                my $chash = content_hash(PublicInbox::Eml->new($bref));
+                push(@{$per_mid->{dd_chash}->{$chash}}, $smsg);
+        }
+        return if $per_mid->{last_smsg} != $smsg;
+        while (my ($chash, $ary) = each %{$per_mid->{dd_chash}}) {
+                my $keep = shift @$ary;
+                next if !scalar(@$ary);
+                $per_mid->{sync}->{dedupe_cull} += scalar(@$ary);
+                print STDERR
+                        "# <$keep->{mid}> keeping #$keep->{num}, dropping ",
+                        join(', ', map { "#$_->{num}" } @$ary),"\n";
+                next if $per_mid->{sync}->{-opt}->{'dry-run'};
+                my $oidx = $self->{oidx};
+                for my $smsg (@$ary) {
+                        my $gone = $smsg->{num};
+                        $oidx->merge_xref3($keep->{num}, $gone, $smsg->oidbin);
+                        remove_doc($self, $gone);
+                }
+        }
+}
+
+sub eidx_dedupe ($$$) {
+        my ($self, $sync, $msgids) = @_;
+        $sync->{dedupe_cull} = 0;
+        my $candidates = 0;
+        my $nr_mid = 0;
+        return unless eidxq_lock_acquire($self);
+        my ($iter, $cur_mid);
+        my $min_id = 0;
+        my $idx = 0;
+        my ($max_id) = $self->{oidx}->dbh->selectrow_array(<<EOS);
+SELECT MAX(id) FROM msgid
+EOS
+        local $sync->{-regen_fmt} = "dedupe %u/$max_id\n";
+
+        # note: we could write this query more intelligently,
+        # but that causes lock contention with read-only processes
+dedupe_restart:
+        $cur_mid = $msgids->[$idx];
+        if ($cur_mid eq '') { # all Message-IDs
+                $iter = $self->{oidx}->dbh->prepare(<<EOS);
+SELECT mid,id FROM msgid WHERE id > ? ORDER BY id ASC
+EOS
+                $iter->execute($min_id);
+        } else {
+                $iter = $self->{oidx}->dbh->prepare(<<EOS);
+SELECT mid,id FROM msgid WHERE mid = ? AND id > ? ORDER BY id ASC
+EOS
+                $iter->execute($cur_mid, $min_id);
+        }
+        while (my ($mid, $id) = $iter->fetchrow_array) {
+                last if $sync->{quit};
+                $self->{current_info} = "dedupe $mid";
+                ${$sync->{nr}} = $min_id = $id;
+                my ($prv, @smsg);
+                while (my $x = $self->{oidx}->next_by_mid($mid, \$id, \$prv)) {
+                        push @smsg, $x;
+                }
+                next if scalar(@smsg) < 2;
+                my $per_mid = {
+                        dd_chash => {}, # chash => [ary of smsgs]
+                        last_smsg => $smsg[-1],
+                        sync => $sync
+                };
+                $nr_mid++;
+                $candidates += scalar(@smsg) - 1;
+                for my $smsg (@smsg) {
+                        my $dd = {
+                                per_mid => $per_mid,
+                                smsg => $smsg,
+                                self => $self,
+                        };
+                        $self->git->cat_async($smsg->{blob}, \&dd_smsg, $dd);
+                }
+                # need to wait on every single one @smsg contents can get
+                # invalidated inside dd_smsg for messages with multiple
+                # Message-IDs.
+                $self->git->async_wait_all;
+
+                if (checkpoint_due($sync)) {
+                        undef $iter;
+                        reindex_checkpoint($self, $sync);
+                        goto dedupe_restart;
+                }
+        }
+        goto dedupe_restart if defined($msgids->[++$idx]);
+
+        my $n = delete $sync->{dedupe_cull};
+        if (my $pr = $sync->{-opt}->{-progress}) {
+                $pr->("culled $n/$candidates candidates ($nr_mid msgids)\n");
+        }
+        ${$sync->{nr}} = 0;
+}
+
+sub eidx_sync { # main entry point
+        my ($self, $opt) = @_;
+
+        my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
+        local $self->{current_info} = '';
+        local $SIG{__WARN__} = sub {
+                return if PublicInbox::Eml::warn_ignore(@_);
+                $warn_cb->($self->{current_info}, ': ', @_);
+        };
+        $self->idx_init($opt); # acquire lock via V2Writable::_idx_init
+        $self->{oidx}->rethread_prepare($opt);
+        my $sync = {
+                need_checkpoint => \(my $need_checkpoint = 0),
+                check_intvl => 10,
+                next_check => now() + 10,
+                -opt => $opt,
+                # DO NOT SET {reindex} here, it's incompatible with reused
+                # V2Writable code, reindex is totally different here
+                # compared to v1/v2 inboxes because we have multiple histories
+                self => $self,
+                -regen_fmt => "%u/?\n",
+        };
+        local $SIG{USR1} = sub { $need_checkpoint = 1 };
+        my $quit = PublicInbox::SearchIdx::quit_cb($sync);
+        local $SIG{QUIT} = $quit;
+        local $SIG{INT} = $quit;
+        local $SIG{TERM} = $quit;
+        for my $ibx (@{ibx_sorted($self, 'known')}) {
+                $ibx->{-ibx_id} //= $self->{oidx}->ibx_id($ibx->eidx_key);
+        }
+
+        if (scalar(grep { defined($_->{boost}) } @{$self->{ibx_known}})) {
+                $sync->{id2pos} //= prep_id2pos($self);
+                $sync->{boost_in_use} = 1;
+        }
+
+        if (my $msgids = delete($opt->{dedupe})) {
+                local $sync->{checkpoint_unlocks} = 1;
+                eidx_dedupe($self, $sync, $msgids);
+        }
+        if (delete($opt->{reindex})) {
+                local $sync->{checkpoint_unlocks} = 1;
+                eidx_reindex($self, $sync);
+        }
+
+        # don't use $_ here, it'll get clobbered by reindex_checkpoint
+        if ($opt->{scan} // 1) {
+                for my $ibx (@{ibx_sorted($self, 'active')}) {
+                        last if $sync->{quit};
+                        sync_inbox($self, $sync, $ibx);
+                }
+        }
+        $self->{oidx}->rethread_done($opt) unless $sync->{quit};
+        eidxq_process($self, $sync) unless $sync->{quit};
+
+        eidxq_release($self);
+        done($self);
+        $sync; # for eidx_watch
+}
+
+sub update_last_commit { # overrides V2Writable
+        my ($self, $sync, $stk) = @_;
+        my $unit = $sync->{unit} // return;
+        my $latest_cmt = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}};
+        defined($latest_cmt) or return;
+        my $ibx = $sync->{ibx} or die 'BUG: {ibx} missing';
+        my $ekey = $ibx->eidx_key;
+        my $uv = $ibx->uidvalidity;
+        my $epoch = $unit->{epoch};
+        my $meta_key;
+        my $v = $ibx->version;
+        if ($v == 2) {
+                die 'No {epoch} for v2 unit' unless defined $epoch;
+                $meta_key = "lc-v2:$ekey//$uv;$epoch";
+        } elsif ($v == 1) {
+                die 'Unexpected {epoch} for v1 unit' if defined $epoch;
+                $meta_key = "lc-v1:$ekey//$uv";
+        } else {
+                die "Unsupported inbox version: $v";
+        }
+        my $last = $self->{oidx}->eidx_meta($meta_key);
+        if (defined $last && is_ancestor($self->git, $last, $latest_cmt)) {
+                my @cmd = (qw(rev-list --count), "$last..$latest_cmt");
+                chomp(my $n = $unit->{git}->qx(@cmd));
+                return if $n ne '' && $n == 0;
+        }
+        $self->{oidx}->eidx_meta($meta_key, $latest_cmt);
+}
+
+sub symlink_packs ($$) {
+        my ($ibx, $pd) = @_;
+        my $ret = 0;
+        my $glob = "$ibx->{inboxdir}/git/*.git/objects/pack/*.idx";
+        for my $idx (bsd_glob($glob, GLOB_NOSORT)) {
+                my $src = substr($idx, 0, -length('.idx'));
+                my $dst = $pd . substr($src, rindex($src, '/'));
+                if (-f "$src.pack" and
+                                symlink("$src.pack", "$dst.pack") and
+                                symlink($idx, "$dst.idx") and
+                                -f $idx) {
+                        ++$ret;
+                        # .promisor, .bitmap, .rev and .keep are optional
+                        # XXX should we symlink .keep here?
+                        for my $s (qw(promisor bitmap rev)) {
+                                symlink("$src.$s", "$dst.$s") if -f "$src.$s";
+                        }
+                } elsif (!$!{EEXIST}) {
+                        warn "W: ln -s $src.{pack,idx} => $dst.*: $!\n";
+                        unlink "$dst.pack", "$dst.idx";
+                }
+        }
+        $ret;
+}
+
+sub idx_init { # similar to V2Writable
+        my ($self, $opt) = @_;
+        return if $self->{idx_shards};
+
+        $self->git->cleanup;
+        my $mode = 0644;
+        my $ALL = $self->git->{git_dir}; # topdir/ALL.git
+        my ($has_new, $alt, $seen, $prune, $prune_nr);
+        if ($opt->{-private}) { # LeiStore
+                my $local = "$self->{topdir}/local"; # lei/store
+                $self->{mg} //= PublicInbox::MultiGit->new($self->{topdir},
+                                                        'ALL.git', 'local');
+                $mode = 0600;
+                unless (-d $ALL) {
+                        umask 077; # don't bother restoring for lei
+                        PublicInbox::Import::init_bare($ALL);
+                        $self->git->qx(qw(config core.sharedRepository 0600));
+                }
+                ($alt, $seen) = $self->{mg}->read_alternates(\$mode);
+                $has_new = $self->{mg}->merge_epochs($alt, $seen);
+        } else { # extindex has no epochs
+                $self->{mg} //= PublicInbox::MultiGit->new($self->{topdir},
+                                                        'ALL.git');
+                $prune = $opt->{-idx_gc} ? \$prune_nr : undef;
+                ($alt, $seen) = $self->{mg}->read_alternates(\$mode, $prune);
+                PublicInbox::Import::init_bare($ALL);
+        }
+
+        # git-multi-pack-index(1) can speed up "git cat-file" startup slightly
+        my $git_midx = 0;
+        my $pd = "$ALL/objects/pack";
+        if (opendir(my $dh, $pd)) { # drop stale symlinks
+                while (defined(my $dn = readdir($dh))) {
+                        if ($dn =~ /\.(?:idx|pack|promisor|bitmap|rev)\z/) {
+                                my $f = "$pd/$dn";
+                                unlink($f) if -l $f && !-e $f;
+                        }
+                }
+        } elsif ($!{ENOENT}) {
+                mkdir($pd) or die "mkdir($pd): $!";
+        } else {
+                die "opendir($pd): $!";
+        }
+        my $new = '';
+        for my $ibx (@{ibx_sorted($self, 'active')}) {
+                # create symlinks for multi-pack-index
+                $git_midx += symlink_packs($ibx, $pd);
+                # add new lines to our alternates file
+                my $d = $ibx->git->{git_dir} . '/objects';
+                next if exists $alt->{$d};
+                if (my @st = stat($d)) {
+                        next if $seen->{"$st[0]\0$st[1]"}++;
+                } else {
+                        warn "W: stat($d) failed (from $ibx->{inboxdir}): $!\n";
+                        next if $opt->{-idx_gc};
+                }
+                $new .= "$d\n";
+        }
+        ($has_new || $prune_nr || $new ne '') and
+                $self->{mg}->write_alternates($mode, $alt, $new);
+        my $restore = $self->with_umask;
+        if ($git_midx && ($opt->{'multi-pack-index'} // 1)) {
+                my $cmd = $self->git->cmd('multi-pack-index');
+                push @$cmd, '--no-progress' if ($opt->{quiet}//0) > 1;
+                my $lk = $self->lock_for_scope;
+                system(@$cmd, 'write');
+                # ignore errors, fairly new command, may not exist
+        }
+        $self->parallel_init($self->{indexlevel});
+        PublicInbox::V2Writable::_idx_init($self, $opt); # acquires ei.lock
+        $self->{midx} = PublicInbox::MiscIdx->new($self);
+        $self->{oidx}->begin_lazy;
+        $self->{oidx}->eidx_prep;
+        $self->{midx}->create_xdb if $new ne '';
+}
+
+sub _watch_commit { # PublicInbox::DS::add_timer callback
+        my ($self) = @_;
+        delete $self->{-commit_timer};
+        eidxq_process($self, $self->{-watch_sync});
+        eidxq_release($self);
+        my $fmt = delete $self->{-watch_sync}->{-regen_fmt};
+        reindex_checkpoint($self, $self->{-watch_sync});
+        $self->{-watch_sync}->{-regen_fmt} = $fmt;
+
+        # call event_step => done unless commit_timer is armed
+        PublicInbox::DS::requeue($self);
+}
+
+sub on_inbox_unlock { # called by PublicInbox::InboxIdle
+        my ($self, $ibx) = @_;
+        my $opt = $self->{-watch_sync}->{-opt};
+        my $pr = $opt->{-progress};
+        my $ekey = $ibx->eidx_key;
+        local $0 = "sync $ekey";
+        $pr->("indexing $ekey\n") if $pr;
+        $self->idx_init($opt);
+        sync_inbox($self, $self->{-watch_sync}, $ibx);
+        $self->{-commit_timer} //= add_timer($opt->{'commit-interval'} // 10,
+                                        \&_watch_commit, $self);
+}
+
+sub eidx_reload { # -extindex --watch SIGHUP handler
+        my ($self, $idler) = @_;
+        if ($self->{cfg}) {
+                my $pr = $self->{-watch_sync}->{-opt}->{-progress};
+                $pr->('reloading ...') if $pr;
+                delete $self->{-resync_queue};
+                delete $self->{-ibx_ary_known};
+                delete $self->{-ibx_ary_active};
+                $self->{ibx_known} = [];
+                $self->{ibx_active} = [];
+                %{$self->{ibx_map}} = ();
+                delete $self->{-watch_sync}->{id2pos};
+                my $cfg = PublicInbox::Config->new;
+                attach_config($self, $cfg);
+                $idler->refresh($cfg);
+                $pr->(" done\n") if $pr;
+        } else {
+                warn "reload not supported without --all\n";
+        }
+}
+
+sub eidx_resync_start ($) { # -extindex --watch SIGUSR1 handler
+        my ($self) = @_;
+        $self->{-resync_queue} //= [ @{ibx_sorted($self, 'active')} ];
+        PublicInbox::DS::requeue($self); # trigger our ->event_step
+}
+
+sub event_step { # PublicInbox::DS::requeue callback
+        my ($self) = @_;
+        if (my $resync_queue = $self->{-resync_queue}) {
+                if (my $ibx = shift(@$resync_queue)) {
+                        on_inbox_unlock($self, $ibx);
+                        PublicInbox::DS::requeue($self);
+                } else {
+                        delete $self->{-resync_queue};
+                        _watch_commit($self);
+                }
+        } else {
+                done($self) unless $self->{-commit_timer};
+        }
+}
+
+sub eidx_watch { # public-inbox-extindex --watch main loop
+        my ($self, $opt) = @_;
+        local @SIG{keys %SIG} = values %SIG;
+        for my $sig (qw(HUP USR1 TSTP QUIT INT TERM)) {
+                $SIG{$sig} = sub { warn "SIG$sig ignored while scanning\n" };
+        }
+        require PublicInbox::InboxIdle;
+        require PublicInbox::DS;
+        require PublicInbox::Syscall;
+        require PublicInbox::Sigfd;
+        my $idler = PublicInbox::InboxIdle->new($self->{cfg});
+        if (!$self->{cfg}) {
+                $idler->watch_inbox($_) for (@{ibx_sorted($self, 'active')});
+        }
+        for my $ibx (@{ibx_sorted($self, 'active')}) {
+                $ibx->subscribe_unlock(__PACKAGE__, $self)
+        }
+        my $pr = $opt->{-progress};
+        $pr->("performing initial scan ...\n") if $pr;
+        my $sync = eidx_sync($self, $opt); # initial sync
+        return if $sync->{quit};
+        my $oldset = PublicInbox::DS::block_signals();
+        local $self->{current_info} = '';
+        my $cb = $SIG{__WARN__} || \&CORE::warn;
+        local $SIG{__WARN__} = sub {
+                return if PublicInbox::Eml::warn_ignore(@_);
+                $cb->($self->{current_info}, ': ', @_);
+        };
+        my $sig = {
+                HUP => sub { eidx_reload($self, $idler) },
+                USR1 => sub { eidx_resync_start($self) },
+                TSTP => sub { kill('STOP', $$) },
+        };
+        my $quit = PublicInbox::SearchIdx::quit_cb($sync);
+        $sig->{QUIT} = $sig->{INT} = $sig->{TERM} = $quit;
+        local $self->{-watch_sync} = $sync; # for ->on_inbox_unlock
+        local @PublicInbox::DS::post_loop_do = (sub { !$sync->{quit} });
+        $pr->("initial scan complete, entering event loop\n") if $pr;
+        # calls InboxIdle->event_step:
+        PublicInbox::DS::event_loop($sig, $oldset);
+        done($self);
+}
+
+no warnings 'once';
+*done = \&PublicInbox::V2Writable::done;
+*parallel_init = \&PublicInbox::V2Writable::parallel_init;
+*nproc_shards = \&PublicInbox::V2Writable::nproc_shards;
+*sync_prepare = \&PublicInbox::V2Writable::sync_prepare;
+*index_todo = \&PublicInbox::V2Writable::index_todo;
+*count_shards = \&PublicInbox::V2Writable::count_shards;
+*atfork_child = \&PublicInbox::V2Writable::atfork_child;
+*idx_shard = \&PublicInbox::V2Writable::idx_shard;
+*reindex_checkpoint = \&PublicInbox::V2Writable::reindex_checkpoint;
+*checkpoint = \&PublicInbox::V2Writable::checkpoint;
+*barrier = \&PublicInbox::V2Writable::barrier;
+
+1;
diff --git a/lib/PublicInbox/FakeImport.pm b/lib/PublicInbox/FakeImport.pm
new file mode 100644
index 00000000..bccc3321
--- /dev/null
+++ b/lib/PublicInbox/FakeImport.pm
@@ -0,0 +1,26 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# pretend to do PublicInbox::Import::add for "lei index"
+package PublicInbox::FakeImport;
+use strict;
+use v5.10.1;
+use PublicInbox::ContentHash qw(git_sha);
+use PublicInbox::Import;
+
+sub new { bless { bytes_added => 0 }, __PACKAGE__ }
+
+sub add {
+        my ($self, $eml, $check_cb, $smsg) = @_;
+        PublicInbox::Import::drop_unwanted_headers($eml);
+        $smsg->populate($eml);
+        my $raw = $eml->as_string;
+        $smsg->{blob} = git_sha(1, \$raw)->hexdigest;
+        $smsg->set_bytes($raw, length($raw));
+        if (my $oidx = delete $smsg->{-oidx}) { # used by LeiStore
+                $oidx->vivify_xvmd($smsg) or return;
+        }
+        1;
+}
+
+1;
diff --git a/lib/PublicInbox/FakeInotify.pm b/lib/PublicInbox/FakeInotify.pm
index 92758613..8be07135 100644
--- a/lib/PublicInbox/FakeInotify.pm
+++ b/lib/PublicInbox/FakeInotify.pm
@@ -1,89 +1,176 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # for systems lacking Linux::Inotify2 or IO::KQueue, just emulates
-# enough of Linux::Inotify2
+# enough of Linux::Inotify2 we use.
 package PublicInbox::FakeInotify;
-use strict;
+use v5.12;
 use Time::HiRes qw(stat);
-use PublicInbox::DS;
+use PublicInbox::DS qw(add_timer);
+use Errno qw(ENOTDIR ENOENT);
 sub IN_MODIFY () { 0x02 } # match Linux inotify
+# my $IN_MOVED_FROM         0x00000040        /* File was moved from X.  */
 # my $IN_MOVED_TO = 0x80;
 # my $IN_CREATE = 0x100;
 sub MOVED_TO_OR_CREATE () { 0x80 | 0x100 }
+sub IN_DELETE () { 0x200 }
+sub IN_DELETE_SELF () { 0x400 }
+sub IN_MOVE_SELF () { 0x800 }
 
 my $poll_intvl = 2; # same as Filesys::Notify::Simple
 
-sub new { bless { watch => {} }, __PACKAGE__ }
+sub new { bless {}, __PACKAGE__ }
 
-# behaves like Linux::Inotify2->watch
-sub watch {
-        my ($self, $path, $mask) = @_;
-        my @st = stat($path) or return;
-        my $k = "$path\0$mask";
-        $self->{watch}->{$k} = $st[10]; # 10 - ctime
-        bless [ $self->{watch}, $k ], 'PublicInbox::FakeInotify::Watch';
+sub on_dir_change ($$$$$) { # used by KQNotify subclass
+        my ($self, $events, $dh, $path, $dir_delete) = @_;
+        my $old = $self->{dirlist}->{$path};
+        my @cur = grep(!/\A\.\.?\z/, readdir($dh));
+        $self->{dirlist}->{$path} = \@cur;
+
+        # new files:
+        my %tmp = map { $_ => undef } @cur;
+        delete @tmp{@$old};
+        push(@$events, map {
+                bless \"$path/$_", 'PublicInbox::FakeInotify::Event'
+        } keys %tmp);
+
+        if ($dir_delete) {
+                %tmp = map { $_ => undef } @$old;
+                delete @tmp{@cur};
+                push(@$events, map {
+                        bless \"$path/$_", 'PublicInbox::FakeInotify::GoneEvent'
+                } keys %tmp);
+        }
 }
 
-sub on_new_files ($$$$) {
-        my ($events, $dh, $path, $old_ctime) = @_;
-        while (defined(my $base = readdir($dh))) {
-                next if $base =~ /\A\.\.?\z/;
-                my $full = "$path/$base";
-                my @st = stat($full);
-                if (@st && $st[10] > $old_ctime) {
-                        push @$events,
-                                bless(\$full, 'PublicInbox::FakeInotify::Event')
+sub watch_open ($$$) { # used by KQNotify subclass
+        my ($self, $path, $dir_delete) = @_;
+        my ($fh, @st, @st0, $tries);
+        do {
+again:
+                unless (@st0 = stat($path)) {
+                        warn "W: stat($path): $!" if $! != ENOENT;
+                        return;
+                }
+                if (!(-d _ ? opendir($fh, $path) : open($fh, '<', $path))) {
+                        goto again if $! == ENOTDIR && ++$tries < 10;
+                        warn "W: open($path): $!" if $! != ENOENT;
+                        return;
                 }
+                @st = stat($fh) or die "fstat($path): $!";
+        } while ("@st[0,1]" ne "@st0[0,1]" &&
+                ((++$tries < 10) || (warn(<<EOM) && return)));
+E: $path switching inodes too frequently to watch
+EOM
+        if (-d _) {
+                $self->{dirlist}->{$path} = [];
+                on_dir_change($self, [], $fh, $path, $$dir_delete);
+        } else {
+                $$dir_delete = 0;
         }
+        bless [ @st[0, 1, 10], $path, $fh ], 'PublicInbox::FakeInotify::Watch'
+}
+
+# behaves like Linux::Inotify2->watch
+sub watch {
+        my ($self, $path, $mask) = @_; # mask is ignored
+        my $dir_delete = $mask & IN_DELETE ? 1 : 0;
+        my $w = watch_open($self, $path, \$dir_delete) or return;
+        pop @$w; # no need to keep $fh open for non-kqueue
+        $self->{watch}->{"$path\0$dir_delete"} = $w;
+}
+
+sub gone ($$$) { # used by KQNotify subclass
+        my ($self, $ident, $path) = @_;
+        delete $self->{watch}->{$ident};
+        delete $self->{dirlist}->{$path};
+        bless(\$path, 'PublicInbox::FakeInotify::SelfGoneEvent');
+}
+
+# fuzz the time for freshly modified directories for low-res VFS
+sub dir_adj ($) {
+        my ($old_ctime) = @_;
+        my $now = Time::HiRes::time;
+        my $diff = $now - $old_ctime;
+        my $adj = $poll_intvl + 1;
+        ($diff > -$adj && $diff < $adj) ? 1 : 0;
 }
 
 # behaves like non-blocking Linux::Inotify2->read
 sub read {
         my ($self) = @_;
-        my $watch = $self->{watch} or return ();
-        my $events = [];
-        for my $x (keys %$watch) {
-                my ($path, $mask) = split(/\0/, $x, 2);
-                my @now = stat($path) or next;
-                my $old_ctime = $watch->{$x};
-                $watch->{$x} = $now[10];
-                next if $old_ctime == $now[10];
-                if ($mask & IN_MODIFY) {
-                        push @$events,
-                                bless(\$path, 'PublicInbox::FakeInotify::Event')
-                } elsif ($mask & MOVED_TO_OR_CREATE) {
-                        opendir(my $dh, $path) or do {
-                                warn "W: opendir $path: $!\n";
+        my $ret = [];
+        while (my ($ident, $w) = each(%{$self->{watch}})) {
+                if (!@$w) { # cancelled
+                        delete($self->{watch}->{$ident});
+                        next;
+                }
+                my $dir_delete = (split(/\0/, $ident, 2))[1];
+                my ($old_dev, $old_ino, $old_ctime, $path) = @$w;
+                my @new_st = stat($path);
+                warn "W: stat($path): $!\n" if !@new_st && $! != ENOENT;
+                if (!@new_st || "$old_dev $old_ino" ne "@new_st[0,1]") {
+                        push @$ret, gone($self, $ident, $path);
+                        next;
+                }
+                if (-d _ && $new_st[10] > ($old_ctime - dir_adj($old_ctime))) {
+                        opendir(my $fh, $path) or do {
+                                if ($! == ENOENT || $! == ENOTDIR) {
+                                        push @$ret, gone($self, $ident, $path);
+                                } else {
+                                        warn "W: opendir($path): $!";
+                                }
                                 next;
                         };
-                        on_new_files($events, $dh, $path, $old_ctime);
+                        @new_st = stat($fh) or die "fstat($path): $!";
+                        if ("$old_dev $old_ino" ne "@new_st[0,1]") {
+                                push @$ret, gone($self, $ident, $path);
+                                next;
+                        }
+                        $w->[2] = $new_st[10];
+                        on_dir_change($self, $ret, $fh, $path, $dir_delete);
+                } elsif ($new_st[10] > $old_ctime) { # regular files, etc
+                        $w->[2] = $new_st[10];
+                        push @$ret, bless(\$path,
+                                        'PublicInbox::FakeInotify::Event');
                 }
         }
-        @$events;
+        @$ret;
 }
 
 sub poll_once {
         my ($obj) = @_;
         $obj->event_step; # PublicInbox::InboxIdle::event_step
-        PublicInbox::DS::add_timer($poll_intvl, \&poll_once, $obj);
+        add_timer($poll_intvl, \&poll_once, $obj);
 }
 
 package PublicInbox::FakeInotify::Watch;
-use strict;
+use v5.12;
 
-sub cancel {
-        my ($self) = @_;
-        delete $self->[0]->{$self->[1]};
-}
+sub cancel { @{$_[0]} = () }
 
-sub name {
-        my ($self) = @_;
-        (split(/\0/, $self->[1], 2))[0];
-}
+sub name { $_[0]->[3] }
 
 package PublicInbox::FakeInotify::Event;
-use strict;
+use v5.12;
 
 sub fullname { ${$_[0]} }
+
+sub IN_DELETE { 0 }
+sub IN_MOVED_FROM { 0 }
+sub IN_DELETE_SELF { 0 }
+
+package PublicInbox::FakeInotify::GoneEvent;
+use v5.12;
+our @ISA = qw(PublicInbox::FakeInotify::Event);
+
+sub IN_DELETE { 1 }
+sub IN_MOVED_FROM { 0 }
+
+package PublicInbox::FakeInotify::SelfGoneEvent;
+use v5.12;
+our @ISA = qw(PublicInbox::FakeInotify::GoneEvent);
+
+sub IN_DELETE_SELF { 1 }
+
 1;
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 805076f0..225565f4 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -1,13 +1,13 @@
-# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Used for generating Atom feeds for web-accessible mailing list archives.
 package PublicInbox::Feed;
 use strict;
-use warnings;
+use v5.10.1;
 use PublicInbox::View;
 use PublicInbox::WwwAtomStream;
-use PublicInbox::Smsg; # this loads w/o Search::Xapian
+use PublicInbox::Smsg; # this loads w/o Xapian
 
 sub generate_i {
         my ($ctx) = @_;
@@ -19,29 +19,25 @@ sub generate {
         my ($ctx) = @_;
         my $msgs = $ctx->{msgs} = recent_msgs($ctx);
         return _no_thread() unless @$msgs;
-        PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i);
+        PublicInbox::WwwAtomStream->response($ctx, \&generate_i);
 }
 
 sub generate_thread_atom {
         my ($ctx) = @_;
-        my $msgs = $ctx->{msgs} = $ctx->{-inbox}->over->get_thread($ctx->{mid});
+        my $msgs = $ctx->{msgs} = $ctx->{ibx}->over->get_thread($ctx->{mid});
         return _no_thread() unless @$msgs;
-        PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i);
+        PublicInbox::WwwAtomStream->response($ctx, \&generate_i);
 }
 
 sub generate_html_index {
         my ($ctx) = @_;
         # if the 'r' query parameter is given, it is a legacy permalink
         # which we must continue supporting:
-        my $qp = $ctx->{qp};
-        my $ibx = $ctx->{-inbox};
-        if ($qp && !$qp->{r} && $ibx->over) {
+        !$ctx->{qp}->{r} && $ctx->{ibx}->over and
                 return PublicInbox::View::index_topics($ctx);
-        }
 
-        my $env = $ctx->{env};
-        my $url = $ibx->base_url($env) . 'new.html';
-        my $qs = $env->{QUERY_STRING};
+        my $url = $ctx->{ibx}->base_url($ctx->{env}) . 'new.html';
+        my $qs = $ctx->{env}->{QUERY_STRING};
         $url .= "?$qs" if $qs ne '';
         [302, [ 'Location', $url, 'Content-Type', 'text/plain'],
                 [ "Redirecting to $url\n" ] ];
@@ -49,12 +45,15 @@ sub generate_html_index {
 
 sub new_html_i {
         my ($ctx, $eml) = @_;
-        $ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip};
+        print { $ctx->zfh } $ctx->html_top if exists $ctx->{-html_tip};
 
-        $eml and return PublicInbox::View::eml_entry($ctx, $eml);
+        if ($eml) {
+                $ctx->{smsg}->populate($eml) if !$ctx->{ibx}->{over};
+                return PublicInbox::View::eml_entry($ctx, $eml);
+        }
         my $smsg = shift @{$ctx->{msgs}} or
-                $ctx->zmore(PublicInbox::View::pagination_footer(
-                                                $ctx, './new.html'));
+                print { $ctx->zfh } PublicInbox::View::pagination_footer(
+                                                $ctx, './new.html');
         $smsg;
 }
 
@@ -67,8 +66,9 @@ sub new_html {
         }
         $ctx->{-html_tip} = '<pre>';
         $ctx->{-upfx} = '';
+        $ctx->{-spfx} = '' if $ctx->{ibx}->{coderepo};
         $ctx->{-hr} = 1;
-        PublicInbox::WwwStream::aresponse($ctx, 200, \&new_html_i);
+        PublicInbox::WwwStream::aresponse($ctx, \&new_html_i);
 }
 
 # private subs
@@ -79,12 +79,11 @@ sub _no_thread () {
 
 sub recent_msgs {
         my ($ctx) = @_;
-        my $ibx = $ctx->{-inbox};
-        my $max = $ibx->{feedmax};
+        my $ibx = $ctx->{ibx};
+        my $max = $ibx->{feedmax} // 25;
         return PublicInbox::View::paginate_recent($ctx, $max) if $ibx->over;
 
         # only for rare v1 inboxes which aren't indexed at all
-        my $qp = $ctx->{qp};
         my $hex = '[a-f0-9]';
         my $addmsg = qr!^:000000 100644 \S+ (\S+) A\t${hex}{2}/${hex}{38}$!;
         my $delmsg = qr!^:100644 000000 (\S+) \S+ D\t(${hex}{2}/${hex}{38})$!;
@@ -92,7 +91,7 @@ sub recent_msgs {
 
         # revision ranges may be specified
         my $range = 'HEAD';
-        my $r = $qp->{r} if $qp;
+        my $r = $ctx->{qp}->{r};
         if ($r && ($r =~ /\A(?:$refhex\.\.)?$refhex\z/o)) {
                 $range = $r;
         }
@@ -108,13 +107,13 @@ sub recent_msgs {
         my $last;
         my $last_commit;
         local $/ = "\n";
-        my @oids;
+        my @ret;
         while (defined(my $line = <$log>)) {
                 if ($line =~ /$addmsg/o) {
                         my $add = $1;
                         next if $deleted{$add}; # optimization-only
-                        push @oids, $add;
-                        if (scalar(@oids) >= $max) {
+                        push(@ret, bless { blob => $add }, 'PublicInbox::Smsg');
+                        if (scalar(@ret) >= $max) {
                                 $last = 1;
                                 last;
                         }
@@ -136,8 +135,7 @@ sub recent_msgs {
         $last_commit and
                 $ctx->{next_page} = qq[<a\nhref="?r=$last_commit"\nrel=next>] .
                                         'next (older)</a>';
-
-        [ map { bless {blob => $_ }, 'PublicInbox::Smsg' } @oids ];
+        \@ret;
 }
 
 1;
diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm
new file mode 100644
index 00000000..814d6e8e
--- /dev/null
+++ b/lib/PublicInbox/Fetch.pm
@@ -0,0 +1,225 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# Wrapper to "git fetch" remote public-inboxes
+package PublicInbox::Fetch;
+use v5.12;
+use parent qw(PublicInbox::IPC);
+use URI ();
+use PublicInbox::Spawn qw(popen_rd run_qx run_wait);
+use PublicInbox::Admin;
+use PublicInbox::LEI;
+use PublicInbox::LeiCurl;
+use PublicInbox::LeiMirror;
+use PublicInbox::SHA qw(sha_all);
+use File::Temp ();
+use PublicInbox::Git qw(git_exe);
+
+sub new { bless {}, __PACKAGE__ }
+
+sub remote_url ($$) {
+        my ($lei, $dir) = @_;
+        my $rn = $lei->{opt}->{'try-remote'} // [ 'origin', '_grokmirror' ];
+        for my $r (@$rn) {
+                my $cmd = [ git_exe, 'config', "remote.$r.url" ];
+                my $url = run_qx($cmd, undef, { -C => $dir, 2 => $lei->{2} });
+                next if $?;
+                $url =~ s!/*\n!!s;
+                return $url;
+        }
+        undef
+}
+
+# PSGI mount prefixes and manifest.js.gz prefixes don't always align...
+# TODO: remove, handle multi-inbox fetch
+sub deduce_epochs ($$) {
+        my ($m, $path) = @_;
+        my ($v1_ent, @v2_epochs);
+        my $path_pfx = '';
+        $path =~ s!/+\z!!;
+        do {
+                $v1_ent = $m->{$path};
+                @v2_epochs = grep(m!\A\Q$path\E/git/[0-9]+\.git\z!, keys %$m);
+        } while (!defined($v1_ent) && !@v2_epochs &&
+                $path =~ s!\A(/[^/]+)/!/! and $path_pfx .= $1);
+        ($path_pfx, $v1_ent ? $path : undef, @v2_epochs);
+}
+
+sub do_manifest ($$$) {
+        my ($lei, $dir, $ibx_uri) = @_;
+        my $muri = URI->new("$ibx_uri/manifest.js.gz");
+        my $ft = File::Temp->new(TEMPLATE => 'm-XXXX',
+                                UNLINK => 1, DIR => $dir, SUFFIX => '.tmp');
+        my $mf = "$dir/manifest.js.gz";
+        my $m0; # current manifest.js.gz contents
+        if (open my $fh, '<', $mf) {
+                $m0 = eval {
+                        PublicInbox::LeiMirror::decode_manifest($fh, $mf, $mf)
+                };
+                warn($@) if $@;
+        }
+        my ($bn) = ($ft->filename =~ m!/([^/]+)\z!);
+        my $curl_cmd = $lei->{curl}->for_uri($lei, $muri, qw(-R -o), $bn);
+        my $opt = { -C => $dir };
+        $opt->{$_} = $lei->{$_} for (0..2);
+        my $cerr = PublicInbox::LeiMirror::run_reap($lei, $curl_cmd, $opt);
+        if ($cerr) {
+                return [ 404, $muri ] if ($cerr >> 8) == 22; # 404 Missing
+                $lei->child_error($cerr, "@$curl_cmd failed");
+                return;
+        }
+        my $m1 = eval {
+                PublicInbox::LeiMirror::decode_manifest($ft, $ft, $muri);
+        } or return [ 404, $muri ];
+        my $mdiff = { %$m1 };
+
+        # filter out unchanged entries.  We check modified, too, since
+        # fingerprints are SHA-1, so there's a teeny chance they'll collide
+        while (my ($k, $v0) = each %{$m0 // {}}) {
+                my $cur = $m1->{$k} // next;
+                my $f0 = $v0->{fingerprint} // next;
+                my $f1 = $cur->{fingerprint} // next;
+                my $t0 = $v0->{modified} // next;
+                my $t1 = $cur->{modified} // next;
+                delete($mdiff->{$k}) if $f0 eq $f1 && $t0 == $t1;
+        }
+        unless (keys %$mdiff) {
+                $lei->child_error(127 << 8) if $lei->{opt}->{'exit-code'};
+                return;
+        }
+        my (undef, $v1_path, @v2_epochs) =
+                deduce_epochs($mdiff, $ibx_uri->path);
+        [ 200, $muri, $v1_path, \@v2_epochs, $ft, $mf, $m1 ];
+}
+
+sub get_fingerprint2 {
+        my ($git_dir) = @_;
+        my $rd = popen_rd([git_exe, 'show-ref'], undef, { -C => $git_dir });
+        sha_all(256, $rd)->digest; # ignore show-ref errors
+}
+
+sub writable_dir ($) {
+        my ($dir) = @_;
+        return unless -d $dir && -w _;
+        my @st = stat($dir);
+        $st[2] & 0222; # any writable bits set? (in case of root)
+}
+
+sub do_fetch { # main entry point
+        my ($cls, $lei, $cd) = @_;
+        my $ibx_ver;
+        $lei->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
+        my $dir = PublicInbox::Admin::resolve_inboxdir($cd, \$ibx_ver);
+        my ($ibx_uri, @git_dir, @epochs, $mg, @new_epoch, $skip);
+        if ($ibx_ver == 1) {
+                my $url = remote_url($lei, $dir) //
+                        die "E: $dir missing remote.*.url\n";
+                $ibx_uri = URI->new($url);
+        } else { # v2:
+                require PublicInbox::MultiGit;
+                $mg = PublicInbox::MultiGit->new($dir, 'all.git', 'git');
+                @epochs = $mg->git_epochs;
+                my ($git_url, $epoch);
+                for my $nr (@epochs) { # try newest epoch, first
+                        my $edir = "$dir/git/$nr.git";
+                        if (!writable_dir($edir)) {
+                                $skip->{$nr} = 1;
+                                next;
+                        }
+                        next if defined $git_url;
+                        if (defined(my $url = remote_url($lei, $edir))) {
+                                $git_url = $url;
+                                $epoch = $nr;
+                        } else {
+                                warn "W: $edir missing remote.*.url\n";
+                                my $o = { -C => $edir };
+                                $o->{1} = $o->{2} = $lei->{2};
+                                run_wait([git_exe, qw(config -l)], undef, $o)
+                                        and $lei->child_error($?);
+                        }
+                }
+                @epochs = grep { !$skip->{$_} } @epochs if $skip;
+                $skip //= {}; # makes code below easier
+                $git_url or die "Unable to determine git URL\n";
+                my $inbox_url = $git_url;
+                $inbox_url =~ s!/git/$epoch(?:\.git)?/?\z!! or
+                        $inbox_url =~ s!/$epoch(?:\.git)?/?\z!! or die <<EOM;
+Unable to infer inbox URL from <$git_url>
+EOM
+                $ibx_uri = URI->new($inbox_url);
+        }
+        PublicInbox::LeiMirror::write_makefile($dir, $ibx_ver);
+        $lei->qerr("# inbox URL: $ibx_uri/");
+        my $res = do_manifest($lei, $dir, $ibx_uri) or return;
+        my ($code, $muri, $v1_path, $v2_epochs, $ft, $mf, $m1) = @$res;
+        if ($code == 404) {
+                # any pre-manifest.js.gz instances running? Just fetch all
+                # existing ones and unconditionally try cloning the next
+                $v2_epochs = [ map { "$dir/git/$_.git" } @epochs ];
+                if (@epochs) {
+                        my $n = $epochs[-1] + 1;
+                        push @$v2_epochs, "$dir/git/$n.git" if !$skip->{$n};
+                }
+        } else {
+                $code == 200 or die "BUG unexpected code $code\n";
+        }
+        my $mculled;
+        if ($ibx_ver == 2) {
+                defined($v1_path) and warn <<EOM;
+E: got v1 `$v1_path' when expecting v2 epoch(s) in <$muri>, WTF?
+EOM
+                @git_dir = map { "$dir/git/$_.git" } sort { $a <=> $b } map {
+                                my ($nr) = (m!/([0-9]+)\.git\z!g);
+                                $skip->{$nr} ? () : $nr;
+                        } @$v2_epochs;
+                if ($m1 && scalar keys %$skip) {
+                        my $re = join('|', keys %$skip);
+                        my @del = grep(m!/git/$re\.git\z!, keys %$m1);
+                        delete @$m1{@del};
+                        $mculled = 1;
+                }
+        } else {
+                $git_dir[0] = $dir;
+        }
+        # n.b. this expects all epochs are from the same host
+        my $torsocks = $lei->{curl}->torsocks($lei, $muri);
+        my $fp2 = $lei->{opt}->{'exit-code'} ? [] : undef;
+        my $xit = 127;
+        for my $d (@git_dir) {
+                my $cmd;
+                my $opt = {}; # for spawn
+                if (-d $d) {
+                        $fp2->[0] = get_fingerprint2($d) if $fp2;
+                        $cmd = [ @$torsocks, git_exe, "--git-dir=$d",
+                               PublicInbox::LeiMirror::fetch_args($lei, $opt)];
+                } else {
+                        my $e_uri = $ibx_uri->clone;
+                        my ($epath) = ($d =~ m!(/git/[0-9]+\.git)\z!);
+                        defined($epath) or
+                                die "BUG: $d is not an epoch to clone\n";
+                        $e_uri->path($ibx_uri->path.$epath);
+                        $cmd = [ @$torsocks,
+                                PublicInbox::LeiMirror::clone_cmd($lei, $opt),
+                                $$e_uri, $d];
+                        push @new_epoch, substr($epath, 5, -4) + 0;
+                        $xit = 0;
+                }
+                my $cerr = PublicInbox::LeiMirror::run_reap($lei, $cmd, $opt);
+                # do not bail on clone failure if we didn't have a manifest
+                if ($cerr && ($code == 200 || -d $d)) {
+                        $lei->child_error($cerr, "@$cmd failed");
+                        return;
+                }
+                if ($fp2 && $xit) {
+                        $fp2->[1] = get_fingerprint2($d);
+                        $xit = 0 if $fp2->[0] ne $fp2->[1];
+                }
+        }
+        for my $i (@new_epoch) { $mg->epoch_cfg_set($i) }
+        if ($ft) {
+                PublicInbox::LeiMirror::dump_manifest($m1 => $ft) if $mculled;
+                PublicInbox::LeiMirror::ft_rename($ft, $mf, 0666);
+        }
+        $lei->child_error($xit << 8) if $fp2 && $xit;
+}
+
+1;
diff --git a/lib/PublicInbox/Filter/Base.pm b/lib/PublicInbox/Filter/Base.pm
index d54570fd..f6355e1b 100644
--- a/lib/PublicInbox/Filter/Base.pm
+++ b/lib/PublicInbox/Filter/Base.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # base class for creating per-list or per-project filters
diff --git a/lib/PublicInbox/Filter/Gmane.pm b/lib/PublicInbox/Filter/Gmane.pm
index c326faca..a18b77d2 100644
--- a/lib/PublicInbox/Filter/Gmane.pm
+++ b/lib/PublicInbox/Filter/Gmane.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Filter for importing some archives from gmane
diff --git a/lib/PublicInbox/Filter/Mirror.pm b/lib/PublicInbox/Filter/Mirror.pm
index 9f6dd342..fe915fc3 100644
--- a/lib/PublicInbox/Filter/Mirror.pm
+++ b/lib/PublicInbox/Filter/Mirror.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Dumb filter for blindly accepting everything
diff --git a/lib/PublicInbox/Filter/RubyLang.pm b/lib/PublicInbox/Filter/RubyLang.pm
index 06e4ea75..57ebbe78 100644
--- a/lib/PublicInbox/Filter/RubyLang.pm
+++ b/lib/PublicInbox/Filter/RubyLang.pm
@@ -1,11 +1,10 @@
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Filter for lists.ruby-lang.org trailers
 package PublicInbox::Filter::RubyLang;
-use base qw(PublicInbox::Filter::Base);
-use strict;
-use warnings;
+use v5.10.1;
+use parent qw(PublicInbox::Filter::Base);
 use PublicInbox::MID qw(mids);
 
 my $l1 = qr/Unsubscribe:\s
@@ -16,7 +15,7 @@ sub new {
         my ($class, %opts) = @_;
         my $altid = delete $opts{-altid};
         my $self = $class->SUPER::new(%opts);
-        my $ibx = $self->{-inbox};
+        my $ibx = $self->{ibx};
         # altid = serial:ruby-core:file=msgmap.sqlite3
         if (!$altid && $ibx && $ibx->{altid}) {
                 $altid ||= $ibx->{altid}->[0];
@@ -56,16 +55,22 @@ sub scrub {
                 my $hdr = $mime->header_obj;
                 my $mids = mids($hdr);
                 return $self->REJECT('Message-ID missing') unless (@$mids);
-                my @v = $hdr->header_raw('X-Mail-Count');
                 my $n;
-                foreach (@v) {
-                        /\A\s*([0-9]+)\s*\z/ or next;
-                        $n = $1;
-                        last;
-                }
-                unless (defined $n) {
-                        return $self->REJECT('X-Mail-Count not numeric');
+                my @v = $hdr->header_raw('X-Mail-Count'); # old host only
+                if (@v) {
+                        for (@v) {
+                                /\A\s*([0-9]+)\s*\z/ or next;
+                                $n = $1;
+                                last;
+                        }
+                } else { # new host: nue.mailmanlists.eu
+                        for ($hdr->header_str('Subject')) {
+                                /\A\[ruby-[^:]+:([0-9]+)\]/ or next;
+                                $n = $1;
+                                last;
+                        }
                 }
+                $n // return $self->REJECT('could not get count not numeric');
                 foreach my $mid (@$mids) {
                         my $r = $altid->mm_alt->mid_set($n, $mid);
                         next if $r == 0;
diff --git a/lib/PublicInbox/Filter/SubjectTag.pm b/lib/PublicInbox/Filter/SubjectTag.pm
index aca6688b..ecedf666 100644
--- a/lib/PublicInbox/Filter/SubjectTag.pm
+++ b/lib/PublicInbox/Filter/SubjectTag.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Filter for various [tags] in subjects
diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm
index 2c73738d..5b3c0277 100644
--- a/lib/PublicInbox/Filter/Vger.pm
+++ b/lib/PublicInbox/Filter/Vger.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Filter for vger.kernel.org list trailer
@@ -24,7 +24,7 @@ sub scrub {
         # the vger appender seems to only work on the raw string,
         # so in multipart (e.g. GPG-signed) messages, the list trailer
         # becomes invisible to MIME-aware email clients.
-        if ($s =~ s/$l0\n$l1\n$l2\n$l3\n($l4\n)?\z//os) {
+        if ($s =~ s/$l0\n$l1\n$l2\n$l3\n(?:$l4\n)?\n*\z//os) {
                 $mime = PublicInbox::Eml->new(\$s);
         }
         $self->ACCEPT($mime);
diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm
index 7983c841..78392990 100644
--- a/lib/PublicInbox/Gcf2.pm
+++ b/lib/PublicInbox/Gcf2.pm
@@ -1,87 +1,132 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # backend for a git-cat-file-workalike based on libgit2,
 # other libgit2 stuff may go here, too.
 package PublicInbox::Gcf2;
-use strict;
-use PublicInbox::Spawn qw(which popen_rd);
-use Fcntl qw(LOCK_EX);
+use v5.12;
+use PublicInbox::Spawn qw(which run_qx); # may set PERL_INLINE_DIRECTORY
+use Fcntl qw(SEEK_SET);
+use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
 use IO::Handle; # autoflush
-my (%CFG, $c_src, $lockfh);
+use PublicInbox::Git qw($ck_unlinked_packs);
+use PublicInbox::Lock;
+use autodie qw(close open seek truncate);
+
 BEGIN {
+        my (%CFG, $c_src);
         # PublicInbox::Spawn will set PERL_INLINE_DIRECTORY
-        # to ~/.cache/public-inbox/inline-c if it exists
+        # to ~/.cache/public-inbox/inline-c if it exists and Inline::C works
         my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //
                 die 'PERL_INLINE_DIRECTORY not defined';
-        my $f = "$inline_dir/.public-inbox.lock";
-        open $lockfh, '>', $f or die "failed to open $f: $!\n";
-        my $pc = which($ENV{PKG_CONFIG} // 'pkg-config');
-        my ($dir) = (__FILE__ =~ m!\A(.+?)/[^/]+\z!);
-        my $rdr = {};
-        open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!";
-        for my $x (qw(libgit2)) {
-                my $l = popen_rd([$pc, '--libs', $x], undef, $rdr);
-                $l = do { local $/; <$l> };
-                next if $?;
-                my $c = popen_rd([$pc, '--cflags', $x], undef, $rdr);
-                $c = do { local $/; <$c> };
-                next if $?;
 
-                # note: we name C source files .h to prevent
-                # ExtUtils::MakeMaker from automatically trying to
-                # build them.
-                my $f = "$dir/gcf2_$x.h";
-                if (open(my $fh, '<', $f)) {
-                        chomp($l, $c);
-                        local $/;
-                        $c_src = <$fh>;
-                        $CFG{LIBS} = $l;
-                        $CFG{CCFLAGSEX} = $c;
-                        last;
-                } else {
-                        die "E: $f: $!\n";
-                }
+        # CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
+        my $lk = PublicInbox::Lock->new("$inline_dir/.public-inbox.lock");
+        my $fh = $lk->lock_acquire;
+
+        my $pc = which($ENV{PKG_CONFIG} // 'pkg-config') //
+                die "pkg-config missing for libgit2";
+        my ($dir) = (__FILE__ =~ m!\A(.+?)/[^/]+\z!);
+        my $vals = {};
+        my $rdr = { 2 => \(my $err) };
+        my @switches = qw(modversion cflags libs);
+        for my $k (@switches) {
+                chomp(my $val = run_qx([$pc, "--$k", 'libgit2'], undef, $rdr));
+                die "E: libgit2 not installed: $err\n" if $?;
+                $vals->{$k} = $val;
         }
-        die "E: libgit2 not installed\n" unless $c_src;
+        my $f = "$dir/gcf2_libgit2.h";
+        $c_src = PublicInbox::IO::try_cat $f or die "cat $f: $!";
+        # append pkg-config results to the source to ensure Inline::C
+        # can rebuild if there's changes (it doesn't seem to detect
+        # $CFG{CCFLAGSEX} nor $CFG{CPPFLAGS} changes)
+        $c_src .= "/* $pc --$_ libgit2 => $vals->{$_} */\n" for @switches;
+        open my $oldout, '>&', \*STDOUT;
+        open my $olderr, '>&', \*STDERR;
+        open STDOUT, '>&', $fh;
+        open STDERR, '>&', $fh;
+        STDERR->autoflush(1);
+        STDOUT->autoflush(1);
+        $CFG{CCFLAGSEX} = $vals->{cflags};
+        $CFG{LIBS} = $vals->{libs};
 
-        # CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
-        flock($lockfh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
+        # we use Capitalized and ALLCAPS for compatibility with old Inline::C
+        CORE::eval <<'EOM';
+use Inline C => Config => %CFG, BOOT => q[git_libgit2_init();];
+use Inline C => $c_src, BUILD_NOISY => 1;
+EOM
+        $err = $@;
+        open(STDERR, '>&', $olderr);
+        open(STDOUT, '>&', $oldout);
+        if ($err) {
+                seek($fh, 0, SEEK_SET);
+                my @msg = <$fh>;
+                truncate($fh, 0);
+                die "Inline::C Gcf2 build failed:\n", $err, "\n", @msg;
+        }
 }
 
-# we use Capitalized and ALLCAPS for compatibility with old Inline::C
-use Inline C => Config => %CFG, BOOT => 'git_libgit2_init();';
-use Inline C => $c_src;
-undef $c_src;
-undef %CFG;
-undef $lockfh;
+sub add_alt ($$) {
+        my ($gcf2, $git_dir) = @_;
+        my $objdir = PublicInbox::Git->new($git_dir)->git_path('objects');
 
-# Usage: $^X -MPublicInbox::Gcf2 -e 'PublicInbox::Gcf2::loop()'
+        # libgit2 (tested 0.27.7+dfsg.1-0.2 and 0.28.3+dfsg.1-1~bpo10+1
+        # in Debian) doesn't handle relative epochs properly when nested
+        # multiple levels.  Add all the absolute paths to workaround it,
+        # since $EXTINDEX_DIR/ALL.git/objects/info/alternates uses absolute
+        # paths to reference $V2INBOX_DIR/all.git/objects and
+        # $V2INBOX_DIR/all.git/objects/info/alternates uses relative paths
+        # to refer to $V2INBOX_DIR/git/$EPOCH.git/objects
+        #
+        # See https://bugs.debian.org/975607
+        if (my $s = PublicInbox::IO::try_cat("$objdir/info/alternates")) {
+                $gcf2->add_alternate($_) for ($s =~ m!^(/[^\n]+)\n!gms);
+        }
+        $gcf2->add_alternate($objdir);
+        1;
+}
+
+# Usage: $^X -MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop [EXPIRE-TIMEOUT]
 # (see lib/PublicInbox/Gcf2Client.pm)
-sub loop {
+sub loop (;$) {
+        my $exp = $_[0] || $ARGV[0] || 60; # seconds
         my $gcf2 = new();
+        my (%seen, $check_at);
         STDERR->autoflush(1);
         STDOUT->autoflush(1);
+        my $pid = $$;
 
         while (<STDIN>) {
                 chomp;
                 my ($oid, $git_dir) = split(/ /, $_, 2);
-                $gcf2->add_alternate("$git_dir/objects");
+                $seen{$git_dir} //= add_alt($gcf2, $git_dir);
                 if (!$gcf2->cat_oid(1, $oid)) {
                         # retry once if missing.  We only get unabbreviated OIDs
                         # from SQLite or Xapian DBs, here, so malicious clients
                         # can't trigger excessive retries:
-                        warn "I: $$ $oid missing, retrying in $git_dir\n";
+                        warn "# $$ $oid missing, retrying in $git_dir\n";
 
                         $gcf2 = new();
-                        $gcf2->add_alternate("$git_dir/objects");
+                        %seen = ($git_dir => add_alt($gcf2, $git_dir));
+                        $check_at = clock_gettime(CLOCK_MONOTONIC) + $exp;
 
                         if ($gcf2->cat_oid(1, $oid)) {
-                                warn "I: $$ $oid found after retry\n";
+                                warn "# $$ $oid found after retry\n";
                         } else {
                                 warn "W: $$ $oid missing after retry\n";
                                 print "$oid missing\n"; # mimic git-cat-file
                         }
+                } else { # check expiry to deal with deleted pack files
+                        my $now = clock_gettime(CLOCK_MONOTONIC);
+                        $check_at //= $now + $exp;
+                        if ($now > $check_at) {
+                                undef $check_at;
+                                if (!$ck_unlinked_packs ||
+                                                $ck_unlinked_packs->($pid)) {
+                                        $gcf2 = new();
+                                        %seen = ();
+                                }
+                        }
                 }
         }
 }
diff --git a/lib/PublicInbox/Gcf2Client.pm b/lib/PublicInbox/Gcf2Client.pm
index 42ff1bf3..07ff7dcb 100644
--- a/lib/PublicInbox/Gcf2Client.pm
+++ b/lib/PublicInbox/Gcf2Client.pm
@@ -1,51 +1,48 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # connects public-inbox processes to PublicInbox::Gcf2::loop()
 package PublicInbox::Gcf2Client;
-use strict;
+use v5.12;
 use parent qw(PublicInbox::DS);
 use PublicInbox::Git;
-use PublicInbox::Spawn qw(popen_rd);
-use IO::Handle ();
-use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLOUT);
-# fields:
-#        async_cat => GitAsyncCat ref (read-only pipe)
-#        sock => writable pipe to Gcf2::loop
-
-sub new { bless($_[0] // {}, __PACKAGE__) }
+use PublicInbox::Gcf2; # fails if Inline::C or libgit2-dev isn't available
+use PublicInbox::Spawn qw(spawn);
+use Socket qw(AF_UNIX SOCK_STREAM);
+use PublicInbox::Syscall qw(EPOLLIN);
+use PublicInbox::IO;
+use autodie qw(socketpair);
 
-sub gcf2c_begin ($) {
-        my ($self) = @_;
+# fields:
+#        sock => socket to Gcf2::loop
+# The rest of these fields are compatible with what PublicInbox::Git
+# uses code-sharing
+#        pid => PID of Gcf2::loop process
+#        pid.owner => process which spawned {pid}
+#        in => same as {sock}, for compatibility with PublicInbox::Git
+#        inflight => array (see PublicInbox::Git)
+sub new  {
+        my ($opt) = @_;
+        my $self = bless {}, __PACKAGE__;
         # ensure the child process has the same @INC we do:
         my $env = { PERL5LIB => join(':', @INC) };
-        my ($out_r, $out_w);
-        pipe($out_r, $out_w) or die "pipe failed: $!";
-        my $rdr = { 0 => $out_r, 2 => $self->{2} };
-        my $cmd = [$^X, qw[-MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop()]];
-        @$self{qw(in pid)} = popen_rd($cmd, $env, $rdr);
-        fcntl($out_w, 1031, 4096) if $^O eq 'linux'; # 1031: F_SETPIPE_SZ
-        $out_w->autoflush(1);
-        $out_w->blocking(0);
-        $self->SUPER::new($out_w, 0); # EPOLL_CTL_ADD (a bit wasteful :x)
+        socketpair(my $s1, my $s2, AF_UNIX, SOCK_STREAM, 0);
+        $s1->blocking(0);
+        $opt->{0} = $opt->{1} = $s2;
+        my $cmd = [$^X, $^W ? ('-w') : (),
+                        qw[-MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop]];
         $self->{inflight} = [];
+        PublicInbox::IO::attach_pid($s1, spawn($cmd, $env, $opt),
+                        \&PublicInbox::Git::gcf_drain, $self->{inflight});
+        $self->{epwatch} = \undef; # for Git->cleanup
+        $self->SUPER::new($s1, EPOLLIN);
 }
 
-sub fail {
-        my $self = shift;
-        $self->close; # PublicInbox::DS::close
-        PublicInbox::Git::fail($self, @_);
-}
-
-sub cat_async ($$$;$) {
+sub gcf2_async ($$$;$) {
         my ($self, $req, $cb, $arg) = @_;
-        my $inflight = $self->{inflight} // gcf2c_begin($self);
-
-        # rare, I hope:
-        cat_async_step($self, $inflight) if $self->{wbuf};
-
-        $self->write(\"$req\n") or $self->fail("gcf2c write: $!");
-        push @$inflight, $req, $cb, $arg;
+        my $inflight = $self->gcf_inflight or return;
+        PublicInbox::Git::write_all($self, $req, \&cat_async_step, $inflight);
+        push @$inflight, \$req, $cb, $arg; # ref prevents Git.pm retries
 }
 
 # ensure PublicInbox::Git::cat_async_step never calls cat_async_retry
@@ -53,10 +50,10 @@ sub alternates_changed {}
 
 no warnings 'once';
 
-# this is the write-only end of a pipe, DS->EventLoop will call this
-*event_step = \&PublicInbox::DS::flush_write;
-
-# used by GitAsyncCat
-*cat_async_step = \&PublicInbox::Git::cat_async_step;
+*gcf_inflight = \&PublicInbox::Git::gcf_inflight; # for event_step
+*cat_async_step = \&PublicInbox::Git::cat_async_step; # for event_step
+*event_step = \&PublicInbox::Git::event_step;
+*fail = \&PublicInbox::Git::fail;
+*DESTROY = \&PublicInbox::Git::DESTROY;
 
 1;
diff --git a/lib/PublicInbox/GetlineBody.pm b/lib/PublicInbox/GetlineBody.pm
deleted file mode 100644
index 988bc63f..00000000
--- a/lib/PublicInbox/GetlineBody.pm
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-
-# Wrap a pipe or file for PSGI streaming response bodies and calls the
-# end callback when the object goes out-of-scope.
-# This depends on rpipe being _blocking_ on getline.
-#
-# This is only used by generic PSGI servers and not public-inbox-httpd
-package PublicInbox::GetlineBody;
-use strict;
-use warnings;
-
-sub new {
-        my ($class, $rpipe, $end, $end_arg, $buf, $filter) = @_;
-        bless {
-                rpipe => $rpipe,
-                end => $end,
-                end_arg => $end_arg,
-                initial_buf => $buf,
-                filter => $filter,
-        }, $class;
-}
-
-# close should always be called after getline returns undef,
-# but a client aborting a connection can ruin our day; so lets
-# hope our underlying PSGI server does not leak references, here.
-sub DESTROY { $_[0]->close }
-
-sub getline {
-        my ($self) = @_;
-        my $rpipe = $self->{rpipe} or return; # EOF was set on previous call
-        my $buf = delete($self->{initial_buf}) // $rpipe->getline;
-        delete($self->{rpipe}) unless defined $buf; # set EOF for next call
-        if (my $filter = $self->{filter}) {
-                $buf = $filter->translate($buf);
-        }
-        $buf;
-}
-
-sub close {
-        my ($self) = @_;
-        my ($end, $end_arg) = delete @$self{qw(end end_arg)};
-        $end->($end_arg) if $end;
-}
-
-1;
diff --git a/lib/PublicInbox/GetlineResponse.pm b/lib/PublicInbox/GetlineResponse.pm
new file mode 100644
index 00000000..290cce74
--- /dev/null
+++ b/lib/PublicInbox/GetlineResponse.pm
@@ -0,0 +1,40 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# For generic PSGI servers (not public-inbox-httpd/netd) which assumes their
+# getline response bodies can be backpressure-aware for slow clients
+# This depends on rpipe being _blocking_ on getline.
+package PublicInbox::GetlineResponse;
+use v5.12;
+
+sub response {
+        my ($qsp) = @_;
+        my ($res, $rbuf);
+        do { # read header synchronously
+                sysread($qsp->{rpipe}, $rbuf, 65536);
+                $res = $qsp->parse_hdr_done($rbuf); # fills $bref
+        } until defined($res);
+        my ($wcb, $filter) = $qsp->yield_pass(undef, $res) or return;
+        my $self = $res->[2] = bless {
+                qsp => $qsp,
+                filter => $filter,
+        }, __PACKAGE__;
+        my ($bref) = @{delete $qsp->{yield_parse_hdr}};
+        $self->{rbuf} = $$bref if $$bref ne '';
+        $wcb->($res);
+}
+
+sub getline {
+        my ($self) = @_;
+        my $rpipe = $self->{qsp}->{rpipe} // do {
+                delete($self->{qsp})->finish;
+                return; # EOF was set on previous call
+        };
+        my $buf = delete($self->{rbuf}) // $rpipe->getline;
+        $buf // delete($self->{qsp}->{rpipe}); # set EOF for next call
+        $self->{filter} ? $self->{filter}->translate($buf) : $buf;
+}
+
+sub close {}
+
+1;
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 2323cecc..a9a821ad 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: GPLv2 or later <https://www.gnu.org/licenses/gpl-2.0.txt>
 #
 # Used to read files from a git repository without excessive forking.
@@ -9,23 +9,35 @@
 package PublicInbox::Git;
 use strict;
 use v5.10.1;
-use parent qw(Exporter);
+use parent qw(Exporter PublicInbox::DS);
+use PublicInbox::DS qw(now);
+use autodie qw(socketpair read);
 use POSIX ();
-use IO::Handle; # ->autoflush
-use Errno qw(EINTR);
+use Socket qw(AF_UNIX SOCK_STREAM);
+use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
+use Errno qw(EAGAIN);
 use File::Glob qw(bsd_glob GLOB_NOSORT);
-use Time::HiRes qw(stat);
-use PublicInbox::Spawn qw(popen_rd);
+use File::Spec ();
+use PublicInbox::Spawn qw(spawn popen_rd run_qx which);
+use PublicInbox::IO qw(read_all try_cat);
 use PublicInbox::Tmpfile;
-use Carp qw(croak);
-our @EXPORT_OK = qw(git_unquote git_quote);
-our $PIPE_BUFSIZ = 65536; # Linux default
+use Carp qw(croak carp);
+use PublicInbox::SHA qw(sha_all);
+our %HEXLEN2SHA = (40 => 1, 64 => 256);
+our %OFMT2HEXLEN = (sha1 => 40, sha256 => 64);
+our @EXPORT_OK = qw(git_unquote git_quote %HEXLEN2SHA %OFMT2HEXLEN
+                        $ck_unlinked_packs git_exe);
 our $in_cleanup;
+our $async_warn; # true in read-only daemons
 
-use constant MAX_INFLIGHT =>
-        (($^O eq 'linux' ? 4096 : POSIX::_POSIX_PIPE_BUF()) * 3)
-        /
-        65; # SHA-256 hex size + "\n" in preparation for git using non-SHA1
+# committerdate:unix is git 2.9.4+ (2017-05-05), so using raw instead
+my @MODIFIED_DATE = qw[for-each-ref --sort=-committerdate
+                        --format=%(committerdate:raw) --count=1];
+
+use constant {
+        MAX_INFLIGHT => 18, # arbitrary, formerly based on PIPE_BUF
+        BATCH_CMD_VER => v2.36.0, # git 2.36+
+};
 
 my %GIT_ESC = (
         a => "\a",
@@ -40,19 +52,44 @@ my %GIT_ESC = (
 );
 my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC;
 
+my $EXE_ST = ''; # pack('dd', st_dev, st_ino); # no `q' in some 32-bit builds
+my ($GIT_EXE, $GIT_VER);
+
+sub git_exe () {
+        my $now = now;
+        state $next_check = $now - 10;
+        return $GIT_EXE if $now < $next_check;
+        $next_check = $now + 10;
+        $GIT_EXE = which('git') // die "git not found in $ENV{PATH}";
+        my @st = stat(_) or die "stat($GIT_EXE): $!"; # can't do HiRes w/ _
+        my $st = pack('dd', $st[0], $st[1]);
+        if ($st ne $EXE_ST) {
+                my $v = run_qx([ $GIT_EXE, '--version' ]);
+                die "$GIT_EXE --version: \$?=$?" if $?;
+                $v =~ /\b([0-9]+(?:\.[0-9]+){2})/ or die
+                        "$GIT_EXE --version output: $v # unparseable";
+                $GIT_VER = eval("v$1") // die "BUG: bad vstring: $1 ($v)";
+                $EXE_ST = $st;
+        }
+        $GIT_EXE;
+}
+
+sub git_version () {
+        git_exe;
+        $GIT_VER;
+}
 
 # unquote pathnames used by git, see quote.c::unquote_c_style.c in git.git
 sub git_unquote ($) {
         return $_[0] unless ($_[0] =~ /\A"(.*)"\z/);
         $_[0] = $1;
-        $_[0] =~ s/\\([\\"abfnrtv])/$GIT_ESC{$1}/g;
-        $_[0] =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
+        $_[0] =~ s!\\([\\"abfnrtv]|[0-3][0-7]{2})!$GIT_ESC{$1}//chr(oct($1))!ge;
         $_[0];
 }
 
 sub git_quote ($) {
         if ($_[0] =~ s/([\\"\a\b\f\n\r\t\013]|[^[:print:]])/
-                      '\\'.($ESC_GIT{$1}||sprintf("%0o",ord($1)))/egs) {
+                      '\\'.($ESC_GIT{$1}||sprintf("%03o",ord($1)))/egs) {
                 return qq{"$_[0]"};
         }
         $_[0];
@@ -60,230 +97,302 @@ sub git_quote ($) {
 
 sub new {
         my ($class, $git_dir) = @_;
+        $git_dir .= '/';
+        $git_dir =~ tr!/!/!s;
+        chop $git_dir;
         # may contain {-tmp} field for File::Temp::Dir
-        bless { git_dir => $git_dir, alt_st => '', -git_path => {} }, $class
+        my %dedupe = ($git_dir => undef);
+        bless { git_dir => (keys %dedupe)[0] }, $class
 }
 
 sub git_path ($$) {
         my ($self, $path) = @_;
-        $self->{-git_path}->{$path} ||= do {
-                local $/ = "\n";
-                chomp(my $str = $self->qx(qw(rev-parse --git-path), $path));
-
-                # git prior to 2.5.0 did not understand --git-path
-                if ($str eq "--git-path\n$path") {
-                        $str = "$self->{git_dir}/$path";
+        $self->{-git_path}->{$path} //= do {
+                my $d = "$self->{git_dir}/$path";
+                if (-e $d) {
+                        $d;
+                } else {
+                        local $/ = "\n";
+                        my $rdr = { 2 => \my $err };
+                        my $s = $self->qx([qw(rev-parse --git-path), $path],
+                                        undef, $rdr);
+                        chomp $s;
+
+                        # git prior to 2.5.0 did not understand --git-path
+                        $s eq "--git-path\n$path" ? $d : $s;
                 }
-                $str;
         };
 }
 
 sub alternates_changed {
         my ($self) = @_;
         my $alt = git_path($self, 'objects/info/alternates');
+        use Time::HiRes qw(stat);
         my @st = stat($alt) or return 0;
 
         # can't rely on 'q' on some 32-bit builds, but `d' works
         my $st = pack('dd', $st[10], $st[7]); # 10: ctime, 7: size
-        return 0 if $self->{alt_st} eq $st;
+        return 0 if ($self->{alt_st} // '') eq $st;
         $self->{alt_st} = $st; # always a true value
 }
 
+sub object_format {
+        $_[0]->{object_format} //= do {
+                my $fmt = $_[0]->qx(qw(config extensions.objectformat));
+                $fmt eq "sha256\n" ? \'sha256' : \undef;
+        }
+}
+
 sub last_check_err {
         my ($self) = @_;
-        my $fh = $self->{err_c} or return;
-        sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
-        defined(sysread($fh, my $buf, -s $fh)) or
-                        fail($self, "sysread failed: $!");
+        my $fh = $self->{err_c} or return '';
+        sysseek($fh, 0, 0) or $self->fail("sysseek: $!");
+        my $size = -s $fh or return '';
+        sysread($fh, my $buf, $size) // $self->fail("sysread: $!");
+        truncate($fh, 0) or $self->fail("truncate: $!");
         $buf;
 }
 
-sub _bidi_pipe {
-        my ($self, $batch, $in, $out, $pid, $err) = @_;
-        if ($self->{$pid}) {
-                if (defined $err) { # "err_c"
-                        my $fh = $self->{$err};
-                        sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
-                        truncate($fh, 0) or fail($self, "truncate failed: $!");
-                }
-                return;
+sub gcf_drain { # awaitpid cb
+        my ($pid, $inflight, $bc) = @_;
+        while (@$inflight) {
+                my ($req, $cb, $arg) = splice(@$inflight, 0, 3);
+                $req = $$req if ref($req);
+                $bc and $req =~ s/\A(?:contents|info) //;
+                $req =~ s/ .*//; # drop git_dir for Gcf2Client
+                eval { $cb->(undef, $req, undef, undef, $arg) };
+                warn "E: (in abort) $req: $@" if $@;
         }
-        my ($out_r, $out_w);
-        pipe($out_r, $out_w) or fail($self, "pipe failed: $!");
-        my @cmd = (qw(git), "--git-dir=$self->{git_dir}",
-                        qw(-c core.abbrev=40 cat-file), $batch);
-        my $redir = { 0 => $out_r };
-        if ($err) {
-                my $id = "git.$self->{git_dir}$batch.err";
-                my $fh = tmpfile($id) or fail($self, "tmpfile($id): $!");
-                $self->{$err} = $fh;
-                $redir->{2} = $fh;
-        }
-        my ($in_r, $p) = popen_rd(\@cmd, undef, $redir);
-        $self->{$pid} = $p;
-        $out_w->autoflush(1);
-        if ($^O eq 'linux') { # 1031: F_SETPIPE_SZ
-                fcntl($out_w, 1031, 4096);
-                fcntl($in_r, 1031, 4096) if $batch eq '--batch-check';
-        }
-        $self->{$out} = $out_w;
-        $self->{$in} = $in_r;
-}
-
-sub my_read ($$$) {
-        my ($fh, $rbuf, $len) = @_;
-        my $left = $len - length($$rbuf);
-        my $r;
-        while ($left > 0) {
-                $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf));
-                if ($r) {
-                        $left -= $r;
-                } else {
-                        next if (!defined($r) && $! == EINTR);
-                        return $r;
-                }
-        }
-        \substr($$rbuf, 0, $len, '');
 }
 
-sub my_readline ($$) {
-        my ($fh, $rbuf) = @_;
-        while (1) {
-                if ((my $n = index($$rbuf, "\n")) >= 0) {
-                        return substr($$rbuf, 0, $n + 1, '');
-                }
-                my $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf));
-                next if $r || (!defined($r) && $! == EINTR);
-                return defined($r) ? '' : undef; # EOF or error
+sub _sock_cmd {
+        my ($self, $batch, $err_c) = @_;
+        $self->{sock} and Carp::confess('BUG: {sock} exists');
+        socketpair(my $s1, my $s2, AF_UNIX, SOCK_STREAM, 0);
+        $s1->blocking(0);
+        my $opt = { pgid => 0, 0 => $s2, 1 => $s2 };
+        my $gd = $self->{git_dir};
+        if ($gd =~ s!/([^/]+/[^/]+)\z!/!) {
+                $opt->{-C} = $gd;
+                $gd = $1;
         }
+
+        # git 2.31.0+ supports -c core.abbrev=no, don't bother with
+        # core.abbrev=64 since not many releases had SHA-256 prior to 2.31
+        my $abbr = git_version lt v2.31.0 ? 40 : 'no';
+        my @cmd = ($GIT_EXE, "--git-dir=$gd", '-c', "core.abbrev=$abbr",
+                        'cat-file', "--$batch");
+        if ($err_c) {
+                my $id = "git.$self->{git_dir}.$batch.err";
+                $self->{err_c} = $opt->{2} = tmpfile($id, undef, 1) or
+                                                $self->fail("tmpfile($id): $!");
+        }
+        my $inflight = []; # TODO consider moving this into the IO object
+        my $pid = spawn(\@cmd, undef, $opt);
+        $self->{sock} = PublicInbox::IO::attach_pid($s1, $pid,
+                                \&gcf_drain, $inflight, $self->{-bc});
+        $self->{inflight} = $inflight;
 }
 
-sub cat_async_retry ($$$$$) {
-        my ($self, $inflight, $req, $cb, $arg) = @_;
+sub cat_async_retry ($$) {
+        my ($self, $old_inflight) = @_;
 
         # {inflight} may be non-existent, but if it isn't we delete it
         # here to prevent cleanup() from waiting:
-        delete $self->{inflight};
-        cleanup($self);
+        my ($sock, $epwatch) = delete @$self{qw(sock epwatch inflight)};
+        $self->SUPER::close if $epwatch;
+        my $new_inflight = batch_prepare($self);
+
+        while (my ($oid, $cb, $arg) = splice(@$old_inflight, 0, 3)) {
+                write_all($self, $oid."\n", \&cat_async_step, $new_inflight);
+                $oid = \$oid if !@$new_inflight; # to indicate oid retried
+                push @$new_inflight, $oid, $cb, $arg;
+        }
+        $sock->close if $sock; # only safe once old_inflight is empty
+        cat_async_step($self, $new_inflight); # take one step
+}
 
-        $self->{inflight} = $inflight;
-        batch_prepare($self);
-        my $buf = "$req\n";
-        for (my $i = 0; $i < @$inflight; $i += 3) {
-                $buf .= "$inflight->[$i]\n";
+sub gcf_inflight ($) {
+        my ($self) = @_;
+        # FIXME: the first {sock} check can succeed but Perl can complain
+        # about an undefined value.  Not sure why or how this happens but
+        # t/imapd.t can complain about it, sometimes.
+        if ($self->{sock}) {
+                if (eval { $self->{sock}->can_reap }) {
+                        return $self->{inflight};
+                } elsif ($@) {
+                        no warnings 'uninitialized';
+                        warn "E: $self sock=$self->{sock}: can_reap failed: ".
+                                "$@ (continuing...)";
+                }
+                delete @$self{qw(sock inflight)};
+        } else {
+                $self->close;
         }
-        print { $self->{out} } $buf or fail($self, "write error: $!");
-        unshift(@$inflight, \$req, $cb, $arg); # \$ref to indicate retried
+        undef;
+}
 
-        cat_async_step($self, $inflight); # take one step
+# returns true if prefetch is successful
+sub async_prefetch {
+        my ($self, $oid, $cb, $arg) = @_;
+        my $inflight = gcf_inflight($self) or return;
+        return if @$inflight;
+        substr($oid, 0, 0) = 'contents ' if $self->{-bc};
+        write_all($self, "$oid\n", \&cat_async_step, $inflight);
+        push(@$inflight, $oid, $cb, $arg);
 }
 
 sub cat_async_step ($$) {
         my ($self, $inflight) = @_;
-        die 'BUG: inflight empty or odd' if scalar(@$inflight) < 3;
-        my ($req, $cb, $arg) = splice(@$inflight, 0, 3);
-        my $rbuf = delete($self->{cat_rbuf}) // \(my $new = '');
+        croak 'BUG: inflight empty or odd' if scalar(@$inflight) < 3;
+        my ($req, $cb, $arg) = @$inflight[0, 1, 2];
         my ($bref, $oid, $type, $size);
-        my $head = my_readline($self->{in}, $rbuf);
+        my $head = $self->{sock}->my_readline;
+        my $cmd = ref($req) ? $$req : $req;
         # ->fail may be called via Gcf2Client.pm
+        my $info = $self->{-bc} && substr($cmd, 0, 5) eq 'info ';
         if ($head =~ /^([0-9a-f]{40,}) (\S+) ([0-9]+)$/) {
                 ($oid, $type, $size) = ($1, $2, $3 + 0);
-                $bref = my_read($self->{in}, $rbuf, $size + 1) or
-                        $self->fail(defined($bref) ? 'read EOF' : "read: $!");
-                chop($$bref) eq "\n" or $self->fail('LF missing after blob');
+                unless ($info) { # --batch-command
+                        $bref = $self->{sock}->my_bufread($size + 1) or
+                                $self->fail(defined($bref) ?
+                                                'read EOF' : "read: $!");
+                        chop($$bref) eq "\n" or
+                                        $self->fail('LF missing after blob');
+                }
+        } elsif ($info && $head =~ / (missing|ambiguous)\n/) {
+                $type = $1;
+                $oid = substr($cmd, 5); # remove 'info '
         } elsif ($head =~ s/ missing\n//s) {
                 $oid = $head;
                 # ref($req) indicates it's already been retried
                 # -gcf2 retries internally, so it never hits this path:
                 if (!ref($req) && !$in_cleanup && $self->alternates_changed) {
-                        return cat_async_retry($self, $inflight,
-                                                $req, $cb, $arg);
+                        return cat_async_retry($self, $inflight);
                 }
                 $type = 'missing';
-                $oid = ref($req) ? $$req : $req if $oid eq '';
+                if ($oid eq '') {
+                        $oid = $cmd;
+                        $oid =~ s/\A(?:contents|info) // if $self->{-bc};
+                }
         } else {
-                $self->fail("Unexpected result from async git cat-file: $head");
+                my $err = $! ? " ($!)" : '';
+                $self->fail("bad result from async cat-file: $head$err");
         }
+        splice(@$inflight, 0, 3); # don't retry $cb on ->fail
         eval { $cb->($bref, $oid, $type, $size, $arg) };
-        $self->{cat_rbuf} = $rbuf if $$rbuf ne '';
-        warn "E: $oid: $@\n" if $@;
+        async_err($self, $req, $oid, $@, $info ? 'check' : 'cat') if $@;
 }
 
 sub cat_async_wait ($) {
         my ($self) = @_;
-        my $inflight = delete $self->{inflight} or return;
-        while (scalar(@$inflight)) {
-                cat_async_step($self, $inflight);
-        }
+        my $inflight = gcf_inflight($self) or return;
+        cat_async_step($self, $inflight) while (scalar(@$inflight));
 }
 
 sub batch_prepare ($) {
-        _bidi_pipe($_[0], qw(--batch in out pid));
+        my ($self) = @_;
+        if (git_version ge BATCH_CMD_VER) {
+                $self->{-bc} = 1;
+                _sock_cmd($self, 'batch-command', 1);
+        } else {
+                _sock_cmd($self, 'batch');
+        }
 }
 
 sub _cat_file_cb {
-        my ($bref, undef, undef, $size, $result) = @_;
-        @$result = ($bref, $size);
+        my ($bref, $oid, $type, $size, $result) = @_;
+        @$result = ($bref, $oid, $type, $size);
 }
 
 sub cat_file {
-        my ($self, $oid, $sizeref) = @_;
+        my ($self, $oid) = @_;
         my $result = [];
         cat_async($self, $oid, \&_cat_file_cb, $result);
         cat_async_wait($self);
-        $$sizeref = $result->[1] if $sizeref;
-        $result->[0];
+        wantarray ? @$result : $result->[0];
 }
 
 sub check_async_step ($$) {
-        my ($self, $inflight_c) = @_;
-        die 'BUG: inflight empty or odd' if scalar(@$inflight_c) < 3;
-        my ($req, $cb, $arg) = splice(@$inflight_c, 0, 3);
-        my $rbuf = delete($self->{rbuf_c}) // \(my $new = '');
-        chomp(my $line = my_readline($self->{in_c}, $rbuf));
+        my ($ck, $inflight) = @_;
+        croak 'BUG: inflight empty or odd' if scalar(@$inflight) < 3;
+        my ($req, $cb, $arg) = @$inflight[0, 1, 2];
+        chomp(my $line = $ck->{sock}->my_readline);
         my ($hex, $type, $size) = split(/ /, $line);
 
-        # Future versions of git.git may have type=ambiguous, but for now,
-        # we must handle 'dangling' below (and maybe some other oddball
-        # stuff):
+        # git <2.21 would show `dangling' (2.21+ shows `ambiguous')
         # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
-        if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') {
-                my $ret = my_read($self->{in_c}, $rbuf, $type + 1);
-                fail($self, defined($ret) ? 'read EOF' : "read: $!") if !$ret;
+        if ($hex eq 'dangling') {
+                my $ret = $ck->{sock}->my_bufread($type + 1);
+                $ck->fail(defined($ret) ? 'read EOF' : "read: $!") if !$ret;
         }
-        eval { $cb->($hex, $type, $size, $arg, $self) };
-        warn "E: check($req) $@\n" if $@;
-        $self->{rbuf_c} = $rbuf if $$rbuf ne '';
+        splice(@$inflight, 0, 3); # don't retry $cb on ->fail
+        eval { $cb->(undef, $hex, $type, $size, $arg) };
+        async_err($ck, $req, $hex, $@, 'check') if $@;
 }
 
 sub check_async_wait ($) {
         my ($self) = @_;
-        my $inflight_c = delete $self->{inflight_c} or return;
-        while (scalar(@$inflight_c)) {
-                check_async_step($self, $inflight_c);
-        }
+        return cat_async_wait($self) if $self->{-bc};
+        my $ck = $self->{ck} or return;
+        my $inflight = gcf_inflight($ck) or return;
+        check_async_step($ck, $inflight) while (scalar(@$inflight));
+}
+
+# git <2.36
+sub ck {
+        $_[0]->{ck} //= bless { git_dir => $_[0]->{git_dir} },
+                                'PublicInbox::GitCheck';
 }
 
 sub check_async_begin ($) {
         my ($self) = @_;
         cleanup($self) if alternates_changed($self);
-        _bidi_pipe($self, qw(--batch-check in_c out_c pid_c err_c));
-        die 'BUG: already in async check' if $self->{inflight_c};
-        $self->{inflight_c} = [];
+        if (git_version ge BATCH_CMD_VER) {
+                $self->{-bc} = 1;
+                _sock_cmd($self, 'batch-command', 1);
+        } else {
+                _sock_cmd($self = ck($self), 'batch-check', 1);
+        }
+}
+
+sub write_all {
+        my ($self, $buf, $read_step, $inflight) = @_;
+        $self->{sock} // Carp::confess 'BUG: no {sock}';
+        Carp::confess('BUG: not an array') if ref($inflight) ne 'ARRAY';
+        $read_step->($self, $inflight) while @$inflight >= MAX_INFLIGHT;
+        do {
+                my $w = syswrite($self->{sock}, $buf);
+                if (defined $w) {
+                        return if $w == length($buf);
+                        substr($buf, 0, $w, ''); # sv_chop
+                } elsif ($! != EAGAIN) {
+                        $self->fail("write: $!");
+                }
+                $read_step->($self, $inflight);
+        } while (1);
 }
 
 sub check_async ($$$$) {
         my ($self, $oid, $cb, $arg) = @_;
-        my $inflight_c = $self->{inflight_c} // check_async_begin($self);
-        if (scalar(@$inflight_c) >= MAX_INFLIGHT) {
-                check_async_step($self, $inflight_c);
+        my $inflight;
+        if ($self->{-bc}) { # likely as time goes on
+batch_command:
+                $inflight = gcf_inflight($self) // cat_async_begin($self);
+                substr($oid, 0, 0) = 'info ';
+                write_all($self, "$oid\n", \&cat_async_step, $inflight);
+        } else { # accounts for git upgrades while we're running:
+                my $ck = $self->{ck}; # undef OK, maybe set in check_async_begin
+                $inflight = ($ck ? gcf_inflight($ck) : undef)
+                                 // check_async_begin($self);
+                goto batch_command if $self->{-bc};
+                write_all($self->{ck}, "$oid\n", \&check_async_step, $inflight);
         }
-        print { $self->{out_c} } $oid, "\n" or fail($self, "write error: $!");
-        push(@$inflight_c, $oid, $cb, $arg);
+        push(@$inflight, $oid, $cb, $arg);
 }
 
 sub _check_cb { # check_async callback
-        my ($hex, $type, $size, $result) = @_;
+        my (undef, $hex, $type, $size, $result) = @_;
         @$result = ($hex, $type, $size);
 }
 
@@ -294,75 +403,87 @@ sub check {
         check_async_wait($self);
         my ($hex, $type, $size) = @$result;
 
-        # Future versions of git.git may show 'ambiguous', but for now,
-        # we must handle 'dangling' below (and maybe some other oddball
-        # stuff):
+        # git <2.21 would show `dangling' (2.21+ shows `ambiguous')
         # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
-        return if $type eq 'missing' || $type eq 'ambiguous';
-        return if $hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop';
+        return if $type =~ /\A(?:missing|ambiguous)\z/ || $hex eq 'dangling';
         ($hex, $type, $size);
 }
 
-sub _destroy {
-        my ($self, $rbuf, $in, $out, $pid, $err) = @_;
-        delete @$self{($rbuf, $in, $out)};
-        delete $self->{$err} if $err; # `err_c'
+sub fail {
+        my ($self, $msg) = @_;
+        $self->close;
+        croak(ref($self) . ' ' . ($self->{git_dir} // '') . ": $msg");
+}
 
-        # GitAsyncCat::event_step may delete {pid}
-        my $p = delete $self->{$pid} or return;
+sub async_err ($$$$$) {
+        my ($self, $req, $oid, $err, $action) = @_;
+        $req = $$req if ref($req); # retried
+        my $msg = "E: $action $req ($oid): $err";
+        $async_warn ? carp($msg) : $self->fail($msg);
+}
 
-        # PublicInbox::DS may not be loaded
-        eval { PublicInbox::DS::dwaitpid($p, undef, undef) };
-        waitpid($p, 0) if $@; # wait synchronously if not in event loop
+sub cmd {
+        my $self = shift;
+        [ git_exe(), "--git-dir=$self->{git_dir}", @_ ]
 }
 
-sub cat_async_abort ($) {
-        my ($self) = @_;
-        if (my $inflight = delete $self->{inflight}) {
-                while (@$inflight) {
-                        my ($req, $cb, $arg) = splice(@$inflight, 0, 3);
-                        $req =~ s/ .*//; # drop git_dir for Gcf2Client
-                        eval { $cb->(undef, $req, undef, undef, $arg) };
-                        warn "E: $req: $@ (in abort)\n" if $@;
-                }
+# $git->popen(qw(show f00)); # or
+# $git->popen(qw(show f00), { GIT_CONFIG => ... }, { 2 => ... });
+sub popen {
+        my ($self, $cmd) = splice(@_, 0, 2);
+        $cmd = $self->cmd(ref($cmd) ? @$cmd :
+                        ($cmd, grep { defined && !ref } @_));
+        popen_rd($cmd, grep { !defined || ref } @_); # env and opt
+}
+
+# same args as popen above
+sub qx {
+        my $fh = popen(@_);
+        if (wantarray) {
+                my @ret = <$fh>;
+                $fh->close; # caller should check $?
+                @ret;
+        } else {
+                local $/;
+                my $ret = <$fh>;
+                $fh->close; # caller should check $?
+                $ret;
         }
-        cleanup($self);
 }
 
-sub fail {
-        my ($self, $msg) = @_;
-        cat_async_abort($self);
-        croak(ref($self) . ' ' . ($self->{git_dir} // '') . ": $msg");
+sub date_parse {
+        my $self = shift;
+        map {
+                substr($_, length('--max-age='), -1)
+        } $self->qx('rev-parse', map { "--since=$_" } @_);
 }
 
-sub popen {
-        my ($self, @cmd) = @_;
-        @cmd = ('git', "--git-dir=$self->{git_dir}", @cmd);
-        popen_rd(\@cmd);
+sub _active ($) {
+        scalar(@{gcf_inflight($_[0]) // []}) ||
+                ($_[0]->{ck} && scalar(@{gcf_inflight($_[0]->{ck}) // []}))
 }
 
-sub qx {
-        my ($self, @cmd) = @_;
-        my $fh = $self->popen(@cmd);
-        local $/ = "\n";
-        return <$fh> if wantarray;
-        local $/;
-        <$fh>
+# check_async and cat_async may trigger the other, so ensure they're
+# both completely done by using this:
+sub async_wait_all ($) {
+        my ($self) = @_;
+        while (_active($self)) {
+                check_async_wait($self);
+                cat_async_wait($self);
+        }
 }
 
 # returns true if there are pending "git cat-file" processes
 sub cleanup {
-        my ($self) = @_;
+        my ($self, $lazy) = @_;
+        ($lazy && _active($self)) and
+                return $self->{epwatch} ? watch_async($self) : 1;
         local $in_cleanup = 1;
-        delete $self->{async_cat};
-        check_async_wait($self);
-        cat_async_wait($self);
-        _destroy($self, qw(cat_rbuf in out pid));
-        _destroy($self, qw(chk_rbuf in_c out_c pid_c err_c));
-        !!($self->{pid} || $self->{pid_c});
+        async_wait_all($self);
+        $_->close for ($self, (delete($self->{ck}) // ()));
+        undef;
 }
 
-
 # assuming a well-maintained repo, this should be a somewhat
 # accurate estimation of its size
 # TODO: show this in the WWW UI as a hint to potential cloners
@@ -370,98 +491,193 @@ sub packed_bytes {
         my ($self) = @_;
         my $n = 0;
         my $pack_dir = git_path($self, 'objects/pack');
-        foreach my $p (bsd_glob("$pack_dir/*.pack", GLOB_NOSORT)) {
-                $n += -s $p;
-        }
+        $n += (-s $_ // 0) for (bsd_glob("$pack_dir/*.pack", GLOB_NOSORT));
         $n
 }
 
-sub DESTROY { cleanup(@_) }
+sub DESTROY { cleanup($_[0]) }
 
 sub local_nick ($) {
-        my ($self) = @_;
-        my $ret = '???';
         # don't show full FS path, basename should be OK:
-        if ($self->{git_dir} =~ m!/([^/]+)(?:/\.git)?\z!) {
-                $ret = "/path/to/$1";
-        }
-        wantarray ? ($ret) : $ret;
+        $_[0]->{nick} // ($_[0]->{git_dir} =~ m!/([^/]+?)(?:/*\.git/*)?\z! ?
+                        "$1.git" : undef);
 }
 
 sub host_prefix_url ($$) {
         my ($env, $url) = @_;
         return $url if index($url, '//') >= 0;
-        my $scheme = $env->{'psgi.url_scheme'};
         my $host_port = $env->{HTTP_HOST} //
                 "$env->{SERVER_NAME}:$env->{SERVER_PORT}";
-        "$scheme://$host_port". ($env->{SCRIPT_NAME} || '/') . $url;
+        my $sn = $env->{SCRIPT_NAME} // '';
+        "$env->{'psgi.url_scheme'}://\L$host_port\E$sn/$url";
+}
+
+sub base_url { # for coderepos, PSGI-only
+        my ($self, $env) = @_; # env - PSGI env
+        my $nick = $self->{nick} // return undef;
+        my $url = host_prefix_url($env, '');
+        # for mount in Plack::Builder
+        $url .= '/' if substr($url, -1, 1) ne '/';
+        $url . $nick . '/';
 }
 
+sub isrch {} # TODO
+
 sub pub_urls {
         my ($self, $env) = @_;
         if (my $urls = $self->{cgit_url}) {
-                return map { host_prefix_url($env, $_) } @$urls;
+                map { host_prefix_url($env, $_) } @$urls;
+        } else {
+                (base_url($self, $env) // '???');
         }
-        local_nick($self);
 }
 
 sub cat_async_begin {
         my ($self) = @_;
         cleanup($self) if $self->alternates_changed;
-        $self->batch_prepare;
-        die 'BUG: already in async' if $self->{inflight};
-        $self->{inflight} = [];
+        die 'BUG: already in async' if gcf_inflight($self);
+        batch_prepare($self);
 }
 
 sub cat_async ($$$;$) {
         my ($self, $oid, $cb, $arg) = @_;
-        my $inflight = $self->{inflight} // cat_async_begin($self);
-        if (scalar(@$inflight) >= MAX_INFLIGHT) {
-                cat_async_step($self, $inflight);
-        }
-
-        print { $self->{out} } $oid, "\n" or fail($self, "write error: $!");
+        my $inflight = gcf_inflight($self) // cat_async_begin($self);
+        substr($oid, 0, 0) = 'contents ' if $self->{-bc};
+        write_all($self, $oid."\n", \&cat_async_step, $inflight);
         push(@$inflight, $oid, $cb, $arg);
 }
 
-sub async_prefetch {
-        my ($self, $oid, $cb, $arg) = @_;
-        if (my $inflight = $self->{inflight}) {
-                # we could use MAX_INFLIGHT here w/o the halving,
-                # but lets not allow one client to monopolize a git process
-                if (scalar(@$inflight) < int(MAX_INFLIGHT/2)) {
-                        print { $self->{out} } $oid, "\n" or
-                                                fail($self, "write error: $!");
-                        return push(@$inflight, $oid, $cb, $arg);
+# returns the modified time of a git repo, same as the "modified" field
+# of a grokmirror manifest
+sub modified ($;$) {
+        my $fh = $_[1] // popen($_[0], @MODIFIED_DATE);
+        (split(/ /, <$fh> // time))[0] + 0; # integerize for JSON
+}
+
+sub cat_desc ($) {
+        my $desc = try_cat($_[0]);
+        chomp $desc;
+        utf8::decode($desc);
+        $desc =~ s/\s+/ /smg;
+        $desc eq '' ? undef : $desc;
+}
+
+sub description {
+        cat_desc("$_[0]->{git_dir}/description") // 'Unnamed repository';
+}
+
+sub cloneurl {
+        my ($self, $env) = @_;
+        $self->{cloneurl} // do {
+                my @urls = split(/\s+/s, try_cat("$self->{git_dir}/cloneurl"));
+                scalar(@urls) ? ($self->{cloneurl} = \@urls) : undef;
+        } // [ substr(base_url($self, $env), 0, -1) ];
+}
+
+# for grokmirror, which doesn't read gitweb.description
+# templates/hooks--update.sample and git-multimail in git.git
+# only match "Unnamed repository", not the full contents of
+# templates/this--description in git.git
+sub manifest_entry {
+        my ($self, $epoch, $default_desc) = @_;
+        my $gd = $self->{git_dir};
+        my @git = (git_exe, "--git-dir=$gd");
+        my $sr = popen_rd([@git, 'show-ref']);
+        my $own = popen_rd([@git, qw(config gitweb.owner)]);
+        my $mod = popen_rd([@git, @MODIFIED_DATE]);
+        my $buf = description($self);
+        if (defined $epoch && index($buf, 'Unnamed repository') == 0) {
+                $buf = "$default_desc [epoch $epoch]";
+        }
+        my $ent = { description => $buf, reference => undef };
+        if (open(my $alt, '<', "$gd/objects/info/alternates")) {
+                # n.b.: GitPython doesn't seem to handle comments or C-quoted
+                # strings like native git does; and we don't for now, either.
+                local $/ = "\n";
+                chomp(my @alt = <$alt>);
+
+                # grokmirror only supports 1 alternate for "reference",
+                if (scalar(@alt) == 1) {
+                        $buf = File::Spec->rel2abs($alt[0], "$gd/objects");
+                        $buf =~ s!/[^/]+/?\z!!; # basename
+                        $ent->{reference} = $buf;
                 }
         }
-        undef;
+        $ent->{fingerprint} = sha_all(1, $sr)->hexdigest;
+        $sr->close or return; # empty, uninitialized git repo
+        $ent->{modified} = modified(undef, $mod);
+        chomp($buf = <$own> // '');
+        utf8::decode($buf);
+        $ent->{owner} = $buf eq '' ? undef : $buf;
+        $ent;
+}
+
+our $ck_unlinked_packs = $^O eq 'linux' ? sub {
+        # FIXME: port gcf2-like over to git.git so we won't need to
+        # deal with libgit2
+        my $s = try_cat "/proc/$_[0]/maps";
+        $s =~ /\.(?:idx|pack) \(deleted\)/s ? 1 : undef;
+} : undef;
+
+# returns true if there are pending cat-file processes
+sub cleanup_if_unlinked {
+        my ($self) = @_;
+        $ck_unlinked_packs or return cleanup($self, 1);
+        # Linux-specific /proc/$PID/maps access
+        # TODO: support this inside git.git
+        my $nr_live = 0;
+        for my $obj ($self, ($self->{ck} // ())) {
+                my $sock = $obj->{sock} // next;
+                my $pid = $sock->attached_pid // next;
+                $ck_unlinked_packs->($pid) and return cleanup($self, 1);
+                ++$nr_live;
+        }
+        $nr_live;
 }
 
-sub extract_cmt_time {
-        my ($bref, undef, undef, undef, $modified) = @_;
-
-        if ($$bref =~ /^committer .*?> ([0-9]+) [\+\-]?[0-9]+/sm) {
-                my $cmt_time = $1 + 0;
-                $$modified = $cmt_time if $cmt_time > $$modified;
+sub event_step {
+        my ($self) = @_;
+        my $inflight = gcf_inflight($self);
+        if ($inflight && @$inflight) {
+                $self->cat_async_step($inflight);
+                return $self->close unless $self->{sock};
+                # don't loop here to keep things fair, but we must requeue
+                # if there's already-read data in pi_io_rbuf
+                $self->requeue if $self->{sock}->has_rbuf;
         }
 }
 
-# returns the modified time of a git repo, same as the "modified" field
-# of a grokmirror manifest
-sub modified ($) {
+sub schedule_cleanup {
+        my ($self) = @_;
+        PublicInbox::DS::add_uniq_timer($self+0, 30, \&cleanup, $self, 1);
+}
+
+# idempotently registers with DS epoll/kqueue/select/poll
+sub watch_async ($) {
         my ($self) = @_;
-        my $modified = 0;
-        my $fh = popen($self, qw(rev-parse --branches));
-        local $/ = "\n";
-        while (my $oid = <$fh>) {
-                chomp $oid;
-                cat_async($self, $oid, \&extract_cmt_time, \$modified);
+        schedule_cleanup($self);
+        $self->{epwatch} //= do {
+                $self->SUPER::new($self->{sock}, EPOLLIN);
+                \undef;
         }
-        cat_async_wait($self);
-        $modified || time;
 }
 
+sub close {
+        my ($self) = @_;
+        my $sock = $self->{sock};
+        delete @$self{qw(-bc err_c inflight)};
+        delete($self->{epwatch}) ? $self->SUPER::close : delete($self->{sock});
+        $sock->close if $sock; # calls gcf_drain via awaitpid
+}
+
+package PublicInbox::GitCheck; # only for git <2.36
+use v5.12;
+our @ISA = qw(PublicInbox::Git);
+no warnings 'once';
+
+# for event_step
+*cat_async_step = \&PublicInbox::Git::check_async_step;
+
 1;
 __END__
 =pod
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index b9dbe0cc..f57e0336 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -1,89 +1,51 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-#
-# internal class used by PublicInbox::Git + PublicInbox::DS
-# This parses the output pipe of "git cat-file --batch"
-#
-# Note: this does NOT set the non-blocking flag, we expect `git cat-file'
-# to be a local process, and git won't start writing a blob until it's
-# fully read.  So minimize context switching and read as much as possible
-# and avoid holding a buffer in our heap any longer than it has to live.
 package PublicInbox::GitAsyncCat;
-use strict;
-use parent qw(PublicInbox::DS Exporter);
-use POSIX qw(WNOHANG);
-use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
-our @EXPORT = qw(git_async_cat git_async_prefetch);
-use PublicInbox::Git ();
+use v5.12;
+use parent qw(Exporter);
+our @EXPORT = qw(ibx_async_cat ibx_async_prefetch async_check);
 
 our $GCF2C; # singleton PublicInbox::Gcf2Client
 
-sub close {
-        my ($self) = @_;
-
-        if (my $gitish = delete $self->{gitish}) {
-                PublicInbox::Git::cat_async_abort($gitish);
-        }
-        $self->SUPER::close; # PublicInbox::DS::close
-}
-
-sub event_step {
-        my ($self) = @_;
-        my $gitish = $self->{gitish} or return;
-        return $self->close if ($gitish->{in} // 0) != ($self->{sock} // 1);
-        my $inflight = $gitish->{inflight};
-        if ($inflight && @$inflight) {
-                $gitish->cat_async_step($inflight);
-
-                # child death?
-                if (($gitish->{in} // 0) != ($self->{sock} // 1)) {
-                        $self->close;
-                } elsif (@$inflight || exists $gitish->{cat_rbuf}) {
-                        # ok, more to do, requeue for fairness
-                        $self->requeue;
-                }
-        } elsif ((my $pid = waitpid($gitish->{pid}, WNOHANG)) > 0) {
-                # May happen if the child process is killed by a BOFH
-                # (or segfaults)
-                delete $gitish->{pid};
-                warn "E: gitish $pid exited with \$?=$?\n";
-                $self->close;
+sub ibx_async_cat ($$$$) {
+        my ($ibx, $oid, $cb, $arg) = @_;
+        my $isrch = $ibx->{isrch};
+        my $git = $isrch ? $isrch->{es}->git : ($ibx->{git} // $ibx->git);
+        # {topdir} means ExtSearch (likely [extindex "all"]) with potentially
+        # 100K alternates.  git v2.33+ can handle 100k alternates fairly well.
+        if (!$isrch && !defined($ibx->{topdir}) && !defined($git->{-tmp}) &&
+                ($GCF2C //= eval {
+                require PublicInbox::Gcf2Client;
+                PublicInbox::Gcf2Client::new();
+        } // 0)) { # 0: do not retry if libgit2 or Inline::C are missing
+                $GCF2C->gcf2_async("$oid $git->{git_dir}\n", $cb, $arg);
+                \undef;
+        } else { # read-only end of git-cat-file pipe
+                $git->cat_async($oid, $cb, $arg);
+                $git->watch_async;
         }
 }
 
-sub git_async_cat ($$$$) {
-        my ($git, $oid, $cb, $arg) = @_;
-        my $gitish = $GCF2C;
-        if ($gitish) {
-                $oid .= " $git->{git_dir}";
-        } else {
-                $gitish = $git;
-        }
-        $gitish->cat_async($oid, $cb, $arg);
-        $gitish->{async_cat} //= do {
-                my $self = bless { gitish => $gitish }, __PACKAGE__;
-                $self->SUPER::new($gitish->{in}, EPOLLIN|EPOLLET);
-                \undef; # this is a true ref()
-        };
+sub async_check ($$$$) {
+        my ($ibx, $oidish, $cb, $arg) = @_; # $ibx may be $ctx
+        my $git = $ibx->{git} // $ibx->git;
+        $git->check_async($oidish, $cb, $arg);
+        ($git->{ck} // $git)->watch_async;
 }
 
 # this is safe to call inside $cb, but not guaranteed to enqueue
-# returns true if successful, undef if not.
-sub git_async_prefetch {
-        my ($git, $oid, $cb, $arg) = @_;
-        if ($GCF2C) {
-                if ($GCF2C->{async_cat} && !$GCF2C->{wbuf}) {
-                        $oid .= " $git->{git_dir}";
-                        return $GCF2C->cat_async($oid, $cb, $arg);
-                }
-        } elsif ($git->{async_cat} && (my $inflight = $git->{inflight})) {
-                # we could use MAX_INFLIGHT here w/o the halving,
-                # but lets not allow one client to monopolize a git process
-                if (@$inflight < int(PublicInbox::Git::MAX_INFLIGHT/2)) {
-                        print { $git->{out} } $oid, "\n" or
-                                                $git->fail("write error: $!");
-                        return push(@$inflight, $oid, $cb, $arg);
+# returns true if successful, undef if not.  For fairness, we only
+# prefetch if there's no in-flight requests.
+sub ibx_async_prefetch {
+        my ($ibx, $oid, $cb, $arg) = @_;
+        my $git = $ibx->git;
+        if (!defined($ibx->{topdir}) && $GCF2C) {
+                if (!@{$GCF2C->gcf_inflight // []}) {
+                        $oid .= " $git->{git_dir}\n";
+                        return $GCF2C->gcf2_async($oid, $cb, $arg); # true
                 }
+        } elsif ($git->{epwatch}) {
+                return $git->async_prefetch($oid, $cb, $arg);
         }
         undef;
 }
diff --git a/lib/PublicInbox/GitCredential.pm b/lib/PublicInbox/GitCredential.pm
index c6da6a09..bb225ff3 100644
--- a/lib/PublicInbox/GitCredential.pm
+++ b/lib/PublicInbox/GitCredential.pm
@@ -1,32 +1,44 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# git-credential wrapper with built-in .netrc fallback
 package PublicInbox::GitCredential;
-use strict;
+use v5.12;
 use PublicInbox::Spawn qw(popen_rd);
+use autodie qw(close pipe);
 
-sub run ($$) {
-        my ($self, $op) = @_;
-        my ($in_r, $in_w);
-        pipe($in_r, $in_w) or die "pipe: $!";
-        my $out_r = popen_rd([qw(git credential), $op], undef, { 0 => $in_r });
-        close $in_r or die "close in_r: $!";
+sub run ($$;$) {
+        my ($self, $op, $lei) = @_;
+        my ($in_r, $in_w, $out_r);
+        my $cmd = [ qw(git credential), $op ];
+        pipe($in_r, $in_w);
+        if ($lei) { # we'll die if disconnected:
+                pipe($out_r, my $out_w);
+                $lei->send_exec_cmd([ $in_r, $out_w ], $cmd, {});
+        } else {
+                $out_r = popen_rd($cmd, undef, { 0 => $in_r });
+        }
+        close $in_r;
 
         my $out = '';
         for my $k (qw(url protocol host username password)) {
-                defined(my $v = $self->{$k}) or next;
+                my $v = $self->{$k} // next;
                 die "`$k' contains `\\n' or `\\0'\n" if $v =~ /[\n\0]/;
                 $out .= "$k=$v\n";
         }
-        $out .= "\n";
-        print $in_w $out or die "print (git credential $op): $!";
-        close $in_w or die "close (git credential $op): $!";
+        say $in_w $out;
+        close $in_w;
         return $out_r if $op eq 'fill';
         <$out_r> and die "unexpected output from `git credential $op'\n";
-        close $out_r or die "`git credential $op' failed: \$!=$! \$?=$?\n";
+        $out_r->close or die "`git credential $op' failed: \$!=$! \$?=$?\n";
 }
 
-sub check_netrc ($) {
-        my ($self) = @_;
+sub check_netrc {
+        my ($self, $lei) = @_;
+
+        # n.b. lei doesn't load ~/.netrc by default, public-inbox-watch does,
+        # which may've been a mistake, but we have to live with it.
+        return if ($lei && !$lei->{opt}->{netrc});
 
         # part of the standard library, but distributions may split it out
         eval { require Net::Netrc };
@@ -41,15 +53,16 @@ sub check_netrc ($) {
 }
 
 sub fill {
-        my ($self) = @_;
-        my $out_r = run($self, 'fill');
+        my ($self, $lei) = @_;
+        my $out_r = run($self, 'fill', $lei);
         while (<$out_r>) {
                 chomp;
                 return if $_ eq '';
                 /\A([^=]+)=(.*)\z/ or die "bad line: $_\n";
                 $self->{$1} = $2;
         }
-        close $out_r or die "git credential fill failed: \$!=$! \$?=$?\n";
+        $out_r->close or die "git credential fill failed: \$!=$! \$?=$?\n";
+        $self->{filled} = 1;
 }
 
 1;
diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index fd2e00dd..396aa783 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -1,20 +1,22 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # when no endpoints match, fallback to this and serve a static file
 # or smart HTTP.  This is our wrapper for git-http-backend(1)
 package PublicInbox::GitHTTPBackend;
 use strict;
-use warnings;
+use v5.10.1;
 use Fcntl qw(:seek);
 use IO::Handle; # ->flush
 use HTTP::Date qw(time2str);
+use PublicInbox::Limiter;
 use PublicInbox::Qspawn;
 use PublicInbox::Tmpfile;
 use PublicInbox::WwwStatic qw(r @NO_CACHE);
+use Carp ();
 
 # 32 is same as the git-daemon connection limit
-my $default_limiter = PublicInbox::Qspawn::Limiter->new(32);
+my $default_limiter = PublicInbox::Limiter->new(32);
 
 # n.b. serving "description" and "cloneurl" should be innocuous enough to
 # not cause problems.  serving "config" might...
@@ -22,13 +24,10 @@ my @text = qw[HEAD info/refs info/attributes
         objects/info/(?:http-alternates|alternates|packs)
         cloneurl description];
 
-my @binary = qw!
-        objects/[a-f0-9]{2}/[a-f0-9]{38}
-        objects/pack/pack-[a-f0-9]{40}\.(?:pack|idx)
-        !;
+my @binary = ('objects/[a-f0-9]{2}/[a-f0-9]{38,62}',
+        'objects/pack/pack-[a-f0-9]{40,64}\.(?:pack|idx)');
 
 our $ANY = join('|', @binary, @text, 'git-upload-pack');
-my $BIN = join('|', @binary);
 my $TEXT = join('|', @text);
 
 sub serve {
@@ -45,10 +44,7 @@ sub serve {
         serve_dumb($env, $git, $path);
 }
 
-sub err ($@) {
-        my ($env, @msg) = @_;
-        $env->{'psgi.errors'}->print(@msg, "\n");
-}
+sub ucarp { Carp::carp(@_); undef }
 
 my $prev = 0;
 my $exp;
@@ -64,13 +60,13 @@ sub serve_dumb {
 
         my $h = [];
         my $type;
-        if ($path =~ m!\Aobjects/[a-f0-9]{2}/[a-f0-9]{38}\z!) {
+        if ($path =~ m!\Aobjects/[a-f0-9]{2}/[a-f0-9]{38,62}\z!) {
                 $type = 'application/x-git-loose-object';
                 cache_one_year($h);
-        } elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40}\.pack\z!) {
+        } elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40,64}\.pack\z!) {
                 $type = 'application/x-git-packed-objects';
                 cache_one_year($h);
-        } elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40}\.idx\z!) {
+        } elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40,64}\.idx\z!) {
                 $type = 'application/x-git-packed-objects-toc';
                 cache_one_year($h);
         } elsif ($path =~ /\A(?:$TEXT)\z/o) {
@@ -83,10 +79,10 @@ sub serve_dumb {
         PublicInbox::WwwStatic::response($env, $h, $path, $type);
 }
 
-sub git_parse_hdr { # {parse_hdr} for Qspawn
-        my ($r, $bref, $dumb_args) = @_;
+sub ghb_parse_hdr { # header parser for Qspawn
+        my ($r, $bref, @dumb_args) = @_;
         my $res = parse_cgi_headers($r, $bref) or return; # incomplete
-        $res->[0] == 403 ? serve_dumb(@$dumb_args) : $res;
+        $res->[0] == 403 ? serve_dumb(@dumb_args) : $res;
 }
 
 # returns undef if 403 so it falls back to dumb HTTP
@@ -98,6 +94,7 @@ sub serve_smart {
         foreach my $name (qw(QUERY_STRING
                                 REMOTE_USER REMOTE_ADDR
                                 HTTP_CONTENT_ENCODING
+                                HTTP_GIT_PROTOCOL
                                 CONTENT_TYPE
                                 SERVER_PROTOCOL
                                 REQUEST_METHOD)) {
@@ -108,8 +105,9 @@ sub serve_smart {
         $env{GIT_HTTP_EXPORT_ALL} = '1';
         $env{PATH_TRANSLATED} = "$git->{git_dir}/$path";
         my $rdr = input_prepare($env) or return r(500);
+        $rdr->{quiet} = 1;
         my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr);
-        $qsp->psgi_return($env, $limiter, \&git_parse_hdr, [$env, $git, $path]);
+        $qsp->psgi_yield($env, $limiter, \&ghb_parse_hdr, $env, $git, $path);
 }
 
 sub input_prepare {
@@ -117,42 +115,23 @@ sub input_prepare {
 
         my $input = $env->{'psgi.input'};
         my $fd = eval { fileno($input) };
-        if (defined $fd && $fd >= 0) {
-                return { 0 => $fd };
-        }
+        return { 0 => $fd } if (defined $fd && $fd >= 0);
         my $id = "git-http.input.$env->{REMOTE_ADDR}:$env->{REMOTE_PORT}";
-        my $in = tmpfile($id);
-        unless (defined $in) {
-                err($env, "could not open temporary file: $!");
-                return;
-        }
+        my $in = tmpfile($id) // return ucarp("tmpfile: $!");
         my $buf;
         while (1) {
-                my $r = $input->read($buf, 8192);
-                unless (defined $r) {
-                        err($env, "error reading input: $!");
-                        return;
-                }
+                my $r = $input->read($buf, 8192) // return ucarp("read $!");
                 last if $r == 0;
-                unless (print $in $buf) {
-                        err($env, "error writing temporary file: $!");
-                        return;
-                }
+                print $in $buf // return ucarp("print: $!");
         }
         # ensure it's visible to git-http-backend(1):
-        unless ($in->flush) {
-                err($env, "error writing temporary file: $!");
-                return;
-        }
-        unless (defined(sysseek($in, 0, SEEK_SET))) {
-                err($env, "error seeking temporary file: $!");
-                return;
-        }
+        $in->flush // return ucarp("flush: $!");
+        sysseek($in, 0, SEEK_SET) // return ucarp($env, "seek: $!");
         { 0 => $in };
 }
 
-sub parse_cgi_headers {
-        my ($r, $bref) = @_;
+sub parse_cgi_headers { # {parse_hdr} for Qspawn
+        my ($r, $bref, $ctx) = @_;
         return r(500) unless defined $r && $r >= 0;
         $$bref =~ s/\A(.*?)\r?\n\r?\n//s or return $r == 0 ? r(500) : undef;
         my $h = $1;
@@ -166,7 +145,18 @@ sub parse_cgi_headers {
                         push @h, $k, $v;
                 }
         }
-        [ $code, \@h ]
+
+        # fallback to WwwCoderepo if cgit 404s
+        if ($code == 404 && $ctx->{www} && !$ctx->{_coderepo_tried}++) {
+                my $wcb = delete $ctx->{env}->{'qspawn.wcb'};
+                $ctx->{env}->{'plack.skip-deflater'} = 1; # prevent 2x gzip
+                $ctx->{env}->{'qspawn.fallback'} = $code;
+                my $res = $ctx->{www}->coderepo->srv($ctx);
+                $ctx->{env}->{'qspawn.wcb'} = $wcb;
+                $res; # CODE or ARRAY ref
+        } else {
+                [ $code, \@h ]
+        }
 }
 
 1;
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 20030433..8b630f25 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # In public-inbox <=1.5.0, public-inbox-httpd favored "getline"
@@ -18,6 +18,7 @@ use Compress::Raw::Zlib qw(Z_OK);
 use PublicInbox::CompressNoop;
 use PublicInbox::Eml;
 use PublicInbox::GitAsyncCat;
+use Carp qw(carp);
 
 our @EXPORT_OK = qw(gzf_maybe);
 my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
@@ -46,18 +47,20 @@ sub gz_or_noop {
 sub gzf_maybe ($$) { bless { gz => gz_or_noop(@_) }, __PACKAGE__ }
 
 sub psgi_response {
+        # $code may be an HTTP response code (e.g. 200) or a CODE ref (mbox_hdr)
         my ($self, $code, $res_hdr) = @_;
-        my $env = $self->{env};
-        $self->{gz} //= gz_or_noop($res_hdr, $env);
-        if ($env->{'pi-httpd.async'}) {
-                my $http = $env->{'psgix.io'}; # PublicInbox::HTTP
+        if ($self->{env}->{'pi-httpd.async'}) {
+                my $http = $self->{env}->{'psgix.io'}; # PublicInbox::HTTP
                 $http->{forward} = $self;
                 sub {
                         my ($wcb) = @_; # -httpd provided write callback
-                        $self->{http_out} = $wcb->([$code, $res_hdr]);
+                        $self->{wcb_args} = [ $code, $res_hdr, $wcb ];
                         $self->can('async_next')->($http); # start stepping
                 };
         } else { # generic PSGI code path
+                ref($code) eq 'CODE' and
+                        ($code, $res_hdr) = @{$code->($self)};
+                $self->{gz} //= gz_or_noop($res_hdr, $self->{env});
                 [ $code, $res_hdr, $self ];
         }
 }
@@ -84,104 +87,132 @@ sub gzip_or_die () {
         $gz;
 }
 
-# for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
+sub gone { # what: search/over/mm
+        my ($ctx, $what) = @_;
+        warn "W: `$ctx->{ibx}->{name}' $what went away unexpectedly\n";
+        undef;
+}
+
+# for GetlineResponse (via Qspawn) when NOT using $env->{'pi-httpd.async'}
 # Also used for ->getline callbacks
-sub translate ($$) {
-        my $self = $_[0]; # $_[1] => input
+sub translate {
+        my $self = shift; # $_[1] => input
 
         # allocate the zlib context lazily here, instead of in ->new.
         # Deflate contexts are memory-intensive and this object may
         # be sitting in the Qspawn limiter queue for a while.
-        my $gz = $self->{gz} //= gzip_or_die();
-        my $zbuf = delete($self->{zbuf});
-        if (defined $_[1]) { # my $buf = $_[1];
-                my $err = $gz->deflate($_[1], $zbuf);
-                die "gzip->deflate: $err" if $err != Z_OK;
-                return $zbuf if length($zbuf) >= 8192;
-
-                $self->{zbuf} = $zbuf;
-                '';
+        $self->{gz} //= gzip_or_die();
+        if (defined $_[0]) { # my $buf = $_[1];
+                zmore($self, @_);
+                length($self->{zbuf}) >= 8192 ? delete($self->{zbuf}) : '';
         } else { # undef == EOF
-                my $err = $gz->flush($zbuf);
-                die "gzip->flush: $err" if $err != Z_OK;
-                $zbuf;
+                $self->zflush;
         }
 }
 
+# returns PublicInbox::HTTP::{Chunked,Identity}
+sub http_out ($) {
+        my ($self) = @_;
+        $self->{http_out} // do {
+                my $args = delete $self->{wcb_args} // return undef;
+                my $wcb = pop @$args; # from PublicInbox:HTTP async
+                # $args->[0] may be \&mbox_hdr or similar
+                $args = $args->[0]->($self) if ref($args->[0]) eq 'CODE';
+                $self->{gz} //= gz_or_noop($args->[1], $self->{env});
+                $self->{http_out} = $wcb->($args); # $wcb->([$code, $hdr_ary])
+        };
+}
+
+# returns undef if HTTP client disconnected, may return 0
+# because ->translate can return ''
 sub write {
-        # my $ret = bytes::length($_[1]); # XXX does anybody care?
-        $_[0]->{http_out}->write(translate($_[0], $_[1]));
+        my $self = shift;
+        http_out($self)->write($self->translate(@_));
+}
+
+sub zfh {
+        $_[0]->{zfh} // do {
+                open($_[0]->{zfh}, '>>', \($_[0]->{pbuf} //= '')) or
+                        die "open: $!";
+                $_[0]->{zfh}
+        };
 }
 
 # similar to ->translate; use this when we're sure we know we have
 # more data to buffer after this
 sub zmore {
-        my $self = $_[0]; # $_[1] => input
-        my $err = $self->{gz}->deflate($_[1], $self->{zbuf});
-        die "gzip->deflate: $err" if $err != Z_OK;
-        undef;
+        my $self = shift;
+        my $zfh = delete $self->{zfh};
+        if (@_ > 1 || $zfh) {
+                print { $zfh // zfh($self) } @_;
+                @_ = (delete $self->{pbuf});
+                delete $self->{zfh};
+        };
+        http_out($self);
+        my $err;
+        ($err = $self->{gz}->deflate($_[0], $self->{zbuf})) == Z_OK or
+                die "gzip->deflate: $err";
 }
 
 # flushes and returns the final bit of gzipped data
-sub zflush ($;$) {
-        my $self = $_[0]; # $_[1] => final input (optional)
-        my $zbuf = delete $self->{zbuf};
-        my $gz = delete $self->{gz};
+sub zflush ($;@) {
+        my $self = shift; # $_[1..Inf] => final input (optional)
+        zmore($self, @_) if scalar(@_) || $self->{zfh};
+        # not a bug, recursing on DS->write failure
+        my $gz = delete $self->{gz} // return '';
         my $err;
-        if (defined $_[1]) {
-                $err = $gz->deflate($_[1], $zbuf);
-                die "gzip->deflate: $err" if $err != Z_OK;
-        }
-        $err = $gz->flush($zbuf);
-        die "gzip->flush: $err" if $err != Z_OK;
+        my $zbuf = delete $self->{zbuf};
+        ($err = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $err";
         $zbuf;
 }
 
 sub close {
         my ($self) = @_;
-        if (my $http_out = delete $self->{http_out}) {
-                $http_out->write(zflush($self));
-                $http_out->close;
-        }
+        my $http_out = http_out($self) // return;
+        $http_out->write($self->zflush);
+        (delete($self->{http_out}) // return)->close;
 }
 
-sub bail  {
+sub bail {
         my $self = shift;
-        if (my $env = $self->{env}) {
-                eval { $env->{'psgi.errors'}->print(@_, "\n") };
-                warn("E: error printing to psgi.errors: $@", @_) if $@;
-                my $http = $env->{'psgix.io'} or return; # client abort
-                eval { $http->close }; # should hit our close
-                warn "E: error in http->close: $@" if $@;
-                eval { $self->close }; # just in case...
-                warn "E: error in self->close: $@" if $@;
-        } else {
-                warn @_, "\n";
-        }
+        carp @_;
+        my $env = $self->{env} or return;
+        my $http = $env->{'psgix.io'} or return; # client abort
+        eval { $http->close }; # should hit our close
+        carp "E: error in http->close: $@" if $@;
+        eval { $self->close }; # just in case...
+        carp "E: error in self->close: $@" if $@;
 }
 
 # this is public-inbox-httpd-specific
 sub async_blob_cb { # git->cat_async callback
         my ($bref, $oid, $type, $size, $self) = @_;
-        my $http = $self->{env}->{'psgix.io'};
+        my $http = $self->{env}->{'psgix.io'}; # PublicInbox::HTTP
         $http->{forward} or return; # client aborted
-        my $smsg = $self->{smsg} or bail($self, 'BUG: no smsg');
-        if (!defined($oid)) {
+        my $smsg = $self->{smsg} or return bail($self, 'BUG: no smsg');
+        $type // return
+                bail($self, "abort: $smsg->{blob} $self->{ibx}->{inboxdir}");
+        if ($type ne 'blob') {
                 # it's possible to have TOCTOU if an admin runs
                 # public-inbox-(edit|purge), just move onto the next message
-                warn "E: $smsg->{blob} missing in $self->{-inbox}->{inboxdir}\n";
+                warn "E: $smsg->{blob} $type in $self->{ibx}->{inboxdir}\n";
                 return $http->next_step($self->can('async_next'));
         }
-        $smsg->{blob} eq $oid or bail($self, "BUG: $smsg->{blob} != $oid");
+        $smsg->{blob} eq $oid or return
+                bail($self, "BUG: $smsg->{blob} != $oid");
         eval { $self->async_eml(PublicInbox::Eml->new($bref)) };
-        bail($self, "E: async_eml: $@") if $@;
-        $http->next_step($self->can('async_next'));
+        return bail($self, "E: async_eml: $@") if $@;
+        if ($self->{-low_prio}) { # run via PublicInbox::WWW::event_step
+                push(@{$self->{www}->{-low_prio_q}}, $self) == 1 and
+                                PublicInbox::DS::requeue($self->{www});
+        } else {
+                $http->next_step($self->can('async_next'));
+        }
 }
 
 sub smsg_blob {
         my ($self, $smsg) = @_;
-        git_async_cat($self->{-inbox}->git, $smsg->{blob},
-                        \&async_blob_cb, $self);
+        ibx_async_cat($self->{ibx}, $smsg->{blob}, \&async_blob_cb, $self);
 }
 
 1;
diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index 5844ef44..7162732e 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Generic PSGI server for convenience.  It aims to provide
@@ -21,13 +21,11 @@
 package PublicInbox::HTTP;
 use strict;
 use parent qw(PublicInbox::DS);
-use bytes (); # only for bytes::length
 use Fcntl qw(:seek);
 use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl
 use Plack::Util;
 use HTTP::Status qw(status_message);
 use HTTP::Date qw(time2str);
-use IO::Handle; # ->write
 use PublicInbox::DS qw(msg_more);
 use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
 use PublicInbox::Tmpfile;
@@ -39,23 +37,19 @@ use constant {
 };
 use Errno qw(EAGAIN);
 
-my $pipelineq = [];
-sub process_pipelineq () {
-        my $q = $pipelineq;
-        $pipelineq = [];
-        foreach (@$q) {
-                next unless $_->{sock};
-                rbuf_process($_);
-        }
-}
-
 # Use the same configuration parameter as git since this is primarily
 # a slow-client sponge for git-http-backend
 # TODO: support per-respository http.maxRequestBuffer somehow...
 our $MAX_REQUEST_BUFFER = $ENV{GIT_HTTP_MAX_REQUEST_BUFFER} ||
                         (10 * 1024 * 1024);
 
-open(my $null_io, '<', '/dev/null') or die "failed to open /dev/null: $!";
+open(my $null_io, '<', '/dev/null') or die "open /dev/null: $!";
+{
+        my @n = stat($null_io) or die "stat(/dev/null): $!";
+        my @i = stat(STDIN) or die "stat(STDIN): $!";
+        $null_io = *STDIN{IO} if "@n[0, 1]" eq "@i[0, 1]";
+}
+
 my $http_date;
 my $prev = 0;
 sub http_date () {
@@ -64,13 +58,13 @@ sub http_date () {
 }
 
 sub new ($$$) {
-        my ($class, $sock, $addr, $httpd) = @_;
-        my $self = bless { httpd => $httpd }, $class;
+        my ($class, $sock, $addr, $srv_env) = @_;
+        my $self = bless { srv_env => $srv_env }, $class;
         my $ev = EPOLLIN;
         my $wbuf;
         if ($sock->can('accept_SSL') && !$sock->accept_SSL) {
-                return CORE::close($sock) if $! != EAGAIN;
-                $ev = PublicInbox::TLS::epollbit();
+                return $sock->close if $! != EAGAIN;
+                $ev = PublicInbox::TLS::epollbit() or return $sock->close;
                 $wbuf = [ \&PublicInbox::DS::accept_tls_step ];
         }
         $self->{wbuf} = $wbuf if $wbuf;
@@ -81,52 +75,36 @@ sub new ($$$) {
 
 sub event_step { # called by PublicInbox::DS
         my ($self) = @_;
-
-        return unless $self->flush_write && $self->{sock};
+        local $SIG{__WARN__} = $self->{srv_env}->{'pi-httpd.warn_cb'};
+        return unless $self->flush_write && $self->{sock} && !$self->{forward};
 
         # only read more requests if we've drained the write buffer,
         # otherwise we can be buffering infinitely w/o backpressure
 
         return read_input($self) if ref($self->{env});
-        my $rbuf = $self->{rbuf} // (\(my $x = ''));
-        $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or return;
-        rbuf_process($self, $rbuf);
-}
 
-sub rbuf_process {
-        my ($self, $rbuf) = @_;
-        $rbuf //= $self->{rbuf} // (\(my $x = ''));
-
-        my %env = %{$self->{httpd}->{env}}; # full hash copy
-        my $r = parse_http_request($$rbuf, \%env);
-
-        # We do not support Trailers in chunked requests, for now
-        # (they are rarely-used and git (as of 2.7.2) does not use them)
-        if ($r == -1 || $env{HTTP_TRAILER} ||
-                        # this length-check is necessary for PURE_PERL=1:
-                        ($r == -2 && bytes::length($$rbuf) > 0x4000)) {
-                return quit($self, 400);
-        }
-        if ($r < 0) { # incomplete
-                $self->rbuf_idle($rbuf);
-                return $self->requeue;
+        my $rbuf = $self->{rbuf} // (\(my $x = ''));
+        my %env = %{$self->{srv_env}}; # full hash copy
+        my $r;
+        while (($r = parse_http_request($$rbuf, \%env)) < 0) {
+                # We do not support Trailers in chunked requests, for
+                # now (they are rarely-used and git (as of 2.7.2) does
+                # not use them).
+                # this length-check is necessary for PURE_PERL=1:
+                if ($r == -1 || $env{HTTP_TRAILER} ||
+                                ($r == -2 && length($$rbuf) > 0x4000)) {
+                        return quit($self, 400);
+                }
+                $self->do_read($rbuf, 8192, length($$rbuf)) or return;
         }
+        return quit($self, 400) if grep(/\s/, keys %env); # stop smugglers
         $$rbuf = substr($$rbuf, $r);
-        my $len = input_prepare($self, \%env);
-        defined $len or return write_err($self, undef); # EMFILE/ENFILE
+        my $len = input_prepare($self, \%env) //
+                return write_err($self, undef); # EMFILE/ENFILE
 
         $len ? read_input($self, $rbuf) : app_dispatch($self, undef, $rbuf);
 }
 
-# IO::Handle::write returns boolean, this returns bytes written:
-sub xwrite ($$$) {
-        my ($fh, $rbuf, $max) = @_;
-        my $w = bytes::length($$rbuf);
-        $w = $max if $w > $max;
-        $fh->write($$rbuf, $w) or return;
-        $w;
-}
-
 sub read_input ($;$) {
         my ($self, $rbuf) = @_;
         $rbuf //= $self->{rbuf} // (\(my $x = ''));
@@ -139,7 +117,7 @@ sub read_input ($;$) {
 
         while ($len > 0) {
                 if ($$rbuf ne '') {
-                        my $w = xwrite($input, $rbuf, $len);
+                        my $w = syswrite($input, $$rbuf, $len);
                         return write_err($self, $len) unless $w;
                         $len -= $w;
                         die "BUG: $len < 0 (w=$w)" if $len < 0;
@@ -163,7 +141,7 @@ sub app_dispatch {
         $env->{REMOTE_ADDR} = $self->{remote_addr};
         $env->{REMOTE_PORT} = $self->{remote_port};
         if (defined(my $host = $env->{HTTP_HOST})) {
-                $host =~ s/:([0-9]+)\z// and $env->{SERVER_PORT} = $1;
+                $host =~ s/:([0-9]+)\z// and $env->{SERVER_PORT} = $1 + 0;
                 $env->{SERVER_NAME} = $host;
         }
         if (defined $input) {
@@ -173,7 +151,7 @@ sub app_dispatch {
         # note: NOT $self->{sock}, we want our close (+ PublicInbox::DS::close),
         # to do proper cleanup:
         $env->{'psgix.io'} = $self; # for ->close or async_pass
-        my $res = Plack::Util::run_app($self->{httpd}->{app}, $env);
+        my $res = Plack::Util::run_app($env->{'pi-httpd.app'}, $env);
         eval {
                 if (ref($res) eq 'CODE') {
                         $res->(sub { response_write($self, $env, $_[0]) });
@@ -182,7 +160,7 @@ sub app_dispatch {
                 }
         };
         if ($@) {
-                err($self, "response_write error: $@");
+                warn "response_write error: $@";
                 $self->close;
         }
 }
@@ -213,6 +191,7 @@ sub response_header_write {
         my $alive;
         if (!$term && $prot_persist) { # auto-chunk
                 $chunked = $alive = 2;
+                $alive = 3 if $env->{REQUEST_METHOD} eq 'HEAD';
                 $h .= "Transfer-Encoding: chunked\r\n";
                 # no need for "Connection: keep-alive" with HTTP/1.1
         } elsif ($term && ($prot_persist || ($conn =~ /\bkeep-alive\b/i))) {
@@ -236,7 +215,7 @@ sub response_header_write {
 sub chunked_write ($$) {
         my $self = $_[0];
         return if $_[1] eq '';
-        msg_more($self, sprintf("%x\r\n", bytes::length($_[1])));
+        msg_more($self, sprintf("%x\r\n", length($_[1])));
         msg_more($self, $_[1]);
 
         # use $self->write(\"\n\n") if you care about real-time
@@ -249,22 +228,19 @@ sub identity_write ($$) {
         $self->write(\($_[1])) if $_[1] ne '';
 }
 
-sub next_request ($) {
-        my ($self) = @_;
-        if ($self->{rbuf}) {
-                # avoid recursion for pipelined requests
-                PublicInbox::DS::requeue(\&process_pipelineq) if !@$pipelineq;
-                push @$pipelineq, $self;
-        } else { # wait for next request
-                $self->requeue;
-        }
-}
-
 sub response_done {
         my ($self, $alive) = @_;
+        if (my $forward = delete $self->{forward}) { # avoid recursion
+                eval { $forward->close };
+                if ($@) {
+                        warn "response forward->close error: $@";
+                        return $self->close; # idempotent
+                }
+        }
         delete $self->{env}; # we're no longer busy
+        # HEAD requests set $alive = 3 so we don't send "0\r\n\r\n";
         $self->write(\"0\r\n\r\n") if $alive == 2;
-        $self->write($alive ? \&next_request : \&close);
+        $self->write($alive ? $self->can('requeue') : \&close);
 }
 
 sub getline_pull {
@@ -274,7 +250,7 @@ sub getline_pull {
         # limit our own running time for fairness with other
         # clients and to avoid buffering too much:
         my $buf = eval {
-                local $/ = \8192;
+                local $/ = \65536;
                 $forward->getline;
         } if $forward;
 
@@ -296,17 +272,9 @@ sub getline_pull {
                         return; # likely
                 }
         } elsif ($@) {
-                err($self, "response ->getline error: $@");
+                warn "response ->getline error: $@";
                 $self->close;
         }
-        # avoid recursion
-        if (delete $self->{forward}) {
-                eval { $forward->close };
-                if ($@) {
-                        err($self, "response ->close error: $@");
-                        $self->close; # idempotent
-                }
-        }
         response_done($self, delete $self->{alive});
 }
 
@@ -327,19 +295,13 @@ sub response_write {
                         getline_pull($self); # kick-off!
                 }
         # these are returned to the calling application:
-        } elsif ($alive == 2) {
+        } elsif ($alive >= 2) {
                 bless [ $self, $alive ], 'PublicInbox::HTTP::Chunked';
         } else {
                 bless [ $self, $alive ], 'PublicInbox::HTTP::Identity';
         }
 }
 
-sub input_tmpfile ($) {
-        my $input = tmpfile('http.input', $_[0]->{sock}) or return;
-        $input->autoflush(1);
-        $input;
-}
-
 sub input_prepare {
         my ($self, $env) = @_;
         my ($input, $len);
@@ -355,39 +317,33 @@ sub input_prepare {
                 return quit($self, 400) if $hte !~ /\Achunked\z/i;
 
                 $len = CHUNK_START;
-                $input = input_tmpfile($self);
+                $input = tmpfile('http.input', $self->{sock});
         } else {
                 $len = $env->{CONTENT_LENGTH};
                 if (defined $len) {
                         # rfc7230 3.3.3.4
                         return quit($self, 400) if $len !~ /\A[0-9]+\z/;
-
                         return quit($self, 413) if $len > $MAX_REQUEST_BUFFER;
-                        $input = $len ? input_tmpfile($self) : $null_io;
+                        $input = $len ? tmpfile('http.input', $self->{sock})
+                                : $null_io;
                 } else {
                         $input = $null_io;
                 }
         }
 
         # TODO: expire idle clients on ENFILE / EMFILE
-        return unless $input;
-
-        $env->{'psgi.input'} = $input;
+        $env->{'psgi.input'} = $input // return;
         $self->{env} = $env;
         $self->{input_left} = $len || 0;
 }
 
 sub env_chunked { ($_[0]->{HTTP_TRANSFER_ENCODING} // '') =~ /\Achunked\z/i }
 
-sub err ($$) {
-        eval { $_[0]->{httpd}->{env}->{'psgi.errors'}->print($_[1]."\n") };
-}
-
 sub write_err {
         my ($self, $len) = @_;
         my $msg = $! || '(zero write)';
         $msg .= " ($len bytes remaining)" if defined $len;
-        err($self, "error buffering to input: $msg");
+        warn "error buffering to input: $msg";
         quit($self, 500);
 }
 
@@ -396,7 +352,7 @@ sub recv_err {
         if ($! == EAGAIN) { # epoll/kevent watch already set by do_read
                 $self->{input_left} = $len;
         } else {
-                err($self, "error reading input: $! ($len bytes remaining)");
+                warn "error reading input: $! ($len bytes remaining)";
         }
 }
 
@@ -411,12 +367,12 @@ sub read_input_chunked { # unlikely...
                         $$rbuf =~ s/\A\r\n//s and
                                 return app_dispatch($self, $input, $rbuf);
 
-                        return quit($self, 400) if bytes::length($$rbuf) > 2;
+                        return quit($self, 400) if length($$rbuf) > 2;
                 }
                 if ($len == CHUNK_END) {
                         if ($$rbuf =~ s/\A\r\n//s) {
                                 $len = CHUNK_START;
-                        } elsif (bytes::length($$rbuf) > 2) {
+                        } elsif (length($$rbuf) > 2) {
                                 return quit($self, 400);
                         }
                 }
@@ -426,14 +382,14 @@ sub read_input_chunked { # unlikely...
                                 if (($len + -s $input) > $MAX_REQUEST_BUFFER) {
                                         return quit($self, 413);
                                 }
-                        } elsif (bytes::length($$rbuf) > CHUNK_MAX_HDR) {
+                        } elsif (length($$rbuf) > CHUNK_MAX_HDR) {
                                 return quit($self, 400);
                         }
                         # will break from loop since $len >= 0
                 }
 
                 if ($len < 0) { # chunk header is trickled, read more
-                        $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or
+                        $self->do_read($rbuf, 8192, length($$rbuf)) or
                                 return recv_err($self, $len);
                         # (implicit) goto chunk_start if $r > 0;
                 }
@@ -442,7 +398,7 @@ sub read_input_chunked { # unlikely...
                 # drain the current chunk
                 until ($len <= 0) {
                         if ($$rbuf ne '') {
-                                my $w = xwrite($input, $rbuf, $len);
+                                my $w = syswrite($input, $$rbuf, $len);
                                 return write_err($self, "$len chunk") if !$w;
                                 $len -= $w;
                                 if ($len == 0) {
@@ -477,15 +433,14 @@ sub close {
         my $self = $_[0];
         if (my $forward = delete $self->{forward}) {
                 eval { $forward->close };
-                err($self, "forward ->close error: $@") if $@;
+                warn "forward ->close error: $@" if $@;
         }
         $self->SUPER::close; # PublicInbox::DS::close
 }
 
-# for graceful shutdown in PublicInbox::Daemon:
-sub busy () {
+sub busy { # for graceful shutdown in PublicInbox::Daemon:
         my ($self) = @_;
-        ($self->{rbuf} || exists($self->{env}) || $self->{wbuf});
+        defined($self->{rbuf}) || exists($self->{env}) || defined($self->{wbuf})
 }
 
 # runs $cb on the next iteration of the event loop at earliest
@@ -499,11 +454,12 @@ sub next_step {
 # They may be exposed to the PSGI application when the PSGI app
 # returns a CODE ref for "push"-based responses
 package PublicInbox::HTTP::Chunked;
-use strict;
+use v5.12;
 
 sub write {
         # ([$http], $buf) = @_;
-        PublicInbox::HTTP::chunked_write($_[0]->[0], $_[1])
+        PublicInbox::HTTP::chunked_write($_[0]->[0], $_[1]);
+        $_[0]->[0]->{sock} ? length($_[1]) : undef;
 }
 
 sub close {
@@ -512,12 +468,13 @@ sub close {
 }
 
 package PublicInbox::HTTP::Identity;
-use strict;
+use v5.12;
 our @ISA = qw(PublicInbox::HTTP::Chunked);
 
 sub write {
         # ([$http], $buf) = @_;
         PublicInbox::HTTP::identity_write($_[0]->[0], $_[1]);
+        $_[0]->[0]->{sock} ? length($_[1]) : undef;
 }
 
 1;
diff --git a/lib/PublicInbox/HTTPD.pm b/lib/PublicInbox/HTTPD.pm
index a9f55ff6..6a6347d8 100644
--- a/lib/PublicInbox/HTTPD.pm
+++ b/lib/PublicInbox/HTTPD.pm
@@ -1,29 +1,33 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # wraps a listen socket for HTTP and links it to the PSGI app in
 # public-inbox-httpd
 package PublicInbox::HTTPD;
+use v5.10.1;
 use strict;
-use warnings;
-use Plack::Util;
-use PublicInbox::HTTPD::Async;
-use PublicInbox::Daemon;
+use Plack::Util ();
+use Plack::Builder;
+use PublicInbox::HTTP;
 
-sub pi_httpd_async { PublicInbox::HTTPD::Async->new(@_) }
+# we have a different env for ever listener socket for
+# SERVER_NAME, SERVER_PORT and psgi.url_scheme
+# envs: listener FD => PSGI env
+sub new { bless { envs => {}, err => \*STDERR }, __PACKAGE__ }
 
-sub new {
-        my ($class, $sock, $app) = @_;
-        my $n = getsockname($sock) or die "not a socket: $sock $!\n";
+# this becomes {srv_env} in PublicInbox::HTTP
+sub env_for ($$$) {
+        my ($self, $srv, $client) = @_;
+        my $n = getsockname($srv) or die "not a socket: $srv $!\n";
         my ($host, $port) = PublicInbox::Daemon::host_with_port($n);
-
-        my %env = (
+        {
                 SERVER_NAME => $host,
                 SERVER_PORT => $port,
                 SCRIPT_NAME => '',
                 'psgi.version' => [ 1, 1 ],
-                'psgi.errors' => \*STDERR,
-                'psgi.url_scheme' => 'http',
+                'psgi.errors' => $self->{err},
+                'psgi.url_scheme' => $client->can('accept_SSL') ?
+                                        'https' : 'http',
                 'psgi.nonblocking' => Plack::Util::TRUE,
                 'psgi.streaming' => Plack::Util::TRUE,
                 'psgi.run_once'         => Plack::Util::FALSE,
@@ -37,13 +41,48 @@ sub new {
                 # XXX unstable API!, only GitHTTPBackend needs
                 # this to limit git-http-backend(1) parallelism.
                 # We also check for the truthiness of this to
-                # detect when to use git_async_cat for slow blobs
-                'pi-httpd.async' => \&pi_httpd_async
-        );
-        bless {
-                app => $app,
-                env => \%env
-        }, $class;
+                # detect when to use async paths for slow blobs
+                'pi-httpd.async' => 1,
+                'pi-httpd.app' => $self->{app},
+                'pi-httpd.warn_cb' => $self->{warn_cb},
+        }
+}
+
+sub refresh_groups {
+        my ($self) = @_;
+        my $app;
+        $self->{psgi} //= $main::ARGV[0] if @main::ARGV;
+        if ($self->{psgi}) {
+                eval { $app = Plack::Util::load_psgi($self->{psgi}) };
+                die $@, <<EOM if $@;
+$0 runs in /, command-line paths must be absolute
+EOM
+        } else {
+                require PublicInbox::WWW;
+                my $www = PublicInbox::WWW->new;
+                $www->preload;
+                $app = builder {
+                        eval { enable 'ReverseProxy' };
+                        $@ and warn <<EOM;
+Plack::Middleware::ReverseProxy missing,
+URL generation for redirects may be wrong if behind a reverse proxy
+EOM
+                        enable 'Head';
+                        sub { $www->call(@_) };
+                };
+        }
+        $_->{'pi-httpd.app'} = $app for values %{$self->{envs}};
+        $self->{app} = $app;
+}
+
+sub post_accept_cb { # for Listener->{post_accept}
+        my ($self) = @_;
+        sub {
+                my ($client, $addr, $srv) = @_; # $_[4] - tls_wrap (unused)
+                PublicInbox::HTTP->new($client, $addr,
+                                $self->{envs}->{fileno($srv)} //=
+                                        env_for($self, $srv, $client));
+        }
 }
 
 1;
diff --git a/lib/PublicInbox/HTTPD/Async.pm b/lib/PublicInbox/HTTPD/Async.pm
deleted file mode 100644
index 87a6a5f9..00000000
--- a/lib/PublicInbox/HTTPD/Async.pm
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-#
-# XXX This is a totally unstable API for public-inbox internal use only
-# This is exposed via the 'pi-httpd.async' key in the PSGI env hash.
-# The name of this key is not even stable!
-# Currently intended for use with read-only pipes with expensive
-# processes such as git-http-backend(1), cgit(1)
-#
-# fields:
-# http: PublicInbox::HTTP ref
-# fh: PublicInbox::HTTP::{Identity,Chunked} ref (can ->write + ->close)
-# cb: initial read callback
-# arg: arg for {cb}
-# end_obj: CODE or object which responds to ->event_step when ->close is called
-package PublicInbox::HTTPD::Async;
-use strict;
-use parent qw(PublicInbox::DS);
-use Errno qw(EAGAIN);
-use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
-
-# This is called via: $env->{'pi-httpd.async'}->()
-# $io is a read-only pipe ($rpipe) for now, but may be a
-# bidirectional socket in the future.
-sub new {
-        my ($class, $io, $cb, $arg, $end_obj) = @_;
-
-        # no $io? call $cb at the top of the next event loop to
-        # avoid recursion:
-        unless (defined($io)) {
-                PublicInbox::DS::requeue($cb ? $cb : $arg);
-                die '$end_obj unsupported w/o $io' if $end_obj;
-                return;
-        }
-        my $self = bless {
-                cb => $cb, # initial read callback
-                arg => $arg, # arg for $cb
-                end_obj => $end_obj, # like END{}, can ->event_step
-        }, $class;
-        IO::Handle::blocking($io, 0);
-        $self->SUPER::new($io, EPOLLIN | EPOLLET);
-}
-
-sub event_step {
-        my ($self) = @_;
-        if (my $cb = delete $self->{cb}) {
-                # this may call async_pass when headers are done
-                $cb->(delete $self->{arg});
-        } elsif (my $sock = $self->{sock}) {
-                my $http = $self->{http};
-                # $self->{sock} is a read pipe for git-http-backend or cgit
-                # and 65536 is the default Linux pipe size
-                my $r = sysread($sock, my $buf, 65536);
-                if ($r) {
-                        $self->{fh}->write($buf); # may call $http->close
-                        if ($http->{sock}) { # !closed
-                                $self->requeue;
-                                # let other clients get some work done, too
-                                return;
-                        }
-
-                        # else: fall through to close below...
-                } elsif (!defined $r && $! == EAGAIN) {
-                        return; # EPOLLET means we'll be notified
-                }
-
-                # Done! Error handling will happen in $self->{fh}->close
-                # called by end_obj->event_step handler
-                delete $http->{forward};
-                $self->close; # queues end_obj->event_step to be called
-        } # else { # we may've been requeued but closed by $http
-}
-
-# once this is called, all data we read is passed to the
-# to the PublicInbox::HTTP instance ($http) via $fh->write
-sub async_pass {
-        my ($self, $http, $fh, $bref) = @_;
-        # In case the client HTTP connection ($http) dies, it
-        # will automatically close this ($self) object.
-        $http->{forward} = $self;
-
-        # write anything we overread when we were reading headers
-        $fh->write($$bref); # PublicInbox:HTTP::{chunked,identity}_wcb
-
-        # we're done with this, free this memory up ASAP since the
-        # calls after this may use much memory:
-        $$bref = undef;
-
-        $self->{http} = $http;
-        $self->{fh} = $fh;
-
-        # either hit EAGAIN or ->requeue to keep EPOLLET happy
-        event_step($self);
-}
-
-# may be called as $forward->close in PublicInbox::HTTP or EOF (event_step)
-sub close {
-        my $self = $_[0];
-        $self->SUPER::close; # DS::close
-
-        # we defer this to the next timer loop since close is deferred
-        if (my $end_obj = delete $self->{end_obj}) {
-                # this calls $end_obj->event_step
-                # (likely PublicInbox::Qspawn::event_step,
-                #  NOT PublicInbox::HTTPD::Async::event_step)
-                PublicInbox::DS::requeue($end_obj);
-        }
-}
-
-1;
diff --git a/lib/PublicInbox/HlMod.pm b/lib/PublicInbox/HlMod.pm
index de285fc2..f42ece80 100644
--- a/lib/PublicInbox/HlMod.pm
+++ b/lib/PublicInbox/HlMod.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # I have no idea how stable or safe this is for handling untrusted
@@ -14,7 +14,7 @@
 # wrapper for SWIG-generated highlight.pm bindings
 package PublicInbox::HlMod;
 use strict;
-use warnings;
+use v5.10.1;
 use highlight; # SWIG-generated stuff
 use PublicInbox::Hval qw(src_escape ascii_html);
 my $hl;
@@ -54,8 +54,7 @@ sub _parse_filetypes ($) {
         (\%ext2lang, \@shebang);
 }
 
-# We only need one instance, so we don't need to do
-# highlight::CodeGenerator::deleteInstance
+# We only need one instance
 sub new {
         my ($class) = @_;
         $hl ||= do {
@@ -95,33 +94,26 @@ sub do_hl {
 sub do_hl_lang {
         my ($self, $str, $lang) = @_;
 
-        my $dir = $self->{-dir};
         my $langpath;
-
         if (defined $lang) {
-                $langpath = $dir->getLangPath("$lang.lang") or return;
-                $lang = undef unless -f $langpath
-        }
-        unless (defined $lang) {
-                $lang = _shebang2lang($self, $str) or return;
-                $langpath = $dir->getLangPath("$lang.lang") or return;
-                return unless -f $langpath
+                $langpath = $self->{-dir}->getLangPath("$lang.lang") or return;
+                undef $lang unless -f $langpath;
         }
-        my $gen = $self->{$langpath} ||= do {
-                my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
-                $g->setFragmentCode(1); # generate html fragment
+        $lang //= _shebang2lang($self, $str) // return;
+        $langpath = $self->{-dir}->getLangPath("$lang.lang") or return;
+        return unless -f $langpath;
 
-                # whatever theme works
-                my $themepath = $dir->getThemePath('print.theme');
-                $g->initTheme($themepath);
-                $g->loadLanguage($langpath);
-                $g->setEncoding('utf-8');
-                $g;
-        };
+        my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
+        $g->setFragmentCode(1); # generate html fragment
 
+        # whatever theme works
+        $g->initTheme($self->{-dir}->getThemePath('print.theme'));
+        $g->loadLanguage($langpath);
+        $g->setEncoding('utf-8');
         # we assume $$str is valid UTF-8, but the SWIG binding doesn't
         # know that, so ensure it's marked as UTF-8 even if it isnt...
-        my $out = $gen->generateString($$str);
+        my $out = $g->generateString($$str);
+        highlight::CodeGenerator::deleteInstance($g);
         utf8::decode($out);
         src_escape($out);
         \$out;
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index fb21041a..963dbb71 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -1,18 +1,19 @@
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # represents a header value in various forms.  Used for HTML generation
 # in our web interface(s)
 package PublicInbox::Hval;
+use v5.10.1; # be careful about unicode_strings in v5.12;
 use strict;
-use warnings;
 use Encode qw(find_encoding);
 use PublicInbox::MID qw/mid_clean mid_escape/;
 use base qw/Exporter/;
 our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape
-                to_attr prurl mid_href fmt_ts ts2str/;
+                to_attr prurl mid_href fmt_ts ts2str utf8_maybe/;
 use POSIX qw(strftime);
 my $enc_ascii = find_encoding('us-ascii');
+use File::Spec;
 
 # safe-ish acceptable filename pattern for portability
 our $FN = '[a-zA-Z0-9][a-zA-Z0-9_\-\.]+[a-zA-Z0-9]'; # needs \z anchor
@@ -34,7 +35,7 @@ my %escape_sequence = (
         "\x7f" => '\\x7f', # DEL
 );
 
-my %xhtml_map = (
+our %xhtml_map = (
         '"' => '&#34;',
         '&' => '&#38;',
         "'" => '&#39;',
@@ -69,7 +70,16 @@ sub prurl ($$) {
                 $u = $host_match[0] // $u->[0];
                 # fall through to below:
         }
-        index($u, '//') == 0 ? "$env->{'psgi.url_scheme'}:$u" : $u;
+        my $dslash = index($u, '//');
+        if ($dslash == 0) {
+                "$env->{'psgi.url_scheme'}:$u"
+        } elsif ($dslash < 0 && substr($u, 0, 1) ne '/' &&
+                        substr(my $path = $env->{PATH_INFO}, 0, 1) eq '/') {
+                # this won't touch the FS at all:
+                File::Spec->abs2rel("/$u", $path);
+        } else {
+                $u;
+        }
 }
 
 # for misguided people who believe in this stuff, give them a
@@ -82,15 +92,22 @@ sub obfuscate_addrs ($$;$) {
         my $repl = $_[2] // '&#8226;';
         my $re = $ibx->{-no_obfuscate_re}; # regex of domains
         my $addrs = $ibx->{-no_obfuscate}; # { $address => 1 }
-        $_[1] =~ s/(([\w\.\+=\-]+)\@([\w\-]+\.[\w\.\-]+))/
-                my ($addr, $user, $domain) = ($1, $2, $3);
-                if ($addrs->{$addr} || ((defined $re && $domain =~ $re))) {
-                        $addr;
+        $_[1] =~ s#(\S+)\@([\w\-]+\.[\w\.\-]+)#
+                my ($pfx, $domain) = ($1, $2);
+                if (index($pfx, '://') > 0 || $pfx !~ s/([\w\.\+=\-]+)\z//) {
+                        "$pfx\@$domain";
                 } else {
-                        $domain =~ s!([^\.]+)\.!$1$repl!;
-                        $user . '@' . $domain
+                        my $user = $1;
+                        my $addr = "$user\@$domain";
+                        if ($addrs->{$addr} || ((defined($re) &&
+                                                $domain =~ $re))) {
+                                $pfx.$addr;
+                        } else {
+                                $domain =~ s!([^\.]+)\.!$1$repl!;
+                                $pfx . $user . '@' . $domain
+                        }
                 }
-                /sge;
+                #sge;
 }
 
 # like format_sanitized_subject in git.git pretty.c with '%f' format string
@@ -111,7 +128,7 @@ $ESCAPES{'/'} = ':'; # common
 sub to_attr ($) {
         my ($str) = @_;
 
-        # git would never do this to us:
+        # git would never do this to us, mail diff uses // to prevent anchors:
         return if index($str, '//') >= 0;
 
         my $first = '';
@@ -128,6 +145,15 @@ sub to_attr ($) {
 sub ts2str ($) { strftime('%Y%m%d%H%M%S', gmtime($_[0])) };
 
 # human-friendly format
-sub fmt_ts ($) { strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
+sub fmt_ts ($) {
+        # strftime %k is not portable and leading zeros in %H slow me down
+        my (undef, $M, $H, $d, $m, $Y) = gmtime $_[0];
+        sprintf '%u-%02u-%02u % 2u:%02u', $Y + 1900, $m + 1, $d, $H, $M;
+}
+
+sub utf8_maybe ($) {
+        utf8::decode($_[0]);
+        utf8::valid($_[0]) or utf8::encode($_[0]); # non-UTF-8 data exists
+}
 
 1;
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index a861282f..b12533cb 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Each instance of this represents an IMAP client connected to
@@ -36,18 +36,10 @@ use parent qw(PublicInbox::DS);
 use PublicInbox::Eml;
 use PublicInbox::EmlContentFoo qw(parse_content_disposition);
 use PublicInbox::DS qw(now);
-use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
 use PublicInbox::GitAsyncCat;
 use Text::ParseWords qw(parse_line);
 use Errno qw(EAGAIN);
-use PublicInbox::IMAPsearchqp;
-
-my $Address;
-for my $mod (qw(Email::Address::XS Mail::Address)) {
-        eval "require $mod" or next;
-        $Address = $mod and last;
-}
-die "neither Email::Address::XS nor Mail::Address loaded: $@" if !$Address;
+use PublicInbox::Address;
 
 sub LINE_MAX () { 8000 } # RFC 2683 3.2.1.5
 
@@ -99,33 +91,15 @@ undef %FETCH_NEED;
 my $valid_range = '[0-9]+|[0-9]+:[0-9]+|[0-9]+:\*';
 $valid_range = qr/\A(?:$valid_range)(?:,(?:$valid_range))*\z/;
 
-# RFC 3501 5.4. Autologout Timer needs to be >= 30min
-$PublicInbox::DS::EXPTIME = 60 * 30;
-
-sub greet ($) {
+sub do_greet {
         my ($self) = @_;
         my $capa = capa($self);
         $self->write(\"* OK [$capa] public-inbox-imapd ready\r\n");
 }
 
-sub new ($$$) {
-        my ($class, $sock, $imapd) = @_;
-        my $self = bless { imapd => $imapd }, 'PublicInbox::IMAP_preauth';
-        my $ev = EPOLLIN;
-        my $wbuf;
-        if ($sock->can('accept_SSL') && !$sock->accept_SSL) {
-                return CORE::close($sock) if $! != EAGAIN;
-                $ev = PublicInbox::TLS::epollbit();
-                $wbuf = [ \&PublicInbox::DS::accept_tls_step, \&greet ];
-        }
-        $self->SUPER::new($sock, $ev | EPOLLONESHOT);
-        if ($wbuf) {
-                $self->{wbuf} = $wbuf;
-        } else {
-                greet($self);
-        }
-        $self->update_idle_time;
-        $self;
+sub new {
+        my (undef, $sock, $imapd) = @_;
+        (bless { imapd => $imapd }, 'PublicInbox::IMAP_preauth')->greet($sock)
 }
 
 sub logged_in { 1 }
@@ -140,7 +114,7 @@ sub capa ($) {
                 $capa .= ' COMPRESS=DEFLATE';
         } else {
                 if (!($self->{sock} // $self)->can('accept_SSL') &&
-                        $self->{imapd}->{accept_tls}) {
+                        $self->{imapd}->{ssl_ctx_opt}) {
                         $capa .= ' STARTTLS';
                 }
                 $capa .= ' AUTH=ANONYMOUS';
@@ -157,6 +131,7 @@ sub login_success ($$) {
 sub auth_challenge_ok ($) {
         my ($self) = @_;
         my $tag = delete($self->{-login_tag}) or return;
+        $self->{anon} = 1;
         login_success($self, $tag);
 }
 
@@ -195,14 +170,14 @@ sub cmd_capability ($$) {
 # but uo2m_hibernate can compact and deduplicate it
 sub uo2m_ary_new ($;$) {
         my ($self, $exists) = @_;
-        my $base = $self->{uid_base};
-        my $uids = $self->{ibx}->over->uid_range($base + 1, $base + UID_SLICE);
+        my $ub = $self->{uid_base};
+        my $uids = $self->{ibx}->over(1)->uid_range($ub + 1, $ub + UID_SLICE);
 
         # convert UIDs to offsets from {base}
         my @tmp; # [$UID_OFFSET] => $MSN
         my $msn = 0;
-        ++$base;
-        $tmp[$_ - $base] = ++$msn for @$uids;
+        ++$ub;
+        $tmp[$_ - $ub] = ++$msn for @$uids;
         $$exists = $msn if $exists;
         \@tmp;
 }
@@ -243,7 +218,7 @@ sub uo2m_extend ($$;$) {
         # need to extend the current range:
         my $base = $self->{uid_base};
         ++$beg;
-        my $uids = $self->{ibx}->over->uid_range($beg, $base + UID_SLICE);
+        my $uids = $self->{ibx}->over(1)->uid_range($beg, $base + UID_SLICE);
         return $uo2m if !scalar(@$uids);
         my @tmp; # [$UID_OFFSET] => $MSN
         my $write_method = $_[2] // 'msg_more';
@@ -316,20 +291,18 @@ sub on_inbox_unlock {
         }
 }
 
-# called every X minute(s) or so by PublicInbox::DS::later
-my $IDLERS = {};
-my $idle_timer;
+# called every minute or so by PublicInbox::DS::later
+my $IDLERS; # fileno($obj->{sock}) => PublicInbox::IMAP
 sub idle_tick_all {
         my $old = $IDLERS;
-        $IDLERS = {};
+        $IDLERS = undef;
         for my $i (values %$old) {
                 next if ($i->{wbuf} || !exists($i->{-idle_tag}));
-                $i->update_idle_time or next;
                 $IDLERS->{fileno($i->{sock})} = $i;
                 $i->write(\"* OK Still here\r\n");
         }
-        $idle_timer = scalar keys %$IDLERS ?
-                        PublicInbox::DS::later(\&idle_tick_all) : undef;
+        $IDLERS and
+                PublicInbox::DS::add_uniq_timer('idle', 60, \&idle_tick_all);
 }
 
 sub cmd_idle ($$) {
@@ -342,11 +315,11 @@ sub cmd_idle ($$) {
         my $fd = fileno($sock);
         $self->{-idle_tag} = $tag;
         # only do inotify on most recent slice
-        if ($ibx->over->max < $uid_end) {
+        if ($ibx->over(1)->max < $uid_end) {
                 $ibx->subscribe_unlock($fd, $self);
                 $self->{imapd}->idler_start;
         }
-        $idle_timer //= PublicInbox::DS::later(\&idle_tick_all);
+        PublicInbox::DS::add_uniq_timer('idle', 60, \&idle_tick_all);
         $IDLERS->{$fd} = $self;
         \"+ idling\r\n"
 }
@@ -371,21 +344,18 @@ sub idle_done ($$) {
         "$idle_tag OK Idle done\r\n";
 }
 
-sub ensure_slices_exist ($$$) {
-        my ($imapd, $ibx, $max) = @_;
-        defined(my $mb_top = $ibx->{newsgroup}) or return;
+sub ensure_slices_exist ($$) {
+        my ($imapd, $ibx) = @_;
+        my $mb_top = $ibx->{newsgroup} // return;
         my $mailboxes = $imapd->{mailboxes};
-        my @created;
-        for (my $i = int($max/UID_SLICE); $i >= 0; --$i) {
+        my $list = $imapd->{mailboxlist}; # may be undef, just autoviv + noop
+        for (my $i = int($ibx->art_max/UID_SLICE); $i >= 0; --$i) {
                 my $sub_mailbox = "$mb_top.$i";
                 last if exists $mailboxes->{$sub_mailbox};
                 $mailboxes->{$sub_mailbox} = $ibx;
                 $sub_mailbox =~ s/\Ainbox\./INBOX./i; # more familiar to users
-                push @created, $sub_mailbox;
+                push @$list, qq[* LIST (\\HasNoChildren) "." $sub_mailbox\r\n]
         }
-        return unless @created;
-        my $l = $imapd->{inboxlist} or return;
-        push @$l, map { qq[* LIST (\\HasNoChildren) "." $_\r\n] } @created;
 }
 
 sub inbox_lookup ($$;$) {
@@ -393,22 +363,23 @@ sub inbox_lookup ($$;$) {
         my ($ibx, $exists, $uidmax, $uid_base) = (undef, 0, 0, 0);
         $mailbox = lc $mailbox;
         $ibx = $self->{imapd}->{mailboxes}->{$mailbox} or return;
-        my $over = $ibx->over;
+        my $over = $ibx->over(1);
         if ($over != $ibx) { # not a dummy
                 $mailbox =~ /\.([0-9]+)\z/ or
                                 die "BUG: unexpected dummy mailbox: $mailbox\n";
                 $uid_base = $1 * UID_SLICE;
 
-                # ->num_highwater caches for writers, so use ->meta_accessor
-                $uidmax = $ibx->mm->meta_accessor('num_highwater') // 0;
+                $uidmax = $ibx->mm->num_highwater // 0;
                 if ($examine) {
                         $self->{uid_base} = $uid_base;
                         $self->{ibx} = $ibx;
                         $self->{uo2m} = uo2m_ary_new($self, \$exists);
                 } else {
-                        $exists = $over->imap_exists;
+                        my $uid_end = $uid_base + UID_SLICE;
+                        $exists = $over->imap_exists($uid_base, $uid_end);
                 }
-                ensure_slices_exist($self->{imapd}, $ibx, $over->max);
+                delete $ibx->{-art_max};
+                ensure_slices_exist($self->{imapd}, $ibx);
         } else {
                 if ($examine) {
                         $self->{uid_base} = $uid_base;
@@ -417,8 +388,9 @@ sub inbox_lookup ($$;$) {
                 }
                 # if "INBOX.foo.bar" is selected and "INBOX.foo.bar.0",
                 # check for new UID ranges (e.g. "INBOX.foo.bar.1")
-                if (my $z = $self->{imapd}->{mailboxes}->{"$mailbox.0"}) {
-                        ensure_slices_exist($self->{imapd}, $z, $z->over->max);
+                if (my $ibx = $self->{imapd}->{mailboxes}->{"$mailbox.0"}) {
+                        delete $ibx->{-art_max};
+                        ensure_slices_exist($self->{imapd}, $ibx);
                 }
         }
         ($ibx, $exists, $uidmax + 1, $uid_base);
@@ -447,8 +419,10 @@ sub _esc ($) {
         if (!defined($v)) {
                 'NIL';
         } elsif ($v =~ /[{"\r\n%*\\\[]/) { # literal string
+                utf8::encode($v);
                 '{' . length($v) . "}\r\n" . $v;
         } else { # quoted string
+                utf8::encode($v);
                 qq{"$v"}
         }
 }
@@ -458,7 +432,7 @@ sub addr_envelope ($$;$) {
         my $v = $eml->header_raw($x) //
                 ($y ? $eml->header_raw($y) : undef) // return 'NIL';
 
-        my @x = $Address->parse($v) or return 'NIL';
+        my @x = PublicInbox::Address::objects($v) or return 'NIL';
         '(' . join('',
                 map { '(' . join(' ',
                                 _esc($_->name), 'NIL',
@@ -499,7 +473,7 @@ sub body_disposition ($) {
         my $cd = $eml->header_raw('Content-Disposition') or return 'NIL';
         $cd = parse_content_disposition($cd);
         my $buf = '('._esc($cd->{type});
-        $buf .= ' ' . _esc_hash(delete $cd->{attributes});
+        $buf .= ' ' . _esc_hash($cd->{attributes});
         $buf .= ')';
 }
 
@@ -511,7 +485,7 @@ sub body_leaf ($$;$) {
         my $ct = $eml->ct;
         $buf .= '('._esc($ct->{type}).' ';
         $buf .= _esc($ct->{subtype});
-        $buf .= ' ' . _esc_hash(delete $ct->{attributes});
+        $buf .= ' ' . _esc_hash($ct->{attributes});
         $buf .= ' ' . _esc($eml->header_raw('Content-ID'));
         $buf .= ' ' . _esc($eml->header_raw('Content-Description'));
         my $cte = $eml->header_raw('Content-Transfer-Encoding') // '7bit';
@@ -540,7 +514,7 @@ sub body_parent ($$$) {
                 $buf .= @$hold ? join('', @$hold) : 'NIL';
                 $buf .= ' '._esc($ct->{subtype});
                 if ($structure) {
-                        $buf .= ' '._esc_hash(delete $ct->{attributes});
+                        $buf .= ' '._esc_hash($ct->{attributes});
                         $buf .= ' '.body_disposition($eml);
                         $buf .= ' '._esc($eml->header_raw('Content-Language'));
                         $buf .= ' '._esc($eml->header_raw('Content-Location'));
@@ -583,22 +557,6 @@ sub fetch_body ($;$) {
         join('', @hold);
 }
 
-sub requeue_once ($) {
-        my ($self) = @_;
-        # COMPRESS users all share the same DEFLATE context.
-        # Flush it here to ensure clients don't see
-        # each other's data
-        $self->zflush;
-
-        # no recursion, schedule another call ASAP,
-        # but only after all pending writes are done.
-        # autovivify wbuf:
-        my $new_size = push(@{$self->{wbuf}}, \&long_step);
-
-        # wbuf may be populated by $cb, no need to rearm if so:
-        $self->requeue if $new_size == 1;
-}
-
 sub fetch_run_ops {
         my ($self, $smsg, $bref, $ops, $partial) = @_;
         my $uid = $smsg->{num};
@@ -612,25 +570,37 @@ sub fetch_run_ops {
         $self->msg_more(")\r\n");
 }
 
-sub fetch_blob_cb { # called by git->cat_async via git_async_cat
+sub requeue { # overrides PublicInbox::DS::requeue
+        my ($self) = @_;
+        if ($self->{anon}) { # AUTH=ANONYMOUS gets high priority
+                $self->SUPER::requeue;
+        } else { # low priority
+                push(@{$self->{imapd}->{-authed_q}}, $self) == 1 and
+                        PublicInbox::DS::requeue($self->{imapd});
+        }
+}
+
+sub fetch_blob_cb { # called by git->cat_async via ibx_async_cat
         my ($bref, $oid, $type, $size, $fetch_arg) = @_;
         my ($self, undef, $msgs, $range_info, $ops, $partial) = @$fetch_arg;
+        my $ibx = $self->{ibx} or return $self->close; # client disconnected
         my $smsg = shift @$msgs or die 'BUG: no smsg';
-        if (!defined($oid)) {
+        if (!defined($type)) {
+                warn "E: git aborted on $oid / $smsg->{blob} $ibx->{inboxdir}";
+                return $self->close;
+        } elsif ($type ne 'blob') {
                 # it's possible to have TOCTOU if an admin runs
                 # public-inbox-(edit|purge), just move onto the next message
-                warn "E: $smsg->{blob} missing in $self->{ibx}->{inboxdir}\n";
-                return requeue_once($self);
-        } else {
-                $smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
+                warn "E: $smsg->{blob} $type in $ibx->{inboxdir}\n";
+                return $self->requeue_once;
         }
+        $smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
         my $pre;
-        if (!$self->{wbuf} && (my $nxt = $msgs->[0])) {
-                $pre = git_async_prefetch($self->{ibx}->git, $nxt->{blob},
-                                                \&fetch_blob_cb, $fetch_arg);
-        }
+        ($self->{anon} && !$self->{wbuf} && $msgs->[0]) and
+                $pre = ibx_async_prefetch($ibx, $msgs->[0]->{blob},
+                                        \&fetch_blob_cb, $fetch_arg);
         fetch_run_ops($self, $smsg, $bref, $ops, $partial);
-        $pre ? $self->zflush : requeue_once($self);
+        $pre ? $self->dflush : $self->requeue_once;
 }
 
 sub emit_rfc822 {
@@ -688,7 +658,7 @@ sub op_eml_new { $_[4] = PublicInbox::Eml->new($_[3]) }
 # s/From / fixes old bug from import (pre-a0c07cba0e5d8b6a)
 sub to_crlf_full {
         ${$_[0]} =~ s/(?<!\r)\n/\r\n/sg;
-        ${$_[0]} =~ s/\A[\r\n]*From [^\r\n]*\r\n//s;
+        PublicInbox::Eml::strip_from(${$_[0]});
 }
 
 sub op_crlf_bref { to_crlf_full($_[3]) }
@@ -721,7 +691,7 @@ sub range_step ($$) {
                 uid_clamp($self, \$beg, \$end);
         } elsif ($range =~ /\A([0-9]+):\*\z/) {
                 $beg = $1 + 0;
-                $end = $self->{ibx}->over->max;
+                $end = $self->{ibx}->over(1)->max;
                 $end = $uid_end if $end > $uid_end;
                 $beg = $end if $beg > $end;
                 uid_clamp($self, \$beg, \$end);
@@ -739,7 +709,7 @@ sub range_step ($$) {
 sub refill_range ($$$) {
         my ($self, $msgs, $range_info) = @_;
         my ($beg, $end, $range_csv) = @$range_info;
-        if (scalar(@$msgs = @{$self->{ibx}->over->query_xover($beg, $end)})) {
+        if (scalar(@$msgs = @{$self->{ibx}->over(1)->query_xover($beg, $end)})){
                 $range_info->[0] = $msgs->[-1]->{num} + 1;
                 return;
         }
@@ -759,7 +729,7 @@ sub fetch_blob { # long_response
                 }
         }
         uo2m_extend($self, $msgs->[-1]->{num});
-        git_async_cat($self->{ibx}->git, $msgs->[0]->{blob},
+        ibx_async_cat($self->{ibx}, $msgs->[0]->{blob},
                         \&fetch_blob_cb, \@_);
 }
 
@@ -780,7 +750,7 @@ sub fetch_smsg { # long_response
 sub refill_uids ($$$;$) {
         my ($self, $uids, $range_info, $sql) = @_;
         my ($beg, $end, $range_csv) = @$range_info;
-        my $over = $self->{ibx}->over;
+        my $over = $self->{ibx}->over(1);
         while (1) {
                 if (scalar(@$uids = @{$over->uid_range($beg, $end, $sql)})) {
                         $range_info->[0] = $uids->[-1] + 1; # update $beg
@@ -848,7 +818,7 @@ sub cmd_status ($$$;@) {
 my %patmap = ('*' => '.*', '%' => '[^\.]*');
 sub cmd_list ($$$$) {
         my ($self, $tag, $refname, $wildcard) = @_;
-        my $l = $self->{imapd}->{inboxlist};
+        my $l = $self->{imapd}->{mailboxlist};
         if ($refname eq '' && $wildcard eq '') {
                 # request for hierarchy delimiter
                 $l = [ qq[* LIST (\\Noselect) "." ""\r\n] ];
@@ -876,12 +846,12 @@ sub eml_index_offs_i { # PublicInbox::Eml::each_part callback
 # prepares an index for BODY[$SECTION_IDX] fetches
 sub eml_body_idx ($$) {
         my ($eml, $section_idx) = @_;
-        my $idx = $eml->{imap_all_parts} //= do {
+        my $idx = $eml->{imap_all_parts} // do {
                 my $all = {};
                 $eml->each_part(\&eml_index_offs_i, $all, 0, 1);
                 # top-level of multipart, BODY[0] not allowed (nz-number)
                 delete $all->{0};
-                $all;
+                $eml->{imap_all_parts} = $all;
         };
         $idx->{$section_idx};
 }
@@ -1032,7 +1002,7 @@ sub fetch_compile ($) {
         # stabilize partial order for consistency and ease-of-debugging:
         if (scalar keys %partial) {
                 $need |= NEED_BLOB;
-                $r[2] = [ map { [ $_, @{$partial{$_}} ] } sort keys %partial ];
+                @{$r[2]} = map { [ $_, @{$partial{$_}} ] } sort keys %partial;
         }
 
         push @op, $OP_EML_NEW if ($need & (EML_HDR|EML_BDY));
@@ -1055,7 +1025,7 @@ sub fetch_compile ($) {
 
         # r[1] = [ $key1, $cb1, $key2, $cb2, ... ]
         use sort 'stable'; # makes output more consistent
-        $r[1] = [ map { ($_->[2], $_->[1]) } sort { $a->[0] <=> $b->[0] } @op ];
+        @{$r[1]} = map { ($_->[2], $_->[1]) } sort { $a->[0] <=> $b->[0] } @op;
         @r;
 }
 
@@ -1070,7 +1040,7 @@ sub cmd_uid_fetch ($$$$;@) {
         my $range_info = range_step($self, \$range_csv);
         return "$tag $range_info\r\n" if !ref($range_info);
         uo2m_hibernate($self) if $cb == \&fetch_blob; # slow, save RAM
-        long_response($self, $cb, $tag, [], $range_info, $ops, $partial);
+        $self->long_response($cb, $tag, [], $range_info, $ops, $partial);
 }
 
 sub cmd_fetch ($$$$;@) {
@@ -1085,7 +1055,7 @@ sub cmd_fetch ($$$$;@) {
         my $range_info = range_step($self, \$range_csv);
         return "$tag $range_info\r\n" if !ref($range_info);
         uo2m_hibernate($self) if $cb == \&fetch_blob; # slow, save RAM
-        long_response($self, $cb, $tag, [], $range_info, $ops, $partial);
+        $self->long_response($cb, $tag, [], $range_info, $ops, $partial);
 }
 
 sub msn_convert ($$) {
@@ -1109,11 +1079,12 @@ sub search_uid_range { # long_response
         1; # more
 }
 
-sub parse_query ($$) {
+sub parse_imap_query ($$) {
         my ($self, $query) = @_;
+        # IMAPsearchqp gets loaded in IMAPD->refresh_groups
         my $q = PublicInbox::IMAPsearchqp::parse($self, $query);
         if (ref($q)) {
-                my $max = $self->{ibx}->over->max;
+                my $max = $self->{ibx}->over(1)->max;
                 my $beg = 1;
                 uid_clamp($self, \$beg, \$max);
                 $q->{range_info} = [ $beg, $max ];
@@ -1121,49 +1092,28 @@ sub parse_query ($$) {
         $q;
 }
 
-sub refill_xap ($$$$) {
-        my ($self, $uids, $range_info, $q) = @_;
-        my ($beg, $end) = @$range_info;
-        my $srch = $self->{ibx}->search;
-        my $opt = { mset => 2, limit => 1000 };
-        my $mset = $srch->mset("$q uid:$beg..$end", $opt);
-        @$uids = @{$srch->mset_to_artnums($mset)};
-        if (@$uids) {
-                $range_info->[0] = $uids->[-1] + 1; # update $beg
-                return; # possibly more
-        }
-        0; # all done
-}
-
-sub search_xap_range { # long_response
-        my ($self, $tag, $q, $range_info, $want_msn) = @_;
-        my $uids = [];
-        if (defined(my $err = refill_xap($self, $uids, $range_info, $q))) {
-                $err ||= 'OK Search done';
-                $self->write("\r\n$tag $err\r\n");
-                return;
-        }
-        msn_convert($self, $uids) if $want_msn;
-        $self->msg_more(join(' ', '', @$uids));
-        1; # more
-}
-
 sub search_common {
         my ($self, $tag, $query, $want_msn) = @_;
         my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
-        my $q = parse_query($self, $query);
+        my $q = parse_imap_query($self, $query);
         return "$tag $q\r\n" if !ref($q);
         my ($sql, $range_info) = delete @$q{qw(sql range_info)};
         if (!scalar(keys %$q)) { # overview.sqlite3
                 $self->msg_more('* SEARCH');
-                long_response($self, \&search_uid_range,
+                $self->long_response(\&search_uid_range,
                                 $tag, $sql, $range_info, $want_msn);
         } elsif ($q = $q->{xap}) {
-                $self->{ibx}->search or
+                my $srch = $self->{ibx}->isrch or
                         return "$tag BAD search not available for mailbox\r\n";
-                $self->msg_more('* SEARCH');
-                long_response($self, \&search_xap_range,
-                                $tag, $q, $range_info, $want_msn);
+                my $opt = {
+                        relevance => -1,
+                        limit => UID_SLICE,
+                        uid_range => $range_info
+                };
+                my $mset = $srch->mset($q, $opt);
+                my $uids = $srch->mset_to_artnums($mset, $opt);
+                msn_convert($self, $uids) if scalar(@$uids) && $want_msn;
+                "* SEARCH @$uids\r\n$tag OK Search done\r\n";
         } else {
                 "$tag BAD Error\r\n";
         }
@@ -1179,15 +1129,6 @@ sub cmd_search ($$$;) {
         search_common($self, $tag, $query, 1);
 }
 
-sub args_ok ($$) { # duplicated from PublicInbox::NNTP
-        my ($cb, $argc) = @_;
-        my $tot = prototype $cb;
-        my ($nreq, undef) = split(';', $tot);
-        $nreq = ($nreq =~ tr/$//) - 1;
-        $tot = ($tot =~ tr/$//) - 1;
-        ($argc <= $tot && $argc >= $nreq);
-}
-
 # returns 1 if we can continue, 0 if not due to buffered writes or disconnect
 sub process_line ($$) {
         my ($self, $l) = @_;
@@ -1222,48 +1163,11 @@ sub process_line ($$) {
         my $err = $@;
         if ($err && $self->{sock}) {
                 $l =~ s/\r?\n//s;
-                err($self, 'error from: %s (%s)', $l, $err);
+                warn("error from: $l ($err)\n");
                 $tag //= '*';
-                $res = "$tag BAD program fault - command not performed\r\n";
+                $res = \"$tag BAD program fault - command not performed\r\n";
         }
-        return 0 unless defined $res;
-        $self->write($res);
-}
-
-sub long_step {
-        my ($self) = @_;
-        # wbuf is unset or empty, here; {long} may add to it
-        my ($fd, $cb, $t0, @args) = @{$self->{long_cb}};
-        my $more = eval { $cb->($self, @args) };
-        if ($@ || !$self->{sock}) { # something bad happened...
-                delete $self->{long_cb};
-                my $elapsed = now() - $t0;
-                if ($@) {
-                        err($self,
-                            "%s during long response[$fd] - %0.6f",
-                            $@, $elapsed);
-                }
-                out($self, " deferred[$fd] aborted - %0.6f", $elapsed);
-                $self->close;
-        } elsif ($more) { # $self->{wbuf}:
-                $self->update_idle_time;
-
-                # control passed to git_async_cat if $more == \undef
-                requeue_once($self) if !ref($more);
-        } else { # all done!
-                delete $self->{long_cb};
-                my $elapsed = now() - $t0;
-                my $fd = fileno($self->{sock});
-                out($self, " deferred[$fd] done - %0.6f", $elapsed);
-                my $wbuf = $self->{wbuf}; # do NOT autovivify
-
-                $self->requeue unless $wbuf && @$wbuf;
-        }
-}
-
-sub err ($$;@) {
-        my ($self, $fmt, @args) = @_;
-        printf { $self->{imapd}->{err} } $fmt."\n", @args;
+        defined($res) ? $self->write($res) : 0;
 }
 
 sub out ($$;@) {
@@ -1271,25 +1175,12 @@ sub out ($$;@) {
         printf { $self->{imapd}->{out} } $fmt."\n", @args;
 }
 
-sub long_response ($$;@) {
-        my ($self, $cb, @args) = @_; # cb returns true if more, false if done
-
-        my $sock = $self->{sock} or return;
-        # make sure we disable reading during a long response,
-        # clients should not be sending us stuff and making us do more
-        # work while we are stream a response to them
-        $self->{long_cb} = [ fileno($sock), $cb, now(), @args ];
-        long_step($self); # kick off!
-        undef;
-}
-
 # callback used by PublicInbox::DS for any (e)poll (in/out/hup/err)
 sub event_step {
         my ($self) = @_;
-
+        local $SIG{__WARN__} = $self->{imapd}->{warn_cb};
         return unless $self->flush_write && $self->{sock} && !$self->{long_cb};
 
-        $self->update_idle_time;
         # only read more requests if we've drained the write buffer,
         # otherwise we can be buffering infinitely w/o backpressure
 
@@ -1315,17 +1206,12 @@ sub event_step {
 
         return $self->close if $r < 0;
         $self->rbuf_idle($rbuf);
-        $self->update_idle_time;
 
         # maybe there's more pipelined data, or we'll have
         # to register it for socket-readiness notifications
         $self->requeue unless $pending;
 }
 
-sub compressed { undef }
-
-sub zflush {} # overridden by IMAPdeflate
-
 # RFC 4978
 sub cmd_compress ($$$) {
         my ($self, $tag, $alg) = @_;
@@ -1335,33 +1221,33 @@ sub cmd_compress ($$$) {
         # CRIME made TLS compression obsolete
         # return "$tag NO [COMPRESSIONACTIVE]\r\n" if $self->tls_compressed;
 
-        PublicInbox::IMAPdeflate->enable($self, $tag);
+        PublicInbox::IMAPdeflate->enable($self) or return
+                                \"$tag BAD failed to activate compression\r\n";
+        PublicInbox::DS::write($self, \"$tag OK DEFLATE active\r\n");
         $self->requeue;
         undef
 }
 
 sub cmd_starttls ($$) {
         my ($self, $tag) = @_;
-        my $sock = $self->{sock} or return;
-        if ($sock->can('stop_SSL') || $self->compressed) {
+        (($self->{sock} // return)->can('stop_SSL') || $self->compressed) and
                 return "$tag BAD TLS or compression already enabled\r\n";
-        }
-        my $opt = $self->{imapd}->{accept_tls} or
+        $self->{imapd}->{ssl_ctx_opt} or
                 return "$tag BAD can not initiate TLS negotiation\r\n";
         $self->write(\"$tag OK begin TLS negotiation now\r\n");
-        $self->{sock} = IO::Socket::SSL->start_SSL($sock, %$opt);
+        PublicInbox::TLS::start($self->{sock}, $self->{imapd});
         $self->requeue if PublicInbox::DS::accept_tls_step($self);
         undef;
 }
 
-# for graceful shutdown in PublicInbox::Daemon:
-sub busy {
-        my ($self, $now) = @_;
+sub busy { # for graceful shutdown in PublicInbox::Daemon:
+        my ($self) = @_;
         if (defined($self->{-idle_tag})) {
                 $self->write(\"* BYE server shutting down\r\n");
                 return; # not busy anymore
         }
-        ($self->{rbuf} || $self->{wbuf} || $self->not_idle_long($now));
+        defined($self->{rbuf}) || defined($self->{wbuf}) ||
+                !$self->write(\"* BYE server shutting down\r\n");
 }
 
 sub close {
@@ -1381,4 +1267,8 @@ our @ISA = qw(PublicInbox::IMAP);
 
 sub logged_in { 0 }
 
+package PublicInbox::IMAPdeflate;
+use PublicInbox::DSdeflate;
+our @ISA = qw(PublicInbox::DSdeflate PublicInbox::IMAP);
+
 1;
diff --git a/lib/PublicInbox/IMAPClient.pm b/lib/PublicInbox/IMAPClient.pm
index 33deee9e..56001517 100644
--- a/lib/PublicInbox/IMAPClient.pm
+++ b/lib/PublicInbox/IMAPClient.pm
@@ -4,17 +4,19 @@
 #
 # The license for this file differs from the rest of public-inbox.
 #
-# Workaround some bugs in upstream Mail::IMAPClient when
+# Workaround some bugs in upstream Mail::IMAPClient <= 3.42 when
 # compression is enabled:
 # - reference cycle: https://rt.cpan.org/Ticket/Display.html?id=132654
 # - read starvation: https://rt.cpan.org/Ticket/Display.html?id=132720
 package PublicInbox::IMAPClient;
 use strict;
 use parent 'Mail::IMAPClient';
-use Errno qw(EAGAIN);
+unless (eval('use Mail::IMAPClient 3.43')) {
+require Errno;
+no warnings 'once';
 
 # RFC4978 COMPRESS
-sub compress {
+*compress = sub {
     my ($self) = @_;
 
     # BUG? strict check on capability commented out for now...
@@ -101,7 +103,7 @@ sub compress {
             # I/O readiness notifications (select, poll).  Refactoring
             # callers will be needed in the unlikely case somebody wants
             # to use edge-triggered notifications (EV_CLEAR, EPOLLET).
-            $! = EAGAIN;
+            $! = Errno::EAGAIN();
             return undef;
         }
 
@@ -114,6 +116,7 @@ sub compress {
     };
 
     return $self;
-}
+};
+} # $Mail::IMAPClient::VERSION < 3.43
 
 1;
diff --git a/lib/PublicInbox/IMAPD.pm b/lib/PublicInbox/IMAPD.pm
index 3c211ee1..42dc2a9f 100644
--- a/lib/PublicInbox/IMAPD.pm
+++ b/lib/PublicInbox/IMAPD.pm
@@ -1,12 +1,11 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# represents an IMAPD (currently a singleton),
-# see script/public-inbox-imapd for how it is used
+# represents an IMAPD, see script/public-inbox-imapd for how it is used
 package PublicInbox::IMAPD;
 use strict;
+use v5.10.1;
 use PublicInbox::Config;
-use PublicInbox::ConfigIter;
 use PublicInbox::InboxIdle;
 use PublicInbox::IMAP;
 use PublicInbox::DummyInbox;
@@ -15,102 +14,84 @@ my $dummy = bless { uidvalidity => 0 }, 'PublicInbox::DummyInbox';
 sub new {
         my ($class) = @_;
         bless {
-                mailboxes => {},
+                # mailboxes => {},
                 err => \*STDERR,
                 out => \*STDOUT,
-                # accept_tls => { SSL_server => 1, ..., SSL_reuse_ctx => ... }
-                # pi_config => PublicInbox::Config
+                # ssl_ctx_opt => { SSL_cert_file => ..., SSL_key_file => ... }
+                # pi_cfg => PublicInbox::Config
                 # idler => PublicInbox::InboxIdle
         }, $class;
 }
 
-sub imapd_refresh_ibx { # pi_config->each_inbox cb
-        my ($ibx, $imapd) = @_;
-        my $ngname = $ibx->{newsgroup} or return;
-        if (ref $ngname) {
-                warn 'multiple newsgroups not supported: '.
-                        join(', ', @$ngname). "\n";
-                return;
-        } elsif ($ngname =~ m![^a-z0-9/_\.\-\~\@\+\=:]! ||
-                 $ngname =~ /\.[0-9]+\z/) {
-                warn "mailbox name invalid: newsgroup=`$ngname'\n";
-                return;
-        }
-        $ibx->over or return;
-        $ibx->{over} = undef;
-        my $mm = $ibx->mm or return;
-        $ibx->{mm} = undef;
-
-        # RFC 3501 2.3.1.1 -  "A good UIDVALIDITY value to use in
-        # this case is a 32-bit representation of the creation
-        # date/time of the mailbox"
-        defined($ibx->{uidvalidity} = $mm->created_at) or return;
-        PublicInbox::IMAP::ensure_slices_exist($imapd, $ibx, $mm->max // 0);
-
-        # preload to avoid fragmentation:
-        $ibx->description;
-        $ibx->base_url;
-
-        # ensure dummies are selectable
-        my $dummies = $imapd->{dummies};
-        do {
-                $dummies->{$ngname} = $dummy;
-        } while ($ngname =~ s/\.[^\.]+\z//);
-}
+sub _refresh_ibx { # pi_cfg->each_inbox cb
+        my ($ibx, $imapd, $cache, $dummies) = @_;
+        my $ngname = $ibx->{newsgroup} // return;
 
-sub imapd_refresh_finalize {
-        my ($imapd, $pi_config) = @_;
-        my $mailboxes;
-        if (my $next = delete $imapd->{imapd_next}) {
-                $imapd->{mailboxes} = delete $next->{mailboxes};
-                $mailboxes = delete $next->{dummies};
-        } else {
-                $mailboxes = delete $imapd->{dummies};
-        }
-        %$mailboxes = (%$mailboxes, %{$imapd->{mailboxes}});
-        $imapd->{mailboxes} = $mailboxes;
-        $imapd->{inboxlist} = [
-                map {
-                        my $no = $mailboxes->{$_} == $dummy ? '' : 'No';
-                        my $u = $_; # capitalize "INBOX" for user-familiarity
-                        $u =~ s/\Ainbox(\.|\z)/INBOX$1/i;
-                        qq[* LIST (\\Has${no}Children) "." $u\r\n]
-                } keys %$mailboxes
-        ];
-        $imapd->{pi_config} = $pi_config;
-        if (my $idler = $imapd->{idler}) {
-                $idler->refresh($pi_config);
+        if ($ngname =~ /\.[0-9]+\z/) { # don't confuse with 50K slices
+                warn "E: mailbox name invalid: newsgroup=`$ngname' (ignored)\n";
+                return;
         }
-}
-
-sub imapd_refresh_step { # pi_config->iterate_start cb
-        my ($pi_config, $section, $imapd) = @_;
-        if (defined($section)) {
-                return if $section !~ m!\Apublicinbox\.([^/]+)\z!;
-                my $ibx = $pi_config->lookup_name($1) or return;
-                imapd_refresh_ibx($ibx, $imapd->{imapd_next});
-        } else { # undef == "EOF"
-                imapd_refresh_finalize($imapd, $pi_config);
+        my $ce = $cache->{$ngname};
+        %$ibx = (%$ibx, %$ce) if $ce;
+        # only valid if msgmap and over works:
+        if (defined($ibx->uidvalidity)) {
+                # fill ->{mailboxes}:
+                PublicInbox::IMAP::ensure_slices_exist($imapd, $ibx);
+                # preload to avoid fragmentation:
+                $ibx->description;
+                # ensure dummies are selectable:
+                do {
+                        $dummies->{$ngname} = $dummy;
+                } while ($ngname =~ s/\.[^\.]+\z//);
         }
+        delete @$ibx{qw(mm over)};
 }
 
 sub refresh_groups {
         my ($self, $sig) = @_;
-        my $pi_config = PublicInbox::Config->new;
-        if ($sig) { # SIGHUP is handled through the event loop
-                $self->{imapd_next} = { dummies => {}, mailboxes => {} };
-                my $iter = PublicInbox::ConfigIter->new($pi_config,
-                                                \&imapd_refresh_step, $self);
-                $iter->event_step;
-        } else { # initial start is synchronous
-                $self->{dummies} = {};
-                $pi_config->each_inbox(\&imapd_refresh_ibx, $self);
-                imapd_refresh_finalize($self, $pi_config);
+        my $pi_cfg = PublicInbox::Config->new;
+        require PublicInbox::IMAPsearchqp;
+        $self->{mailboxes} = $pi_cfg->{-imap_mailboxes} // do {
+                my $mailboxes = $self->{mailboxes} = {};
+                my $cache = eval { $pi_cfg->ALL->misc->nntpd_cache_load } // {};
+                my $dummies = {};
+                $pi_cfg->each_inbox(\&_refresh_ibx, $self, $cache, $dummies);
+                %$mailboxes = (%$dummies, %$mailboxes);
+                @{$pi_cfg->{-imap_mailboxlist}} = map { $_->[2] }
+                        sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] }
+                        map {
+                                # capitalize "INBOX" for user-familiarity
+                                my $u = $_;
+                                $u =~ s/\Ainbox(\.|\z)/INBOX$1/i;
+                                if ($mailboxes->{$_} == $dummy) {
+                                        [ $u, -1,
+                                          qq[* LIST (\\HasChildren) "." $u\r\n]]
+                                } else {
+                                        $u =~ /\A(.+)\.([0-9]+)\z/ or die
+"BUG: `$u' has no slice digit(s)";
+                                        [ $1, $2 + 0, '* LIST '.
+                                          qq[(\\HasNoChildren) "." $u\r\n] ]
+                                }
+                        } keys %$mailboxes;
+                $pi_cfg->{-imap_mailboxes} = $mailboxes;
+        };
+        $self->{mailboxlist} = $pi_cfg->{-imap_mailboxlist} //
+                        die 'BUG: no mailboxlist';
+        $self->{pi_cfg} = $pi_cfg;
+        if (my $idler = $self->{idler}) {
+                $idler->refresh($pi_cfg);
         }
 }
 
 sub idler_start {
-        $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_config});
+        $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_cfg});
+}
+
+sub event_step { # called vai requeue for low-priority IMAP clients
+        my ($self) = @_;
+        my $imap = shift(@{$self->{-authed_q}}) // return;
+        PublicInbox::DS::requeue($self) if scalar(@{$self->{-authed_q}});
+        $imap->event_step; # PublicInbox::IMAP::event_step
 }
 
 1;
diff --git a/lib/PublicInbox/IMAPTracker.pm b/lib/PublicInbox/IMAPTracker.pm
index be9caf76..4efa8a7e 100644
--- a/lib/PublicInbox/IMAPTracker.pm
+++ b/lib/PublicInbox/IMAPTracker.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::IMAPTracker;
 use strict;
@@ -65,7 +65,7 @@ sub new {
         my ($class, $url) = @_;
 
         # original name for compatibility with old setups:
-        my $dbname = PublicInbox::Config->config_dir() . "/imap.sqlite3";
+        my $dbname = PublicInbox::Config->config_dir() . '/imap.sqlite3';
 
         # use the new XDG-compliant name for new setups:
         if (!-f $dbname) {
@@ -75,8 +75,11 @@ sub new {
         }
         if (!-f $dbname) {
                 require File::Path;
-                require File::Basename;
-                File::Path::mkpath(File::Basename::dirname($dbname));
+                require PublicInbox::Syscall;
+                my ($dir) = ($dbname =~ m!(.*?/)[^/]+\z!);
+                File::Path::mkpath($dir);
+                PublicInbox::Syscall::nodatacow_dir($dir);
+                open my $fh, '+>>', $dbname or die "failed to open $dbname: $!";
         }
         my $self = bless { lock_path => "$dbname.lock", url => $url }, $class;
         $self->lock_acquire;
diff --git a/lib/PublicInbox/IMAPdeflate.pm b/lib/PublicInbox/IMAPdeflate.pm
deleted file mode 100644
index b98a069d..00000000
--- a/lib/PublicInbox/IMAPdeflate.pm
+++ /dev/null
@@ -1,126 +0,0 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# TODO: reduce duplication from PublicInbox::NNTPdeflate
-
-# RFC 4978
-package PublicInbox::IMAPdeflate;
-use strict;
-use warnings;
-use 5.010_001;
-use base qw(PublicInbox::IMAP);
-use Compress::Raw::Zlib;
-
-my %IN_OPT = (
-        -Bufsize => 1024,
-        -WindowBits => -15, # RFC 1951
-        -AppendOutput => 1,
-);
-
-# global deflate context and buffer
-my $zbuf = \(my $buf = '');
-my $zout;
-{
-        my $err;
-        ($zout, $err) = Compress::Raw::Zlib::Deflate->new(
-                # nnrpd (INN) and Compress::Raw::Zlib favor MemLevel=9,
-                # the zlib C library and git use MemLevel=8 as the default
-                # -MemLevel => 9,
-                -Bufsize => 65536, # same as nnrpd
-                -WindowBits => -15, # RFC 1951
-                -AppendOutput => 1,
-        );
-        $err == Z_OK or die "Failed to initialize zlib deflate stream: $err";
-}
-
-sub enable {
-        my ($class, $self, $tag) = @_;
-        my ($in, $err) = Compress::Raw::Zlib::Inflate->new(%IN_OPT);
-        if ($err != Z_OK) {
-                $self->err("Inflate->new failed: $err");
-                $self->write(\"$tag BAD failed to activate compression\r\n");
-                return;
-        }
-        $self->write(\"$tag OK DEFLATE active\r\n");
-        bless $self, $class;
-        $self->{zin} = $in;
-}
-
-# overrides PublicInbox::NNTP::compressed
-sub compressed { 1 }
-
-sub do_read ($$$$) {
-        my ($self, $rbuf, $len, $off) = @_;
-
-        my $zin = $self->{zin} or return; # closed
-        my $doff;
-        my $dbuf = delete($self->{dbuf}) // '';
-        $doff = length($dbuf);
-        my $r = PublicInbox::DS::do_read($self, \$dbuf, $len, $doff) or return;
-
-        # Workaround inflate bug appending to OOK scalars:
-        # <https://rt.cpan.org/Ticket/Display.html?id=132734>
-        # We only have $off if the client is pipelining, and pipelining
-        # is where our substr() OOK optimization in event_step makes sense.
-        if ($off) {
-                my $copy = $$rbuf;
-                undef $$rbuf;
-                $$rbuf = $copy;
-        }
-
-        # assert(length($$rbuf) == $off) as far as NNTP.pm is concerned
-        # -ConsumeInput is true, so $dbuf is automatically emptied
-        my $err = $zin->inflate($dbuf, $rbuf);
-        if ($err == Z_OK) {
-                $self->{dbuf} = $dbuf if $dbuf ne '';
-                $r = length($$rbuf) and return $r;
-                # nothing ready, yet, get more, later
-                $self->requeue;
-        } else {
-                delete $self->{zin};
-                $self->close;
-        }
-        0;
-}
-
-# override PublicInbox::DS::msg_more
-sub msg_more ($$) {
-        my $self = $_[0];
-
-        # $_[1] may be a reference or not for ->deflate
-        my $err = $zout->deflate($_[1], $zbuf);
-        $err == Z_OK or die "->deflate failed $err";
-        1;
-}
-
-sub zflush ($) {
-        my ($self) = @_;
-
-        my $deflated = $zbuf;
-        $zbuf = \(my $next = '');
-
-        my $err = $zout->flush($deflated, Z_FULL_FLUSH);
-        $err == Z_OK or die "->flush failed $err";
-
-        # We can still let the lower socket layer do buffering:
-        PublicInbox::DS::msg_more($self, $$deflated);
-}
-
-# compatible with PublicInbox::DS::write, so $_[1] may be a reference or not
-sub write ($$) {
-        my $self = $_[0];
-        return PublicInbox::DS::write($self, $_[1]) if ref($_[1]) eq 'CODE';
-
-        my $deflated = $zbuf;
-        $zbuf = \(my $next = '');
-
-        # $_[1] may be a reference or not for ->deflate
-        my $err = $zout->deflate($_[1], $deflated);
-        $err == Z_OK or die "->deflate failed $err";
-        $err = $zout->flush($deflated, Z_FULL_FLUSH);
-        $err == Z_OK or die "->flush failed $err";
-
-        # We can still let the socket layer do buffering:
-        PublicInbox::DS::write($self, $deflated);
-}
-
-1;
diff --git a/lib/PublicInbox/IMAPsearchqp.pm b/lib/PublicInbox/IMAPsearchqp.pm
index 190fefb9..0c37220c 100644
--- a/lib/PublicInbox/IMAPsearchqp.pm
+++ b/lib/PublicInbox/IMAPsearchqp.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 # IMAP search query parser.  cf RFC 3501
 
@@ -124,7 +124,7 @@ sub ON {
         my ($self, $item) = @_;
         my $ts = yyyymmdd($item);
         my $end = $ts + 86399; # no leap day
-        push @{$self->{xap}}, "ts:$ts..$end";
+        push @{$self->{xap}}, "rt:$ts..$end";
         my $sql = $self->{sql} or return 1;
         $$sql .= " AND ts >= $ts AND ts <= $end";
 }
@@ -132,7 +132,7 @@ sub ON {
 sub BEFORE {
         my ($self, $item) = @_;
         my $ts = yyyymmdd($item);
-        push @{$self->{xap}}, "ts:..$ts";
+        push @{$self->{xap}}, "rt:..$ts";
         my $sql = $self->{sql} or return 1;
         $$sql .= " AND ts <= $ts";
 }
@@ -140,7 +140,7 @@ sub BEFORE {
 sub SINCE {
         my ($self, $item) = @_;
         my $ts = yyyymmdd($item);
-        push @{$self->{xap}}, "ts:$ts..";
+        push @{$self->{xap}}, "rt:$ts..";
         my $sql = $self->{sql} or return 1;
         $$sql .= " AND ts >= $ts";
 }
@@ -165,7 +165,7 @@ sub msn_set {
 # things that should not match
 sub impossible {
         my ($self) = @_;
-        push @{$self->{xap}}, 'bytes:..0';
+        push @{$self->{xap}}, 'z:..0';
         my $sql = $self->{sql} or return 1;
         $$sql .= ' AND num < 0';
 }
@@ -217,8 +217,8 @@ BEFORE_date : 'BEFORE' date { $q->BEFORE(\%item) }
 
 MSN_set : sequence_set { $q->msn_set($item{sequence_set}) }
 UID_set : "UID" sequence_set { $q->uid_set($item{sequence_set}) }
-LARGER_number : "LARGER" number { $q->xap_only("bytes:$item{number}..") }
-SMALLER_number : "SMALLER" number { $q->xap_only("bytes:..$item{number}") }
+LARGER_number : "LARGER" number { $q->xap_only("z:$item{number}..") }
+SMALLER_number : "SMALLER" number { $q->xap_only("z:..$item{number}") }
 
 DELETED : "DELETED" { $q->impossible }
 OLD : "OLD" { $q->impossible }
@@ -279,6 +279,8 @@ sub parse {
         my $sql = '';
         %$q = (sql => \$sql, imap => $imap); # imap = PublicInbox::IMAP obj
         # $::RD_TRACE = 1;
+        local $::RD_ERRORS = undef;
+        local $::RD_WARN = undef;
         my $res = eval { $prd->search_key(uc($query)) };
         return $@ if $@ && $@ =~ /\A(?:BAD|NO) /;
         return 'BAD unexpected result' if !$res || $res != $q;
diff --git a/lib/PublicInbox/IO.pm b/lib/PublicInbox/IO.pm
new file mode 100644
index 00000000..8640f112
--- /dev/null
+++ b/lib/PublicInbox/IO.pm
@@ -0,0 +1,152 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# supports reaping of children tied to a pipe or socket
+package PublicInbox::IO;
+use v5.12;
+use parent qw(IO::Handle Exporter);
+use PublicInbox::DS qw(awaitpid);
+our @EXPORT_OK = qw(poll_in read_all try_cat write_file);
+use Carp qw(croak);
+use IO::Poll qw(POLLIN);
+use Errno qw(EINTR EAGAIN);
+use PublicInbox::OnDestroy;
+# don't autodie in top-level for Perl 5.16.3 (and maybe newer versions)
+# we have our own ->close, so we scope autodie into each sub
+
+sub waitcb { # awaitpid callback
+        my ($pid, $errref, $cb, @args) = @_;
+        $$errref = $?; # sets .cerr for _close
+        $cb->($pid, @args) if $cb; # may clobber $?
+}
+
+sub attach_pid {
+        my ($io, $pid, @cb_arg) = @_;
+        bless $io, __PACKAGE__;
+        # we share $err (and not $self) with awaitpid to avoid a ref cycle
+        my $e = \(my $err);
+        ${*$io}{pi_io_reap} = [ $PublicInbox::OnDestroy::fork_gen, $pid, $e ];
+        awaitpid($pid, \&waitcb, $e, @cb_arg);
+        $io;
+}
+
+sub attached_pid {
+        my ($io) = @_;
+        ${${*$io}{pi_io_reap} // []}[1];
+}
+
+sub can_reap {
+        my ($io) = @_;
+        ${${*$io}{pi_io_reap} // [-1]}[0] == $PublicInbox::OnDestroy::fork_gen;
+}
+
+# caller cares about error result if they call close explicitly
+# reap->[2] may be set before this is called via waitcb
+sub close {
+        my ($io) = @_;
+        my $ret = $io->SUPER::close;
+        my $reap = delete ${*$io}{pi_io_reap};
+        return $ret if ($reap->[0] // -1) != $PublicInbox::OnDestroy::fork_gen;
+        if (defined ${$reap->[2]}) { # reap_pids already reaped asynchronously
+                $? = ${$reap->[2]};
+        } else { # wait synchronously
+                my $w = awaitpid($reap->[1]);
+        }
+        $? ? '' : $ret;
+}
+
+sub DESTROY {
+        my ($io) = @_;
+        my $reap = delete ${*$io}{pi_io_reap};
+        if (($reap->[0] // -1) == $PublicInbox::OnDestroy::fork_gen) {
+                $io->SUPER::close;
+                ${$reap->[2]} // awaitpid($reap->[1]);
+        }
+        $io->SUPER::DESTROY;
+}
+
+sub write_file ($$@) { # mode, filename, LIST (for print)
+        use autodie qw(open close);
+        open(my $fh, shift, shift);
+        print $fh @_;
+        defined(wantarray) && !wantarray ? $fh : close $fh;
+}
+
+sub poll_in ($;$) {
+        IO::Poll::_poll($_[1] // -1, fileno($_[0]), my $ev = POLLIN);
+}
+
+sub read_all ($;$$$) { # pass $len=0 to read until EOF for :utf8 handles
+        use autodie qw(read);
+        my ($io, $len, $bref, $off) = @_;
+        $bref //= \(my $buf);
+        $off //= 0;
+        my $r = 0;
+        if (my $left = $len //= -s $io) { # known size (binmode :raw/:unix)
+                do { # retry for binmode :unix
+                        $r = read($io, $$bref, $left, $off += $r) or croak(
+                                "read($io) premature EOF ($left/$len remain)");
+                } while ($left -= $r);
+        } else { # read until EOF
+                while (($r = read($io, $$bref, 65536, $off += $r))) {}
+        }
+        wantarray ? split(/^/sm, $$bref) : $$bref
+}
+
+sub try_cat ($) {
+        my ($path) = @_;
+        open(my $fh, '<', $path) or return '';
+        read_all $fh;
+}
+
+# TODO: move existing HTTP/IMAP/NNTP/POP3 uses of rbuf here
+sub my_bufread {
+        my ($io, $len) = @_;
+        my $rbuf = ${*$io}{pi_io_rbuf} //= \(my $new = '');
+        my $left = $len - length($$rbuf);
+        my $r;
+        while ($left > 0) {
+                $r = sysread($io, $$rbuf, $left, length($$rbuf));
+                if ($r) {
+                        $left -= $r;
+                } elsif (defined($r)) { # EOF
+                        return 0;
+                } else {
+                        next if ($! == EAGAIN and poll_in($io));
+                        next if $! == EINTR; # may be set by sysread or poll_in
+                        return; # unrecoverable error
+                }
+        }
+        my $no_pad = substr($$rbuf, 0, $len, '');
+        delete(${*$io}{pi_io_rbuf}) if $$rbuf eq '';
+        \$no_pad;
+}
+
+# always uses "\n"
+sub my_readline {
+        my ($io) = @_;
+        my $rbuf = ${*$io}{pi_io_rbuf} //= \(my $new = '');
+        while (1) {
+                if ((my $n = index($$rbuf, "\n")) >= 0) {
+                        my $ret = substr($$rbuf, 0, $n + 1, '');
+                        delete(${*$io}{pi_io_rbuf}) if $$rbuf eq '';
+                        return $ret;
+                }
+                my $r = sysread($io, $$rbuf, 65536, length($$rbuf));
+                if (!defined($r)) {
+                        next if ($! == EAGAIN and poll_in($io));
+                        next if $! == EINTR; # may be set by sysread or poll_in
+                        return; # unrecoverable error
+                } elsif ($r == 0) { # return whatever's left on EOF
+                        delete(${*$io}{pi_io_rbuf});
+                        return $$rbuf;
+                } # else { continue
+        }
+}
+
+sub has_rbuf {
+        my ($io) = @_;
+        defined(${*$io}{pi_io_rbuf});
+}
+
+1;
diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm
new file mode 100644
index 00000000..ed6d27fd
--- /dev/null
+++ b/lib/PublicInbox/IPC.pm
@@ -0,0 +1,452 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# base class for remote IPC calls and workqueues, requires Storable or Sereal
+# - ipc_do and ipc_worker_* is for a single worker/producer and uses pipes
+# - wq_io_do and wq_worker* is for a single producer and multiple workers,
+#   using SOCK_SEQPACKET for work distribution
+# use ipc_do when you need work done on a certain process
+# use wq_io_do when your work can be done on any idle worker
+package PublicInbox::IPC;
+use v5.12;
+use parent qw(Exporter);
+use autodie qw(close pipe read socketpair sysread);
+use Carp qw(croak);
+use PublicInbox::DS qw(awaitpid);
+use PublicInbox::Spawn;
+use PublicInbox::OnDestroy;
+use PublicInbox::WQWorker;
+use Socket qw(AF_UNIX SOCK_STREAM SOCK_SEQPACKET);
+my $MY_MAX_ARG_STRLEN = 4096 * 33; # extra 4K for serialization
+our @EXPORT_OK = qw(ipc_freeze ipc_thaw nproc_shards);
+my ($enc, $dec);
+# ->imports at BEGIN turns sereal_*_with_object into custom ops on 5.14+
+# and eliminate method call overhead
+BEGIN {
+        eval {
+                require Sereal::Encoder;
+                require Sereal::Decoder;
+                Sereal::Encoder->import('sereal_encode_with_object');
+                Sereal::Decoder->import('sereal_decode_with_object');
+                ($enc, $dec) = (Sereal::Encoder->new, Sereal::Decoder->new);
+        };
+};
+
+if ($enc && $dec) { # should be custom ops
+        *ipc_freeze = sub ($) { sereal_encode_with_object $enc, $_[0] };
+        *ipc_thaw = sub ($) { sereal_decode_with_object $dec, $_[0], my $ret };
+} else {
+        require Storable;
+        *ipc_freeze = \&Storable::freeze;
+        *ipc_thaw = \&Storable::thaw;
+}
+
+our $recv_cmd = PublicInbox::Spawn->can('recv_cmd4');
+our $send_cmd = PublicInbox::Spawn->can('send_cmd4') // do {
+        require PublicInbox::CmdIPC4;
+        $recv_cmd //= PublicInbox::CmdIPC4->can('recv_cmd4');
+        PublicInbox::CmdIPC4->can('send_cmd4');
+} // do {
+        require PublicInbox::Syscall;
+        $recv_cmd //= PublicInbox::Syscall->can('recv_cmd4');
+        PublicInbox::Syscall->can('send_cmd4');
+};
+
+sub _get_rec ($) {
+        my ($r) = @_;
+        my $len = <$r> // return;
+        chop($len) eq "\n" or croak "no LF byte in $len";
+        my $n = read($r, my $buf, $len);
+        $n == $len or croak "short read: $n != $len";
+        ipc_thaw($buf);
+}
+
+sub _send_rec ($$) {
+        my ($w, $ref) = @_;
+        my $buf = ipc_freeze($ref);
+        print $w length($buf), "\n", $buf or croak "print: $!";
+}
+
+sub ipc_return ($$$) {
+        my ($w, $ret, $exc) = @_;
+        _send_rec($w, $exc ? bless(\$exc, 'PublicInbox::IPC::Die') : $ret);
+}
+
+sub ipc_worker_loop ($$$) {
+        my ($self, $r_req, $w_res) = @_;
+        my ($rec, $wantarray, $sub, @args);
+        local $/ = "\n";
+        while ($rec = _get_rec($r_req)) {
+                ($wantarray, $sub, @args) = @$rec;
+                # no waiting if client doesn't care,
+                # this is the overwhelmingly likely case
+                if (!defined($wantarray)) {
+                        eval { $self->$sub(@args) };
+                        warn "$$ die: $@ (from nowait $sub)\n" if $@;
+                } elsif ($wantarray) {
+                        my @ret = eval { $self->$sub(@args) };
+                        ipc_return($w_res, \@ret, $@);
+                } else { # '' => wantscalar
+                        my $ret = eval { $self->$sub(@args) };
+                        ipc_return($w_res, \$ret, $@);
+                }
+        }
+}
+
+sub exit_exception { exit(!!$@) }
+
+# starts a worker if Sereal or Storable is installed
+sub ipc_worker_spawn {
+        my ($self, $ident, $oldset, $fields, @cb_args) = @_;
+        return if ($self->{-ipc_ppid} // -1) == $$; # idempotent
+        delete(@$self{qw(-ipc_req -ipc_res -ipc_ppid -ipc_pid)});
+        pipe(my $r_req, my $w_req);
+        pipe(my $r_res, my $w_res);
+        my $sigset = $oldset // PublicInbox::DS::block_signals();
+        $self->ipc_atfork_prepare;
+        my $pid = PublicInbox::DS::fork_persist;
+        if ($pid == 0) {
+                delete @$self{qw(-wq_s1 -wq_s2 -wq_workers -wq_ppid)};
+                $w_req = $r_res = undef;
+                $w_res->autoflush(1);
+                $SIG{$_} = 'IGNORE' for (qw(TERM INT QUIT));
+                local $0 = $ident;
+                # ensure we properly exit even if warn() dies:
+                my $end = on_destroy \&exit_exception;
+                eval {
+                        $fields //= {};
+                        local @$self{keys %$fields} = values(%$fields);
+                        my $on_destroy = $self->ipc_atfork_child;
+                        local @SIG{keys %SIG} = values %SIG;
+                        PublicInbox::DS::sig_setmask($sigset);
+                        ipc_worker_loop($self, $r_req, $w_res);
+                };
+                warn "worker $ident PID:$$ died: $@\n" if $@;
+                undef $end; # trigger exit
+        }
+        PublicInbox::DS::sig_setmask($sigset) unless $oldset;
+        $r_req = $w_res = undef;
+        $w_req->autoflush(1);
+        $self->{-ipc_req} = $w_req;
+        $self->{-ipc_res} = $r_res;
+        $self->{-ipc_ppid} = $$;
+        awaitpid($pid, \&ipc_worker_reap, $self, @cb_args);
+        $self->{-ipc_pid} = $pid;
+}
+
+sub ipc_worker_reap { # awaitpid callback
+        my ($pid, $self, $cb, @args) = @_;
+        delete $self->{-wq_workers}->{$pid};
+        return $cb->($pid, $self, @args) if $cb;
+        return if !$?;
+        my $s = $? & 127;
+        # TERM(15) is our default exit signal, PIPE(13) is likely w/ pager
+        warn "$self->{-wq_ident} PID:$pid died \$?=$?\n" if $s != 15 && $s != 13
+}
+
+# for base class, override in sub classes
+sub ipc_atfork_prepare {}
+
+sub wq_atexit_child {}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        my $io = delete($self->{-ipc_atfork_child_close}) or return;
+        close($_) for @$io;
+        undef;
+}
+
+# idempotent, can be called regardless of whether worker is active or not
+sub ipc_worker_stop {
+        my ($self) = @_;
+        my ($pid, $ppid) = delete(@$self{qw(-ipc_pid -ipc_ppid)});
+        my ($w_req, $r_res) = delete(@$self{qw(-ipc_req -ipc_res)});
+        if (!$w_req && !$r_res) {
+                die "unexpected PID:$pid without IPC pipes" if $pid;
+                return; # idempotent
+        }
+        die 'no PID with IPC pipes' unless $pid;
+        $w_req = $r_res = undef;
+        awaitpid($pid) if $$ == $ppid; # for non-event loop
+}
+
+sub _wait_return ($$) {
+        my ($r_res, $sub) = @_;
+        my $ret = _get_rec($r_res) // die "no response on $sub";
+        die $$ret if ref($ret) eq 'PublicInbox::IPC::Die';
+        wantarray ? @$ret : $$ret;
+}
+
+# call $self->$sub(@args), on a worker if ipc_worker_spawn was used
+sub ipc_do {
+        my ($self, $sub, @args) = @_;
+        if (my $w_req = $self->{-ipc_req}) { # run in worker
+                if (defined(wantarray)) {
+                        my $r_res = $self->{-ipc_res} or die 'no ipc_res';
+                        _send_rec($w_req, [ wantarray, $sub, @args ]);
+                        _wait_return($r_res, $sub);
+                } else { # likely, fire-and-forget into pipe
+                        _send_rec($w_req, [ undef , $sub, @args ]);
+                }
+        } else { # run locally
+                $self->$sub(@args);
+        }
+}
+
+# needed when there's multiple IPC workers and the parent forking
+# causes newer siblings to inherit older siblings sockets
+sub ipc_sibling_atfork_child {
+        my ($self) = @_;
+        my ($pid, undef) = delete(@$self{qw(-ipc_pid -ipc_ppid)});
+        delete(@$self{qw(-ipc_req -ipc_res)});
+        $pid == $$ and die "BUG: $$ ipc_atfork_child called on itself";
+}
+
+sub recv_and_run {
+        my ($self, $s2, $len, $full_stream) = @_;
+        my @fds = $recv_cmd->($s2, my $buf, $len // $MY_MAX_ARG_STRLEN);
+        return if scalar(@fds) && !defined($fds[0]);
+        my $n = length($buf) or return 0;
+        my $nfd = 0;
+        for my $fd (@fds) {
+                open(my $cmdfh, '+<&=', $fd);
+                $self->{$nfd++} = $cmdfh;
+                $cmdfh->autoflush(1);
+        }
+        while ($full_stream && $n < $len) {
+                my $r = sysread($s2, $buf, $len - $n, $n);
+                croak "read EOF after $n/$len bytes" if $r == 0;
+                $n = length($buf);
+        }
+        # Sereal dies on truncated data, Storable returns undef
+        my $args = ipc_thaw($buf) // die "thaw error on buffer of size: $n";
+        undef $buf;
+        my $sub = shift @$args;
+        eval { $self->$sub(@$args) };
+        warn "$$ $0 wq_worker: $sub: $@" if $@;
+        delete @$self{0..($nfd-1)};
+        $n;
+}
+
+sub sock_defined { # PublicInbox::DS::post_loop_do CB
+        my ($wqw) = @_;
+        defined($wqw->{sock});
+}
+
+sub wq_worker_loop ($$$) {
+        my ($self, $bcast2, $oldset) = @_;
+        my $wqw = PublicInbox::WQWorker->new($self, $self->{-wq_s2});
+        PublicInbox::WQWorker->new($self, $bcast2) if $bcast2;
+        local @PublicInbox::DS::post_loop_do = (\&sock_defined, $wqw);
+        my $sig = delete($self->{wq_sig});
+        $sig->{CHLD} //= \&PublicInbox::DS::enqueue_reap;
+        PublicInbox::DS::event_loop($sig, $oldset);
+        PublicInbox::DS->Reset;
+}
+
+sub do_sock_stream { # via wq_io_do, for big requests
+        my ($self, $len) = @_;
+        recv_and_run($self, my $s2 = delete $self->{0}, $len, 1);
+}
+
+sub wq_broadcast {
+        my ($self, $sub, @args) = @_;
+        my $wkr = $self->{-wq_workers} or Carp::confess('no -wq_workers');
+        my $buf = ipc_freeze([$sub, @args]);
+        for my $bcast1 (values %$wkr) {
+                my $sock = $bcast1 // $self->{-wq_s1} // next;
+                send($sock, $buf, 0) // croak "send: $!";
+                # XXX shouldn't have to deal with EMSGSIZE here...
+        }
+}
+
+sub stream_in_full ($$$) {
+        my ($s1, $fds, $buf) = @_;
+        socketpair(my $r, my $w, AF_UNIX, SOCK_STREAM, 0);
+        my $n = $send_cmd->($s1, [ fileno($r) ],
+                        ipc_freeze(['do_sock_stream', length($buf)]),
+                        0) // croak "sendmsg: $!";
+        undef $r;
+        $n = $send_cmd->($w, $fds, $buf, 0) // croak "sendmsg: $!";
+        print $w substr($buf, $n) if $n < length($buf); # need > 2G on Linux
+        close $w; # autodies
+}
+
+sub wq_io_do { # always async
+        my ($self, $sub, $ios, @args) = @_;
+        my $s1 = $self->{-wq_s1} or Carp::confess('no -wq_s1');
+        my $fds = [ map { fileno($_) } @$ios ];
+        my $buf = ipc_freeze([$sub, @args]);
+        if (length($buf) > $MY_MAX_ARG_STRLEN) {
+                stream_in_full($s1, $fds, $buf);
+        } else {
+                my $n = $send_cmd->($s1, $fds, $buf, 0);
+                return if defined($n); # likely
+                $!{ETOOMANYREFS} and croak "sendmsg: $! (check RLIMIT_NOFILE)";
+                $!{EMSGSIZE} ? stream_in_full($s1, $fds, $buf) :
+                        croak("sendmsg: $!");
+        }
+}
+
+sub wq_sync_run {
+        my ($self, $wantarray, $sub, @args) = @_;
+        if ($wantarray) {
+                my @ret = eval { $self->$sub(@args) };
+                ipc_return($self->{0}, \@ret, $@);
+        } else { # '' => wantscalar
+                my $ret = eval { $self->$sub(@args) };
+                ipc_return($self->{0}, \$ret, $@);
+        }
+}
+
+sub wq_do {
+        my ($self, $sub, @args) = @_;
+        if (defined(wantarray)) {
+                pipe(my $r, my $w);
+                wq_io_do($self, 'wq_sync_run', [ $w ], wantarray, $sub, @args);
+                undef $w;
+                _wait_return($r, $sub);
+        } else {
+                wq_io_do($self, $sub, [], @args);
+        }
+}
+
+sub prepare_nonblock {
+        ($_[0]->{-wq_s1} // die 'BUG: no {-wq_s1}')->blocking(0);
+        require PublicInbox::WQBlocked;
+}
+
+sub wq_nonblock_do { # always async
+        my ($self, $sub, @args) = @_;
+        my $buf = ipc_freeze([$sub, @args]);
+        if ($self->{wqb}) { # saturated once, assume saturated forever
+                $self->{wqb}->flush_send($buf);
+        } else {
+                $send_cmd->($self->{-wq_s1}, [], $buf, 0) //
+                        ($!{EAGAIN} ? PublicInbox::WQBlocked->new($self, $buf)
+                                        : croak("sendmsg: $!"));
+        }
+}
+
+sub _wq_worker_start {
+        my ($self, $oldset, $fields, $one, @cb_args) = @_;
+        my ($bcast1, $bcast2);
+        $one or socketpair($bcast1, $bcast2, AF_UNIX, SOCK_SEQPACKET, 0);
+        my $pid = PublicInbox::DS::fork_persist;
+        if ($pid == 0) {
+                undef $bcast1;
+                delete @$self{qw(-wq_s1 -wq_ppid)};
+                $self->{-wq_worker_nr} =
+                                keys %{delete($self->{-wq_workers}) // {}};
+                $SIG{$_} = 'DEFAULT' for (qw(TTOU TTIN TERM QUIT INT CHLD));
+                local $0 = $one ? $self->{-wq_ident} :
+                        "$self->{-wq_ident} $self->{-wq_worker_nr}";
+                # ensure we properly exit even if warn() dies:
+                my $end = on_destroy \&exit_exception;
+                eval {
+                        $fields //= {};
+                        local @$self{keys %$fields} = values(%$fields);
+                        my $on_destroy = $self->ipc_atfork_child;
+                        local @SIG{keys %SIG} = values %SIG;
+                        wq_worker_loop($self, $bcast2, $oldset);
+                };
+                warn "worker $self->{-wq_ident} PID:$$ died: $@" if $@;
+                undef $end; # trigger exit
+        } else {
+                $self->{-wq_workers}->{$pid} = $bcast1;
+                awaitpid($pid, \&ipc_worker_reap, $self, @cb_args);
+        }
+}
+
+# starts workqueue workers if Sereal or Storable is installed
+sub wq_workers_start {
+        my ($self, $ident, $nr_workers, $oldset, $fields, @cb_args) = @_;
+        ($send_cmd && $recv_cmd) or return;
+        return if $self->{-wq_s1}; # idempotent
+        socketpair($self->{-wq_s1}, $self->{-wq_s2},AF_UNIX, SOCK_SEQPACKET, 0);
+        $self->ipc_atfork_prepare;
+        $nr_workers //= $self->{-wq_nr_workers}; # was set earlier
+        my $sigset = $oldset // PublicInbox::DS::block_signals();
+        $self->{-wq_workers} = {};
+        $self->{-wq_ident} = $ident;
+        my $one = $nr_workers == 1;
+        $self->{-wq_nr_workers} = $nr_workers;
+        for (1..$nr_workers) {
+                _wq_worker_start($self, $sigset, $fields, $one, @cb_args);
+        }
+        PublicInbox::DS::sig_setmask($sigset) unless $oldset;
+        $self->{-wq_ppid} = $$;
+}
+
+sub wq_close {
+        my ($self) = @_;
+        if (my $wqb = delete $self->{wqb}) {
+                $wqb->enq_close;
+        }
+        delete @$self{qw(-wq_s1 -wq_s2)} or return;
+        return if ($self->{-wq_ppid} // -1) != $$;
+        awaitpid($_) for keys %{$self->{-wq_workers}};
+}
+
+sub wq_kill {
+        my ($self, $sig) = @_;
+        kill($sig // 'TERM', keys %{$self->{-wq_workers}});
+}
+
+sub DESTROY {
+        my ($self) = @_;
+        my $ppid = $self->{-wq_ppid};
+        wq_kill($self) if $ppid && $ppid == $$;
+        wq_close($self);
+        ipc_worker_stop($self);
+}
+
+# _SC_NPROCESSORS_ONLN = 84 on both Linux glibc and musl,
+# emitted using: $^X devel/sysdefs-list
+my %NPROCESSORS_ONLN = (
+        linux => 84,
+        freebsd => 58,
+        dragonfly => 58,
+        openbsd => 503,
+        netbsd => 1002
+);
+
+sub detect_nproc () {
+        my $n = $NPROCESSORS_ONLN{$^O};
+        return POSIX::sysconf($n) if defined $n;
+
+        # getconf(1) is POSIX, but *NPROCESSORS* vars are not even if
+        # glibc, {Free,Net,Open}BSD all support them.
+        for (qw(_NPROCESSORS_ONLN NPROCESSORS_ONLN)) {
+                `getconf $_ 2>/dev/null` =~ /^(\d+)$/ and return $1;
+        }
+        # note: GNU nproc(1) checks CPU affinity, which is nice but
+        # isn't remotely portable
+        undef
+}
+
+# SATA storage lags behind what CPUs are capable of, so relying on
+# nproc(1) can be misleading and having extra Xapian shards is a
+# waste of FDs and space.  It can also lead to excessive IO latency
+# and slow things down.  Users on NVME or other fast storage can
+# use the NPROC env or switches in our script/public-inbox-* programs
+# to increase Xapian shards
+our $NPROC_MAX_DEFAULT = 4;
+
+sub nproc_shards ($) {
+        my ($creat_opt) = @_;
+        my $n = $creat_opt->{nproc} if ref($creat_opt) eq 'HASH';
+        $n //= $ENV{NPROC};
+        if (!$n) {
+                # assume 2 cores if not detectable or zero
+                state $NPROC_DETECTED = PublicInbox::IPC::detect_nproc() || 2;
+                $n = $NPROC_DETECTED;
+                $n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT;
+        }
+
+        # subtract for the main process and git-fast-import
+        $n -= 1;
+        $n < 1 ? 1 : $n;
+}
+
+1;
diff --git a/lib/PublicInbox/IdxStack.pm b/lib/PublicInbox/IdxStack.pm
index ce75b46a..7681ee6f 100644
--- a/lib/PublicInbox/IdxStack.pm
+++ b/lib/PublicInbox/IdxStack.pm
@@ -1,25 +1,35 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # temporary stack for public-inbox-index
+# FIXME: needs to support multi-hash in the same repo once git itself can
 package PublicInbox::IdxStack;
-use v5.10.1;
-use strict;
+use v5.12;
 use Fcntl qw(:seek);
-use constant FMT => eval { pack('Q', 1) } ? 'A1QQH*' : 'A1IIH*';
+use constant PACK_FMT => eval { pack('Q', 1) } ? 'A1QQH*H*' : 'A1IIH*H*';
+use autodie qw(open seek);
+use PublicInbox::IO qw(read_all);
 
 # start off in write-only mode
 sub new {
-        open(my $io, '+>', undef) or die "open: $!";
+        open(my $io, '+>', undef);
+        # latest_cmt is still useful when the newest revision is a `d'(elete),
+        # otherwise we favor $sync->{latest_cmt} for checkpoints and {quit}
         bless { wr => $io, latest_cmt => $_[1] }, __PACKAGE__
 }
 
 # file_char = [d|m]
 sub push_rec {
-        my ($self, $file_char, $at, $ct, $blob_oid) = @_;
-        my $rec = pack(FMT, $file_char, $at, $ct, $blob_oid);
-        $self->{rec_size} //= length($rec);
-        print { $self->{wr} } $rec or die "print: $!";
+        my ($self, $file_char, $at, $ct, $blob_oid, $cmt_oid) = @_;
+        my $rec = pack(PACK_FMT, $file_char, $at, $ct, $blob_oid, $cmt_oid);
+        $self->{unpack_fmt} // do {
+                my $len = length($cmt_oid);
+                my $fmt = PACK_FMT;
+                $fmt =~ s/H\*/H$len/g;
+                $self->{rec_size} = length($rec);
+                $self->{unpack_fmt} = $fmt;
+        };
+        print { $self->{wr} } $rec;
         $self->{tot_size} += length($rec);
 }
 
@@ -41,12 +51,8 @@ sub pop_rec {
         my $sz = $self->{rec_size} or return;
         my $rec_pos = $self->{tot_size} -= $sz;
         return if $rec_pos < 0;
-        my $io = $self->{rd};
-        seek($io, $rec_pos, SEEK_SET) or die "seek: $!";
-        my $r = read($io, my $buf, $sz);
-        defined($r) or die "read: $!";
-        $r == $sz or die "read($r != $sz)";
-        unpack(FMT, $buf);
+        seek($self->{rd}, $rec_pos, SEEK_SET);
+        unpack($self->{unpack_fmt}, read_all($self->{rd}, $sz));
 }
 
 1;
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 1a226cc7..fefc282a 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # git fast-import-based ssoma-mda MDA replacement
@@ -6,10 +6,9 @@
 # and public-inbox-watch. Not the WWW or NNTP code which only
 # requires read-only access.
 package PublicInbox::Import;
-use strict;
+use v5.12;
 use parent qw(PublicInbox::Lock);
-use v5.10.1;
-use PublicInbox::Spawn qw(spawn popen_rd);
+use PublicInbox::Spawn qw(run_die run_qx spawn);
 use PublicInbox::MID qw(mids mid2path);
 use PublicInbox::Address;
 use PublicInbox::Smsg;
@@ -18,14 +17,27 @@ use PublicInbox::ContentHash qw(content_digest);
 use PublicInbox::MDA;
 use PublicInbox::Eml;
 use POSIX qw(strftime);
+use autodie qw(socketpair);
+use Carp qw(croak);
+use Socket qw(AF_UNIX SOCK_STREAM);
+use PublicInbox::IO qw(read_all);
+
+sub default_branch () {
+        state $default_branch = do {
+                my $h = run_qx([qw(git config --global init.defaultBranch)],
+                                 { GIT_CONFIG => undef });
+                chomp $h;
+                $h eq '' ? 'refs/heads/master' : "refs/heads/$h";
+        }
+}
 
 sub new {
         # we can't change arg order, this is documented in POD
         # and external projects may rely on it:
         my ($class, $git, $name, $email, $ibx) = @_;
-        my $ref = 'refs/heads/master';
+        my $ref;
         if ($ibx) {
-                $ref = $ibx->{ref_head} // 'refs/heads/master';
+                $ref = $ibx->{ref_head};
                 $name //= $ibx->{name};
                 $email //= $ibx->{-primary_address};
                 $git //= $ibx->git;
@@ -34,7 +46,7 @@ sub new {
                 git => $git,
                 ident => "$name <$email>",
                 mark => 1,
-                ref => $ref,
+                ref => $ref // default_branch,
                 ibx => $ibx,
                 path_type => '2/38', # or 'v2'
                 lock_path => "$git->{git_dir}/ssoma.lock", # v2 changes this
@@ -45,41 +57,36 @@ sub new {
 # idempotent start function
 sub gfi_start {
         my ($self) = @_;
-
-        return ($self->{in}, $self->{out}) if $self->{pid};
-
-        my (@ret, $out_r, $out_w);
-        pipe($out_r, $out_w) or die "pipe failed: $!";
+        my $io = $self->{io};
+        return $io if $io;
+        socketpair($io, my $s2, AF_UNIX, SOCK_STREAM, 0);
+        $io->autoflush(1);
 
         $self->lock_acquire;
         eval {
                 my ($git, $ref) = @$self{qw(git ref)};
                 local $/ = "\n";
                 chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $ref));
+                die "fatal: rev-parse --revs-only $ref: \$?=$?" if $?;
                 if ($self->{path_type} ne '2/38' && $self->{tip}) {
-                        local $/ = "\0";
-                        my @t = $git->qx(qw(ls-tree -r -z --name-only), $ref);
-                        chomp @t;
-                        $self->{-tree} = { map { $_ => 1 } @t };
+                        my $t = $git->qx(qw(ls-tree -r -z --name-only), $ref);
+                        die "fatal: ls-tree -r -z --name-only $ref: \$?=$?" if $?;
+                        $self->{-tree} = { map { $_ => 1 } split(/\0/, $t) };
                 }
-                my @cmd = ('git', "--git-dir=$git->{git_dir}",
-                        qw(fast-import --quiet --done --date-format=raw));
-                my ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r });
-                $out_w->autoflush(1);
-                $self->{in} = $in_r;
-                $self->{out} = $out_w;
-                $self->{pid} = $pid;
+                my $gfi = $git->cmd(qw(fast-import
+                                        --quiet --done --date-format=raw));
+                my $pid = spawn($gfi, undef, { 0 => $s2, 1 => $s2 });
                 $self->{nchg} = 0;
-                @ret = ($in_r, $out_w);
+                $self->{io} = PublicInbox::IO::attach_pid($io, $pid);
         };
         if ($@) {
                 $self->lock_release;
                 die $@;
         }
-        @ret;
+        $self->{io};
 }
 
-sub wfail () { die "write to fast-import failed: $!" }
+sub wfail () { croak "write to fast-import failed: $!" }
 
 sub now_raw () { time . ' +0000' }
 
@@ -91,60 +98,43 @@ sub norm_body ($) {
 }
 
 # only used for v1 (ssoma) inboxes
-sub _check_path ($$$$) {
-        my ($r, $w, $tip, $path) = @_;
+sub _check_path ($$$) {
+        my ($io, $tip, $path) = @_;
         return if $tip eq '';
-        print $w "ls $tip $path\n" or wfail;
+        print $io "ls $tip $path\n" or wfail;
         local $/ = "\n";
-        defined(my $info = <$r>) or die "EOF from fast-import: $!";
+        my $info = <$io> // die "EOF from fast-import: $!";
         $info =~ /\Amissing / ? undef : $info;
 }
 
-sub _cat_blob ($$$) {
-        my ($r, $w, $oid) = @_;
-        print $w "cat-blob $oid\n" or wfail;
+sub _cat_blob ($$) {
+        my ($io, $oid) = @_;
+        print $io "cat-blob $oid\n" or wfail;
         local $/ = "\n";
-        my $info = <$r>;
-        defined $info or die "EOF from fast-import / cat-blob: $!";
+        my $info = <$io> // die "EOF from fast-import / cat-blob: $!";
         $info =~ /\A[a-f0-9]{40,} blob ([0-9]+)\n\z/ or return;
-        my $left = $1;
-        my $offset = 0;
-        my $buf = '';
-        my $n;
-        while ($left > 0) {
-                $n = read($r, $buf, $left, $offset);
-                defined($n) or die "read cat-blob failed: $!";
-                $n == 0 and die 'fast-export (cat-blob) died';
-                $left -= $n;
-                $offset += $n;
-        }
-        $n = read($r, my $lf, 1);
-        defined($n) or die "read final byte of cat-blob failed: $!";
-        die "bad read on final byte: <$lf>" if $lf ne "\n";
-
-        # fixup some bugginess in old versions:
-        $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+        my $buf = read_all($io, my $len = $1 + 1);
+        my $lf = chop $buf;
+        croak "bad read on final byte: <$lf>" if $lf ne "\n";
         \$buf;
 }
 
 sub cat_blob {
         my ($self, $oid) = @_;
-        my ($r, $w) = $self->gfi_start;
-        _cat_blob($r, $w, $oid);
+        _cat_blob($self->{io} // return, $oid);
 }
 
 sub check_remove_v1 {
-        my ($r, $w, $tip, $path, $mime) = @_;
+        my ($io, $tip, $path, $mime) = @_;
 
-        my $info = _check_path($r, $w, $tip, $path) or return ('MISSING',undef);
+        my $info = _check_path($io, $tip, $path) or return ('MISSING',undef);
         $info =~ m!\A100644 blob ([a-f0-9]{40,})\t!s or die "not blob: $info";
         my $oid = $1;
-        my $msg = _cat_blob($r, $w, $oid) or die "BUG: cat-blob $1 failed";
-        my $cur = PublicInbox::Eml->new($msg);
-        my $cur_s = $cur->header('Subject');
-        $cur_s = '' unless defined $cur_s;
-        my $cur_m = $mime->header('Subject');
-        $cur_m = '' unless defined $cur_m;
+        my $bref = _cat_blob($io, $oid) or die "BUG: cat-blob $1 failed";
+        PublicInbox::Eml::strip_from($$bref);
+        my $cur = PublicInbox::Eml->new($bref);
+        my $cur_s = $cur->header('Subject') // '';
+        my $cur_m = $mime->header('Subject') // '';
         if ($cur_s ne $cur_m || norm_body($cur) ne norm_body($mime)) {
                 return ('MISMATCH', $cur);
         }
@@ -153,16 +143,15 @@ sub check_remove_v1 {
 
 sub checkpoint {
         my ($self) = @_;
-        return unless $self->{pid};
-        print { $self->{out} } "checkpoint\n" or wfail;
+        print { $self->{io} // return } "checkpoint\n" or wfail;
         undef;
 }
 
 sub progress {
         my ($self, $msg) = @_;
-        return unless $self->{pid};
-        print { $self->{out} } "progress $msg\n" or wfail;
-        readline($self->{in}) eq "progress $msg\n" or die
+        my $io = $self->{io} or return;
+        print $io "progress $msg\n" or wfail;
+        readline($io) eq "progress $msg\n" or die
                 "progress $msg not received\n";
         undef;
 }
@@ -172,14 +161,14 @@ sub _update_git_info ($$) {
         # for compatibility with existing ssoma installations
         # we can probably remove this entirely by 2020
         my $git_dir = $self->{git}->{git_dir};
-        my @cmd = ('git', "--git-dir=$git_dir");
+        my @cmd = @{$self->{git}->cmd};
         my $index = "$git_dir/ssoma.index";
         if (-e $index && !$ENV{FAST}) {
                 my $env = { GIT_INDEX_FILE => $index };
                 run_die([@cmd, qw(read-tree -m -v -i), $self->{ref}], $env);
         }
-        eval { run_die([@cmd, 'update-server-info']) };
         my $ibx = $self->{ibx};
+        eval { run_die([@cmd, 'update-server-info']) } if $ibx;
         if ($ibx && $ibx->version == 1 && -d "$ibx->{inboxdir}/public-inbox" &&
                                 eval { require PublicInbox::SearchIdx }) {
                 eval {
@@ -188,7 +177,10 @@ sub _update_git_info ($$) {
                 };
                 warn "$ibx->{inboxdir} index failed: $@\n" if $@;
         }
-        eval { run_die([@cmd, qw(gc --auto)]) } if $do_gc;
+        if ($do_gc) {
+                my @quiet = (-t STDERR ? () : '-q');
+                eval { run_die([@cmd, qw(gc --auto), @quiet]) }
+        }
 }
 
 sub barrier {
@@ -209,10 +201,9 @@ sub barrier {
 # used for v2
 sub get_mark {
         my ($self, $mark) = @_;
-        die "not active\n" unless $self->{pid};
-        my ($r, $w) = $self->gfi_start;
-        print $w "get-mark $mark\n" or wfail;
-        defined(my $oid = <$r>) or die "get-mark failed, need git 2.6.0+\n";
+        my $io = $self->{io} or croak "not active\n";
+        print $io "get-mark $mark\n" or wfail;
+        my $oid = <$io> // die "get-mark failed, need git 2.6.0+\n";
         chomp($oid);
         $oid;
 }
@@ -229,11 +220,11 @@ sub remove {
         my $path_type = $self->{path_type};
         my ($path, $err, $cur, $blob);
 
-        my ($r, $w) = $self->gfi_start;
+        my $io = gfi_start($self);
         my $tip = $self->{tip};
         if ($path_type eq '2/38') {
                 $path = mid2path(v1_mid0($mime));
-                ($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime);
+                ($err, $cur) = check_remove_v1($io, $tip, $path, $mime);
                 return ($err, $cur) if $err;
         } else {
                 my $sref;
@@ -245,7 +236,7 @@ sub remove {
                 }
                 my $len = length($$sref);
                 $blob = $self->{mark}++;
-                print $w "blob\nmark :$blob\ndata $len\n",
+                print $io "blob\nmark :$blob\ndata $len\n",
                         $$sref, "\n" or wfail;
         }
 
@@ -253,22 +244,22 @@ sub remove {
         my $commit = $self->{mark}++;
         my $parent = $tip =~ /\A:/ ? $tip : undef;
         unless ($parent) {
-                print $w "reset $ref\n" or wfail;
+                print $io "reset $ref\n" or wfail;
         }
         my $ident = $self->{ident};
         my $now = now_raw();
         $msg //= 'rm';
         my $len = length($msg) + 1;
-        print $w "commit $ref\nmark :$commit\n",
+        print $io "commit $ref\nmark :$commit\n",
                 "author $ident $now\n",
                 "committer $ident $now\n",
                 "data $len\n$msg\n\n",
                 'from ', ($parent ? $parent : $tip), "\n" or wfail;
         if (defined $path) {
-                print $w "D $path\n\n" or wfail;
+                print $io "D $path\n\n" or wfail;
         } else {
-                clean_tree_v2($self, $w, 'd');
-                print $w "M 100644 :$blob d\n\n" or wfail;
+                clean_tree_v2($self, $io, 'd');
+                print $io "M 100644 :$blob d\n\n" or wfail;
         }
         $self->{nchg}++;
         (($self->{tip} = ":$commit"), $cur);
@@ -328,11 +319,40 @@ sub extract_cmt_info ($;$) {
 }
 
 # kill potentially confusing/misleading headers
+our @UNWANTED_HEADERS = (qw(Bytes Lines Content-Length),
+                        qw(Status X-Status));
+our $DROP_UNIQUE_UNSUB;
 sub drop_unwanted_headers ($) {
-        my ($mime) = @_;
+        my ($eml) = @_;
+        for (@UNWANTED_HEADERS, @PublicInbox::MDA::BAD_HEADERS) {
+                $eml->header_set($_);
+        }
 
-        $mime->header_set($_) for qw(Bytes Lines Content-Length Status);
-        $mime->header_set($_) for @PublicInbox::MDA::BAD_HEADERS;
+        # We don't want public-inbox readers to be able to unsubcribe the
+        # address which does archiving.  WARNING: this breaks DKIM if the
+        # mailing list sender follows RFC 8058, section 4; but breaking DKIM
+        # (or have senders ignore RFC 8058 sec. 4) is preferable to having
+        # saboteurs unsubscribing independent archivists:
+        if ($DROP_UNIQUE_UNSUB && grep(/\AList-Unsubscribe=One-Click\z/,
+                                $eml->header_raw('List-Unsubscribe-Post'))) {
+                for (qw(List-Unsubscribe-Post List-Unsubscribe)) {
+                        $eml->header_set($_)
+                }
+        }
+}
+
+sub load_config ($;$) {
+        my ($cfg, $do_exit) = @_;
+        my $v = $cfg->{lc 'publicinboxImport.dropUniqueUnsubscribe'};
+        if (defined $v) {
+                $DROP_UNIQUE_UNSUB = $cfg->git_bool($v) // do {
+                        warn <<EOM;
+E: publicinboxImport.dropUniqueUnsubscribe=$v in $cfg->{-f} is not boolean
+EOM
+                        $do_exit //= \&CORE::exit;
+                        $do_exit->(78); # EX_CONFIG
+                };
+        }
 }
 
 # used by V2Writable, too
@@ -356,11 +376,11 @@ sub v1_mid0 ($) {
         $mids->[0];
 }
 sub clean_tree_v2 ($$$) {
-        my ($self, $w, $keep) = @_;
+        my ($self, $io, $keep) = @_;
         my $tree = $self->{-tree} or return; #v2 only
         delete $tree->{$keep};
         foreach (keys %$tree) {
-                print $w "D $_\n" or wfail;
+                print $io "D $_\n" or wfail;
         }
         %$tree = ($keep => 1);
 }
@@ -379,10 +399,10 @@ sub add {
                 $path = 'm';
         }
 
-        my ($r, $w) = $self->gfi_start;
+        my $io = gfi_start($self);
         my $tip = $self->{tip};
         if ($path_type eq '2/38') {
-                _check_path($r, $w, $tip, $path) and return;
+                _check_path($io, $tip, $path) and return;
         }
 
         drop_unwanted_headers($mime);
@@ -396,48 +416,51 @@ sub add {
         my $raw_email = $mime->{-public_inbox_raw} // $mime->as_string;
         my $n = length($raw_email);
         $self->{bytes_added} += $n;
-        print $w "blob\nmark :$blob\ndata ", $n, "\n" or wfail;
-        print $w $raw_email, "\n" or wfail;
+        print $io "blob\nmark :$blob\ndata $n\n", $raw_email, "\n" or wfail;
 
         # v2: we need this for Xapian
         if ($smsg) {
                 $smsg->{blob} = $self->get_mark(":$blob");
-                $smsg->{raw_bytes} = $n;
-                $smsg->{-raw_email} = \$raw_email;
+                $smsg->set_bytes($raw_email, $n);
+                if (my $oidx = delete $smsg->{-oidx}) { # used by LeiStore
+                        my $eidx_git = delete $smsg->{-eidx_git};
+
+                        # we need this sharedkv to dedupe blobs added in the
+                        # same fast-import transaction
+                        my $u = $self->{uniq_skv} //= do {
+                                require PublicInbox::SharedKV;
+                                my $x = PublicInbox::SharedKV->new;
+                                $x->dbh;
+                                $x;
+                        };
+                        return if !$u->set_maybe($smsg->oidbin, 1);
+                        return if (!$oidx->vivify_xvmd($smsg) &&
+                                        $eidx_git->check($smsg->{blob}));
+                }
         }
         my $ref = $self->{ref};
         my $commit = $self->{mark}++;
         my $parent = $tip =~ /\A:/ ? $tip : undef;
 
         unless ($parent) {
-                print $w "reset $ref\n" or wfail;
+                print $io "reset $ref\n" or wfail;
         }
 
-        print $w "commit $ref\nmark :$commit\n",
+        print $io "commit $ref\nmark :$commit\n",
                 "author $author $at\n",
-                "committer $self->{ident} $ct\n" or wfail;
-        print $w "data ", (length($subject) + 1), "\n",
+                "committer $self->{ident} $ct\n",
+                "data ", (length($subject) + 1), "\n",
                 $subject, "\n\n" or wfail;
         if ($tip ne '') {
-                print $w 'from ', ($parent ? $parent : $tip), "\n" or wfail;
+                print $io 'from ', ($parent ? $parent : $tip), "\n" or wfail;
         }
-        clean_tree_v2($self, $w, $path);
-        print $w "M 100644 :$blob $path\n\n" or wfail;
+        clean_tree_v2($self, $io, $path);
+        print $io "M 100644 :$blob $path\n\n" or wfail;
         $self->{nchg}++;
         $self->{tip} = ":$commit";
 }
 
-sub run_die ($;$$) {
-        my ($cmd, $env, $rdr) = @_;
-        my $pid = spawn($cmd, $env, $rdr);
-        waitpid($pid, 0) == $pid or die join(' ', @$cmd) .' did not finish';
-        $? == 0 or die join(' ', @$cmd) . " failed: $?\n";
-}
-
-my @INIT_FILES = ('HEAD' => "ref: refs/heads/master\n",
-                'description' => <<EOD,
-Unnamed repository; edit this file 'description' to name the repository.
-EOD
+my @INIT_FILES = ('HEAD' => undef, # filled in at runtime
                 'config' => <<EOC);
 [core]
         repositoryFormatVersion = 0
@@ -448,32 +471,37 @@ EOD
 EOC
 
 sub init_bare {
-        my ($dir) = @_; # or self
+        my ($dir, $head, $fmt) = @_; # or self
         $dir = $dir->{git}->{git_dir} if ref($dir);
         require File::Path;
-        File::Path::mkpath([ map { "$dir/$_" } qw(objects/info refs/heads) ]);
-        for (my $i = 0; $i < @INIT_FILES; $i++) {
-                my $f = $dir.'/'.$INIT_FILES[$i++];
+        File::Path::make_path(map { $dir.$_ } qw(/objects/info /refs/heads));
+        $INIT_FILES[1] //= 'ref: '.default_branch."\n";
+        my @fn_contents = @INIT_FILES;
+        $fn_contents[1] = "ref: refs/heads/$head\n" if defined $head;
+        $fn_contents[3] = <<EOM if defined($fmt) && $fmt ne 'sha1';
+[core]
+        repositoryFormatVersion = 1
+        filemode = true
+        bare = true
+[extensions]
+        objectFormat = $fmt
+EOM
+        while (my ($fn, $contents) = splice(@fn_contents, 0, 2)) {
+                my $f = $dir.'/'.$fn;
                 next if -f $f;
-                open my $fh, '>', $f or die "open $f: $!";
-                print $fh $INIT_FILES[$i] or die "print $f: $!";
-                close $fh or die "close $f: $!";
+                PublicInbox::IO::write_file '>', $f, $contents;
         }
 }
 
 # true if locked and active
-sub active { !!$_[0]->{out} }
+sub active { !!$_[0]->{io} }
 
 sub done {
         my ($self) = @_;
-        my $w = delete $self->{out} or return;
+        my $io = delete $self->{io} or return;
         eval {
-                my $r = delete $self->{in} or die 'BUG: missing {in} when done';
-                print $w "done\n" or wfail;
-                my $pid = delete $self->{pid} or
-                                die 'BUG: missing {pid} when done';
-                waitpid($pid, 0) == $pid or die 'fast-import did not finish';
-                $? == 0 or die "fast-import failed: $?";
+                print $io "done\n" or wfail;
+                $io->close or croak "close fast-import \$?=$?"; # reaps
         };
         my $wait_err = $@;
         my $nchg = delete $self->{nchg};
@@ -486,16 +514,10 @@ sub done {
         die $wait_err if $wait_err;
 }
 
-sub atfork_child {
-        my ($self) = @_;
-        foreach my $f (qw(in out)) {
-                next unless defined($self->{$f});
-                close $self->{$f} or die "failed to close import[$f]: $!\n";
-        }
-}
+sub atfork_child { (delete($_[0]->{io}) // return)->close }
 
-sub digest2mid ($$) {
-        my ($dig, $hdr) = @_;
+sub digest2mid ($$;$) {
+        my ($dig, $hdr, $fallback_time) = @_;
         my $b64 = $dig->clone->b64digest;
         # Make our own URLs nicer:
         # See "Base 64 Encoding with URL and Filename Safe Alphabet" in RFC4648
@@ -504,7 +526,7 @@ sub digest2mid ($$) {
         # Add a date prefix to prevent a leading '-' in case that trips
         # up some tools (e.g. if a Message-ID were a expected as a
         # command-line arg)
-        my $dt = msg_datestamp($hdr);
+        my $dt = msg_datestamp($hdr, $fallback_time);
         $dt = POSIX::strftime('%Y%m%d%H%M%S', gmtime($dt));
         "$dt.$b64" . '@z';
 }
@@ -545,7 +567,7 @@ sub replace_oids {
         my $git = $self->{git};
         my @export = (qw(fast-export --no-data --use-done-feature), $old);
         my $rd = $git->popen(@export);
-        my ($r, $w) = $self->gfi_start;
+        my $io = gfi_start($self);
         my @buf;
         my $nreplace = 0;
         my @oids;
@@ -556,17 +578,14 @@ sub replace_oids {
                         push @buf, "reset $tmp\n";
                 } elsif (/^commit (?:.+)/) {
                         if (@buf) {
-                                print $w @buf or wfail;
+                                print $io @buf or wfail;
                                 @buf = ();
                         }
                         push @buf, "commit $tmp\n";
                 } elsif (/^data ([0-9]+)/) {
                         # only commit message, so $len is small:
-                        my $len = $1; # + 1 for trailing "\n"
                         push @buf, $_;
-                        my $n = read($rd, my $buf, $len) or die "read: $!";
-                        $len == $n or die "short read ($n < $len)";
-                        push @buf, $buf;
+                        push @buf, read_all($rd, my $len = $1);
                 } elsif (/^M 100644 ([a-f0-9]+) (\w+)/) {
                         my ($oid, $path) = ($1, $2);
                         $tree->{$path} = 1;
@@ -593,7 +612,7 @@ sub replace_oids {
                                 rewrite_commit($self, \@oids, \@buf, $mime);
                                 $nreplace++;
                         }
-                        print $w @buf, "\n" or wfail;
+                        print $io @buf, "\n" or wfail;
                         @buf = ();
                 } elsif ($_ eq "done\n") {
                         $done = 1;
@@ -604,15 +623,15 @@ sub replace_oids {
                         push @buf, $_;
                 }
         }
-        close $rd or die "close fast-export failed: $?";
+        $rd->close or die "E: git @export (\$?=$?)";
         if (@buf) {
-                print $w @buf or wfail;
+                print $io @buf or wfail;
         }
         die 'done\n not seen from fast-export' unless $done;
         chomp(my $cmt = $self->get_mark(":$mark")) if $nreplace;
         $self->{nchg} = 0; # prevent _update_git_info until update-ref:
         $self->done;
-        my @git = ('git', "--git-dir=$git->{git_dir}");
+        my @git = @{$git->cmd};
 
         run_die([@git, qw(update-ref), $old, $tmp]) if $nreplace;
 
diff --git a/lib/PublicInbox/In2Tie.pm b/lib/PublicInbox/In2Tie.pm
index 7dee3627..3689432b 100644
--- a/lib/PublicInbox/In2Tie.pm
+++ b/lib/PublicInbox/In2Tie.pm
@@ -1,10 +1,10 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # used to ensure PublicInbox::DS can call fileno() as a function
 # on Linux::Inotify2 objects
 package PublicInbox::In2Tie;
-use strict;
+use v5.12;
 use Symbol qw(gensym);
 
 sub io {
diff --git a/lib/PublicInbox/In3Event.pm b/lib/PublicInbox/In3Event.pm
new file mode 100644
index 00000000..f93dc0da
--- /dev/null
+++ b/lib/PublicInbox/In3Event.pm
@@ -0,0 +1,24 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# duck-type compatible with Linux::Inotify2::Event for pure Perl
+# PublicInbox::Inotify3 w/o callback support
+package PublicInbox::In3Event;
+use v5.12;
+
+sub w { $_[0]->[2] } # PublicInbox::In3Watch
+sub mask { $_[0]->[0] }
+sub name { $_[0]->[1] }
+
+sub fullname {
+        my ($name, $wname) = ($_[0]->[1], $_[0]->[2]->name);
+        length($name) ? "$wname/$name" : $wname;
+}
+
+my $buf = '';
+while (my ($sym, $mask) = each %PublicInbox::Inotify3::events) {
+        $buf .= "sub $sym { \$_[0]->[0] & $mask }\n";
+}
+eval $buf;
+
+1;
diff --git a/lib/PublicInbox/In3Watch.pm b/lib/PublicInbox/In3Watch.pm
new file mode 100644
index 00000000..bdb91869
--- /dev/null
+++ b/lib/PublicInbox/In3Watch.pm
@@ -0,0 +1,20 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# duck-type compatible with Linux::Inotify2::Watch for pure Perl
+# PublicInbox::Inotify3 for our needs, only
+package PublicInbox::In3Watch;
+use v5.12;
+
+sub mask { $_[0]->[1] }
+sub name { $_[0]->[2] }
+
+sub cancel {
+        my ($self) = @_;
+        my ($wd, $in3) = @$self[0, 3];
+        $in3 or return 1; # already canceled
+        pop @$self;
+        $in3->rm_watch($wd);
+}
+
+1;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index b0894a7d..dd689221 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -1,102 +1,53 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Represents a public-inbox (which may have multiple mailing addresses)
 package PublicInbox::Inbox;
 use strict;
-use warnings;
+use v5.10.1;
 use PublicInbox::Git;
 use PublicInbox::MID qw(mid2path);
 use PublicInbox::Eml;
+use List::Util qw(max);
+use Carp qw(croak);
+use PublicInbox::Compat qw(uniqstr);
 
-# Long-running "git-cat-file --batch" processes won't notice
-# unlinked packs, so we need to restart those processes occasionally.
-# Xapian and SQLite file handles are mostly stable, but sometimes an
-# admin will attempt to replace them atomically after compact/vacuum
-# and we need to be prepared for that.
-my $cleanup_timer;
-my $cleanup_avail = -1; # 0, or 1
-my $have_devel_peek;
-my $CLEANUP = {}; # string(inbox) -> inbox
-
-sub git_cleanup ($) {
-        my ($self) = @_;
-        my $git = $self->{git} or return;
-        $git->cleanup;
+# in case DBs get replaced (Xapcmd does it for v1)
+sub check_inodes ($) {
+        for (qw(over mm)) { $_[0]->{$_}->check_inodes if $_[0]->{$_} }
 }
 
-sub cleanup_task () {
-        $cleanup_timer = undef;
-        my $next = {};
-        for my $ibx (values %$CLEANUP) {
-                my $again;
-                if ($have_devel_peek) {
-                        foreach my $f (qw(search)) {
-                                # we bump refcnt by assigning tmp, here:
-                                my $tmp = $ibx->{$f} or next;
-                                next if Devel::Peek::SvREFCNT($tmp) > 2;
-                                delete $ibx->{$f};
-                                # refcnt is zero when tmp is out-of-scope
-                        }
-                }
-                git_cleanup($ibx);
-                if (my $gits = $ibx->{-repo_objs}) {
-                        foreach my $git (@$gits) {
-                                $again = 1 if $git->cleanup;
-                        }
-                }
-                check_inodes($ibx);
-                if ($have_devel_peek) {
-                        $again ||= !!$ibx->{search};
-                }
-                $next->{"$ibx"} = $ibx if $again;
-        }
-        $CLEANUP = $next;
+# search/over/mm hold onto FDs and description+cloneurl may get updated.
+# creating long-lived allocations in the same phase as short-lived
+# allocations also leads to fragmentation, so we don't want some stuff
+# living too long.
+sub do_cleanup {
+        my ($ibx) = @_;
+        my ($srch) = delete @$ibx{qw(search over mm description cloneurl)};
+        $srch //= $ibx; # extsearch
+        delete @$srch{qw(xdb qp)};
 }
 
-sub cleanup_possible () {
+sub _cleanup_later ($) {
         # no need to require DS, here, if it were enabled another
         # module would've require'd it, already
-        eval { PublicInbox::DS::in_loop() } or return 0;
-
-        eval {
-                require Devel::Peek; # needs separate package in Fedora
-                $have_devel_peek = 1;
-        };
-        1;
-}
-
-sub _cleanup_later ($) {
-        my ($self) = @_;
-        $cleanup_avail = cleanup_possible() if $cleanup_avail < 0;
-        return if $cleanup_avail != 1;
-        $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
-        $CLEANUP->{"$self"} = $self;
-}
-
-sub _set_uint ($$$) {
-        my ($opts, $field, $default) = @_;
-        my $val = $opts->{$field};
-        if (defined $val) {
-                $val = $val->[-1] if ref($val) eq 'ARRAY';
-                $val = undef if $val !~ /\A[0-9]+\z/;
-        }
-        $opts->{$field} = $val || $default;
+        eval { PublicInbox::DS::in_loop() } and
+                PublicInbox::DS::add_uniq_timer($_[0]+0, 30, \&do_cleanup, @_)
 }
 
 sub _set_limiter ($$$) {
-        my ($self, $pi_config, $pfx) = @_;
+        my ($self, $pi_cfg, $pfx) = @_;
         my $lkey = "-${pfx}_limiter";
-        $self->{$lkey} ||= do {
+        $self->{$lkey} //= do {
                 # full key is: publicinbox.$NAME.httpbackendmax
                 my $mkey = $pfx.'max';
                 my $val = $self->{$mkey} or return;
                 my $lim;
                 if ($val =~ /\A[0-9]+\z/) {
-                        require PublicInbox::Qspawn;
-                        $lim = PublicInbox::Qspawn::Limiter->new($val);
+                        require PublicInbox::Limiter;
+                        $lim = PublicInbox::Limiter->new($val);
                 } elsif ($val =~ /\A[a-z][a-z0-9]*\z/) {
-                        $lim = $pi_config->limiter($val);
+                        $lim = $pi_cfg->limiter($val);
                         warn "$mkey limiter=$val not found\n" if !$lim;
                 } else {
                         warn "$mkey limiter=$val not understood\n";
@@ -110,48 +61,46 @@ sub new {
         my $v = $opts->{address} ||= [ 'public-inbox@example.com' ];
         my $p = $opts->{-primary_address} = ref($v) eq 'ARRAY' ? $v->[0] : $v;
         $opts->{domain} = ($p =~ /\@(\S+)\z/) ? $1 : 'localhost';
-        my $pi_config = delete $opts->{-pi_config};
-        _set_limiter($opts, $pi_config, 'httpbackend');
-        _set_uint($opts, 'feedmax', 25);
-        $opts->{nntpserver} ||= $pi_config->{'publicinbox.nntpserver'};
-        my $dir = $opts->{inboxdir};
-        if (defined $dir && -f "$dir/inbox.lock") {
-                $opts->{version} = 2;
+        my $pi_cfg = delete $opts->{-pi_cfg};
+        _set_limiter($opts, $pi_cfg, 'httpbackend');
+        my $fmax = $opts->{feedmax};
+        if (defined($fmax) && $fmax =~ /\A[0-9]+\z/) {
+                $opts->{feedmax} += 0;
+        } else {
+                delete $opts->{feedmax};
         }
-
         # allow any combination of multi-line or comma-delimited hide entries
-        my $hide = {};
-        if (defined(my $h = $opts->{hide})) {
-                foreach my $v (@$h) {
-                        $hide->{$_} = 1 foreach (split(/\s*,\s*/, $v));
-                }
-                $opts->{-hide} = $hide;
+        for $v (@{delete($opts->{hide}) // []}) {
+                $opts->{-'hide_'.$_} = 1 for split(/\s*,\s*/, $v);
         }
         bless $opts, $class;
 }
 
-sub version { $_[0]->{version} // 1 }
+sub version {
+        $_[0]->{version} //= -f "$_[0]->{inboxdir}/inbox.lock" ? 2 : 1
+}
 
 sub git_epoch {
-        my ($self, $epoch) = @_;
-        $self->version == 2 or return;
-        $self->{"$epoch.git"} ||= do {
+        my ($self, $epoch) = @_; # v2-only, callers always supply $epoch
+        $self->{"$epoch.git"} //= do {
                 my $git_dir = "$self->{inboxdir}/git/$epoch.git";
+                return unless -d $git_dir;
                 my $g = PublicInbox::Git->new($git_dir);
-                $g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
-                # no cleanup needed, we never cat-file off this, only clone
+                my $lim = $self->{-httpbackend_limiter};
+                $g->{-httpbackend_limiter} = $lim if $lim;
+                # caller must manually cleanup when done
                 $g;
         };
 }
 
 sub git {
         my ($self) = @_;
-        $self->{git} ||= do {
+        $self->{git} //= do {
                 my $git_dir = $self->{inboxdir};
                 $git_dir .= '/all.git' if $self->version == 2;
                 my $g = PublicInbox::Git->new($git_dir);
-                $g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
-                _cleanup_later($self);
+                my $lim = $self->{-httpbackend_limiter};
+                $g->{-httpbackend_limiter} = $lim if $lim;
                 $g;
         };
 }
@@ -160,160 +109,167 @@ sub max_git_epoch {
         my ($self) = @_;
         return if $self->version < 2;
         my $cur = $self->{-max_git_epoch};
-        my $changed = git($self)->alternates_changed;
-        if (!defined($cur) || $changed) {
-                git_cleanup($self) if $changed;
+        my $changed;
+        if (!defined($cur) || ($changed = git($self)->alternates_changed)) {
+                $self->{git}->cleanup if $changed;
                 my $gits = "$self->{inboxdir}/git";
                 if (opendir my $dh, $gits) {
-                        my $max = -1;
-                        while (defined(my $git_dir = readdir($dh))) {
-                                $git_dir =~ m!\A([0-9]+)\.git\z! or next;
-                                $max = $1 if $1 > $max;
-                        }
-                        $cur = $self->{-max_git_epoch} = $max if $max >= 0;
-                } else {
-                        warn "opendir $gits failed: $!\n";
+                        my $max = max(map {
+                                substr($_, 0, -4) + 0; # drop ".git" suffix
+                        } grep(/\A[0-9]+\.git\z/, readdir($dh))) // return;
+                        $cur = $self->{-max_git_epoch} = $max;
                 }
         }
         $cur;
 }
 
-sub mm {
+sub mm_file {
         my ($self) = @_;
-        $self->{mm} ||= eval {
+        my $d = $self->{inboxdir};
+        ($self->version >= 2 ? $d : "$d/public-inbox").'/msgmap.sqlite3';
+}
+
+sub mm {
+        my ($self, $req) = @_;
+        $self->{mm} //= eval {
                 require PublicInbox::Msgmap;
-                my $dir = $self->{inboxdir};
-                if ($self->version >= 2) {
-                        PublicInbox::Msgmap->new_file("$dir/msgmap.sqlite3");
-                } else {
-                        PublicInbox::Msgmap->new($dir);
-                }
-        };
+                _cleanup_later($self);
+                PublicInbox::Msgmap->new_file($self);
+        } // ($req ? croak("E: $@") : undef);
 }
 
-sub search ($;$$) {
-        my ($self, $over_only, $ctx) = @_;
-        my $srch = $self->{search} ||= eval {
+sub search {
+        my ($self) = @_;
+        $self->{search} // eval {
                 _cleanup_later($self);
                 require PublicInbox::Search;
-                PublicInbox::Search->new($self);
-        };
-        ($over_only || eval { $srch->xdb }) ? $srch : do {
-                $ctx and $ctx->{env}->{'psgi.errors'}->print(<<EOF);
-`$self->{name}' search went away unexpectedly
-EOF
-                undef;
+                my $srch = PublicInbox::Search->new($self);
+                (eval { $srch->xdb }) ? ($self->{search} = $srch) : undef;
         };
 }
 
+# isrch is preferred for read-only interfaces if available since it
+# reduces kernel cache and FD overhead
+sub isrch { $_[0]->{isrch} // search($_[0]) }
+
 sub over {
-        $_[0]->{over} //= eval {
-                my $srch = search($_[0], 1) or return;
+        my ($self, $req) = @_;
+        $self->{over} // eval {
+                my $srch = $self->{search} // do {
+                        require PublicInbox::Search;
+                        PublicInbox::Search->new($self);
+                };
+                _cleanup_later($self);
                 my $over = PublicInbox::Over->new("$srch->{xpfx}/over.sqlite3");
                 $over->dbh; # may fail
-                $over;
-        };
-}
-
-sub try_cat {
-        my ($path) = @_;
-        my $rv = '';
-        if (open(my $fh, '<', $path)) {
-                local $/;
-                $rv = <$fh>;
-        }
-        $rv;
+                $self->{over} = $over;
+        } // ($req ? croak("E: $@") : undef);
 }
 
 sub description {
         my ($self) = @_;
-        ($self->{description} //= do {
-                my $desc = try_cat("$self->{inboxdir}/description");
-                local $/ = "\n";
-                chomp $desc;
-                utf8::decode($desc);
-                $desc =~ s/\s+/ /smg;
-                $desc eq '' ? undef : $desc;
-        }) // '($INBOX_DIR/description missing)';
+        ($self->{description} //=
+                PublicInbox::Git::cat_desc("$self->{inboxdir}/description")) //
+                '($INBOX_DIR/description missing)';
 }
 
 sub cloneurl {
         my ($self) = @_;
-        ($self->{cloneurl} //= do {
-                my $s = try_cat("$self->{inboxdir}/cloneurl");
-                my @urls = split(/\s+/s, $s);
-                scalar(@urls) ? \@urls : undef
-        }) // [];
+        $self->{cloneurl} // do {
+                my @urls = split(/\s+/s,
+                        PublicInbox::IO::try_cat "$self->{inboxdir}/cloneurl");
+                scalar(@urls) ? ($self->{cloneurl} = \@urls) : undef;
+        } // [];
 }
 
 sub base_url {
         my ($self, $env) = @_; # env - PSGI env
-        if ($env) {
+        if ($env && $env->{'psgi.url_scheme'}) {
                 my $url = PublicInbox::Git::host_prefix_url($env, '');
                 # for mount in Plack::Builder
                 $url .= '/' if $url !~ m!/\z!;
                 return $url .= $self->{name} . '/';
         }
-        # called from a non-PSGI environment (e.g. NNTP/POP3):
-        $self->{-base_url} ||= do {
-                my $url = $self->{url}->[0] or return undef;
-                # expand protocol-relative URLs to HTTPS if we're
-                # not inside a web server
-                $url = "https:$url" if $url =~ m!\A//!;
-                $url .= '/' if $url !~ m!/\z!;
-                $url;
-        };
+        # called from a non-PSGI environment or cross-inbox environment
+        # where multiple inboxes can have different domains
+        my $url = $self->{url} // return undef;
+        $url = $url->[0] // return undef;
+        # expand protocol-relative URLs to HTTPS if we're
+        # not inside a web server
+        substr($url, 0, 0, 'https:') if substr($url, 0, 2) eq '//';
+        $url .= '/' if substr($url, -1, 1) ne '/';
+        $url;
+}
+
+# imapserver, nntpserver configs are used here:
+sub _x_url ($$$) {
+        my ($self, $x, $ctx) = @_; # $x is "imap" or "nntp"
+        # no checking for nntp_usable here, we can point entirely
+        # to non-local servers or users run by a different user
+        my $ns = $self->{"${x}server"} //
+               $ctx->{www}->{pi_cfg}->get_all("publicinbox.${x}server");
+        my $group = $self->{newsgroup};
+        my @urls;
+        if ($ns && $group) {
+                @urls = map {
+                        my $u = m!\A${x}s?://! ? $_ : "$x://$_";
+                        $u .= '/' if $u !~ m!/\z!;
+                        $u.$group;
+                } @$ns;
+        }
+        if (my $mirrors = $self->{"${x}mirror"}) {
+                my @m;
+                for (@$mirrors) {
+                        my $u = m!\A${x}s?://! ? $_ : "$x://$_";
+                        if ($u =~ m!\A${x}s?://[^/]+/?\z!) {
+                                if ($group) {
+                                        $u .= '/' if $u !~ m!/\z!;
+                                        $u .= $group;
+                                } else { # n.b. IMAP and POP3 use "newsgroup"
+                                        warn <<EOM;
+publicinbox.$self->{name}.${x}mirror=$_ missing newsgroup name
+EOM
+                                }
+                        }
+                        # else: allow full URLs like:
+                        # nntp://news.example.com/alt.example
+                        push @m, $u;
+                }
+                @urls = uniqstr @urls, @m;
+        }
+        \@urls;
 }
 
-sub nntp_url {
-        my ($self) = @_;
-        $self->{-nntp_url} ||= do {
-                # no checking for nntp_usable here, we can point entirely
-                # to non-local servers or users run by a different user
-                my $ns = $self->{nntpserver};
+# my ($self, $ctx) = @_;
+sub imap_url { $_[0]->{-imap_url} //= _x_url($_[0], 'imap', $_[1]) }
+sub nntp_url { $_[0]->{-nntp_url} //= _x_url($_[0], 'nntp', $_[1]) }
+
+sub pop3_url {
+        my ($self, $ctx) = @_;
+        $self->{-pop3_url} //= do {
+                my $ps = $self->{'pop3server'} //
+                       $ctx->{www}->{pi_cfg}->get_all('publicinbox.pop3server');
                 my $group = $self->{newsgroup};
                 my @urls;
-                if ($ns && $group) {
-                        $ns = [ $ns ] if ref($ns) ne 'ARRAY';
-                        @urls = map {
-                                my $u = m!\Anntps?://! ? $_ : "nntp://$_";
-                                $u .= '/' if $u !~ m!/\z!;
-                                $u.$group;
-                        } @$ns;
-                }
-
-                my $mirrors = $self->{nntpmirror};
-                if ($mirrors) {
-                        my @m;
-                        foreach (@$mirrors) {
-                                my $u = m!\Anntps?://! ? $_ : "nntp://$_";
-                                if ($u =~ m!\Anntps?://[^/]+/?\z!) {
-                                        if ($group) {
-                                                $u .= '/' if $u !~ m!/\z!;
-                                                $u .= $group;
-                                        } else {
-                                                warn
-"publicinbox.$self->{name}.nntpmirror=$_ missing newsgroup name\n";
-                                        }
-                                }
-                                # else: allow full URLs like:
-                                # nntp://news.example.com/alt.example
-                                push @m, $u;
-                        }
-
-                        # List::Util::uniq requires Perl 5.26+, maybe we
-                        # can use it by 2030 or so
-                        my %seen;
-                        @urls = grep { !$seen{$_}++ } (@urls, @m);
+                ($ps && $group) and
+                        @urls = map { m!\Apop3?s?://! ? $_ : "pop3://$_" } @$ps;
+                if (my $mi = $self->{'pop3mirror'}) {
+                        my @m = map { m!\Apop3?s?://! ? $_ : "pop3://$_" } @$mi;
+                        @urls = uniqstr @urls, @m;
                 }
+                my $n = 0;
+                for (@urls) { $n += s!/+\z!! }
+                warn <<EOM if $n;
+W: pop3server and/or pop3mirror URLs should not end with trailing slash `/'
+EOM
                 \@urls;
-        };
+        }
 }
 
 sub nntp_usable {
         my ($self) = @_;
         my $ret = mm($self) && over($self);
-        $self->{mm} = $self->{over} = $self->{search} = undef;
+        delete @$self{qw(mm over search)};
         $ret;
 }
 
@@ -328,53 +284,42 @@ sub msg_by_smsg ($$) {
 
         # ghosts may have undef smsg (from SearchThread.node) or
         # no {blob} field
-        return unless defined $smsg;
-        defined(my $blob = $smsg->{blob}) or return;
-
-        git($self)->cat_file($blob);
+        $smsg // return;
+        $self->git->cat_file($smsg->{blob} // return);
 }
 
 sub smsg_eml {
         my ($self, $smsg) = @_;
         my $bref = msg_by_smsg($self, $smsg) or return;
         my $eml = PublicInbox::Eml->new($bref);
-        $smsg->populate($eml) unless exists($smsg->{num}); # v1 w/o SQLite
+        $smsg->{num} // $smsg->populate($eml);
         $eml;
 }
 
-sub mid2num($$) {
-        my ($self, $mid) = @_;
-        my $mm = mm($self) or return;
-        $mm->num_for($mid);
-}
-
 sub smsg_by_mid ($$) {
         my ($self, $mid) = @_;
-        my $over = over($self) or return;
-        # favor the Message-ID we used for the NNTP article number:
-        defined(my $num = mid2num($self, $mid)) or return;
-        my $smsg = $over->get_art($num) or return;
-        PublicInbox::Smsg::psgi_cull($smsg);
+        my $over = $self->over or return;
+        my $smsg;
+        if (my $mm = $self->mm) {
+                # favor the Message-ID we used for the NNTP article number:
+                my $num = $mm->num_for($mid) // return;
+                $smsg = $over->get_art($num);
+        } else {
+                my ($id, $prev);
+                $smsg = $over->next_by_mid($mid, \$id, \$prev);
+        }
+        $smsg ? PublicInbox::Smsg::psgi_cull($smsg) : undef;
 }
 
 sub msg_by_mid ($$) {
         my ($self, $mid) = @_;
-
-        over($self) or
-                return msg_by_path($self, mid2path($mid));
-
         my $smsg = smsg_by_mid($self, $mid);
-        $smsg ? msg_by_smsg($self, $smsg) : undef;
-}
-
-sub recent {
-        my ($self, $opts, $after, $before) = @_;
-        over($self)->recent($opts, $after, $before);
+        $smsg ? msg_by_smsg($self, $smsg) : msg_by_path($self, mid2path($mid));
 }
 
 sub modified {
         my ($self) = @_;
-        if (my $over = over($self)) {
+        if (my $over = $self->over) {
                 my $msgs = $over->recent({limit => 1});
                 if (my $smsg = $msgs->[0]) {
                         return $smsg->{ts};
@@ -388,7 +333,7 @@ sub modified {
 # (pathname is NOT public, but prefix is used for Xapian queries)
 sub altid_map ($) {
         my ($self) = @_;
-        $self->{-altid_map} //= eval {
+        eval {
                 require PublicInbox::AltId;
                 my $altid = $self->{altid} or return {};
                 my %h = map {;
@@ -410,22 +355,43 @@ sub unsubscribe_unlock {
         delete $self->{unlock_subs}->{$ident};
 }
 
-sub check_inodes ($) {
-        my ($self) = @_;
-        for (qw(over mm)) { # TODO: search
-                $self->{$_}->check_inodes if $self->{$_};
-        }
-}
-
 # called by inotify
 sub on_unlock {
         my ($self) = @_;
-        check_inodes($self);
+        check_inodes($self); # DB files may be replaced while holding lock
         my $subs = $self->{unlock_subs} or return;
-        for (values %$subs) {
-                eval { $_->on_inbox_unlock($self) };
+        for my $obj (values %$subs) {
+                eval { $obj->on_inbox_unlock($self) };
                 warn "E: $@ ($self->{inboxdir})\n" if $@;
         }
 }
 
+sub uidvalidity { $_[0]->{uidvalidity} //= eval { $_[0]->mm->created_at } }
+
+sub eidx_key { $_[0]->{newsgroup} // $_[0]->{inboxdir} }
+
+# only used by NNTP, so we need ->mm anyways
+sub art_min { $_[0]->{-art_min} //= eval { $_[0]->mm(1)->min } }
+
+# used by IMAP, too, which tries to avoid ->mm (but ->{mm} is likely
+# faster since it's smaller iff available)
+sub art_max {
+        $_[0]->{-art_max} //= eval { $_[0]->{mm}->max } //
+                                eval { $_[0]->over(1)->max };
+}
+
+sub mailboxid { # rfc 8474, 8620, 8621
+        my ($self, $imap_slice) = @_;
+        my $pfx = defined($imap_slice) ? $self->{newsgroup} : $self->{name};
+        utf8::encode($pfx); # to octets
+        # RFC 8620, 1.2 recommends not starting with dash or digits
+        # "A good solution to these issues is to prefix every id with a single
+        #  alphabetical character."
+        'M'.join('', map { sprintf('%02x', ord) } split(//, $pfx)) .
+                (defined($imap_slice) ? sprintf('-%x', $imap_slice) : '') .
+                sprintf('-%x', uidvalidity($self) // 0)
+}
+
+sub thing_type { 'public inbox' }
+
 1;
diff --git a/lib/PublicInbox/InboxIdle.pm b/lib/PublicInbox/InboxIdle.pm
index 0cdd2e2a..3c4d4a68 100644
--- a/lib/PublicInbox/InboxIdle.pm
+++ b/lib/PublicInbox/InboxIdle.pm
@@ -1,20 +1,18 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # fields:
-# pi_config: PublicInbox::Config ref
 # inot: Linux::Inotify2-like object
 # pathmap => { inboxdir => [ ibx, watch1, watch2, watch3... ] } mapping
 package PublicInbox::InboxIdle;
-use strict;
+use v5.12;
 use parent qw(PublicInbox::DS);
-use Cwd qw(abs_path);
-use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
+use PublicInbox::Syscall qw(EPOLLIN);
 my $IN_MODIFY = 0x02; # match Linux inotify
 my $ino_cls;
-if ($^O eq 'linux' && eval { require Linux::Inotify2; 1 }) {
-        $IN_MODIFY = Linux::Inotify2::IN_MODIFY();
-        $ino_cls = 'Linux::Inotify2';
+if ($^O eq 'linux' && eval { require PublicInbox::Inotify }) {
+        $IN_MODIFY = PublicInbox::Inotify::IN_MODIFY();
+        $ino_cls = 'PublicInbox::Inotify';
 } elsif (eval { require PublicInbox::KQNotify }) {
         $IN_MODIFY = PublicInbox::KQNotify::NOTE_WRITE();
         $ino_cls = 'PublicInbox::KQNotify';
@@ -23,46 +21,57 @@ require PublicInbox::In2Tie if $ino_cls;
 
 sub in2_arm ($$) { # PublicInbox::Config::each_inbox callback
         my ($ibx, $self) = @_;
-        my $dir = abs_path($ibx->{inboxdir});
-        if (!defined($dir)) {
-                warn "W: $ibx->{inboxdir} not watched: $!\n";
-                return;
-        }
+        my $dir = $ibx->{inboxdir};
         my $inot = $self->{inot};
         my $cur = $self->{pathmap}->{$dir} //= [];
+        my $lock = "$dir/".($ibx->version >= 2 ? 'inbox.lock' : 'ssoma.lock');
 
         # transfer old subscriptions to the current inbox, cancel the old watch
-        if (my $old_ibx = $cur->[0]) {
-                $ibx->{unlock_subs} and
-                        die "BUG: $dir->{unlock_subs} should not exist";
+        my $old_ibx = $cur->[0];
+        $cur->[0] = $ibx;
+        if ($old_ibx) {
+                my $u = $ibx->{unlock_subs};
                 $ibx->{unlock_subs} = $old_ibx->{unlock_subs};
+                %{$ibx->{unlock_subs}} = (%$u, %{$ibx->{unlock_subs}}) if $u;
+
+                # *::Inotify*::Watch::name matches if watches are the
+                # same, no point in replacing a watch of the same name
+                if ($cur->[1]->name eq $lock) {
+                        $self->{on_unlock}->{$lock} = $ibx;
+                        return;
+                }
+                # rare, name changed (v1 inbox converted to v2)
                 $cur->[1]->cancel; # Linux::Inotify2::Watch::cancel
         }
-        $cur->[0] = $ibx;
 
-        my $lock = "$dir/".($ibx->version >= 2 ? 'inbox.lock' : 'ssoma.lock');
         if (my $w = $cur->[1] = $inot->watch($lock, $IN_MODIFY)) {
                 $self->{on_unlock}->{$w->name} = $ibx;
         } else {
                 warn "E: ".ref($inot)."->watch($lock, IN_MODIFY) failed: $!\n";
+                warn <<"" if $!{ENOSPC} && $^O eq 'linux';
+# consider increasing /proc/sys/fs/inotify/max_user_watches
+
         }
 
         # TODO: detect deleted packs (and possibly other files)
 }
 
 sub refresh {
-        my ($self, $pi_config) = @_;
-        $pi_config->each_inbox(\&in2_arm, $self);
+        my ($self, $pi_cfg) = @_;
+        $pi_cfg->each_inbox(\&in2_arm, $self);
 }
 
+# internal API for ease-of-use
+sub watch_inbox { in2_arm($_[1], $_[0]) };
+
 sub new {
-        my ($class, $pi_config) = @_;
+        my ($class, $pi_cfg) = @_;
         my $self = bless {}, $class;
         my $inot;
         if ($ino_cls) {
                 $inot = $ino_cls->new or die "E: $ino_cls->new: $!";
                 my $io = PublicInbox::In2Tie::io($inot);
-                $self->SUPER::new($io, EPOLLIN | EPOLLET);
+                $self->SUPER::new($io, EPOLLIN);
         } else {
                 require PublicInbox::FakeInotify;
                 $inot = PublicInbox::FakeInotify->new;
@@ -70,7 +79,7 @@ sub new {
         $self->{inot} = $inot;
         $self->{pathmap} = {}; # inboxdir => [ ibx, watch1, watch2, watch3...]
         $self->{on_unlock} = {}; # lock path => ibx
-        refresh($self, $pi_config);
+        refresh($self, $pi_cfg) if $pi_cfg;
         PublicInbox::FakeInotify::poll_once($self) if !$ino_cls;
         $self;
 }
@@ -78,10 +87,11 @@ sub new {
 sub event_step {
         my ($self) = @_;
         eval {
-                my @events = $self->{inot}->read; # Linux::Inotify2::read
+                my @events = $self->{inot}->read; # PublicInbox::Inotify3::read
                 my $on_unlock = $self->{on_unlock};
                 for my $ev (@events) {
-                        if (my $ibx = $on_unlock->{$ev->fullname}) {
+                        my $fn = $ev->fullname // next; # cancelled
+                        if (my $ibx = $on_unlock->{$fn}) {
                                 $ibx->on_unlock;
                         }
                 }
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 752f1997..8e95cb28 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -1,23 +1,17 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Extends read-only Inbox for writing
 package PublicInbox::InboxWritable;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::Inbox Exporter);
+use parent qw(PublicInbox::Inbox PublicInbox::Umask Exporter);
 use PublicInbox::Import;
+use PublicInbox::IO qw(read_all);
 use PublicInbox::Filter::Base qw(REJECT);
 use Errno qw(ENOENT);
-our @EXPORT_OK = qw(eml_from_path warn_ignore_cb);
-
-use constant {
-        PERM_UMASK => 0,
-        OLD_PERM_GROUP => 1,
-        OLD_PERM_EVERYBODY => 2,
-        PERM_GROUP => 0660,
-        PERM_EVERYBODY => 0664,
-};
+our @EXPORT_OK = qw(eml_from_path);
+use Fcntl qw(O_RDONLY O_NONBLOCK);
 
 sub new {
         my ($class, $ibx, $creat_opt) = @_;
@@ -46,12 +40,13 @@ sub _init_v1 {
                 require PublicInbox::Msgmap;
                 my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create
                 $sidx->begin_txn_lazy;
+                my $mm = PublicInbox::Msgmap->new_file($self, 1);
                 if (defined $skip_artnum) {
-                        my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1);
                         $mm->{dbh}->begin_work;
                         $mm->skip_artnum($skip_artnum);
                         $mm->{dbh}->commit;
                 }
+                undef $mm; # ->created_at set
                 $sidx->commit_txn_lazy;
         } else {
                 open my $fh, '>>', "$self->{inboxdir}/ssoma.lock" or
@@ -64,7 +59,6 @@ sub init_inbox {
         if ($self->version == 1) {
                 my $dir = assert_usable_dir($self);
                 PublicInbox::Import::init_bare($dir);
-                $self->umask_prepare;
                 $self->with_umask(\&_init_v1, $self, $skip_artnum);
         } else {
                 my $v2w = importer($self);
@@ -102,7 +96,7 @@ sub filter {
                         $im->done;
                 }
 
-                my @args = (-inbox => $self);
+                my @args = (ibx => $self);
                 # basic line splitting, only
                 # Perhaps we can have proper quote splitting one day...
                 ($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
@@ -118,184 +112,98 @@ sub filter {
         undef;
 }
 
-sub is_maildir_basename ($) {
-        my ($bn) = @_;
-        return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/;
-        if ($bn =~ /:2,([A-Z]+)\z/i) {
-                my $flags = $1;
-                return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail
-        }
-        1;
-}
-
-sub is_maildir_path ($) {
-        my ($path) = @_;
-        my @p = split(m!/+!, $path);
-        (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0;
-}
-
 sub eml_from_path ($) {
         my ($path) = @_;
-        if (open my $fh, '<', $path) {
-                my $str = do { local $/; <$fh> } or return;
-                PublicInbox::Eml->new(\$str);
+        if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) {
+                return unless -f $fh && -s _; # no FIFOs or directories
+                PublicInbox::Eml->new(\(my $str = read_all($fh, -s _)));
         } else { # ENOENT is common with Maildir
                 warn "failed to open $path: $!\n" if $! != ENOENT;
                 undef;
         }
 }
 
+sub _each_maildir_eml {
+        my ($fn, $kw, $eml, $im, $self) = @_;
+        return if grep(/\Adraft\z/, @$kw);
+        if ($self && (my $filter = $self->filter($im))) {
+                my $ret = $filter->scrub($eml) or return;
+                return if $ret == REJECT();
+                $eml = $ret;
+        }
+        $im->add($eml);
+}
+
+# XXX does anybody use this?
 sub import_maildir {
         my ($self, $dir) = @_;
-        my $im = $self->importer(1);
-
         foreach my $sub (qw(cur new tmp)) {
                 -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
         }
-        foreach my $sub (qw(cur new)) {
-                opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n";
-                while (defined(my $fn = readdir($dh))) {
-                        next unless is_maildir_basename($fn);
-                        my $mime = eml_from_path("$dir/$fn") or next;
-
-                        if (my $filter = $self->filter($im)) {
-                                my $ret = $filter->scrub($mime) or return;
-                                return if $ret == REJECT();
-                                $mime = $ret;
-                        }
-                        $im->add($mime);
-                }
-        }
+        my $im = $self->importer(1);
+        my @self = $self->filter($im) ? ($self) : ();
+        require PublicInbox::MdirReader;
+        PublicInbox::MdirReader->new->maildir_each_eml($dir,
+                                        \&_each_maildir_eml, $im, @self);
         $im->done;
 }
 
-# asctime: From example@example.com Fri Jun 23 02:56:55 2000
-my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/;
-
-sub mb_add ($$$$) {
-        my ($im, $variant, $filter, $msg) = @_;
-        $$msg =~ s/(\r?\n)+\z/$1/s;
-        if ($variant eq 'mboxrd') {
-                $$msg =~ s/^>(>*From )/$1/gms;
-        } elsif ($variant eq 'mboxo') {
-                $$msg =~ s/^>From /From /gms;
-        }
-        my $mime = PublicInbox::Eml->new($msg);
+sub _mbox_eml_cb { # MboxReader->mbox* callback
+        my ($eml, $im, $filter) = @_;
         if ($filter) {
-                my $ret = $filter->scrub($mime) or return;
+                my $ret = $filter->scrub($eml) or return;
                 return if $ret == REJECT();
-                $mime = $ret;
+                $eml = $ret;
         }
-        $im->add($mime)
+        $im->add($eml);
 }
 
 sub import_mbox {
         my ($self, $fh, $variant) = @_;
-        if ($variant !~ /\A(?:mboxrd|mboxo)\z/) {
-                die "variant must be 'mboxrd' or 'mboxo'\n";
-        }
+        require PublicInbox::MboxReader;
+        my $cb = PublicInbox::MboxReader->reads($variant) or
+                die "$variant not supported\n";
         my $im = $self->importer(1);
-        my $prev = undef;
-        my $msg = '';
-        my $filter = $self->filter;
-        while (defined(my $l = <$fh>)) {
-                if ($l =~ /$from_strict/o) {
-                        if (!defined($prev) || $prev =~ /^\r?$/) {
-                                mb_add($im, $variant, $filter, \$msg) if $msg;
-                                $msg = '';
-                                $prev = $l;
-                                next;
-                        }
-                        warn "W[$.] $l\n";
-                }
-                $prev = $l;
-                $msg .= $l;
-        }
-        mb_add($im, $variant, $filter, \$msg) if $msg;
+        $cb->(undef, $fh, \&_mbox_eml_cb, $im, $self->filter);
         $im->done;
 }
 
-sub _read_git_config_perm {
-        my ($self) = @_;
-        chomp(my $perm = $self->git->qx('config', 'core.sharedRepository'));
-        $perm;
-}
-
-sub _git_config_perm {
-        my $self = shift;
-        my $perm = scalar @_ ? $_[0] : _read_git_config_perm($self);
-        return PERM_UMASK if (!defined($perm) || $perm eq '');
-        return PERM_UMASK if ($perm eq 'umask');
-        return PERM_GROUP if ($perm eq 'group');
-        if ($perm =~ /\A(?:all|world|everybody)\z/) {
-                return PERM_EVERYBODY;
-        }
-        return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/);
-        return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/);
-
-        my $i = oct($perm);
-        return PERM_UMASK if ($i == PERM_UMASK);
-        return PERM_GROUP if ($i == OLD_PERM_GROUP);
-        return PERM_EVERYBODY if ($i == OLD_PERM_EVERYBODY);
-
-        if (($i & 0600) != 0600) {
-                die "core.sharedRepository mode invalid: ".
-                    sprintf('%.3o', $i) . "\nOwner must have permissions\n";
-        }
-        ($i & 0666);
-}
-
-sub _umask_for {
-        my ($perm) = @_; # _git_config_perm return value
-        my $rv = $perm;
-        return umask if $rv == 0;
-
-        # set +x bit if +r or +w were set
-        $rv |= 0100 if ($rv & 0600);
-        $rv |= 0010 if ($rv & 0060);
-        $rv |= 0001 if ($rv & 0006);
-        (~$rv & 0777);
+sub cleanup ($) {
+        delete @{$_[0]}{qw(over mm git search)};
 }
 
-sub with_umask {
-        my ($self, $cb, @arg) = @_;
-        my $old = umask $self->{umask};
-        my $rv = eval { $cb->(@arg) };
-        my $err = $@;
-        umask $old;
-        die $err if $err;
-        $rv;
+# v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove
+sub git_dir_latest {
+        my ($self, $max) = @_;
+        defined($$max = $self->max_git_epoch) ?
+                "$self->{inboxdir}/git/$$max.git" : undef;
 }
 
-sub umask_prepare {
-        my ($self) = @_;
-        my $perm = _git_config_perm($self);
-        my $umask = _umask_for($perm);
-        $self->{umask} = $umask;
-}
+# for unconfigured inboxes
+sub detect_indexlevel ($) {
+        my ($ibx) = @_;
 
-sub cleanup ($) {
-        delete @{$_[0]}{qw(over mm git search)};
-}
+        my $over = $ibx->over;
+        my $srch = $ibx->search;
+        delete @$ibx{qw(over search)}; # don't leave open FDs lying around
 
-# warnings to ignore when handling spam mailboxes and maybe other places
-sub warn_ignore {
-        my $s = "@_";
-        # Email::Address::XS warnings
-        $s =~ /^Argument contains empty address at /
-        || $s =~ /^Element at index [0-9]+ contains /
-        # PublicInbox::MsgTime
-        || $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/
-        || $s =~ /^bad Date: .+? in /
-}
+        # brand new or never before indexed inboxes default to full
+        return 'full' unless $over;
+        my $l = 'basic';
+        return $l unless $srch;
+        if (my $xdb = $srch->xdb) {
+                $l = 'full';
+                my $m = $xdb->get_metadata('indexlevel');
+                if ($m eq 'medium') {
+                        $l = $m;
+                } elsif ($m ne '') {
+                        warn <<"";
+$ibx->{inboxdir} has unexpected indexlevel in Xapian: $m
 
-# this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..."
-sub warn_ignore_cb {
-        my $cb = $SIG{__WARN__} // sub { print STDERR @_ };
-        sub {
-                return if warn_ignore(@_);
-                $cb->(@_);
+                }
+                $ibx->{-skip_docdata} = 1 if $xdb->get_metadata('skip_docdata');
         }
+        $l;
 }
 
 1;
diff --git a/lib/PublicInbox/Inotify.pm b/lib/PublicInbox/Inotify.pm
new file mode 100644
index 00000000..c4f1ae84
--- /dev/null
+++ b/lib/PublicInbox/Inotify.pm
@@ -0,0 +1,47 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# wrap Linux::Inotify2 XS module, support pure Perl via `syscall' someday
+package PublicInbox::Inotify;
+use v5.12;
+our @ISA;
+BEGIN { # prefer pure Perl since it works out-of-the-box
+        my $isa;
+        for my $m (qw(PublicInbox::Inotify3 Linux::Inotify2)) {
+                eval "require $m";
+                next if $@;
+                $isa = $m;
+        }
+        if ($isa) {
+                push @ISA, $isa;
+                my $buf = '';
+                for (qw(IN_MOVED_TO IN_CREATE IN_DELETE IN_DELETE_SELF
+                                IN_MOVE_SELF IN_MOVED_FROM IN_MODIFY)) {
+                        $buf .= "*$_ = \\&PublicInbox::Inotify3::$_;\n";
+                }
+                eval $buf;
+                die $@ if $@;
+        } else {
+                die <<EOM;
+W: inotify syscall numbers unknown on your platform and
+W: Linux::Inotify2 missing: $@
+W: public-inbox hackers welcome the plain-text output of ./devel/sysdefs-list
+W: at meta\@public-inbox.org
+EOM
+        }
+};
+
+sub new {
+        $_[0]->SUPER::new // do {
+                my $msg = $!{EMFILE} ? <<EOM : "$_[0]->new: $!\n";
+inotify_init/inotify_init1: $!
+You may need to raise the `fs.inotify.max_user_instances' sysctl limit.
+Consult your OS documentation and/or sysctl(8) + sysctl.conf(5) manpages.
+EOM
+                $msg =~ s/^/E: /smg;
+                require Carp;
+                Carp::croak($msg);
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/Inotify3.pm b/lib/PublicInbox/Inotify3.pm
new file mode 100644
index 00000000..4f337a7a
--- /dev/null
+++ b/lib/PublicInbox/Inotify3.pm
@@ -0,0 +1,115 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Implements most Linux::Inotify2 functionality we need in pure Perl
+# Anonymous sub support isn't supported since it's expensive in the
+# best case and likely leaky in older Perls (e.g. 5.16.3)
+package PublicInbox::Inotify3;
+use v5.12;
+use autodie qw(open);
+use PublicInbox::Syscall ();
+use Carp;
+use Scalar::Util ();
+
+# this fails if undefined no unsupported platforms
+use constant $PublicInbox::Syscall::INOTIFY;
+our %events;
+
+# extracted from devel/sysdefs-list output, these should be arch-independent
+BEGIN {
+%events = (
+        IN_ACCESS => 0x1,
+        IN_ALL_EVENTS => 0xfff,
+        IN_ATTRIB => 0x4,
+        IN_CLOSE => 0x18,
+        IN_CLOSE_NOWRITE => 0x10,
+        IN_CLOSE_WRITE => 0x8,
+        IN_CREATE => 0x100,
+        IN_DELETE => 0x200,
+        IN_DELETE_SELF => 0x400,
+        IN_DONT_FOLLOW => 0x2000000,
+        IN_EXCL_UNLINK => 0x4000000,
+        IN_IGNORED => 0x8000,
+        IN_ISDIR => 0x40000000,
+        IN_MASK_ADD => 0x20000000,
+        IN_MODIFY => 0x2,
+        IN_MOVE => 0xc0,
+        IN_MOVED_FROM => 0x40,
+        IN_MOVED_TO => 0x80,
+        IN_MOVE_SELF => 0x800,
+        IN_ONESHOT => 0x80000000,
+        IN_ONLYDIR => 0x1000000,
+        IN_OPEN => 0x20,
+        IN_Q_OVERFLOW => 0x4000,
+        IN_UNMOUNT => 0x2000,
+);
+} # /BEGIN
+use constant \%events;
+require PublicInbox::In3Event; # uses %events
+require PublicInbox::In3Watch; # uses SYS_inotify_rm_watch
+
+use constant autocancel =>
+        (IN_IGNORED|IN_UNMOUNT|IN_ONESHOT|IN_DELETE_SELF);
+
+sub new {
+        open my $fh, '+<&=', syscall(SYS_inotify_init1, IN_CLOEXEC);
+        bless { fh => $fh }, __PACKAGE__;
+}
+
+sub read {
+        my ($self) = @_;
+        my (@ret, $wd, $mask, $len, $name, $size, $buf);
+        my $r = sysread($self->{fh}, my $rbuf, 8192);
+        if ($r) {
+                while ($r) {
+                        ($wd, $mask, undef, $len) = unpack('lLLL', $rbuf);
+                        $size = 16 + $len; # 16: sizeof(struct inotify_event)
+                        substr($rbuf, 0, 16, '');
+                        $name = $len ? unpack('Z*', substr($rbuf, 0, $len, ''))
+                                        : undef;
+                        $r -= $size;
+                        next if $self->{ignore}->{$wd};
+                        my $ev = bless [$mask, $name], 'PublicInbox::In3Event';
+                        push @ret, $ev;
+                        if (my $w = $self->{w}->{$wd}) {
+                                $ev->[2] = $w;
+                                $w->cancel if $ev->mask & autocancel;
+                        } elsif ($mask & IN_Q_OVERFLOW) {
+                                carp 'E: IN_Q_OVERFLOW, too busy? (non-fatal)'
+                        } else {
+                                carp "BUG? wd:$wd unknown (non-fatal)";
+                        }
+                }
+        } elsif (defined($r) || ($!{EAGAIN} || $!{EINTR})) {
+        } else {
+                croak "inotify read: $!";
+        }
+        delete $self->{ignore};
+        @ret;
+}
+
+sub fileno { CORE::fileno($_[0]->{fh}) }
+
+sub fh { $_[0]->{fh} }
+
+sub blocking { shift->{fh}->blocking(@_) }
+
+sub watch {
+        my ($self, $name, $mask, $cb) = @_;
+        croak "E: $cb not supported" if $cb; # too much memory
+        my $wd = syscall(SYS_inotify_add_watch, $self->fileno, $name, $mask);
+        return if $wd < 0;
+        my $w = bless [ $wd, $mask, $name, $self ], 'PublicInbox::In3Watch';
+        $self->{w}->{$wd} = $w;
+        Scalar::Util::weaken($w->[3]); # ugh
+        $w;
+}
+
+sub rm_watch {
+        my ($self, $wd) = @_;
+        delete $self->{w}->{$wd};
+        $self->{ignore}->{$wd} = 1; # is this needed?
+        syscall(SYS_inotify_rm_watch, $self->fileno, $wd) < 0 ? undef : 1;
+}
+
+1;
diff --git a/lib/PublicInbox/InputPipe.pm b/lib/PublicInbox/InputPipe.pm
new file mode 100644
index 00000000..ee5bda59
--- /dev/null
+++ b/lib/PublicInbox/InputPipe.pm
@@ -0,0 +1,52 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# for reading pipes, sockets, and TTYs off the DS event loop
+package PublicInbox::InputPipe;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::Syscall qw(EPOLLIN);
+
+sub consume {
+        my ($in, $cb, @args) = @_;
+        my $self = bless { cb => $cb, args => \@args }, __PACKAGE__;
+        eval { $self->SUPER::new($in, EPOLLIN) };
+        if ($@) { # regular file (but not w/ select|IO::Poll backends)
+                $self->{-need_rq} = 1;
+                $self->requeue;
+        } elsif (-p _ || -S _) { # O_NONBLOCK for sockets and pipes
+                $in->blocking(0);
+        }
+        $self;
+}
+
+sub close { # idempotent
+        my ($self) = @_;
+        $self->{-need_rq} ? delete($self->{sock}) : $self->SUPER::close
+}
+
+sub event_step {
+        my ($self) = @_;
+        my $r = sysread($self->{sock} // return, my $rbuf, 65536);
+        eval {
+                if ($r) {
+                        $self->{cb}->($self, @{$self->{args}}, $rbuf);
+                        $self->requeue if $self->{-need_rq};
+                } elsif (defined($r)) { # EOF
+                        $self->{cb}->($self, @{$self->{args}}, '');
+                        $self->close
+                } elsif ($!{EAGAIN}) { # rely on EPOLLIN
+                } elsif ($!{EINTR}) { # rely on EPOLLIN for sockets/pipes
+                        $self->requeue if $self->{-need_rq};
+                } else { # another error
+                        $self->{cb}->($self, @{$self->{args}}, undef);
+                        $self->close;
+                }
+        };
+        if ($@) {
+                warn "E: $@";
+                $self->close;
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm
new file mode 100644
index 00000000..20808d6d
--- /dev/null
+++ b/lib/PublicInbox/Isearch.pm
@@ -0,0 +1,141 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Provides everything the PublicInbox::Search object does;
+# but uses global ExtSearch (->ALL) with an eidx_key query to
+# emulate per-Inbox search using ->ALL.
+package PublicInbox::Isearch;
+use v5.12;
+use PublicInbox::ExtSearch;
+use PublicInbox::Search;
+
+sub new {
+        my (undef, $ibx, $es) = @_;
+        bless { es => $es, eidx_key => $ibx->eidx_key }, __PACKAGE__;
+}
+
+sub _ibx_id ($) {
+        my ($self) = @_;
+        my $sth = $self->{es}->over->dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
+
+        $sth->execute($self->{eidx_key});
+        $sth->fetchrow_array //
+                die "E: `$self->{eidx_key}' not in $self->{es}->{topdir}\n";
+}
+
+sub query_approxidate { $_[0]->{es}->query_approxidate($_[1], $_[2]) }
+
+sub eidx_mset_prep ($$) {
+        my ($self, $opt) = @_;
+        my %opt = $opt ? %$opt : ();
+        $opt{eidx_key} = $self->{eidx_key};
+        my $uid_range = $opt{uid_range} or return \%opt;
+        my ($beg, $end) = @$uid_range;
+        my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
+        my $dbh = $self->{es}->over->dbh;
+        my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT MIN(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
+
+        $sth->execute($ibx_id, $beg, $end);
+        my @r = ($sth->fetchrow_array);
+
+        $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT MAX(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
+
+        $sth->execute($ibx_id, $beg, $end);
+        $r[1] = $sth->fetchrow_array;
+        if (defined($r[1]) && defined($r[0])) {
+                $opt{limit} = $r[1] - $r[0] + 1;
+        } else {
+                $r[1] //= $self->{es}->xdb->get_lastdocid;
+                $r[0] //= 0;
+        }
+        $opt{uid_range} = \@r; # these are fed to Xapian and SQLite
+        \%opt;
+}
+
+sub mset {
+        my ($self, $str, $opt) = @_;
+        $self->{es}->mset($str, eidx_mset_prep $self, $opt);
+}
+
+sub async_mset {
+        my ($self, $str, $opt, $cb, @args) = @_;
+        $opt = eidx_mset_prep $self, $opt;
+        $self->{es}->async_mset($str, $opt, $cb, @args);
+}
+
+sub mset_to_artnums {
+        my ($self, $mset, $opt) = @_;
+        my $docids = PublicInbox::Search::mset_to_artnums($self->{es}, $mset);
+        my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
+        my $qmarks = join(',', map { '?' } @$docids);
+        if ($opt && ($opt->{relevance} // 0) == -1) { # -1 => ENQ_ASCENDING
+                my $range = '';
+                my @r;
+                if (my $r = $opt->{uid_range}) {
+                        $range = 'AND xnum >= ? AND xnum <= ?';
+                        @r = @$r;
+                }
+                return $self->{es}->over->dbh->
+                        selectcol_arrayref(<<"", undef, $ibx_id, @$docids, @r);
+SELECT xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks) $range
+ORDER BY xnum ASC
+
+        }
+
+        my $rows = $self->{es}->over->dbh->
+                        selectall_arrayref(<<"", undef, $ibx_id, @$docids);
+SELECT docid,xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks)
+
+        my $i = -1;
+        my %order = map { $_ => ++$i } @$docids;
+        my @xnums;
+        for my $row (@$rows) { # @row = ($docid, $xnum)
+                my $idx = delete($order{$row->[0]}) // next;
+                $xnums[$idx] = $row->[1];
+        }
+        if (scalar keys %order) {
+                warn "W: $self->{es}->{topdir} #",
+                        join(', ', sort { $a <=> $b } keys %order),
+                        " not mapped to `$self->{eidx_key}'\n";
+                warn "W: $self->{es}->{topdir} may need to be reindexed\n";
+                @xnums = grep { defined } @xnums;
+        }
+        \@xnums;
+}
+
+sub mset_to_smsg {
+        my ($self, $ibx, $mset) = @_; # $ibx is a real inbox, not eidx
+        my $xnums = mset_to_artnums($self, $mset);
+        my $i = -1;
+        my %order = map { $_ => ++$i } @$xnums;
+        my $unordered = $ibx->over->get_all(@$xnums);
+        my @msgs;
+        for my $smsg (@$unordered) {
+                my $idx = delete($order{$smsg->{num}}) // do {
+                        warn "W: $ibx->{inboxdir} #$smsg->{num}\n";
+                        next;
+                };
+                $msgs[$idx] = $smsg;
+        }
+        if (scalar keys %order) {
+                warn "W: $ibx->{inboxdir} #",
+                        join(', ', sort { $a <=> $b } keys %order),
+                        " no longer valid\n";
+                warn "W: $self->{es}->{topdir} may need to be reindexed\n";
+        }
+        wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs;
+}
+
+sub has_threadid { 1 }
+
+sub help { $_[0]->{es}->help }
+
+sub xh_args { # prep getopt args to feed to xap_helper.h socket
+        my ($self, $opt) = @_; # TODO uid_range
+        ($self->{es}->xh_args, '-O', $self->{eidx_key});
+}
+
+1;
diff --git a/lib/PublicInbox/KQNotify.pm b/lib/PublicInbox/KQNotify.pm
index c7740df2..2efa887d 100644
--- a/lib/PublicInbox/KQNotify.pm
+++ b/lib/PublicInbox/KQNotify.pm
@@ -1,48 +1,35 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # implements the small subset of Linux::Inotify2 functionality we use
 # using IO::KQueue on *BSD systems.
 package PublicInbox::KQNotify;
-use strict;
+use v5.12;
+use parent qw(PublicInbox::FakeInotify);
 use IO::KQueue;
 use PublicInbox::DSKQXS; # wraps IO::KQueue for fork-safe DESTROY
-use PublicInbox::FakeInotify;
-use Time::HiRes qw(stat);
+use Errno qw(ENOENT);
 
 # NOTE_EXTEND detects rename(2), NOTE_WRITE detects link(2)
 sub MOVED_TO_OR_CREATE () { NOTE_EXTEND|NOTE_WRITE }
 
 sub new {
         my ($class) = @_;
-        bless { dskq => PublicInbox::DSKQXS->new, watch => {} }, $class;
+        bless { dskq => PublicInbox::DSKQXS->new }, $class;
 }
 
 sub watch {
         my ($self, $path, $mask) = @_;
-        my ($fh, $watch);
-        if (-d $path) {
-                opendir($fh, $path) or return;
-                my @st = stat($fh);
-                $watch = bless [ $fh, $path, $st[10] ],
-                        'PublicInbox::KQNotify::Watchdir';
-        } else {
-                open($fh, '<', $path) or return;
-                $watch = bless [ $fh, $path ],
-                        'PublicInbox::KQNotify::Watch';
-        }
-        my $ident = fileno($fh);
-        $self->{dskq}->{kq}->EV_SET($ident, # ident
+        my $dir_delete = $mask & NOTE_DELETE ? 1 : 0;
+        my $w = $self->watch_open($path, \$dir_delete) or return;
+        $w->[2] = pop @$w; # ctime is unused by this subclass
+        my $ident = fileno($w->[2]) // die "BUG: bad fileno $w->[2]: $!";
+        $self->{dskq}->{kq}->EV_SET($ident, # ident (fd)
                 EVFILT_VNODE, # filter
                 EV_ADD | EV_CLEAR, # flags
                 $mask, # fflags
-                0, 0); # data, udata
-        if ($mask == NOTE_WRITE || $mask == MOVED_TO_OR_CREATE) {
-                $self->{watch}->{$ident} = $watch;
-        } else {
-                die "TODO Not implemented: $mask";
-        }
-        $watch;
+                0, $dir_delete); # data, udata
+        $self->{watch}->{$ident} = $w;
 }
 
 # emulate Linux::Inotify::fileno
@@ -59,38 +46,31 @@ sub blocking {}
 # behave like Linux::Inotify2->read
 sub read {
         my ($self) = @_;
-        my @kevents = $self->{dskq}->{kq}->kevent(0);
         my $events = [];
-        for my $kev (@kevents) {
+        for my $kev ($self->{dskq}->{kq}->kevent(0)) {
                 my $ident = $kev->[KQ_IDENT];
-                my $mask = $kev->[KQ_FFLAGS];
-                my ($dh, $path, $old_ctime) = @{$self->{watch}->{$ident}};
-                if (!defined($old_ctime)) {
-                        push @$events,
-                                bless(\$path, 'PublicInbox::FakeInotify::Event')
-                } elsif ($mask & MOVED_TO_OR_CREATE) {
-                        my @new_st = stat($path) or next;
-                        $self->{watch}->{$ident}->[3] = $new_st[10]; # ctime
-                        rewinddir($dh);
-                        PublicInbox::FakeInotify::on_new_files($events, $dh,
-                                                        $path, $old_ctime);
+                my $w = $self->{watch}->{$ident} or next;
+                if (!@$w) { # cancelled
+                        delete($self->{watch}->{$ident});
+                        next;
+                }
+                my $dir_delete = $kev->[KQ_UDATA];
+                my ($old_dev, $old_ino, $fh, $path) = @$w;
+                my @new_st = stat($path);
+                warn "W: stat($path): $!\n" if !@new_st && $! != ENOENT;
+                if (!@new_st || "$old_dev $old_ino" ne "@new_st[0,1]") {
+                        push(@$events, $self->gone($ident, $path));
+                        next;
+                }
+                if (-d _) {
+                        rewinddir($fh);
+                        $self->on_dir_change($events, $fh, $path, $dir_delete);
+                } else {
+                        push @$events, bless(\$path,
+                                        'PublicInbox::FakeInotify::Event');
                 }
         }
         @$events;
 }
 
-package PublicInbox::KQNotify::Watch;
-use strict;
-
-sub name { $_[0]->[1] }
-
-sub cancel { close $_[0]->[0] or die "close: $!" }
-
-package PublicInbox::KQNotify::Watchdir;
-use strict;
-
-sub name { $_[0]->[1] }
-
-sub cancel { closedir $_[0]->[0] or die "closedir: $!" }
-
 1;
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
new file mode 100644
index 00000000..e9a0de6c
--- /dev/null
+++ b/lib/PublicInbox/LEI.pm
@@ -0,0 +1,1605 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Backend for `lei' (local email interface).  Unlike the C10K-oriented
+# PublicInbox::Daemon, this is designed exclusively to handle trusted
+# local clients with read/write access to the FS and use as many
+# system resources as the local user has access to.
+package PublicInbox::LEI;
+use v5.12;
+use parent qw(PublicInbox::DS PublicInbox::LeiExternal
+        PublicInbox::LeiQuery);
+use autodie qw(bind chdir open pipe socket socketpair syswrite unlink);
+use Getopt::Long ();
+use Socket qw(AF_UNIX SOCK_SEQPACKET pack_sockaddr_un);
+use Errno qw(EPIPE EAGAIN ECONNREFUSED ENOENT ECONNRESET);
+use Cwd qw(getcwd);
+use POSIX qw(strftime);
+use IO::Handle ();
+use Fcntl qw(SEEK_SET);
+use PublicInbox::Config;
+use PublicInbox::Syscall qw(EPOLLIN);
+use PublicInbox::Spawn qw(run_wait popen_rd run_qx);
+use PublicInbox::Lock;
+use PublicInbox::Eml;
+use PublicInbox::Import;
+use PublicInbox::ContentHash qw(git_sha);
+use PublicInbox::OnDestroy;
+use PublicInbox::IPC;
+use Time::HiRes qw(stat); # ctime comparisons for config cache
+use File::Path ();
+use File::Spec;
+use Carp qw(carp);
+use Sys::Syslog qw(openlog syslog closelog);
+our $quit = \&CORE::exit;
+our ($current_lei, $errors_log, $listener, $oldset, $dir_idle);
+my $GLP = Getopt::Long::Parser->new;
+$GLP->configure(qw(gnu_getopt no_ignore_case auto_abbrev));
+my $GLP_PASS = Getopt::Long::Parser->new;
+$GLP_PASS->configure(qw(gnu_getopt no_ignore_case auto_abbrev pass_through));
+
+our (%PATH2CFG, # persistent for socket daemon
+$MDIR2CFGPATH, # location => { /path/to/config => [ ino watches ] }
+$OPT, # shared between optparse and opt_dash callback (for Getopt::Long)
+$daemon_pid
+);
+
+# TBD: this is a documentation mechanism to show a subcommand
+# (may) pass options through to another command:
+sub pass_through { $GLP_PASS }
+
+sub opt_dash ($$) { # callback runs inside optparse
+        my ($spec, $re_str) = @_; # 'limit|n=i', '([0-9]+)'
+        my ($key) = ($spec =~ m/\A([a-z]+)/g);
+        my $cb = sub { # Getopt::Long "<>" catch-all handler
+                my ($arg) = @_;
+                if ($arg =~ /\A-($re_str)\z/) {
+                        $OPT->{$key} = $1;
+                } elsif ($arg eq '--') { # "--" arg separator, ignore first
+                        push @{$OPT->{-argv}}, $arg if $OPT->{'--'}++;
+                # lone (single) dash is handled elsewhere
+                } elsif (substr($arg, 0, 1) eq '-') {
+                        if ($OPT->{'--'}) {
+                                push @{$OPT->{-argv}}, $arg;
+                        } else {
+                                die "bad argument: $arg\n";
+                        }
+                } else {
+                        push @{$OPT->{-argv}}, $arg;
+                }
+        };
+        ($spec, '<>' => $cb, $GLP_PASS) # for Getopt::Long
+}
+
+# rel2abs preserves symlinks in parent, unlike abs_path
+sub rel2abs {
+        my ($self, $p) = @_;
+        if (index($p, '/') == 0) { # already absolute
+                $p =~ tr!/!/!s; # squeeze redundant slashes
+                chop($p) if substr($p, -1, 1) eq '/';
+                return $p;
+        }
+        my $pwd = $self->{env}->{PWD};
+        if (defined $pwd) {
+                if (my @st_pwd = stat($pwd)) {
+                        my @st_cwd = stat($self->{3}) or die "stat({3}): $!";
+                        "@st_pwd[1,0]" eq "@st_cwd[1,0]" or
+                                $self->{env}->{PWD} = $pwd = undef;
+                } else { # PWD was invalid
+                        $self->{env}->{PWD} = $pwd = undef;
+                }
+        }
+        $pwd //= $self->{env}->{PWD} = getcwd() // die "getcwd: $!";
+        File::Spec->rel2abs($p, $pwd);
+}
+
+# abs_path resolves symlinks in parent iff all parents exist
+sub abs_path { Cwd::abs_path($_[1]) // rel2abs(@_) }
+
+sub canonpath_harder {
+        my $p = $_[-1]; # $_[0] may be self
+        $p = File::Spec->canonpath($p);
+        $p =~ m!(?:/*|\A)\.\.(?:/*|\z)! && -e $p ? Cwd::abs_path($p) : $p;
+}
+
+sub share_path ($) { # $HOME/.local/share/lei/$FOO
+        my ($self) = @_;
+        rel2abs($self, ($self->{env}->{XDG_DATA_HOME} //
+                ($self->{env}->{HOME} // '/nonexistent').'/.local/share')
+                .'/lei');
+}
+
+sub store_path ($) { share_path($_[0]) . '/store' }
+
+sub _config_path ($) {
+        my ($self) = @_;
+        rel2abs($self, ($self->{env}->{XDG_CONFIG_HOME} //
+                ($self->{env}->{HOME} // '/nonexistent').'/.config')
+                .'/lei/config');
+}
+
+sub cache_dir ($) {
+        my ($self) = @_;
+        rel2abs($self, ($self->{env}->{XDG_CACHE_HOME} //
+                ($self->{env}->{HOME} // '/nonexistent').'/.cache')
+                .'/lei');
+}
+
+sub url_folder_cache {
+        my ($self) = @_;
+        require PublicInbox::SharedKV; # URI => updated_at_sec_
+        PublicInbox::SharedKV->new(cache_dir($self).'/uri_folder');
+}
+
+sub ale {
+        my ($self) = @_;
+        $self->{ale} // do {
+                require PublicInbox::LeiALE;
+                my $cfg = $self->_lei_cfg(1);
+                $self->{ale} = $cfg->{ale} //= PublicInbox::LeiALE->new($self);
+        };
+}
+
+sub index_opt {
+        # TODO: drop underscore variants everywhere, they're undocumented
+        qw(fsync|sync! jobs|j=i indexlevel|L=s compact
+        max_size|max-size=s sequential-shard
+        batch_size|batch-size=s skip-docdata)
+}
+
+my @c_opt = qw(c=s@ C=s@ quiet|q);
+my @net_opt = (qw(no-torsocks torsocks=s), PublicInbox::LeiQuery::curl_opt());
+my @lxs_opt = qw(remote! local! external! include|I=s@ exclude=s@ only|O=s@
+        import-remote!);
+
+# we don't support -C as an alias for --find-copies since it's already
+# used for chdir
+our @diff_opt = qw(unified|U=i output-indicator-new=s output-indicator-old=s
+        output-indicator-context=s indent-heuristic!
+        minimal patience histogram anchored=s@ diff-algorithm=s
+        color-moved:s color-moved-ws=s no-color-moved no-color-moved-ws
+        word-diff:s word-diff-regex=s color-words:s no-renames
+        rename-empty! check ws-error-highlight=s full-index binary
+        abbrev:i break-rewrites|B:s find-renames|M:s find-copies:s
+        find-copies-harder irreversible-delete|D l=i diff-filter=s
+        S=s G=s find-object=s pickaxe-all pickaxe-regex R
+        relative:s text|a ignore-cr-at-eol ignore-space-at-eol
+        ignore-space-change|b ignore-all-space|w ignore-blank-lines
+        inter-hunk-context=i function-context|W exit-code ext-diff
+        no-ext-diff textconv! src-prefix=s dst-prefix=s no-prefix
+        line-prefix=s);
+
+# we generate shell completion + help using %CMD and %OPTDESC,
+# see lei__complete() and PublicInbox::LeiHelp
+# command => [ positional_args, 1-line description, Getopt::Long option spec ]
+our %CMD = ( # sorted in order of importance/use:
+'q' => [ '--stdin|SEARCH_TERMS...', 'search for messages matching terms',
+        'stdin|', # /|\z/ must be first for lone dash
+        @lxs_opt, @net_opt,
+        qw(save! output|mfolder|o=s format|f=s dedupe|d=s threads|t+
+        thread-id|T=s
+        sort|s=s reverse|r offset=i pretty jobs|j=s globoff|g augment|a
+        import-before! lock=s@ rsyncable alert=s@ mua=s verbose|v+
+        shared color! mail-sync!), @c_opt, opt_dash('limit|n=i', '[0-9]+') ],
+
+'up' => [ 'OUTPUT...|--all', 'update saved search',
+        qw(jobs|j=s lock=s@ alert=s@ mua=s verbose|v+ exclude=s@
+        remote-fudge-time=s all:s remote! local! external!), @net_opt, @c_opt ],
+
+'lcat' => [ '--stdin|MSGID_OR_URL...', 'display local copy of message(s)',
+        'stdin|', # /|\z/ must be first for lone dash
+        # some of these options are ridiculous for lcat
+        @lxs_opt, @net_opt,
+        qw(output|mfolder|o=s format|f=s dedupe|d=s threads|t+
+        sort|s=s reverse|r offset=i jobs|j=s globoff|g augment|a
+        import-before! lock=s@ rsyncable alert=s@ mua=s verbose|v+
+        color!), @c_opt, opt_dash('limit|n=i', '[0-9]+') ],
+
+'blob' => [ 'OID', 'show a git blob, reconstructing from mail if necessary',
+        qw(git-dir=s@ cwd! verbose|v+ mail! oid-a|A=s path-a|a=s path-b|b=s),
+        @lxs_opt, @net_opt, @c_opt ],
+
+'rediff' => [ '--stdin|LOCATION...',
+                'regenerate a diff with different options',
+        'stdin|', # /|\z/ must be first for lone dash
+        qw(git-dir=s@ cwd! verbose|v+ color:s no-color drq:1 dequote-only:1
+        order-file=s), @diff_opt, @lxs_opt, @net_opt, @c_opt ],
+
+'mail-diff' => [ '--stdin|LOCATION...', 'diff the contents of emails',
+        'stdin|', # /|\z/ must be first for lone dash
+        qw(verbose|v+ in-format|F=s color:s no-color raw-header),
+        @diff_opt, @net_opt, @c_opt ],
+
+'add-external' => [ 'LOCATION',
+        'add/set priority of a publicinbox|extindex for extra matches',
+        qw(boost=i mirror=s inbox-version=i epoch=s verbose|v+),
+        @c_opt, index_opt(), @net_opt ],
+'ls-external' => [ '[FILTER]', 'list publicinbox|extindex locations',
+        qw(format|f=s z|0 globoff|g invert-match|v local remote), @c_opt ],
+'ls-label' => [ '', 'list labels', qw(z|0 stats:s), @c_opt ],
+'ls-mail-sync' => [ '[FILTER]', 'list mail sync folders',
+                qw(z|0 globoff|g invert-match|v local remote), @c_opt ],
+'ls-mail-source' => [ 'URL', 'list IMAP or NNTP mail source folders',
+                qw(z|0 ascii l pretty url), @net_opt, @c_opt ],
+'forget-external' => [ 'LOCATION...|--prune',
+        'exclude further results from a publicinbox|extindex',
+        qw(prune), @c_opt ],
+
+'ls-search' => [ '[PREFIX]', 'list saved search queries',
+                qw(format|f=s pretty l ascii z|0), @c_opt ],
+'forget-search' => [ 'OUTPUT...|--prune', 'forget a saved search',
+                qw(verbose|v+ prune:s), @c_opt ],
+'edit-search' => [ 'OUTPUT', "edit saved search via `git config --edit'",
+                        @c_opt ],
+'rm' => [ '--stdin|LOCATION...',
+        'remove a message from the index and prevent reindexing',
+        'stdin|', # /|\z/ must be first for lone dash
+        qw(in-format|F=s lock=s@ commit-delay=i), @net_opt, @c_opt ],
+'plonk' => [ '--threads|--from=IDENT',
+        'exclude mail matching From: or threads from non-Message-ID searches',
+        qw(stdin| threads|t from|f=s mid=s oid=s), @c_opt ],
+tag => [ 'KEYWORDS... LOCATION...|--stdin',
+        'set/unset keywords and/or labels on message(s)',
+        qw(stdin| in-format|F=s input|i=s@ oid=s@ mid=s@ commit-delay=i),
+        @net_opt, @c_opt, pass_through('-kw:foo for delete') ],
+
+'purge-mailsource' => [ 'LOCATION|--all',
+        'remove imported messages from IMAP, Maildirs, and MH',
+        qw(exact! all jobs:i indexed), @c_opt ],
+
+'add-watch' => [ 'LOCATION... [LABELS...]',
+        'watch for new messages and flag changes',
+        qw(poll-interval=s state=s recursive|r), @c_opt ],
+'rm-watch' => [ 'LOCATION...', 'remove specified watch(es)',
+        qw(recursive|r), @c_opt ],
+'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status',
+                qw(l z|0), @c_opt ],
+'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote), @c_opt ],
+'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote), @c_opt ],
+'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch',
+        qw(prune), @c_opt ],
+
+'reindex' => [ '', 'reindex all locally-indexed messages', @c_opt ],
+
+'index' => [ 'LOCATION... [LABELS...]', 'one-time index from URL or filesystem',
+        qw(in-format|F=s kw! offset=i recursive|r exclude=s include|I=s
+        verbose|v+ incremental!), @net_opt, # mainly for --proxy=
+         @c_opt ],
+import => [ 'LOCATION...|--stdin [LABELS...]',
+        'one-time import/update from URL or filesystem',
+        qw(stdin| offset=i recursive|r exclude=s include|I=s new-only
+        lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!
+        commit-delay=i sort|s:s@),
+        @net_opt, @c_opt ],
+'forget-mail-sync' => [ 'LOCATION...',
+        'forget sync information for a mail folder', @c_opt ],
+'refresh-mail-sync' => [ 'LOCATION...|--all',
+        'prune dangling sync data for a mail folder', 'all:s',
+                @net_opt, @c_opt ],
+'export-kw' => [ 'LOCATION...|--all',
+        'one-time export of keywords of sync sources',
+        qw(all:s mode=s), @net_opt, @c_opt ],
+'convert' => [ 'LOCATION...|--stdin',
+        'one-time conversion from URL or filesystem to another format',
+        qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!
+                rsyncable sort|s:s@),
+        @net_opt, @c_opt ],
+'p2q' => [ 'LOCATION_OR_COMMIT...|--stdin',
+        "use a patch to generate a query for `lei q --stdin'",
+        qw(stdin| in-format|F=s want|w=s@ uri debug), @net_opt, @c_opt ],
+'config' => [ '[...]', sub {
+                'git-config(1) wrapper for '._config_path($_[0]). "\n" .
+        '-l/--list and other common git-config uses are supported'
+        }, qw(config-file|system|global|file|f=s), # for conflict detection
+         qw(edit|e c=s@ C=s@), pass_through('git config') ],
+'inspect' => [ 'ITEMS...|--stdin', 'inspect lei/store and/or local external',
+        qw(stdin| pretty ascii dir|d=s), @c_opt ],
+
+'init' => [ '[DIRNAME]', sub {
+        "initialize storage, default: ".store_path($_[0]);
+        }, @c_opt ],
+'daemon-kill' => [ '[-SIGNAL]', 'signal the lei-daemon',
+        # "-C DIR" conflicts with -CHLD, here, and chdir makes no sense, here
+        opt_dash('signal|s=s', '[0-9]+|(?:[A-Z][A-Z0-9]+)') ],
+'daemon-pid' => [ '', 'show the PID of the lei-daemon' ],
+'help' => [ '[SUBCOMMAND]', 'show help' ],
+
+# TODO
+#'reorder-local-store-and-break-history' => [ '[REFNAME]',
+#        'rewrite git history in an attempt to improve compression',
+#        qw(gc!), @c_opt ],
+#'fuse-mount' => [ 'PATHNAME', 'expose lei/store as Maildir(s)', @c_opt ],
+#
+# internal commands are prefixed with '_'
+'_complete' => [ '[...]', 'internal shell completion helper',
+                pass_through('everything') ],
+); # @CMD
+
+# switch descriptions, try to keep consistent across commands
+# $spec: Getopt::Long option specification
+# $spec => [@ALLOWED_VALUES (default is first), $description],
+# $spec => $description
+# "$SUB_COMMAND TAB $spec" => as above
+my $stdin_formats = [ 'MAIL_FORMAT|eml|mboxrd|mboxcl2|mboxcl|mboxo',
+                        'specify message input format' ];
+my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ];
+my $sort_out = [ 'VAL|received|relevance|docid',
+                "order of results is `--output'-dependent"];
+my $sort_in = [ 'sequence|mtime|size', 'sort input (format-dependent)' ];
+
+# we use \x{a0} (non-breaking SP) to avoid wrapping in PublicInbox::LeiHelp
+my %OPTDESC = (
+'help|h' => 'show this built-in help',
+'c=s@' => [ 'NAME=VALUE', 'set config option' ],
+'C=s@' => [ 'DIR', 'chdir to specify to directory' ],
+'quiet|q' => 'be quiet',
+'lock=s@' => [ 'METHOD|dotlock|fcntl|flock|none',
+        'mbox(5) locking method(s) to use (default: fcntl,dotlock)' ],
+
+'incremental!        import' => 'import already seen IMAP and NNTP articles',
+'globoff|g' => "do not match locations using '*?' wildcards ".
+                "and\xa0'[]'\x{a0}ranges",
+'invert-match|v' => 'select non-matching lines',
+'color!' => 'disable color (for --format=text)',
+'verbose|v+' => 'be more verbose',
+'external!' => 'do not use externals',
+'mail!' => 'do not look in mail storage for OID',
+'cwd!' => 'do not look in git repo of current working directory',
+'oid-a|A=s' => 'pre-image OID',
+'path-a|a=s' => 'pre-image pathname associated with OID',
+'path-b|b=s' => 'post-image pathname associated with OID',
+'git-dir=s@' => 'additional git repository to scan',
+'dir|d=s        inspect' =>
+        'specify a inboxdir, extindex topdir or Xapian shard',
+'proxy=s' => [ 'PROTO://HOST[:PORT]', # shared with curl(1)
+        "proxy for (e.g. `socks5h://0:9050')" ],
+'torsocks=s' => ['VAL|auto|no|yes',
+                'whether or not to wrap git and curl commands with torsocks'],
+'no-torsocks' => 'alias for --torsocks=no',
+'save!' =>  "do not save a search for `lei up'",
+'import-remote!' => 'do not memoize remote messages into local store',
+'import-before!' => 'do not import before writing to output (DANGEROUS)',
+
+'type=s' => [ 'any|mid|git', 'disambiguate type' ],
+
+'dedupe|d=s' => ['STRATEGY|content|oid|mid|none',
+                'deduplication strategy'],
+'threads|t+' =>
+        'return all messages in the same threads as the actual match(es)',
+
+'want|w=s@' => [ 'PREFIX|dfpost|dfn', # common ones in help...
+                'search prefixes to extract (default: dfpost7)' ],
+'uri        p2q' => [ 'URI escape output' ],
+
+'alert=s@' => ['CMD,:WINCH,:bell,<any command>',
+        'run command(s) or perform ops when done writing to output ' .
+        '(default: ":WINCH,:bell" with --mua and Maildir/IMAP output, ' .
+        'nothing otherwise)' ],
+
+'augment|a' => 'augment --output destination instead of clobbering',
+
+'output|mfolder|o=s' => [ 'MFOLDER',
+        "destination (e.g.\xa0`/path/to/Maildir', ".
+        "or\xa0`-'\x{a0}for\x{a0}stdout)" ],
+'mua=s' => [ 'CMD',
+        "MUA to run on --output Maildir or mbox (e.g.\xa0`mutt\xa0-f\xa0%f')" ],
+'new-only        import' => 'only import new messages from IMAP source',
+
+'inbox-version=i' => [ 'NUM|1|2',
+                'force a public-inbox version with --mirror'],
+'mirror=s' => [ 'URL', 'mirror a public-inbox'],
+
+# public-inbox-index options
+'fsync!' => 'speed up indexing after --mirror, risk index corruption',
+'compact' => 'run compact index after mirroring',
+'indexlevel|L=s' => [ 'LEVEL|full|medium|basic',
+        "indexlevel with --mirror (default: full)" ],
+'max_size|max-size=s' => [ 'SIZE',
+        'do not index messages larger than SIZE (default: infinity)' ],
+'batch_size|batch-size=s' => [ 'SIZE',
+        'flush changes to OS after given number of bytes (default: 1m)' ],
+'sequential-shard' =>
+        'index Xapian shards sequentially for slow storage',
+'skip-docdata' =>
+        'drop compatibility w/ public-inbox <1.6 to save ~1.5% space',
+
+'format|f=s        q' => [
+        'OUT|maildir|mboxrd|mboxcl2|mboxcl|mboxo|html|json|jsonl|concatjson',
+                'specify output format, default depends on --output'],
+'exclude=s@        q' => [ 'LOCATION',
+                'exclude specified external(s) from search' ],
+'include|I=s@        q' => [ 'LOCATION',
+                'include specified external(s) in search' ],
+'only|O=s@        q' => [ 'LOCATION',
+                'only use specified external(s) for search' ],
+'jobs|j=s' => [ 'JOBSPEC',
+                'control number of query and writer jobs' .
+                "integers delimited by `,', either of which may be omitted"
+                ],
+'jobs|j=i        add-external' => 'set parallelism when indexing after --mirror',
+
+'in-format|F=s' => $stdin_formats,
+'format|f=s        ls-search' => ['OUT|json|jsonl|concatjson',
+                        'listing output format' ],
+'l        ls-search' => 'long listing format',
+'l        ls-watch' => 'long listing format',
+'l        ls-mail-source' => 'long listing format',
+'url        ls-mail-source' => 'show full URL of newsgroup or IMAP folder',
+'format|f=s        ls-external' => $ls_format,
+
+'prune:s        forget-search' =>
+        ['TYPE|local|remote', 'prune all, remote or local folders' ],
+
+'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ],
+'offset=i' => ['OFF', 'search result offset (default: 0)'],
+
+'sort|s=s        q' => $sort_out,
+'sort|s=s        lcat' => $sort_out,
+'sort|s:s@        convert' => $sort_in,
+'sort|s:s@        import' => $sort_in,
+'reverse|r' => 'reverse search results', # like sort(1)
+
+'boost=i' => 'increase/decrease priority of results (default: 0)',
+
+'local' => 'limit operations to the local filesystem',
+'local!' => 'exclude results from the local filesystem',
+'remote' => 'limit operations to those requiring network access',
+'remote!' => 'prevent operations requiring network access',
+
+# up, refresh-mail-sync, export-kw
+'all:s' => ['TYPE|local|remote', 'all remote or local folders' ],
+
+'remote-fudge-time=s' => [ 'INTERVAL',
+        'look for mail INTERVAL older than the last successful query' ],
+
+'mid=s' => 'specify the Message-ID of a message',
+'oid=s' => 'specify the git object ID of a message',
+
+'recursive|r' => 'scan directories/mailboxes/newsgroups recursively',
+'exclude=s' => 'exclude mailboxes/newsgroups based on pattern',
+'include=s' => 'include mailboxes/newsgroups based on pattern',
+
+'exact' => 'operate on exact header matches only',
+'exact!' => 'rely on content match instead of exact header matches',
+
+'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ],
+
+'kw!' => 'disable/enable importing keywords (aka "flags")',
+
+# xargs, env, use "-0", git(1) uses "-z".  We support z|0 everywhere
+'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines',
+
+'signal|s=s' => [ 'SIG', 'signal to send lei-daemon (default: TERM)' ],
+'edit|e        config' => 'open an editor to modify the lei config file',
+); # %OPTDESC
+
+my %CONFIG_KEYS = (
+        'leistore.dir' => 'top-level storage location',
+);
+
+my @WQ_KEYS = qw(lxs l2m ikw pmd wq1 lne v2w); # internal workers
+
+sub _drop_wq {
+        my ($self) = @_;
+        for my $wq (grep(defined, delete(@$self{@WQ_KEYS}))) {
+                $wq->wq_kill(-POSIX::SIGTERM());
+                $wq->DESTROY;
+        }
+}
+
+# pronounced "exit": x_it(1 << 8) => exit(1); x_it(13) => SIGPIPE
+sub x_it ($$) {
+        my ($self, $code) = @_;
+        # make sure client sees stdout before exit
+        $self->{1}->autoflush(1) if $self->{1};
+        stop_pager($self);
+        if ($self->{pkt_op_p}) { # worker => lei-daemon
+                $self->{pkt_op_p}->pkt_do('x_it', $code);
+                exit($code >> 8) if $$ != $daemon_pid;
+        } elsif ($self->{sock}) { # lei->daemon => lei(1) client
+                send($self->{sock}, "x_it $code", 0);
+        } elsif ($quit == \&CORE::exit) { # an admin (one-shot) command
+                exit($code >> 8);
+        } # else ignore if client disconnected
+        $self->dclose if $$ == $daemon_pid;
+}
+
+sub err ($;@) {
+        my $self = shift;
+        my $err = $self->{2} // ($self->{pgr} // [])->[2] // *STDERR{GLOB};
+        my @eor = (substr($_[-1]//'', -1, 1) eq "\n" ? () : ("\n"));
+        print $err @_, @eor and return;
+        my $old_err = delete $self->{2};
+        $old_err->close if $! == EPIPE && $old_err;
+        $err = $self->{2} = ($self->{pgr} // [])->[2] // *STDERR{GLOB};
+        print $err @_, @eor or print STDERR @_, @eor;
+}
+
+sub qerr ($;@) { $_[0]->{opt}->{quiet} or err(shift, @_) }
+
+sub qfin { # show message on finalization (LeiFinmsg)
+        my ($lei, $msg) = @_;
+        return if $lei->{opt}->{quiet};
+        $lei->{fmsg} ? push(@{$lei->{fmsg}}, "$msg\n") : qerr($lei, $msg);
+}
+
+sub fail_handler ($;$$) {
+        my ($lei, $code, $io) = @_;
+        $io->close if $io; # needed to avoid warnings on SIGPIPE
+        _drop_wq($lei);
+        x_it($lei, $code // (1 << 8));
+}
+
+sub sigpipe_handler { # handles SIGPIPE from @WQ_KEYS workers
+        fail_handler($_[0], 13, delete $_[0]->{1});
+}
+
+sub fail ($;@) {
+        my ($lei, @msg) = @_;
+        my $exit_code = ($msg[0]//'') =~ /\A-?[0-9]+\z/ ? shift(@msg) : undef;
+        local $current_lei = $lei;
+        $lei->{failed}++;
+        if (@msg) {
+                push @msg, "\n" if substr($msg[-1], -1, 1);
+                warn @msg;
+        }
+        $lei->{pkt_op_p}->pkt_do('fail_handler') if $lei->{pkt_op_p};
+        x_it($lei, $exit_code // (1 << 8));
+        undef;
+}
+
+sub out ($;@) {
+        my $self = shift;
+        return if print { $self->{1} // return } @_; # likely
+        return note_sigpipe($self, 1) if $! == EPIPE;
+        my $err = "error writing to output: $!";
+        delete $self->{1};
+        fail($self, $err);
+}
+
+sub puts ($;@) { out(shift, map { "$_\n" } @_) }
+
+sub child_error { # passes non-fatal curl exit codes to user
+        my ($self, $child_error, $msg) = @_; # child_error is $?
+        local $current_lei = $self;
+        $child_error ||= 1 << 8;
+        warn(substr($msg, -1, 1) eq "\n" ? $msg : "$msg\n") if defined $msg;
+        $self->{child_error} ||= $child_error;
+        if ($self->{pkt_op_p}) { # to top lei-daemon
+                $self->{pkt_op_p}->pkt_do('child_error', $child_error);
+        } elsif ($self->{sock}) { # to lei(1) client
+                send($self->{sock}, "child_error $child_error", 0);
+        } # else noop if client disconnected
+}
+
+sub note_sigpipe { # triggers sigpipe_handler
+        my ($self, $fd) = @_;
+        delete($self->{$fd})->close; # explicit close silences Perl warning
+        $self->{pkt_op_p}->pkt_do('sigpipe_handler') if $self->{pkt_op_p};
+        x_it($self, 13);
+}
+
+sub _lei_atfork_child {
+        my ($self, $persist) = @_;
+        # we need to explicitly close things which are on stack
+        my $cfg = $self->{cfg};
+        delete @$cfg{qw(-watches -lei_note_event)};
+        if ($persist) {
+                open $self->{3}, '<', '/';
+                fchdir($self);
+                close($_) for (grep(defined, delete @$self{qw(0 1 2 sock)}));
+                delete $cfg->{-lei_store};
+        } else { # worker, Net::NNTP (Net::Cmd) uses STDERR directly
+                open STDERR, '+>&='.fileno($self->{2}); # idempotent w/ fileno
+                STDERR->autoflush(1);
+                $self->{2} = \*STDERR;
+                POSIX::setpgid(0, $$) // die "setpgid(0, $$): $!";
+        }
+        close($_) for (grep(defined, delete @$self{qw(old_1 au_done)}));
+        close($_) for (@{delete($self->{-socks}) // []});
+        if (my $op_c = delete $self->{pkt_op_c}) {
+                close(delete $op_c->{sock});
+        }
+        if (my $pgr = delete $self->{pgr}) {
+                close($_) for (@$pgr[1,2]);
+        }
+        close $listener if $listener;
+        undef $listener;
+        $dir_idle->force_close if $dir_idle;
+        undef $dir_idle;
+        %PATH2CFG = ();
+        $MDIR2CFGPATH = undef;
+        eval 'no warnings; undef $PublicInbox::LeiNoteEvent::to_flush';
+        undef $errors_log;
+        $quit = \&CORE::exit;
+        if (!$self->{-eml_noisy}) { # only "lei import" sets this atm
+                my $cb = $SIG{__WARN__} // \&CORE::warn;
+                $SIG{__WARN__} = sub {
+                        $cb->(@_) unless PublicInbox::Eml::warn_ignore(@_)
+                };
+        }
+        $SIG{TERM} = sub { exit(128 + 15) };
+        $current_lei = $persist ? undef : $self; # for SIG{__WARN__}
+}
+
+sub _delete_pkt_op { # OnDestroy callback to prevent leaks on die
+        my ($self) = @_;
+        if (my $op = delete $self->{pkt_op_c}) { # in case of die
+                $op->close; # PublicInbox::PktOp::close
+        }
+        my $pkt_op_p = delete($self->{pkt_op_p}) or return;
+        close $pkt_op_p->{op_p};
+}
+
+sub pkt_op_pair {
+        my ($self) = @_;
+        require PublicInbox::PktOp;
+        my $end = on_destroy \&_delete_pkt_op, $self;
+        @$self{qw(pkt_op_c pkt_op_p)} = PublicInbox::PktOp->pair;
+        $end;
+}
+
+sub incr {
+        my $lei = shift;
+        $lei->{incr_pid} = $$ if @_;
+        while (my ($f, $n) = splice(@_, 0, 2)) { $lei->{$f} += $n }
+}
+
+sub pkt_ops {
+        my ($lei, $ops) = @_;
+        $ops->{fail_handler} = [ $lei ];
+        $ops->{sigpipe_handler} = [ $lei ];
+        $ops->{x_it} = [ $lei ];
+        $ops->{child_error} = [ $lei ];
+        $ops->{incr} = [ $lei ];
+        $ops;
+}
+
+sub workers_start {
+        my ($lei, $wq, $jobs, $ops, $flds) = @_;
+        $ops //= {};
+        ($wq->can('net_merge_all_done') && $lei->{auth}) and
+                $lei->{auth}->op_merge($ops, $wq, $lei);
+        pkt_ops($lei, $ops);
+        $ops->{''} //= [ $wq->can('_lei_wq_eof') || \&wq_eof, $lei ];
+        my $end = $lei->pkt_op_pair;
+        my $ident = $wq->{-wq_ident} // "lei-$lei->{cmd} worker";
+        $flds->{lei} = $lei;
+        $wq->wq_workers_start($ident, $jobs, $lei->oldset, $flds,
+                $wq->can('_wq_done_wait') // \&wq_done_wait, $lei);
+        delete $lei->{pkt_op_p};
+        my $op_c = delete $lei->{pkt_op_c};
+        @$end = ();
+        $lei->event_step_init;
+        ($op_c, $ops);
+}
+
+# call this when we're ready to wait on events and yield to other clients
+sub wait_wq_events {
+        my ($lei, $op_c, $ops) = @_;
+        my $wq1 = $lei->{wq1};
+        ($wq1 && $wq1->can('net_merge_all_done') && !$lei->{auth}) and
+                $wq1->net_merge_all_done;
+        for my $wq (grep(defined, @$lei{qw(ikw pmd)})) { # auxiliary WQs
+                $wq->wq_close;
+        }
+        $wq1->{lei_sock} = $lei->{sock} if $wq1;
+        $op_c->{ops} = $ops;
+}
+
+sub wq1_start {
+        my ($lei, $wq, $jobs) = @_;
+        my ($op_c, $ops) = workers_start($lei, $wq, $jobs // 1);
+        $lei->{wq1} = $wq;
+        wait_wq_events($lei, $op_c, $ops); # net_merge_all_done if !{auth}
+}
+
+sub _help {
+        require PublicInbox::LeiHelp;
+        PublicInbox::LeiHelp::call($_[0], $_[1], \%CMD, \%OPTDESC);
+}
+
+sub optparse ($$$) {
+        my ($self, $cmd, $argv) = @_;
+        # allow _complete --help to complete, not show help
+        return 1 if substr($cmd, 0, 1) eq '_';
+        $self->{cmd} = $cmd;
+        local $OPT = $self->{opt} //= {};
+        my $info = $CMD{$cmd} // [ '[...]' ];
+        my ($proto, undef, @spec) = @$info;
+        my $glp = ref($spec[-1]) eq ref($GLP) ? pop(@spec) : $GLP;
+        push @spec, qw(help|h);
+        my $lone_dash;
+        if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias
+                $lone_dash = $spec[0];
+                $OPT->{$spec[0]} = \(my $var);
+                push @spec, '' => \$var;
+        }
+        $glp->getoptionsfromarray($argv, $OPT, @spec) or
+                return _help($self, "bad arguments or options for $cmd");
+        return _help($self) if $OPT->{help};
+
+        push @$argv, @{$OPT->{-argv}} if defined($OPT->{-argv});
+
+        # "-" aliases "stdin" or "clear"
+        $OPT->{$lone_dash} = ${$OPT->{$lone_dash}} if defined $lone_dash;
+
+        if ($proto =~ s/\s*\[?(?:KEYWORDS|LABELS)\.\.\.\]?\s*//g) {
+                require PublicInbox::LeiInput;
+                my @err = PublicInbox::LeiInput::vmd_mod_extract($self, $argv);
+                return $self->fail(join("\n", @err)) if @err;
+        }
+
+        my $i = 0;
+        my $POS_ARG = '[A-Z][A-Z0-9_]+';
+        my ($err, $inf);
+        my @args = split(/ /, $proto);
+        for my $var (@args) {
+                if ($var =~ /\A$POS_ARG\.\.\.\z/o) { # >= 1 args;
+                        $inf = defined($argv->[$i]) and last;
+                        $var =~ s/\.\.\.\z//;
+                        $err = "$var not supplied";
+                } elsif ($var =~ /\A$POS_ARG\z/o) { # required arg at $i
+                        $argv->[$i++] // ($err = "$var not supplied");
+                } elsif ($var =~ /\.\.\.\]\z/) { # optional args start
+                        $inf = 1;
+                        last;
+                } elsif ($var =~ /\A\[-?$POS_ARG\]\z/) { # one optional arg
+                        $i++;
+                } elsif ($var =~ /\A.+?\|/) { # required FOO|--stdin
+                        $inf = 1 if index($var, '...') > 0;
+                        my @or = split(/\|/, $var);
+                        my $ok;
+                        for my $o (@or) {
+                                if ($o =~ /\A--([a-z0-9\-]+)/) {
+                                        my $sw = $1;
+                                        # assume pipe/regular file on stdin
+                                        # w/o args means stdin
+                                        if ($sw eq 'stdin' && !@$argv &&
+                                                        (-p $self->{0} ||
+                                                         -f _)) {
+                                                $OPT->{stdin} //= 1;
+                                        }
+                                        $ok = defined($OPT->{$sw}) and last;
+                                } elsif (defined($argv->[$i])) {
+                                        $ok = 1;
+                                        $i++;
+                                        last;
+                                } # else continue looping
+                        }
+                        last if $ok;
+                        my $last = pop @or;
+                        $err = join(', ', @or) . " or $last must be set";
+                } else {
+                        warn "BUG: can't parse `$var' in $proto";
+                }
+                last if $err;
+        }
+        if (!$inf && scalar(@$argv) > scalar(@args)) {
+                $err //= 'too many arguments';
+        }
+        $err ? fail($self, "usage: lei $cmd $proto\nE: $err") : 1;
+}
+
+sub lazy_cb ($$$) { # $pfx is _complete_ or lei_
+        my ($self, $cmd, $pfx) = @_;
+        my $ucmd = $cmd;
+        $ucmd =~ tr/-/_/;
+        my $cb;
+        $cb = $self->can($pfx.$ucmd) and return $cb;
+        my $base = $ucmd;
+        $base =~ s/_([a-z])/\u$1/g;
+        my $pkg = "PublicInbox::Lei\u$base";
+        ($INC{"PublicInbox/Lei\u$base.pm"} // eval("require $pkg")) ?
+                $pkg->can($pfx.$ucmd) : undef;
+}
+
+sub do_env {
+        my $lei = shift;
+        fchdir($lei);
+        my $cb = shift // return ($lei, %{$lei->{env}}) ;
+        local ($current_lei, %ENV) = ($lei, %{$lei->{env}});
+        $cb = $lei->can($cb) if !ref($cb); # $cb may be a scalar sub name
+        eval { $cb->($lei, @_) };
+        $lei->fail($@) if $@;
+}
+
+sub dispatch {
+        my ($self, $cmd, @argv) = @_;
+        local ($current_lei, %ENV) = do_env($self);
+        $self->{2}->autoflush(1); # keep stdout buffered until x_it|DESTROY
+        return _help($self, 'no command given') unless defined($cmd);
+        # do not support Getopt bundling for this
+        while ($cmd eq '-C' || $cmd eq '-c') {
+                my $v = shift(@argv) // return fail($self, $cmd eq '-C' ?
+                                        '-C DIRECTORY' : '-c <name>=<value>');
+                push @{$self->{opt}->{substr($cmd, 1, 1)}}, $v;
+                $cmd = shift(@argv) // return _help($self, 'no command given');
+        }
+        if (my $cb = lazy_cb(__PACKAGE__, $cmd, 'lei_')) {
+                optparse($self, $cmd, \@argv) or return;
+                if (my $chdir = $self->{opt}->{C}) {
+                        for my $d (@$chdir) {
+                                next if $d eq ''; # same as git(1)
+                                chdir $d;
+                        }
+                        open($self->{3}, '<', '.');
+                }
+                $cb->($self, @argv);
+        } elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only
+                $GLP->getoptionsfromarray([$cmd, @argv], {}, qw(help|h C=s@))
+                        or return _help($self, 'bad arguments or options');
+                _help($self);
+        } else {
+                fail($self, "`$cmd' is not an lei command");
+        }
+}
+
+sub _lei_cfg ($;$) {
+        my ($self, $creat) = @_;
+        return $self->{cfg} if $self->{cfg};
+        my $f = _config_path($self);
+        my @st = stat($f);
+        my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size
+        my ($sto, $sto_dir, $watches, $lne, $cfg);
+        if ($cfg = $PATH2CFG{$f}) { # reuse existing object in common case
+                ($cur_st eq $cfg->{-st} && !$self->{opt}->{c}) and
+                        return ($self->{cfg} = $cfg);
+                # reuse some fields below if they match:
+                ($sto, $sto_dir, $watches, $lne) =
+                                @$cfg{qw(-lei_store leistore.dir -watches
+                                        -lei_note_event)};
+        }
+        if (!@st) {
+                unless ($creat) { # any commands which write to cfg must creat
+                        $cfg = PublicInbox::Config->git_config_dump(
+                                                        '/dev/null', $self);
+                        return ($self->{cfg} = $cfg);
+                }
+                my ($cfg_dir) = ($f =~ m!(.*?/)[^/]+\z!);
+                File::Path::mkpath($cfg_dir);
+                open my $fh, '>>', $f;
+                @st = stat($fh) or die "fstat($f): $!\n";
+                $cur_st = pack('dd', $st[10], $st[7]);
+                qerr($self, "# $f created") if $self->{cmd} ne 'config';
+        }
+        $cfg = PublicInbox::Config->git_config_dump($f, $self);
+        $cfg->{-st} = $cur_st;
+        if ($sto && canonpath_harder($sto_dir // store_path($self))
+                        eq canonpath_harder($cfg->{'leistore.dir'} //
+                                                store_path($self))) {
+                $cfg->{-lei_store} = $sto;
+                $cfg->{-lei_note_event} = $lne;
+                $cfg->{-watches} = $watches if $watches;
+        }
+        if (scalar(keys %PATH2CFG) > 5) {
+                # FIXME: use inotify/EVFILT_VNODE to detect unlinked configs
+                delete(@PATH2CFG{grep(!-f, keys %PATH2CFG)});
+        }
+        $self->{cfg} = $self->{opt}->{c} ? $cfg : ($PATH2CFG{$f} = $cfg);
+        refresh_watches($self);
+        $cfg;
+}
+
+sub _lei_store ($;$) {
+        my ($self, $creat) = @_;
+        my $cfg = _lei_cfg($self, $creat) // return;
+        $cfg->{-lei_store} //= do {
+                require PublicInbox::LeiStore;
+                my $dir = $cfg->{'leistore.dir'} // store_path($self);
+                return unless $creat || -d $dir;
+                PublicInbox::LeiStore->new($dir, { creat => $creat });
+        };
+}
+
+# returns true on success, undef
+# argv[0] eq `+e' means errors do not ->fail # (like `sh +e')
+sub _config {
+        my ($self, @argv) = @_;
+        my $err_ok = ($argv[0] // '') eq '+e' ? shift(@argv) : undef;
+        my %env;
+        my %opt = map { $_ => $self->{$_} } (0..2);
+        my $cfg = _lei_cfg($self, 1);
+        my $opt_c = delete local $cfg->{-opt_c};
+        my @file_arg;
+        if ($opt_c) {
+                my ($set, $get, $nondash);
+                for (@argv) { # order matters for git-config
+                        if (!$nondash) {
+                                if (/\A--(?:add|rename-section|remove-section|
+                                                replace-all|
+                                                unset-all|unset)\z/x) {
+                                        ++$set;
+                                } elsif ($_ eq '-l' || $_ eq '--list' ||
+                                                /\A--get/) {
+                                        ++$get;
+                                } elsif (/\A-/) { # -z and such
+                                } else {
+                                        ++$nondash;
+                                }
+                        } else {
+                                ++$nondash;
+                        }
+                }
+                if ($set || ($nondash//0) > 1 && !$get) {
+                        @file_arg = ('-f', $cfg->{-f});
+                        $env{GIT_CONFIG} = $file_arg[1];
+                } else { # OK, we can use `-c n=v' for read-only
+                        $cfg->{-opt_c} = $opt_c;
+                        $env{GIT_CONFIG} = undef;
+                }
+        }
+        my $cmd = $cfg->config_cmd(\%env, \%opt);
+        push @$cmd, @file_arg, @argv;
+        run_wait($cmd, \%env, \%opt) ? ($err_ok ? undef : fail($self, $?)) : 1;
+}
+
+sub lei_daemon_pid { puts shift, $daemon_pid }
+
+sub lei_daemon_kill {
+        my ($self) = @_;
+        my $sig = $self->{opt}->{signal} // 'TERM';
+        kill($sig, $$) or fail($self, "kill($sig, $$): $!");
+}
+
+# Shell completion helper.  Used by lei-completion.bash and hopefully
+# other shells.  Try to do as much here as possible to avoid redundancy
+# and improve maintainability.
+sub lei__complete {
+        my ($self, @argv) = @_; # argv = qw(lei and any other args...)
+        shift @argv; # ignore "lei", the entire command is sent
+        @argv or return puts $self, grep(!/^_/, keys %CMD), qw(--help -h -C);
+        my $cmd = shift @argv;
+        my $info = $CMD{$cmd} // do { # filter matching commands
+                @argv or puts $self, grep(/\A\Q$cmd\E/, keys %CMD);
+                return;
+        };
+        my ($proto, undef, @spec) = @$info;
+        my $cur = pop @argv;
+        my $re = defined($cur) ? qr/\A\Q$cur\E/ : qr/./;
+        if (substr(my $_cur = $cur // '-', 0, 1) eq '-') { # --switches
+                # gross special case since the only git-config options
+                # Consider moving to a table if we need more special cases
+                # we use Getopt::Long for are the ones we reject, so these
+                # are the ones we don't reject:
+                if ($cmd eq 'config') {
+                        puts $self, grep(/$re/, keys %CONFIG_KEYS);
+                        @spec = qw(add z|null get get-all unset unset-all
+                                replace-all get-urlmatch
+                                remove-section rename-section
+                                name-only list|l edit|e
+                                get-color-name get-colorbool);
+                        # fall-through
+                }
+                # generate short/long names from Getopt::Long specs
+                puts $self, grep(/$re/, qw(--help -h -C), map {
+                        if (s/[:=].+\z//) { # req/optional args, e.g output|o=i
+                        } elsif (s/\+\z//) { # verbose|v+
+                        } elsif (s/!\z//) {
+                                # negation: mail! => no-mail|mail
+                                s/([\w\-]+)/$1|no-$1/g
+                        }
+                        map {
+                                my $x = length > 1 ? "--$_" : "-$_";
+                                $x eq $_cur ? () : $x;
+                        } grep(!/_/, split(/\|/, $_, -1)) # help|h
+                } grep { $OPTDESC{"$_\t$cmd"} || $OPTDESC{$_} } @spec);
+        } elsif ($cmd eq 'config' && !@argv && !$CONFIG_KEYS{$cur}) {
+                puts $self, grep(/$re/, keys %CONFIG_KEYS);
+        }
+
+        # switch args (e.g. lei q -f mbox<TAB>)
+        if (($argv[-1] // $cur // '') =~ /\A--?([\w\-]+)\z/) {
+                my $opt = quotemeta $1;
+                puts $self, map {
+                        my $v = $OPTDESC{$_};
+                        my @v = ref($v) ? split(/\|/, $v->[0]) : ();
+                        # get rid of ALL CAPS placeholder (e.g "OUT")
+                        # (TODO: completion for external paths)
+                        shift(@v) if scalar(@v) && uc($v[0]) eq $v[0];
+                        @v;
+                } grep(/\A(?:[\w-]+\|)*$opt\b.*?(?:\t$cmd)?\z/, keys %OPTDESC);
+        }
+        if (my $cb = lazy_cb($self, $cmd, '_complete_')) {
+                puts $self, $cb->($self, @argv, $cur ? ($cur) : ());
+        }
+        # TODO: URLs, pathnames, OIDs, MIDs, etc...  See optparse() for
+        # proto parsing.
+}
+
+sub exec_buf ($$) {
+        my ($argv, $env) = @_;
+        my $argc = scalar @$argv;
+        my $buf = 'exec '.join("\0", scalar(@$argv), @$argv);
+        while (my ($k, $v) = each %$env) { $buf .= "\0$k=$v" };
+        $buf;
+}
+
+sub start_mua {
+        my ($self) = @_;
+        if ($self->{ovv}->{fmt} =~ /\A(?:maildir)\z/) { # TODO: IMAP
+                refresh_watches($self);
+        }
+        my $mua = $self->{opt}->{mua} // return;
+        my $mfolder = $self->{ovv}->{dst};
+        my (@cmd, $replaced);
+        if ($mua =~ /\A(?:mutt|mailx|mail|neomutt)\z/) {
+                @cmd = ($mua, '-f');
+        # TODO: help wanted: other common FOSS MUAs
+        } else {
+                require Text::ParseWords;
+                @cmd = Text::ParseWords::shellwords($mua);
+                # mutt uses '%f' for open-hook with compressed mbox, we follow
+                @cmd = map { $_ eq '%f' ? ($replaced = $mfolder) : $_ } @cmd;
+        }
+        push @cmd, $mfolder unless defined($replaced);
+        if ($self->{sock}) { # lei(1) client process runs it
+                # restore terminal: echo $query | lei q --stdin --mua=...
+                my $io = [];
+                $io->[0] = $self->{1} if $self->{opt}->{stdin} && -t $self->{1};
+                send_exec_cmd($self, $io, \@cmd, {});
+        }
+
+        # kick wait_startq:
+        syswrite($self->{au_done}, 'q') if $self->{lxs} && $self->{au_done};
+
+        return unless -t $self->{2}; # XXX how to determine non-TUI MUAs?
+        $self->{opt}->{quiet} = 1;
+        delete $self->{-progress};
+        delete $self->{opt}->{verbose};
+}
+
+sub send_exec_cmd { # tell script/lei to execute a command
+        my ($self, $io, $cmd, $env) = @_;
+        $PublicInbox::IPC::send_cmd->(
+                        $self->{sock} // die('lei client gone'),
+                        [ map { fileno($_) } @$io ],
+                        exec_buf($cmd, $env), 0) //
+                Carp::croak("sendmsg: $!");
+}
+
+sub poke_mua { # forces terminal MUAs to wake up and hopefully notice new mail
+        my ($self) = @_;
+        my $alerts = $self->{opt}->{alert} // return;
+        my $sock = $self->{sock};
+        while (my $op = shift(@$alerts)) {
+                if ($op eq ':WINCH') {
+                        # hit the process group that started the MUA
+                        send($sock, '-WINCH', 0) if $sock;
+                } elsif ($op eq ':bell') {
+                        out($self, "\a");
+                } elsif ($op =~ /(?<!\\),/) { # bare ',' (not ',,')
+                        push @$alerts, split(/(?<!\\),/, $op);
+                } elsif ($op =~ m!\A([/a-z0-9A-Z].+)!) {
+                        my $cmd = $1; # run an arbitrary command
+                        require Text::ParseWords;
+                        $cmd = [ Text::ParseWords::shellwords($cmd) ];
+                        send($sock, exec_buf($cmd, {}), 0) if $sock;
+                } else {
+                        warn("W: unsupported --alert=$op\n"); # non-fatal
+                }
+        }
+}
+
+my %path_to_fd = ('/dev/stdin' => 0, '/dev/stdout' => 1, '/dev/stderr' => 2);
+$path_to_fd{"/dev/fd/$_"} = $_ for (0..2);
+
+# this also normalizes the path
+sub path_to_fd {
+        my ($self, $path) = @_;
+        $path = rel2abs($self, $path);
+        $path =~ tr!/!/!s;
+        $path_to_fd{$path} // (
+                ($path =~ m!\A/(?:dev|proc/self)/fd/[0-9]+\z!) ?
+                        fail($self, "cannot open $path from daemon") : -1
+        );
+}
+
+# caller needs to "-t $self->{1}" to check if tty
+sub start_pager {
+        my ($self, $new_env) = @_;
+        chomp(my $pager = run_qx([qw(git var GIT_PAGER)]));
+        warn "`git var PAGER' error: \$?=$?" if $?;
+        return if $pager eq 'cat' || $pager eq '';
+        $new_env //= {};
+        $new_env->{LESS} //= 'FRX';
+        $new_env->{LV} //= '-c';
+        $new_env->{MORE} = $new_env->{LESS} if $^O eq 'freebsd';
+        my $rdr = { 1 => $self->{1}, 2 => $self->{2} };
+        CORE::pipe($rdr->{0}, my $wpager) or return warn "pipe: $!";
+        my $pgr = [ undef, @$rdr{1, 2} ];
+        my $env = $self->{env};
+        if ($self->{sock}) { # lei(1) process runs it
+                delete @$new_env{keys %$env}; # only set iff unset
+                send_exec_cmd($self, [ @$rdr{0..2} ], [$pager], $new_env);
+        } else {
+                die 'BUG: start_pager w/o socket';
+        }
+        $self->{1} = $wpager;
+        $self->{2} = $wpager if -t $self->{2};
+        $env->{GIT_PAGER_IN_USE} = 'true'; # we may spawn git
+        $self->{pgr} = $pgr;
+}
+
+# display a message for user before spawning full-screen $VISUAL
+sub pgr_err {
+        my ($self, @msg) = @_;
+        return warn(@msg) unless $self->{sock} && -t $self->{2};
+        start_pager($self, { LESS => 'RX' }); # no 'F' so we prompt
+        say { $self->{2} } @msg, '# -quit pager to continue-';
+        $self->{2}->autoflush(1);
+        stop_pager($self);
+        send($self->{sock}, 'wait', 0); # wait for user to quit pager
+}
+
+sub stop_pager {
+        my ($self) = @_;
+        my $pgr = delete($self->{pgr}) or return;
+        $self->{2} = $pgr->[2];
+        delete($self->{1})->close if $self->{1};
+        $self->{1} = $pgr->[1];
+}
+
+sub accept_dispatch { # Listener {post_accept} callback
+        my ($sock) = @_; # ignore other
+        $sock->autoflush(1);
+        my $self = bless { sock => $sock }, __PACKAGE__;
+        vec(my $rvec = '', fileno($sock), 1) = 1;
+        select($rvec, undef, undef, 60) or
+                return send($sock, 'timed out waiting to recv FDs', 0);
+        # (4096 * 33) >MAX_ARG_STRLEN
+        my @fds = $PublicInbox::IPC::recv_cmd->($sock, my $buf, 4096 * 33) or
+                return; # EOF
+        if (!defined($fds[0])) {
+                warn(my $msg = "recv_cmd failed: $!");
+                return send($sock, $msg, 0);
+        } else {
+                my $i = 0;
+                open($self->{$i++}, '+<&=', $_) for @fds;
+                $i == 4 or return send($sock, 'not enough FDs='.($i-1), 0)
+        }
+        # $ENV_STR = join('', map { "\0$_=$ENV{$_}" } keys %ENV);
+        # $buf = "$argc\0".join("\0", @ARGV).$ENV_STR."\0\0";
+        substr($buf, -2, 2, '') eq "\0\0" or  # s/\0\0\z//
+                return send($sock, 'request command truncated', 0);
+        my ($argc, @argv) = split(/\0/, $buf, -1);
+        undef $buf;
+        my %env = map { split(/=/, $_, 2) } splice(@argv, $argc);
+        $self->{env} = \%env;
+        eval { dispatch($self, @argv) };
+        $self->fail($@) if $@;
+}
+
+sub dclose {
+        my ($self) = @_;
+        local $current_lei = $self;
+        delete $self->{-progress};
+        _drop_wq($self) if $self->{failed};
+        $self->close if $self->{-event_init_done}; # PublicInbox::DS::close
+}
+
+# for long-running results
+sub event_step {
+        my ($self) = @_;
+        local %ENV = %{$self->{env}};
+        local $current_lei = $self;
+        eval {
+                my @fds = $PublicInbox::IPC::recv_cmd->(
+                        $self->{sock} // return, my $buf, 4096);
+                if (scalar(@fds) == 1 && !defined($fds[0])) {
+                        return if $! == EAGAIN;
+                        die "recvmsg: $!" if $! != ECONNRESET;
+                        @fds = (); # for open loop below:
+                }
+                for (@fds) { open my $rfh, '+<&=', $_ }
+                if ($buf eq '') {
+                        _drop_wq($self); # EOF, client disconnected
+                        dclose($self);
+                        $buf = 'TERM';
+                }
+                if ($buf =~ /\A(?:STOP|CONT|TERM)\z/) {
+                        my $sig = "-$buf";
+                        for my $wq (grep(defined, @$self{@WQ_KEYS})) {
+                                $wq->wq_kill($sig);
+                        }
+                } else {
+                        die "unrecognized client signal: $buf";
+                }
+                my $s = $self->{-socks} // []; # lei up --all
+                @$s = grep { send($_, $buf, 0) } @$s;
+        };
+        if (my $err = $@) {
+                eval { $self->fail($err) };
+                dclose($self);
+        }
+}
+
+sub event_step_init {
+        my ($self) = @_;
+        my $sock = $self->{sock} or return;
+        $self->{-event_init_done} // do { # persist til $ops done
+                $sock->blocking(0);
+                $self->SUPER::new($sock, EPOLLIN);
+                $self->{-event_init_done} = $sock;
+        };
+}
+
+sub oldset { $oldset }
+
+sub dump_and_clear_log {
+        if (defined($errors_log) && -s STDIN && seek(STDIN, 0, SEEK_SET)) {
+                openlog('lei-daemon', 'pid,nowait,nofatal,ndelay', 'user');
+                chomp(my @lines = <STDIN>);
+                truncate(STDIN, 0) or
+                        syslog('warning', "ftruncate (%s): %m", $errors_log);
+                for my $l (@lines) { syslog('warning', '%s', $l) }
+                closelog(); # don't share across fork
+        }
+}
+
+sub cfg2lei ($) {
+        my ($cfg) = @_;
+        my $lei = bless { env => { %{$cfg->{-env}} } }, __PACKAGE__;
+        open($lei->{0}, '<&', \*STDIN);
+        open($lei->{1}, '>>&', \*STDOUT);
+        open($lei->{2}, '>>&', \*STDERR);
+        open($lei->{3}, '<', '/');
+        socketpair(my $x, my $y, AF_UNIX, SOCK_SEQPACKET, 0);
+        $lei->{sock} = $x;
+        require PublicInbox::LeiSelfSocket;
+        PublicInbox::LeiSelfSocket->new($y); # adds to event loop
+        $lei;
+}
+
+sub note_event ($@) { # runs lei_note_event for a given config file
+        my ($cfg_f, @args) = @_;
+        my $cfg = $PATH2CFG{$cfg_f} // return;
+        eval { cfg2lei($cfg)->dispatch('note-event', @args) };
+        carp "E: note-event $cfg_f: $@\n" if $@;
+}
+
+sub dir_idle_handler ($) { # PublicInbox::DirIdle callback
+        my ($ev) = @_; # Linux::Inotify2::Event or duck type
+        my $fn = $ev->fullname;
+        if ($fn =~ m!\A(.+)/(new|cur)/([^/]+)\z!) { # Maildir file
+                my ($loc, $new_cur, $bn) = ("maildir:$1", $2, $3);
+                $new_cur = '' if $ev->IN_DELETE || $ev->IN_MOVED_FROM;
+                for my $cfg_f (keys %{$MDIR2CFGPATH->{$loc} // {}}) {
+                        note_event($cfg_f, $loc, $new_cur, $bn, $fn);
+                }
+        } elsif ($fn =~ m!\A(.+)/([0-9]+)\z!) { # MH mail message file
+                my ($loc, $n, $new_cur) = ("mh:$1", $2, '+');
+                $new_cur = '' if $ev->IN_DELETE || $ev->IN_MOVED_FROM;
+                for my $cfg_f (keys %{$MDIR2CFGPATH->{$loc} // {}}) {
+                        note_event($cfg_f, $loc, $new_cur, $n, $fn);
+                }
+        } elsif ($fn =~ m!\A(.+)/\.mh_sequences\z!) { # reread flags
+                my $loc = "mh:$1";
+                for my $cfg_f (keys %{$MDIR2CFGPATH->{$loc} // {}}) {
+                        note_event($cfg_f, $loc, '.mh_sequences')
+                }
+        } # else we don't care
+        if ($ev->can('cancel') && ($ev->IN_IGNORE || $ev->IN_UNMOUNT)) {
+                $ev->cancel;
+        }
+        if ($fn =~ m!\A(.+)/(?:new|cur)\z! && !-e $fn) {
+                delete $MDIR2CFGPATH->{"maildir:$1"};
+        }
+        if (!-e $fn) { # config file, Maildir, or MH dir gone
+                delete $_->{$fn} for values %$MDIR2CFGPATH; # config file
+                delete @$MDIR2CFGPATH{"maildir:$fn", "mh:$fn"};
+                delete $PATH2CFG{$fn};
+        }
+}
+
+sub can_stay_alive { # PublicInbox::DS::post_loop_do cb
+        my ($path, $dev_ino_expect) = @_;
+        if (my @st = defined($$path) ? stat($$path) : ()) {
+                if ($dev_ino_expect ne pack('dd', $st[0], $st[1])) {
+                        warn "$$path dev/ino changed, quitting\n";
+                        $$path = undef;
+                }
+        } elsif (defined($$path)) { # ENOENT is common
+                warn "stat($$path): $!, quitting ...\n" if $! != ENOENT;
+                undef $$path;
+                $quit->();
+        }
+        return 1 if defined($$path);
+        my $n = PublicInbox::DS::close_non_busy() or do {
+                eval 'PublicInbox::LeiNoteEvent::flush_task()';
+                # drop stores only if no clients
+                for my $cfg (values %PATH2CFG) {
+                        my $lne = delete($cfg->{-lei_note_event});
+                        $lne->wq_close if $lne;
+                        my $sto = delete($cfg->{-lei_store}) // next;
+                        eval { $sto->wq_do('done') if $sto->{-wq_s1} };
+                        warn "E: $@ (dropping store for $cfg->{-f})" if $@;
+                        $sto->wq_close;
+                }
+        };
+        # returns true: continue, false: stop
+        $n + scalar(keys(%PublicInbox::DS::AWAIT_PIDS));
+}
+
+# lei(1) calls this when it can't connect
+sub lazy_start {
+        my ($path, $errno, $narg) = @_;
+        local ($errors_log, $listener);
+        my ($sock_dir) = ($path =~ m!\A(.+?)/[^/]+\z!);
+        $errors_log = "$sock_dir/errors.log";
+        my $addr = pack_sockaddr_un($path);
+        my $lk = PublicInbox::Lock->new($errors_log);
+        umask(077) // die("umask(077): $!");
+        $lk->lock_acquire;
+        socket($listener, AF_UNIX, SOCK_SEQPACKET, 0);
+        if ($errno == ECONNREFUSED || $errno == ENOENT) {
+                return if connect($listener, $addr); # another process won
+                unlink($path) if $errno == ECONNREFUSED && -S $path;
+        } else {
+                $! = $errno; # allow interpolation to stringify in die
+                die "connect($path): $!";
+        }
+        bind($listener, $addr);
+        $lk->lock_release;
+        undef $lk;
+        my @st = stat($path) or die "stat($path): $!";
+        my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino
+        local $oldset = PublicInbox::DS::block_signals(POSIX::SIGALRM);
+        die "incompatible narg=$narg" if $narg != 5;
+        $PublicInbox::IPC::send_cmd or die <<"";
+(Socket::MsgHdr || Inline::C) missing/unconfigured (narg=$narg);
+
+        require PublicInbox::Listener;
+        require PublicInbox::PktOp;
+        (-p STDOUT) or die "E: stdout must be a pipe\n";
+        open(STDIN, '+>>', $errors_log);
+        STDIN->autoflush(1);
+        dump_and_clear_log();
+        POSIX::setsid() > 0 or die "setsid: $!";
+        my $pid = PublicInbox::OnDestroy::fork_tmp;
+        return if $pid;
+        $0 = "lei-daemon $path";
+        local (%PATH2CFG, $MDIR2CFGPATH);
+        local $daemon_pid = $$;
+        $listener->blocking(0);
+        my $exit_code;
+        my $pil = PublicInbox::Listener->new($listener, \&accept_dispatch);
+        local $quit = do {
+                my (undef, $eof_p) = PublicInbox::PktOp->pair;
+                sub {
+                        $exit_code //= eval("POSIX::SIG$_[0] + 128") if @_;
+                        $dir_idle->close if $dir_idle; # EPOLL_CTL_DEL
+                        $dir_idle = undef; # let RC take care of it
+                        eval 'PublicInbox::LeiNoteEvent::flush_task()';
+                        my $lis = $pil or exit($exit_code // 0);
+                        # closing eof_p triggers \&noop wakeup
+                        $listener = $eof_p = $pil = $path = undef;
+                        $lis->close; # DS::close
+                };
+        };
+        my $sig = { CHLD => \&PublicInbox::DS::enqueue_reap };
+        $sig->{$_} = $quit for qw(QUIT INT TERM);
+        $sig->{$_} = \&PublicInbox::Config::noop for qw(HUP USR1 USR2);
+        require PublicInbox::DirIdle;
+        local $dir_idle = PublicInbox::DirIdle->new(sub {
+                # just rely on wakeup to hit post_loop_do
+                dir_idle_handler($_[0]) if $_[0]->fullname ne $path;
+        });
+        $dir_idle->add_watches([$sock_dir]);
+        local @PublicInbox::DS::post_loop_do = (\&can_stay_alive,
+                                                \$path, $dev_ino_expect);
+        # STDIN was redirected to /dev/null above, closing STDERR and
+        # STDOUT will cause the calling `lei' client process to finish
+        # reading the <$daemon> pipe.
+        local $SIG{__WARN__} = sub {
+                $current_lei ? err($current_lei, @_) : warn(
+                  strftime('%Y-%m-%dT%H:%M:%SZ', gmtime(time))," $$ ", @_);
+        };
+        local $SIG{PIPE} = 'IGNORE';
+        local $SIG{ALRM} = 'IGNORE';
+        open STDERR, '>&STDIN';
+        open STDOUT, '>&STDIN';
+        # $daemon pipe to `lei' closed, main loop begins:
+        eval { PublicInbox::DS::event_loop($sig, $oldset) };
+        warn "event loop error: $@\n" if $@;
+        dump_and_clear_log();
+        exit($exit_code // 0);
+}
+
+sub busy { 1 } # prevent daemon-shutdown if client is connected
+
+# ensures stdout hits the FS before sock disconnects so a client
+# can immediately reread it
+sub DESTROY {
+        my ($self) = @_;
+        if (defined($self->{incr_pid}) && $self->{incr_pid} == $$) {
+                for my $k (sort(grep(/\A-nr_/, keys %$self))) {
+                        my $nr = $self->{$k};
+                        substr($k, 0, length('-nr_'), '');
+                        $self->child_error(0, "$nr $k messages");
+                }
+        }
+        $self->{1}->autoflush(1) if $self->{1};
+        stop_pager($self);
+        dump_and_clear_log();
+        # preserve $? for ->fail or ->x_it code
+}
+
+sub wq_done_wait { # awaitpid cb (via wq_eof)
+        my ($pid, $wq, $lei) = @_;
+        local $current_lei = $lei;
+        my $err_type = $lei->{-err_type};
+        $? and $lei->child_error($?,
+                $err_type ? "$err_type errors during $lei->{cmd} \$?=$?" : ());
+        $lei->dclose;
+}
+
+sub fchdir {
+        my ($lei) = @_;
+        chdir($lei->{3} // die 'BUG: lei->{3} (CWD) gone');
+}
+
+sub wq_eof { # EOF callback for main daemon
+        my ($lei, $wq_fld) = @_;
+        local $current_lei = $lei;
+        my $wq = delete $lei->{$wq_fld // 'wq1'};
+        $lei->sto_barrier_request($wq);
+        $wq // $lei->fail; # already failed
+}
+
+sub watch_state_ok ($) {
+        my ($state) = $_[-1]; # $_[0] may be $self
+        $state =~ /\Apause|(?:import|index|tag)-(?:ro|rw)\z/;
+}
+
+sub cancel_dir_watch ($$$) {
+        my ($type, $d, $cfg_f) = @_;
+        my $loc = "$type:".canonpath_harder($d);
+        my $w = delete $MDIR2CFGPATH->{$loc}->{$cfg_f};
+        delete $MDIR2CFGPATH->{$loc} if !(keys %{$MDIR2CFGPATH->{$loc}});
+        $_->cancel for @$w;
+}
+
+sub add_dir_watch ($$$) {
+        my ($type, $d, $cfg_f) = @_;
+        $d = canonpath_harder($d);
+        my $loc = "$type:$d";
+        my @dirs = $type eq 'mh' ? ($d) : ("$d/cur", "$d/new");
+        if (!exists($MDIR2CFGPATH->{$loc}->{$cfg_f})) {
+                my @w = $dir_idle->add_watches(\@dirs, 1);
+                push @{$MDIR2CFGPATH->{$loc}->{$cfg_f}}, @w if @w;
+        }
+}
+
+sub refresh_watches {
+        my ($lei) = @_;
+        $dir_idle or return;
+        my $cfg = _lei_cfg($lei) or return;
+        my $old = $cfg->{-watches};
+        my $watches = $cfg->{-watches} //= {};
+        my %seen;
+        my $cfg_f = $cfg->{'-f'};
+        for my $w (grep(/\Awatch\..+\.state\z/, keys %$cfg)) {
+                my $loc = substr($w, length('watch.'), -length('.state'));
+                require PublicInbox::LeiWatch;
+                $watches->{$loc} //= PublicInbox::LeiWatch->new($loc);
+                $seen{$loc} = undef;
+                my $state = $cfg->get_1("watch.$loc.state");
+                if (!watch_state_ok($state)) {
+                        warn("watch.$loc.state=$state not supported\n");
+                } elsif ($loc =~ /\A(maildir|mh):(.+)\z/i) {
+                        my ($type, $d) = ($1, $2);
+                        $state eq 'pause' ?
+                                cancel_dir_watch($type, $d, $cfg_f) :
+                                add_dir_watch($type, $d, $cfg_f);
+                } else { # TODO: imap/nntp/jmap
+                        $lei->child_error(0, "E: watch $loc not supported, yet")
+                }
+        }
+
+        # add all known Maildir folders as implicit watches
+        my $lms = $lei->lms;
+        if ($lms) {
+                $lms->lms_write_prepare;
+                for my $loc ($lms->folders(qr/\A(?:maildir|mh):/)) {
+                        my $old = $loc;
+                        my ($type, $d) = split /:/, $loc, 2;
+                        # fixup old bugs while we're iterating:
+                        $d = canonpath_harder($d);
+                        $loc = "$type:$d";
+                        $lms->rename_folder($old, $loc) if $old ne $loc;
+                        next if $watches->{$loc}; # may be set to pause
+                        require PublicInbox::LeiWatch;
+                        $watches->{$loc} = PublicInbox::LeiWatch->new($loc);
+                        $seen{$loc} = undef;
+                        add_dir_watch($type, $d, $cfg_f);
+                }
+        }
+        if ($old) { # cull old non-existent entries
+                for my $loc (keys %$old) {
+                        next if exists $seen{$loc};
+                        delete $old->{$loc};
+                        if ($loc =~ /\A(maildir|mh):(.+)\z/i) {
+                                cancel_dir_watch($1, $2, $cfg_f);
+                        } else { # TODO: imap/nntp/jmap
+                                $lei->child_error(0, "E: watch $loc TODO");
+                        }
+                }
+        }
+        if (scalar keys %$watches) {
+                $cfg->{-env} //= { %{$lei->{env}}, PWD => '/' }; # for cfg2lei
+        } else {
+                delete $cfg->{-watches};
+        }
+}
+
+# TODO: support SHA-256
+sub git_oid {
+        my $eml = $_[-1];
+        $eml->header_set($_) for @PublicInbox::Import::UNWANTED_HEADERS;
+        git_sha(1, $eml);
+}
+
+sub lms {
+        my ($lei, $creat) = @_;
+        my $sto = $lei->{sto} // _lei_store($lei) // return;
+        require PublicInbox::LeiMailSync;
+        my $f = "$sto->{priv_eidx}->{topdir}/mail_sync.sqlite3";
+        (-f $f || $creat) ? PublicInbox::LeiMailSync->new($f) : undef;
+}
+
+sub sto_barrier_request {
+        my ($lei, $wq) = @_;
+        return unless $lei->{sto} && $lei->{sto}->{-wq_s1};
+        local $current_lei = $lei;
+        if (my $n = $lei->{opt}->{'commit-delay'}) {
+                eval { $lei->{sto}->wq_do('schedule_commit', $n) };
+        } else {
+                my $s = ($wq ? $wq->{lei_sock} : undef) // $lei->{sock};
+                my $errfh = $lei->{2} // *STDERR{GLOB};
+                my @io = $s ? ($errfh, $s) : ($errfh);
+                eval { $lei->{sto}->wq_io_do('barrier', \@io, 1) };
+        }
+        warn($@) if $@;
+}
+
+sub cfg_dump ($$) {
+        my ($lei, $f) = @_;
+        my $ret = eval { PublicInbox::Config->git_config_dump($f, $lei) };
+        return $ret if !$@;
+        warn($@);
+        undef;
+}
+
+sub request_umask {
+        my ($lei) = @_;
+        my $s = $lei->{sock} // return;
+        send($s, 'umask', 0) // die "send: $!";
+        vec(my $rvec = '', fileno($s), 1) = 1;
+        select($rvec, undef, undef, 2) or die 'timeout waiting for umask';
+        recv($s, my $v, 5, 0) // die "recv: $!";
+        (my $u, $lei->{client_umask}) = unpack('AV', $v);
+        $u eq 'u' or warn "E: recv $v has no umask";
+}
+
+sub _stdin_cb { # PublicInbox::InputPipe::consume callback for --stdin
+        my (undef, $lei, $cb) = @_; # $_[-1] = $rbuf
+        $_[1] // return $lei->fail("error reading stdin: $!");
+        $lei->{stdin_buf} .= $_[-1];
+        do_env($lei, $cb) if $_[-1] eq '';
+}
+
+sub slurp_stdin {
+        my ($lei, $cb) = @_;
+        require PublicInbox::InputPipe;
+        my $in = $lei->{0};
+        if (-t $in) { # run cat via script/lei and read from it
+                $in = undef;
+                pipe($in, my $wr);
+                say { $lei->{2} } '# enter query, Ctrl-D when done';
+                send_exec_cmd($lei, [ $lei->{0}, $wr ], ['cat'], {});
+        }
+        PublicInbox::InputPipe::consume($in, \&_stdin_cb, $lei, $cb);
+}
+
+1;
diff --git a/lib/PublicInbox/LI2Wrap.pm b/lib/PublicInbox/LI2Wrap.pm
new file mode 100644
index 00000000..d4792b25
--- /dev/null
+++ b/lib/PublicInbox/LI2Wrap.pm
@@ -0,0 +1,20 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Wrapper for Linux::Inotify2 < 2.3 which lacked ->fh and auto-close
+# Remove this when supported LTS/enterprise distros are all
+# Linux::Inotify2 >= 2.3
+package PublicInbox::LI2Wrap;
+use v5.12;
+our @ISA = qw(Linux::Inotify2);
+
+sub wrapclose {
+        my ($inot) = @_;
+        my $fd = $inot->fileno;
+        open my $fh, '<&=', $fd or die "open <&= $fd $!";
+        bless $inot, __PACKAGE__;
+}
+
+sub DESTROY {} # no-op
+
+1
diff --git a/lib/PublicInbox/LeiALE.pm b/lib/PublicInbox/LeiALE.pm
new file mode 100644
index 00000000..ce03f5b4
--- /dev/null
+++ b/lib/PublicInbox/LeiALE.pm
@@ -0,0 +1,106 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# All Locals Ever: track lei/store + externals ever used as
+# long as they're on an accessible FS.  Includes "lei q" --include
+# and --only targets that haven't been through "lei add-external".
+# Typically: ~/.cache/lei/all_locals_ever.git
+package PublicInbox::LeiALE;
+use v5.12;
+use parent qw(PublicInbox::LeiSearch PublicInbox::Lock);
+use PublicInbox::Git;
+use autodie qw(close open rename seek truncate);
+use PublicInbox::Import;
+use PublicInbox::OnDestroy;
+use PublicInbox::LeiXSearch;
+use Fcntl qw(SEEK_SET);
+
+sub _new {
+        my ($d) = @_;
+        PublicInbox::Import::init_bare($d, 'ale');
+        bless {
+                git => PublicInbox::Git->new($d),
+                lock_path => "$d/lei_ale.state", # dual-duty lock + state
+                ibxish => [], # Inbox and ExtSearch (and LeiSearch) objects
+        }, __PACKAGE__
+}
+
+sub new {
+        my ($self, $lei) = @_;
+        ref($self) or $self = _new($lei->cache_dir . '/all_locals_ever.git');
+        my $lxs = PublicInbox::LeiXSearch->new;
+        my $sto = $lei->_lei_store;
+        $lxs->prepare_external($sto->search) if $sto;
+        for my $loc ($lei->externals_each) { # locals only
+                $lxs->prepare_external($loc) if -d $loc;
+        }
+        $self->refresh_externals($lxs, $lei);
+        $self;
+}
+
+sub over {} # undef for xoids_for
+
+sub overs_all { # for xoids_for (called only in lei workers?)
+        my ($self) = @_;
+        my $fgen = $PublicInbox::OnDestroy::fork_gen ;
+        if (($self->{fgen} // $fgen) != $fgen) {
+                delete($_->{over}) for @{$self->{ibxish}};
+        }
+        $self->{fgen} = $fgen;
+        grep(defined, map { $_->over } @{$self->{ibxish}});
+}
+
+sub refresh_externals {
+        my ($self, $lxs, $lei) = @_;
+        $self->git->cleanup;
+        my $lk = $self->lock_for_scope;
+        my $cur_lxs = ref($lxs)->new;
+        my $orig = PublicInbox::IO::read_all $self->{lockfh};
+        my $new = '';
+        my $old = '';
+        my $gone = 0;
+        my %seen_ibxish; # $dir => any-defined value
+        for my $dir (split(/\n/, $orig)) {
+                if (-d $dir && -r _ && $cur_lxs->prepare_external($dir)) {
+                        $seen_ibxish{$dir} //= length($old .= "$dir\n");
+                } else {
+                        ++$gone;
+                }
+        }
+        my @ibxish = $cur_lxs->locals;
+        for my $x ($lxs->locals) {
+                my $d = $lei->canonpath_harder($x->{inboxdir} // $x->{topdir});
+                $seen_ibxish{$d} //= do {
+                        $new .= "$d\n";
+                        push @ibxish, $x;
+                };
+        }
+        if ($new ne '' || $gone) {
+                $self->{lockfh}->autoflush(1);
+                if ($gone) {
+                        seek($self->{lockfh}, 0, SEEK_SET);
+                        truncate($self->{lockfh}, 0);
+                } else {
+                        $old = '';
+                }
+                print { $self->{lockfh} } $old, $new or die "print: $!";
+        }
+        $new = '';
+        my $f = $self->git->{git_dir}.'/objects/info/alternates';
+        $old = PublicInbox::IO::try_cat $f;
+        for my $x (@ibxish) {
+                $new .= $lei->canonpath_harder($x->git->{git_dir})."/objects\n";
+        }
+        $self->{ibxish} = \@ibxish;
+        return if $old eq $new;
+
+        # this needs to be atomic since child processes may start
+        # git-cat-file at any time
+        my $tmp = "$f.$$.tmp";
+        open my $fh, '>', $tmp;
+        print $fh $new;
+        close $fh;
+        rename($tmp, $f)
+}
+
+1;
diff --git a/lib/PublicInbox/LeiAddExternal.pm b/lib/PublicInbox/LeiAddExternal.pm
new file mode 100644
index 00000000..5eef206c
--- /dev/null
+++ b/lib/PublicInbox/LeiAddExternal.pm
@@ -0,0 +1,72 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei add-external" command
+package PublicInbox::LeiAddExternal;
+use strict;
+use v5.10.1;
+
+sub _finish_add_external {
+        my ($lei, $location) = @_;
+        my $new_boost = $lei->{opt}->{boost} // 0;
+        my $key = "external.$location.boost";
+        my $cur_boost = $lei->_lei_cfg(1)->{$key};
+        return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
+        $lei->_config($key, $new_boost);
+}
+
+sub lei_add_external {
+        my ($lei, $location) = @_;
+        my $mirror = $lei->{opt}->{mirror} // do {
+                my @fail;
+                for my $sw ($lei->index_opt, $lei->curl_opt,
+                                qw(no-torsocks torsocks inbox-version)) {
+                        my ($f) = (split(/|/, $sw, 2))[0];
+                        next unless defined $lei->{opt}->{$f};
+                        $f = length($f) == 1 ? "-$f" : "--$f";
+                        push @fail, $f;
+                }
+                if (scalar(@fail) == 1) {
+                        return $lei->("@fail requires --mirror");
+                } elsif (@fail) {
+                        my $last = pop @fail;
+                        my $fail = join(', ', @fail);
+                        return $lei->("@fail and $last require --mirror");
+                }
+                undef;
+        };
+        $location = $lei->ext_canonicalize($location);
+        if (defined($mirror) && -d $location) {
+                $lei->fail(<<""); # TODO: did you mean "update-external?"
+--mirror destination `$location' already exists
+
+        } elsif (-d $location) {
+                index($location, "\n") >= 0 and
+                        return $lei->fail("`\\n' not allowed in `$location'");
+        }
+        if ($location !~ m!\Ahttps?://! && !-d $location) {
+                $mirror // return $lei->fail("$location not a directory");
+                index($location, "\n") >= 0 and
+                        return $lei->fail("`\\n' not allowed in `$location'");
+                $mirror = $lei->ext_canonicalize($mirror);
+                require PublicInbox::LeiMirror;
+                PublicInbox::LeiMirror->start($lei, $mirror => $location);
+        } else {
+                _finish_add_external($lei, $location);
+        }
+}
+
+sub _complete_add_external { # for bash, this relies on "compopt -o nospace"
+        my ($lei, @argv) = @_;
+        my $cfg = $lei->_lei_cfg or return ();
+        my $match_cb = $lei->complete_url_prepare(\@argv);
+        require URI;
+        map {
+                my $u = URI->new(substr($_, length('external.')));
+                my ($base) = ($u->path =~ m!((?:/?.*)?/)[^/]+/?\z!);
+                $u->path($base);
+                $match_cb->($u->as_string);
+        } grep(m!\Aexternal\.https?://!, @{$cfg->{-section_order}});
+}
+
+1;
diff --git a/lib/PublicInbox/LeiAddWatch.pm b/lib/PublicInbox/LeiAddWatch.pm
new file mode 100644
index 00000000..e2be5cee
--- /dev/null
+++ b/lib/PublicInbox/LeiAddWatch.pm
@@ -0,0 +1,42 @@
+# Copyright all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei add-watch" command
+package PublicInbox::LeiAddWatch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::LeiInput);
+
+sub lei_add_watch {
+        my ($lei, @argv) = @_;
+        my $cfg = $lei->_lei_cfg(1);
+        my $self = bless {}, __PACKAGE__;
+        $lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs
+        my $state = $lei->{opt}->{'state'} // 'import-rw';
+        $lei->watch_state_ok($state) or
+                return $lei->fail("invalid state: $state");
+        $self->prepare_inputs($lei, \@argv) or return;
+        my @vmd;
+        while (my ($type, $vals) = each %{$lei->{vmd_mod}}) {
+                push @vmd, "$type:$_" for @$vals;
+        }
+        my $vmd0 = shift @vmd;
+        for my $w (@{$self->{inputs}}) {
+                # clobber existing, allow multiple
+                if (defined($vmd0)) {
+                        $lei->_config("watch.$w.vmd", '--replace-all', $vmd0)
+                                or return;
+                        for my $v (@vmd) {
+                                $lei->_config("watch.$w.vmd", $v) or return;
+                        }
+                }
+                next if defined $cfg->{"watch.$w.state"};
+                $lei->_config("watch.$w.state", $state) or return;
+        }
+        $lei->_lei_store(1); # create
+        $lei->lms(1)->lms_write_prepare->add_folders(@{$self->{inputs}});
+        delete $lei->{cfg}; # force reload
+        $lei->refresh_watches;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiAuth.pm b/lib/PublicInbox/LeiAuth.pm
new file mode 100644
index 00000000..020dd125
--- /dev/null
+++ b/lib/PublicInbox/LeiAuth.pm
@@ -0,0 +1,72 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Authentication worker for anything that needs auth for read/write IMAP
+# and read-only NNTP access
+#
+# timelines
+# lei-daemon              |  LeiAuth worker #0      | other WQ workers
+# ----------------------------------------------------------
+# spawns all workers ---->[ workers all start and run ipc_atfork_child ]
+#                         | do_auth_atfork          | wq_worker_loop sleep
+#                         | # reads .netrc          |
+#                         | # queries git-credential|
+#                         | send net_merge_continue |
+#                         |         |               |
+#                         |         v               |
+# recv net_merge_continue <---------/               |
+#            |            |                         |
+#            v            |                         |
+# broadcast net_merge_all [ all workers (including LeiAuth worker #0) ]
+#                         [ LeiAuth worker #0 becomes just another WQ worker ]
+#                         |
+# call net_merge_all_done ->-> do per-WQ-class defined actions
+package PublicInbox::LeiAuth;
+use v5.12;
+
+sub do_auth_atfork { # used by IPC WQ workers
+        my ($self, $wq) = @_;
+        return if $wq->{-wq_worker_nr} != 0; # only first worker calls this
+        my $lei = $wq->{lei};
+        my $net = $lei->{net};
+        if ($net->{-auth_done}) { # from previous worker... (ugly)
+                $lei->{pkt_op_p}->pkt_do('net_merge_continue', $net) or
+                                $lei->fail("pkt_do net_merge_continue: $!");
+                return;
+        }
+        eval { # fill auth info (may prompt user or read netrc)
+                my $mics = $net->imap_common_init($lei);
+                my $nn = $net->nntp_common_init($lei);
+                # broadcast successful auth info to lei-daemon:
+                $net->{-auth_done} = 1;
+                $lei->{pkt_op_p}->pkt_do('net_merge_continue', $net) or
+                                die "pkt_do net_merge_continue: $!";
+                $net->{mics_cached} = $mics if $mics;
+                $net->{nn_cached} = $nn if $nn;
+        };
+        $lei->fail($@) if $@;
+}
+
+sub net_merge_all { # called in wq worker via wq_broadcast
+        my ($wq, $net_new) = @_;
+        my $net = $wq->{lei}->{net};
+        %$net = (%$net, %$net_new);
+}
+
+# called by top-level lei-daemon when first worker is done with auth
+# passes updated net auth info to current workers
+sub net_merge_continue {
+        my ($lei, $wq, $net_new) = @_;
+        $wq->{-net_new} = $net_new; # for "lei up"
+        $wq->wq_broadcast('PublicInbox::LeiAuth::net_merge_all', $net_new);
+        $wq->net_merge_all_done($lei); # defined per-WQ
+}
+
+sub op_merge { # prepares PktOp->pair ops
+        my ($self, $ops, $wq, $lei) = @_;
+        $ops->{net_merge_continue} = [ \&net_merge_continue, $lei, $wq ];
+}
+
+sub new { bless \(my $x), __PACKAGE__ }
+
+1;
diff --git a/lib/PublicInbox/LeiBlob.pm b/lib/PublicInbox/LeiBlob.pm
new file mode 100644
index 00000000..7b2ea434
--- /dev/null
+++ b/lib/PublicInbox/LeiBlob.pm
@@ -0,0 +1,179 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei blob $OID" command
+# TODO: this doesn't scan submodules, but maybe it should
+package PublicInbox::LeiBlob;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+use PublicInbox::Spawn qw(run_wait run_qx which);
+use PublicInbox::DS;
+use PublicInbox::Eml;
+use PublicInbox::Git;
+use PublicInbox::IO qw(read_all);
+
+sub get_git_dir ($$) {
+        my ($lei, $d) = @_;
+        return $d if -d "$d/objects" && -d "$d/refs" && -e "$d/HEAD";
+
+        my $cmd = [ qw(git rev-parse --git-dir) ];
+        my $opt = { '-C' => $d };
+        if (defined($lei->{opt}->{cwd})) { # --cwd used, report errors
+                $opt->{2} = $lei->{2};
+        } else { # implicit --cwd, quiet errors
+                open $opt->{2}, '>', '/dev/null' or die "open /dev/null: $!";
+        }
+        chomp(my $git_dir = run_qx($cmd, {GIT_DIR => undef}, $opt));
+        $? ? undef : $git_dir;
+}
+
+sub solver_user_cb { # called by solver when done
+        my ($res, $self) = @_;
+        my $lei = $self->{lei};
+        my $log_buf = delete $lei->{'log_buf'};
+        $$log_buf =~ s/^/# /sgm;
+        ref($res) eq 'ARRAY' or return $lei->child_error(0, $$log_buf);
+        $lei->qerr($$log_buf);
+        my ($git, $oid, $type, $size, $di) = @$res;
+
+        # don't try to support all the git-show(1) options for non-blob,
+        # this is just a convenience:
+        $type ne 'blob' and warn <<EOM;
+# $oid is a $type of $size bytes in:\n#\t$git->{git_dir}
+EOM
+        my $cmd = $git->cmd('show', $oid);
+        my $rdr = { 1 => $lei->{1}, 2 => $lei->{2} };
+        run_wait($cmd, $lei->{env}, $rdr) and $lei->child_error($?);
+}
+
+sub do_solve_blob { # via wq_do
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        my $git_dirs = $lei->{opt}->{'git-dir'};
+        my $hints = {};
+        for my $x (qw(oid-a path-a path-b)) {
+                my $v = $lei->{opt}->{$x} // next;
+                $x =~ tr/-/_/;
+                $hints->{$x} = $v;
+        }
+        open my $log, '+>', \(my $log_buf = '') or die "PerlIO::scalar: $!";
+        $lei->{log_buf} = \$log_buf;
+        my $git = $lei->{ale}->git;
+        my @rmt = map {
+                PublicInbox::LeiRemote->new($lei, $_)
+        } $self->{lxs}->remotes;
+        my $solver = bless {
+                gits => [ map {
+                                PublicInbox::Git->new($lei->rel2abs($_))
+                        } @$git_dirs ],
+                user_cb => \&solver_user_cb,
+                uarg => $self,
+                # -cur_di, -msg => temporary fields for Qspawn callbacks
+                inboxes => [ $self->{lxs}->locals, @rmt ],
+        }, 'PublicInbox::SolverGit';
+        local $PublicInbox::DS::in_loop = 0; # waitpid synchronously
+        $solver->solve($lei->{env}, $log, $self->{oid_b}, $hints);
+}
+
+sub cat_attach_i { # Eml->each_part callback
+        my ($part, $depth, $idx) = @{$_[0]};
+        my $lei = $_[1];
+        my $want = $lei->{-attach_idx} // return;
+        return if $idx ne $want; # [0-9]+(?:\.[0-9]+)+
+        delete $lei->{-attach_idx};
+        $lei->out($part->body);
+}
+
+sub extract_attach ($$$) {
+        my ($lei, $blob, $bref) = @_;
+        my $eml = PublicInbox::Eml->new($bref);
+        $eml->each_part(\&cat_attach_i, $lei, 1);
+        my $idx = delete $lei->{-attach_idx};
+        defined($idx) and return $lei->fail(<<EOM);
+E: attachment $idx not found in $blob
+EOM
+}
+
+sub lei_blob {
+        my ($lei, $blob) = @_;
+        $lei->start_pager if -t $lei->{1};
+        my $opt = $lei->{opt};
+        my $has_hints = grep(defined, @$opt{qw(oid-a path-a path-b)});
+        my $lxs;
+        if ($blob =~ s/:([0-9\.]+)\z//) {
+                $lei->{-attach_idx} = $1;
+                $opt->{mail} = 1;
+        }
+
+        # first, see if it's a blob returned by "lei q" JSON output:k
+        if ($opt->{mail} // ($has_hints ? 0 : 1)) {
+                if (grep(defined, @$opt{qw(include only)})) {
+                        $lxs = $lei->lxs_prepare;
+                        $lei->ale->refresh_externals($lxs, $lei);
+                }
+                my $rdr = {};
+                if ($opt->{mail}) {
+                        open $rdr->{2}, '+>', undef or die "open: $!";
+                } else {
+                        open $rdr->{2}, '>', '/dev/null' or die "open: $!";
+                }
+                my $cmd = $lei->ale->git->cmd('cat-file', 'blob', $blob);
+                my $cerr;
+                if (defined $lei->{-attach_idx}) {
+                        my $buf = run_qx($cmd, $lei->{env}, $rdr);
+                        return extract_attach($lei, $blob, \$buf) unless $?;
+                        $cerr = $?;
+                } else {
+                        $rdr->{1} = $lei->{1}; # write directly to client
+                        $cerr = run_wait($cmd, $lei->{env}, $rdr) or return;
+                }
+                # fall back to unimported ('lei index') and inflight blobs
+                my $lms = $lei->lms;
+                my $bref = ($lms ? $lms->local_blob($blob, 1) : undef) // do {
+                        my $sto = $lei->{sto} // $lei->_lei_store;
+                        $sto && $sto->{-wq_s1} ? $sto->wq_do('cat_blob', $blob)
+                                                : undef;
+                };
+                $bref and return $lei->{-attach_idx} ?
+                                        extract_attach($lei, $blob, $bref) :
+                                        $lei->out($$bref);
+                if ($opt->{mail}) {
+                        seek($rdr->{2}, 0, 0);
+                        return $lei->child_error($cerr, read_all($rdr->{2}));
+                } # else: fall through to solver below
+        }
+
+        # maybe it's a non-email (code) blob from a coderepo
+        my $git_dirs = $opt->{'git-dir'} //= [];
+        if ($opt->{'cwd'} // 1) {
+                my $cgd = get_git_dir($lei, '.');
+                unshift(@$git_dirs, $cgd) if defined $cgd;
+        }
+        return $lei->fail('no --git-dir to try') unless @$git_dirs;
+        unless ($lxs) {
+                $lxs = $lei->lxs_prepare or return;
+                $lei->ale->refresh_externals($lxs, $lei);
+        }
+        if ($lxs->remotes) {
+                require PublicInbox::LeiRemote;
+                $lei->{curl} //= which('curl') or return
+                        $lei->fail('curl needed for '.join(', ',$lxs->remotes));
+                $lei->_lei_store(1)->write_prepare($lei);
+        }
+        require PublicInbox::SolverGit;
+        my $self = bless { lxs => $lxs, oid_b => $blob }, __PACKAGE__;
+        my ($op_c, $ops) = $lei->workers_start($self, 1);
+        $lei->{wq1} = $self;
+        $self->wq_io_do('do_solve_blob', []);
+        $self->wq_close;
+        $lei->wait_wq_events($op_c, $ops);
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        $self->{lei}->_lei_atfork_child;
+        $self->SUPER::ipc_atfork_child;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiConfig.pm b/lib/PublicInbox/LeiConfig.pm
new file mode 100644
index 00000000..a50ff2b6
--- /dev/null
+++ b/lib/PublicInbox/LeiConfig.pm
@@ -0,0 +1,55 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::LeiConfig; # subclassed by LeiEditSearch
+use v5.12;
+use PublicInbox::PktOp;
+use Fcntl qw(SEEK_SET);
+use autodie qw(open seek);
+use PublicInbox::IO qw(read_all);
+
+sub cfg_do_edit ($;$) {
+        my ($self, $reason) = @_;
+        my $lei = $self->{lei};
+        $lei->pgr_err($reason) if defined $reason;
+        my $cmd = [ qw(git config --edit -f), $self->{-f} ];
+        my $env = { GIT_CONFIG => $self->{-f} };
+        $self->cfg_edit_begin if $self->can('cfg_edit_begin');
+        # run in script/lei foreground
+        my ($op_c, $op_p) = PublicInbox::PktOp->pair;
+        # $op_p will EOF when $EDITOR is done
+        $op_c->{ops} = { '' => [\&cfg_edit_done, $lei, $self] };
+        $lei->send_exec_cmd([ @$lei{qw(0 1 2)}, $op_p->{op_p} ], $cmd, $env);
+}
+
+sub cfg_edit_done { # PktOp lei->do_env cb
+        my ($lei, $self) = @_;
+        open my $fh, '+>', undef;
+        my $cfg = do {
+                local $lei->{2} = $fh;
+                $lei->cfg_dump($self->{-f});
+        } or do {
+                seek($fh, 0, SEEK_SET);
+                return cfg_do_edit($self, read_all($fh));
+        };
+        $self->cfg_verify($cfg) if $self->can('cfg_verify');
+}
+
+sub lei_config {
+        my ($lei, @argv) = @_;
+        $lei->{opt}->{'config-file'} and return $lei->fail(
+                "config file switches not supported by `lei config'");
+        if ($lei->{opt}->{edit}) {
+                @argv and return $lei->fail(
+'--edit must be used without other arguments');
+                $lei->{opt}->{c} and return $lei->fail(
+"`-c $lei->{opt}->{c}->[0]' not allowed with --edit");
+                my $f = $lei->_lei_cfg(1)->{-f};
+                cfg_do_edit(bless { lei => $lei, -f => $f }, __PACKAGE__);
+        } elsif (@argv) { # let git-config do error-checking
+                $lei->_config(@argv);
+        } else {
+                $lei->_help('no options given');
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
new file mode 100644
index 00000000..4d4fceb2
--- /dev/null
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -0,0 +1,87 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# front-end for the "lei convert" sub-command
+package PublicInbox::LeiConvert;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use PublicInbox::LeiOverview;
+use PublicInbox::DS;
+
+# /^input_/ subs are used by PublicInbox::LeiInput
+
+sub input_mbox_cb { # MboxReader callback
+        my ($eml, $self) = @_;
+        my $kw = PublicInbox::MboxReader::mbox_keywords($eml);
+        $eml->header_set($_) for qw(Status X-Status);
+        $self->{wcb}->(undef, { kw => $kw }, $eml);
+}
+
+sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
+        my ($self, $eml) = @_;
+        $self->{wcb}->(undef, {}, $eml);
+}
+
+sub input_maildir_cb {
+        my (undef, $kw, $eml, $self) = @_; # $_[0] $filename ignored
+        $self->{wcb}->(undef, { kw => $kw }, $eml);
+}
+
+sub input_mh_cb {
+        my ($dn, $bn, $kw, $eml, $self) = @_;
+        $self->{wcb}->(undef, { kw => $kw }, $eml);
+}
+
+sub process_inputs { # via wq_do
+        my ($self) = @_;
+        local $PublicInbox::DS::in_loop = 0; # force synchronous awaitpid
+        $self->SUPER::process_inputs;
+        my $lei = $self->{lei};
+        my $l2m = delete $lei->{l2m};
+        delete $self->{wcb}; # may close connections
+        $l2m->finish_output($lei) if $l2m;
+        if (my $v2w = delete $lei->{v2w}) { $v2w->done } # may die
+        my $nr_w = delete($l2m->{-nr_write}) // 0;
+        my $d = (delete($l2m->{-nr_seen}) // 0) - $nr_w;
+        $d = $d ? " ($d duplicates)" : '';
+        $lei->qerr("# converted $nr_w messages$d");
+}
+
+sub lei_convert { # the main "lei convert" method
+        my ($lei, @inputs) = @_;
+        $lei->{opt}->{kw} //= 1;
+        $lei->{opt}->{dedupe} //= 'none';
+        $lei->{input_opt}->{sort} = 1; # for LeiToMail conflict check
+        my $self = bless {}, __PACKAGE__;
+        my $ovv = PublicInbox::LeiOverview->new($lei, 'out-format');
+        $lei->{l2m} or return
+                $lei->fail('--output unspecified or is not a mail destination');
+        my $devfd = $lei->path_to_fd($ovv->{dst}) // return;
+        $lei->{opt}->{augment} = 1 if $devfd < 0;
+        $self->prepare_inputs($lei, \@inputs) or return;
+        # n.b. {net} {auth} is handled by l2m worker
+        my ($op_c, $ops) = $lei->workers_start($self, 1);
+        $lei->{wq1} = $self;
+        $self->wq_io_do('process_inputs', []);
+        $self->wq_close;
+        $lei->wait_wq_events($op_c, $ops);
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        $lei->_lei_atfork_child;
+        my $l2m = $lei->{l2m};
+        if (my $net = $lei->{net}) { # may prompt user once
+                $net->{mics_cached} = $net->imap_common_init($lei);
+                $net->{nn_cached} = $net->nntp_common_init($lei);
+        }
+        $l2m->pre_augment($lei);
+        $l2m->do_augment($lei);
+        $l2m->post_augment($lei);
+        $self->{wcb} = $l2m->write_cb($lei);
+        $self->SUPER::ipc_atfork_child;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiCurl.pm b/lib/PublicInbox/LeiCurl.pm
new file mode 100644
index 00000000..48c66ee9
--- /dev/null
+++ b/lib/PublicInbox/LeiCurl.pm
@@ -0,0 +1,85 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# common option and torsocks(1) wrapping for curl(1)
+# Eventually, we may support using libcurl via Inline::C and/or
+# WWW::Curl; but curl(1) is most prevalent and widely-installed.
+# n.b. curl may support a daemon/client model like lei someday:
+#   https://github.com/curl/curl/wiki/curl-tool-master-client
+package PublicInbox::LeiCurl;
+use v5.12;
+use PublicInbox::Spawn qw(which);
+use PublicInbox::Config;
+
+# Ensures empty strings are quoted, we don't need more
+# sophisticated quoting than for empty strings: curl -d ''
+use overload '""' => sub {
+        join(' ', map { $_ eq '' ?  "''" : $_ } @{$_[0]});
+};
+
+my %lei2curl = (
+        'curl-config=s@' => 'config|K=s@',
+);
+
+# prepares a common command for curl(1) based on $lei command
+sub new {
+        my ($cls, $lei, $curl) = @_;
+        $curl //= which('curl') // return $lei->fail('curl not found');
+        my $opt = $lei->{opt};
+        my @cmd = ($curl, qw(-gSf));
+        $cmd[-1] .= 's' if $opt->{quiet}; # already the default for "lei q"
+        $cmd[-1] .= 'v' if $opt->{verbose}; # we use ourselves, too
+        for my $o ($lei->curl_opt) {
+                if (my $lei_spec = $lei2curl{$o}) {
+                        $o = $lei_spec;
+                }
+                $o =~ s/\|[a-z0-9]\b//i; # remove single char short option
+                if ($o =~ s/=[is]@\z//) {
+                        my $ary = $opt->{$o} or next;
+                        push @cmd, map { ("--$o", $_) } @$ary;
+                } elsif ($o =~ s/=[is]\z//) {
+                        my $val = $opt->{$o} // next;
+                        push @cmd, "--$o", $val;
+                } elsif ($opt->{$o}) {
+                        push @cmd, "--$o";
+                }
+        }
+        push @cmd, '-v' if $opt->{verbose}; # lei uses this itself
+        bless \@cmd, $cls;
+}
+
+sub torsocks { # useful for "git clone" and "git fetch", too
+        my ($self, $lei, $uri)= @_;
+        my $opt = $lei->{opt};
+        $opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
+        my $torsocks = $opt->{torsocks} //= 'auto';
+        if ($torsocks eq 'auto' && substr($uri->host, -6) eq '.onion' &&
+                ($PublicInbox::Config::LD_PRELOAD//'') !~ m!/libtorsocks\b!) {
+                # "auto" continues anyways if torsocks is missing;
+                # a proxy may be specified via CLI, curlrc,
+                # environment variable, or even firewall rule
+                [ ($lei->{torsocks} //= which('torsocks')) // () ]
+        } elsif (PublicInbox::Config::git_bool($torsocks)) {
+                my $x = $lei->{torsocks} //= which('torsocks');
+                $x or return $lei->fail(<<EOM);
+--torsocks=yes specified but torsocks not found in PATH=$ENV{PATH}
+EOM
+                [ $x ];
+        } else { # the common case for current Internet :<
+                [];
+        }
+}
+
+# completes the result of cmd() for $uri
+sub for_uri {
+        my ($self, $lei, $uri, @opt) = @_;
+        my $pfx = torsocks($self, $lei, $uri) or return; # error
+        if ($uri->scheme =~ /\Ahttps?\z/i) {
+                my $cfg = $lei->_lei_cfg;
+                my $p = $cfg ? $cfg->urlmatch('http.Proxy', $$uri, 1) : undef;
+                push(@opt, '--proxy', $p) if defined($p);
+        }
+        bless [ @$pfx, @$self, @opt, $uri->as_string ], ref($self);
+}
+
+1;
diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm
new file mode 100644
index 00000000..eda54d79
--- /dev/null
+++ b/lib/PublicInbox/LeiDedupe.pm
@@ -0,0 +1,137 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::LeiDedupe;
+use v5.12;
+use PublicInbox::ContentHash qw(content_hash content_digest git_sha);
+use PublicInbox::SHA qw(sha256);
+
+# n.b. mutt sets most of these headers not sure about Bytes
+our @OID_IGNORE = qw(Status X-Status Content-Length Lines Bytes);
+
+# best-effort regeneration of OID when augmenting existing results
+sub _regen_oid ($) {
+        my ($eml) = @_;
+        my @stash; # stash away headers we shouldn't have in git
+        for my $k (@OID_IGNORE) {
+                my @v = $eml->header_raw($k) or next;
+                push @stash, [ $k, \@v ];
+                $eml->header_set($k); # restore below
+        }
+        my $dig = git_sha(1, $eml);
+        for my $kv (@stash) { # restore stashed headers
+                my ($k, @v) = @$kv;
+                $eml->header_set($k, @v);
+        }
+        $dig->digest;
+}
+
+sub _oidbin ($) { defined($_[0]) ? pack('H*', $_[0]) : undef }
+
+sub smsg_hash ($) {
+        my ($smsg) = @_;
+        my $x = join("\0", @$smsg{qw(from to cc ds subject references mid)});
+        utf8::encode($x);
+        sha256($x);
+}
+
+# the paranoid option
+sub dedupe_oid ($) {
+        my ($skv) = @_;
+        (sub { # may be called in a child process
+                my ($eml, $oidhex) = @_;
+                $skv->set_maybe(_oidbin($oidhex) // _regen_oid($eml), '');
+        }, sub {
+                my ($smsg) = @_;
+                $skv->set_maybe(_oidbin($smsg->{blob}), '');
+        });
+}
+
+# dangerous if there's duplicate messages with different Message-IDs
+sub dedupe_mid ($) {
+        my ($skv) = @_;
+        (sub { # may be called in a child process
+                my ($eml, $oidhex) = @_;
+                # lei supports non-public drafts w/o Message-ID
+                my $mid = $eml->header_raw('Message-ID') // _oidbin($oidhex) //
+                        content_hash($eml);
+                $skv->set_maybe($mid, '');
+        }, sub {
+                my ($smsg) = @_;
+                my $mid = $smsg->{mid};
+                $mid = undef if $mid eq '';
+                $mid //= smsg_hash($smsg) // _oidbin($smsg->{blob});
+                $skv->set_maybe($mid, '');
+        });
+}
+
+# our default deduplication strategy (used by v2, also)
+sub dedupe_content ($) {
+        my ($skv) = @_;
+        (sub { # may be called in a child process
+                my ($eml) = @_; # $oidhex = $_[1], ignored
+
+                # we must account for Message-ID via hash_mids, since
+                # (unlike v2 dedupe) Message-ID is not accounted for elsewhere:
+                $skv->set_maybe(content_digest($eml, PublicInbox::SHA->new(256),
+                                1 # hash_mids
+                                )->digest, '');
+        }, sub {
+                my ($smsg) = @_;
+                $skv->set_maybe(smsg_hash($smsg), '');
+        });
+}
+
+# no deduplication at all
+sub true { 1 }
+sub dedupe_none ($) { (\&true, \&true) }
+
+sub new {
+        my ($cls, $lei) = @_;
+        my $dd = $lei->{opt}->{dedupe} // 'content';
+        my $dst = $lei->{ovv}->{dst};
+
+        # allow "none" to bypass Eml->new if writing to directory:
+        return if ($dd eq 'none' && substr($dst // '', -1) eq '/');
+        my $m = "dedupe_$dd";
+        $cls->can($m) or die "unsupported dedupe strategy: $dd\n";
+        my $skv;
+        if ($dd ne 'none') {
+                require PublicInbox::SharedKV;
+                $skv = PublicInbox::SharedKV->new;
+        }
+        # [ $skv, $eml_cb, $smsg_cb, "dedupe_$dd" ]
+        bless [ $skv, undef, undef, $m ], $cls;
+}
+
+# returns true on seen messages according to the deduplication strategy,
+# returns false if unseen
+sub is_dup {
+        my ($self, $eml, $smsg) = @_;
+        !$self->[1]->($eml, $smsg ? $smsg->{blob} : undef);
+}
+
+sub is_smsg_dup {
+        my ($self, $smsg) = @_;
+        !$self->[2]->($smsg);
+}
+
+sub prepare_dedupe {
+        my ($self) = @_;
+        my $skv = $self->[0];
+        $self->[1] or @$self[1,2] = $self->can($self->[3])->($skv);
+        $skv ? $skv->dbh : undef;
+}
+
+sub pause_dedupe {
+        my ($self) = @_;
+        my $skv = $self->[0] or return;
+        $skv->dbh_release;
+        delete($skv->{dbh}) if $skv;
+}
+
+sub has_entries {
+        my $skv = $_[0]->[0] or return undef;
+        $skv->has_entries;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiEditSearch.pm b/lib/PublicInbox/LeiEditSearch.pm
new file mode 100644
index 00000000..bcf7c105
--- /dev/null
+++ b/lib/PublicInbox/LeiEditSearch.pm
@@ -0,0 +1,76 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei edit-search" edit a saved search following "lei q --save"
+package PublicInbox::LeiEditSearch;
+use strict;
+use v5.10.1;
+use PublicInbox::LeiSavedSearch;
+use PublicInbox::LeiUp;
+use parent qw(PublicInbox::LeiConfig);
+
+sub cfg_edit_begin {
+        my ($self) = @_;
+        if (ref($self->{lss}->{-cfg}->{'lei.q.output'})) {
+                delete $self->{lss}->{-cfg}->{'lei.q.output'}; # invalid
+                $self->{lei}->pgr_err(<<EOM);
+$self->{lss}->{-f} has multiple values of lei.q.output
+please remove redundant ones
+EOM
+        }
+}
+
+sub cfg_verify {
+        my ($self, $cfg) = @_;
+        my $new_out = $cfg->{'lei.q.output'} // '';
+        return $self->cfg_do_edit(<<EOM) if ref $new_out;
+$self->{-f} has multiple values of lei.q.output
+EOM
+        return $self->cfg_do_edit(<<EOM) if $new_out eq '';
+$self->{-f} needs lei.q.output
+EOM
+        my $lss = $self->{lss};
+        my $old_out = $lss->{-cfg}->{'lei.q.output'} // return;
+        return if $old_out eq $new_out;
+        my $lei = $self->{lei};
+        my $old_path = $old_out;
+        my $new_path = $new_out;
+        s!$PublicInbox::LeiSavedSearch::LOCAL_PFX!! for ($old_path, $new_path);
+        my $dir_old = $lss->can('lss_dir_for')->($lei, \$old_path, 1);
+        my $dir_new = $lss->can('lss_dir_for')->($lei, \$new_path);
+        return if $dir_new eq $dir_old;
+
+        ($old_out =~ m!\Av2:!i || $new_out =~ m!\Av2:!) and
+                return $self->cfg_do_edit(<<EOM);
+conversions from/to v2 inboxes not supported at this time
+EOM
+        return $self->cfg_do_edit(<<EOM) if -e $dir_new;
+lei.q.output changed from `$old_out' to `$new_out'
+However, $dir_new exists
+EOM
+        # start the conversion asynchronously
+        my $old_sq = PublicInbox::Config::squote_maybe($old_out);
+        my $new_sq = PublicInbox::Config::squote_maybe($new_out);
+        $lei->puts("lei.q.output changed from $old_sq to $new_sq");
+        $lei->qerr("# lei convert $old_sq -o $new_sq");
+        my $v = !$lei->{opt}->{quiet};
+        $lei->{opt} = { output => $new_out, verbose => $v };
+        require PublicInbox::LeiConvert;
+        PublicInbox::LeiConvert::lei_convert($lei, $old_out);
+
+        $lei->fail(<<EOM) if -e $dir_old && !rename($dir_old, $dir_new);
+E: rename($dir_old, $dir_new) error: $!
+EOM
+}
+
+sub lei_edit_search {
+        my ($lei, $out) = @_;
+        my $lss = PublicInbox::LeiSavedSearch->up($lei, $out) or return;
+        my $f = $lss->{-f};
+        my $self = bless { lei => $lei, lss => $lss, -f => $f }, __PACKAGE__;
+        $self->cfg_do_edit;
+}
+
+*_complete_edit_search = \&PublicInbox::LeiUp::_complete_up;
+
+1;
diff --git a/lib/PublicInbox/LeiExportKw.pm b/lib/PublicInbox/LeiExportKw.pm
new file mode 100644
index 00000000..16f069da
--- /dev/null
+++ b/lib/PublicInbox/LeiExportKw.pm
@@ -0,0 +1,147 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# front-end for the "lei export-kw" sub-command
+package PublicInbox::LeiExportKw;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use Errno qw(EEXIST ENOENT);
+use PublicInbox::Syscall qw(rename_noreplace);
+
+sub export_kw_md { # LeiMailSync->each_src callback
+        my ($oidbin, $id, $self, $mdir) = @_;
+        my $sto_kw = $self->{lse}->oidbin_keywords($oidbin) or return;
+        my $bn = $$id;
+        my ($md_kw, $unknown, @try);
+        if ($bn =~ s/:2,([a-zA-Z]*)\z//) {
+                ($md_kw, $unknown) = PublicInbox::MdirReader::flags2kw($1);
+                @try = qw(cur new);
+        } else {
+                $unknown = [];
+                @try = qw(new cur);
+        }
+        if ($self->{-merge_kw} && $md_kw) { # merging keywords is the default
+                @$sto_kw{keys %$md_kw} = values(%$md_kw);
+        }
+        $bn .= ':2,'.
+                PublicInbox::LeiToMail::kw2suffix([keys %$sto_kw], @$unknown);
+        return if $bn eq $$id;
+        my $dst = "$mdir/cur/$bn";
+        my $lei = $self->{lei};
+        for my $d (@try) {
+                my $src = "$mdir/$d/$$id";
+                if (rename_noreplace($src, $dst)) { # success
+                        $self->{lms}->mv_src("maildir:$mdir",
+                                                $oidbin, $id, $bn);
+                        return; # success
+                } elsif ($! == EEXIST) { # lost race with lei/store?
+                        return;
+                } elsif ($! != ENOENT) {
+                        $lei->child_error(0,
+                                "E: rename_noreplace($src -> $dst): $!");
+                } # else loop @try
+        }
+        my $e = $!;
+        # both tries failed
+        my $oidhex = unpack('H*', $oidbin);
+        my $src = "$mdir/{".join(',', @try)."}/$$id";
+        $lei->child_error(0, "rename_noreplace($src -> $dst) ($oidhex): $e");
+        for (@try) { return if -e "$mdir/$_/$$id" }
+        $self->{lms}->clear_src("maildir:$mdir", $id);
+}
+
+sub export_kw_imap { # LeiMailSync->each_src callback
+        my ($oidbin, $id, $self, $mic) = @_;
+        my $sto_kw = $self->{lse}->oidbin_keywords($oidbin) or return;
+        $self->{imap_mod_kw}->($self->{nwr}, $mic, $id, [ keys %$sto_kw ]);
+}
+
+# overrides PublicInbox::LeiInput::input_path_url
+sub input_path_url {
+        my ($self, $input, @args) = @_;
+        $self->{lms}->lms_write_prepare;
+        if ($input =~ /\Amaildir:(.+)/i) {
+                my $mdir = $1;
+                require PublicInbox::LeiToMail; # kw2suffix
+                $self->{lms}->each_src($input, \&export_kw_md, $self, $mdir);
+        } elsif ($input =~ m!\Aimaps?://!i) {
+                my $uri = PublicInbox::URIimap->new($input);
+                my $mic = $self->{nwr}->mic_for_folder($uri);
+                if ($mic && !$self->{nwr}->can_store_flags($mic)) {
+                        my $m = "$input does not support PERMANENTFLAGS";
+                        if (defined $self->{lei}->{opt}->{all}) {
+                                $self->{lei}->qerr("# $m");
+                        } else { # set error code if user explicitly requested
+                                $self->{lei}->child_error(0, "E: $m");
+                        }
+                        return;
+                }
+                if ($mic) {
+                        $self->{lms}->each_src($$uri, \&export_kw_imap,
+                                                $self, $mic);
+                        $mic->expunge;
+                } else {
+                        $self->{lei}->child_error(0, "$input unavailable: $@");
+                }
+        } else { die "BUG: $input not supported" }
+}
+
+sub lei_export_kw {
+        my ($lei, @folders) = @_;
+        my $sto = $lei->_lei_store or return $lei->fail(<<EOM);
+lei/store uninitialized, see lei-import(1)
+EOM
+        my $lms = $lei->lms or return $lei->fail(<<EOM);
+lei mail_sync uninitialized, see lei-import(1)
+EOM
+        if (defined(my $all = $lei->{opt}->{all})) { # --all=<local|remote>
+                $lms->group2folders($lei, $all, \@folders) or return;
+                @folders = grep(/\A(?:maildir|imaps?):/i, @folders);
+        } else {
+                $lms->arg2folder($lei, \@folders); # may die
+        }
+        $lms->lms_pause;
+        my $self = bless { lse => $sto->search, lms => $lms }, __PACKAGE__;
+        $lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs
+        $self->prepare_inputs($lei, \@folders) or return;
+        if (my @ro = grep(!/\A(?:maildir|imaps?):/i, @folders)) {
+                return $lei->fail("cannot export to read-only folders: @ro");
+        }
+        my $m = $lei->{opt}->{mode} // 'merge';
+        if ($m eq 'merge') { # default
+                $self->{-merge_kw} = 1;
+        } elsif ($m eq 'set') {
+        } else {
+                return $lei->fail(<<EOM);
+--mode=$m not supported (`set' or `merge')
+EOM
+        }
+        if (my $net = $lei->{net}) {
+                require PublicInbox::NetWriter;
+                $self->{nwr} = bless $net, 'PublicInbox::NetWriter';
+                $self->{imap_mod_kw} = $net->can($self->{-merge_kw} ?
+                                        'imap_add_kw' : 'imap_set_kw');
+                $self->{nwr}->{-skip_creat} = 1;
+        }
+        $lei->{-err_type} = 'non-fatal';
+        $lei->wq1_start($self);
+}
+
+sub _complete_export_kw {
+        my ($lei, @argv) = @_;
+        my $lms = $lei->lms or return ();
+        my $match_cb = $lei->complete_url_prepare(\@argv);
+        # filter-out read-only sources:
+        my @k = grep(m!(?:maildir|imaps?):!,
+                        $lms->folders($argv[-1] // undef, 1));
+        my @m = map { $match_cb->($_) } @k;
+        @m ? @m : @k;
+}
+
+no warnings 'once';
+
+*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+
+1;
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
new file mode 100644
index 00000000..31b9bd1e
--- /dev/null
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -0,0 +1,119 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# *-external commands of lei
+package PublicInbox::LeiExternal;
+use strict;
+use v5.10.1;
+use PublicInbox::Config qw(glob2re);
+
+sub externals_each {
+        my ($self, $cb, @arg) = @_;
+        my $cfg = $self->_lei_cfg;
+        my %boost;
+        for my $sec (grep(/\Aexternal\./, @{$cfg->{-section_order}})) {
+                my $loc = substr($sec, length('external.'));
+                $boost{$loc} = $cfg->{"$sec.boost"};
+        }
+        return \%boost if !wantarray && !$cb;
+
+        # highest boost first, but stable for alphabetic tie break
+        use sort 'stable';
+        my @order = sort { $boost{$b} <=> $boost{$a} } sort keys %boost;
+        if (ref($cb) eq 'CODE') {
+                for my $loc (@order) {
+                        $cb->(@arg, $loc, $boost{$loc});
+                }
+        } elsif (ref($cb) eq 'HASH') {
+                %$cb = %boost;
+        }
+        @order; # scalar or array
+}
+
+sub ext_canonicalize {
+        my $location = $_[-1]; # $_[0] may be $lei
+        if ($location !~ m!\Ahttps?://!) {
+                PublicInbox::Config::rel2abs_collapsed($location);
+        } else {
+                require URI;
+                my $uri = URI->new($location)->canonical;
+                my $path = $uri->path . '/';
+                $path =~ tr!/!/!s; # squeeze redundant '/'
+                $uri->path($path);
+                $uri->as_string;
+        }
+}
+
+# get canonicalized externals list matching $loc
+# $is_exclude denotes it's for --exclude
+# otherwise it's for --only/--include is assumed
+sub get_externals {
+        my ($self, $loc, $is_exclude) = @_;
+        return (ext_canonicalize($loc)) if -e $loc;
+        my @m;
+        my @cur = externals_each($self);
+        my $do_glob = !$self->{opt}->{globoff}; # glob by default
+        if ($do_glob && (my $re = glob2re($loc))) {
+                @m = grep(m!$re/?\z!, @cur);
+                return @m if scalar(@m);
+        } elsif (index($loc, '/') < 0) { # exact basename match:
+                @m = grep(m!/\Q$loc\E/?\z!, @cur);
+                return @m if scalar(@m) == 1;
+        } elsif ($is_exclude) { # URL, maybe:
+                my $canon = ext_canonicalize($loc);
+                @m = grep(m!\A\Q$canon\E\z!, @cur);
+                return @m if scalar(@m) == 1;
+        } else { # URL:
+                return (ext_canonicalize($loc));
+        }
+        if (scalar(@m) == 0) {
+                die "`$loc' is unknown\n";
+        } else {
+                die("`$loc' is ambiguous:\n", map { "\t$_\n" } @m, "\n");
+        }
+}
+
+sub canonicalize_excludes {
+        my ($lei, $excludes) = @_;
+        my %x;
+        for my $loc (@$excludes) {
+                my @l = get_externals($lei, $loc, 1);
+                $x{$_} = 1 for @l;
+        }
+        \%x;
+}
+
+# returns an anonymous sub which returns an array of potential results
+sub complete_url_prepare {
+        my $argv = $_[-1]; # $_[0] may be $lei
+        # Workaround bash default COMP_WORDBREAKS splitting URLs to
+        # ['https', ':', '//', ...].  COMP_WORDBREAKS is global for all
+        # completions loaded, not just ours, so we can't change it.
+        # cf. contrib/completion/lei-completion.bash
+        my ($pfx, $cur)  = ('', pop(@$argv) // '');
+        if (@$argv) {
+                my @x = @$argv;
+                if ($cur =~ /\A[:;=]\z/) { # COMP_WORDBREAKS + URL union
+                        push @x, $cur;
+                        $cur = '';
+                }
+                while (@x && $pfx !~ m!\A(?: (?:[\+\-]?(?:L|kw):) |
+                                (?:(?:imap|nntp|http)s?:) |
+                                (?:--\w?\z)|(?:-\w?\z) )!x) {
+                        $pfx = pop(@x).$pfx;
+                }
+        }
+        my $re = qr!\A\Q$pfx\E(\Q$cur\E.*)!;
+        my $match_cb = sub {
+                # the "//;" here (for AUTH=ANONYMOUS) interacts badly with
+                # bash tab completion, strip it out for now since our commands
+                # work w/o it.  Not sure if there's a better solution...
+                $_[0] =~ s!//;AUTH=ANONYMOUS\@!//!i;
+                # only return the part specified on the CLI
+                # don't duplicate if already 100% completed
+                $_[0] =~ $re ? ($cur eq $1 ? () : $1) : ()
+        };
+        wantarray ? ($pfx, $cur, $match_cb) : $match_cb;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiFinmsg.pm b/lib/PublicInbox/LeiFinmsg.pm
new file mode 100644
index 00000000..7ed58c24
--- /dev/null
+++ b/lib/PublicInbox/LeiFinmsg.pm
@@ -0,0 +1,22 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Finalization messages, used to queue up a bunch of messages which
+# only get written out on ->DESTROY
+package PublicInbox::LeiFinmsg;
+use strict;
+use v5.10.1;
+
+sub new {
+        my ($cls, $lei) = @_;
+        bless [ @$lei{qw(2 sock)}, $$ ], $cls;
+}
+
+sub DESTROY {
+        my ($self) = @_;
+        my ($stderr, $sock, $pid) = splice(@$self, 0, 3);
+        print $stderr @$self if $pid == $$;
+        # script/lei disconnects when $sock SvREFCNT drops to zero
+}
+
+1;
diff --git a/lib/PublicInbox/LeiForgetExternal.pm b/lib/PublicInbox/LeiForgetExternal.pm
new file mode 100644
index 00000000..c8d1df38
--- /dev/null
+++ b/lib/PublicInbox/LeiForgetExternal.pm
@@ -0,0 +1,40 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei forget-external" command
+package PublicInbox::LeiForgetExternal;
+use strict;
+use v5.10.1;
+
+sub lei_forget_external {
+        my ($lei, @locations) = @_;
+        my $cfg = $lei->_lei_cfg or
+                return $lei->fail('no externals configured');
+        my %seen;
+        for my $loc (@locations) {
+                for my $l ($loc, $lei->ext_canonicalize($loc)) {
+                        next if $seen{$l}++;
+                        my $key = "external.$l.boost";
+                        delete($cfg->{$key});
+                        if ($lei->_config('+e', '--unset', $key)) {
+                                $lei->qerr("# $l forgotten ");
+                        } elsif (($? >> 8) == 5) {
+                                warn("# $l not found\n");
+                        } else {
+                                $lei->child_error($?, "# --unset $key error");
+                        }
+                }
+        }
+}
+
+# shell completion helper called by lei__complete
+sub _complete_forget_external {
+        my ($lei, @argv) = @_;
+        my $cfg = $lei->_lei_cfg or return ();
+        my ($pfx, $cur, $match_cb) = $lei->complete_url_prepare(\@argv);
+        map {
+                $match_cb->(substr($_, length('external.')));
+        } grep(/\Aexternal\.\Q$pfx$cur/, @{$cfg->{-section_order}});
+}
+
+1;
diff --git a/lib/PublicInbox/LeiForgetMailSync.pm b/lib/PublicInbox/LeiForgetMailSync.pm
new file mode 100644
index 00000000..762910ed
--- /dev/null
+++ b/lib/PublicInbox/LeiForgetMailSync.pm
@@ -0,0 +1,26 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei forget-mail-sync" drop synchronization information
+# TODO: figure out what to do about "lei index" users having
+# dangling references.  Perhaps just documenting "lei index"
+# use being incompatible with "forget-mail-sync" use is
+# sufficient.
+
+package PublicInbox::LeiForgetMailSync;
+use strict;
+use v5.10.1;
+use PublicInbox::LeiRefreshMailSync;
+
+sub lei_forget_mail_sync {
+        my ($lei, @folders) = @_;
+        my $lms = $lei->lms or return;
+        $lms->lms_write_prepare;
+        $lms->arg2folder($lei, \@folders); # may die
+        $lms->forget_folders(@folders);
+}
+
+*_complete_forget_mail_sync =
+        \&PublicInbox::LeiRefreshMailSync::_complete_refresh_mail_sync;
+
+1;
diff --git a/lib/PublicInbox/LeiForgetSearch.pm b/lib/PublicInbox/LeiForgetSearch.pm
new file mode 100644
index 00000000..dd358ae1
--- /dev/null
+++ b/lib/PublicInbox/LeiForgetSearch.pm
@@ -0,0 +1,79 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei forget-search" forget/remove a saved search "lei q --save"
+package PublicInbox::LeiForgetSearch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::LeiUp);
+use PublicInbox::LeiSavedSearch;
+use File::Path ();
+use SelectSaver;
+
+sub do_forget_search {
+        my ($lei, @outs) = @_;
+        my @dirs; # paths in ~/.local/share/lei/saved-search/
+        my $cwd;
+        for my $o (@outs) {
+                my $d = PublicInbox::LeiSavedSearch::lss_dir_for($lei, \$o, 1);
+                if (-e $d) {
+                        push @dirs, $d
+                } else { # keep going, like rm(1):
+                        $cwd //= $lei->rel2abs('.');
+                        warn "--save was not used with $o cwd=$cwd\n";
+                }
+        }
+        my $save;
+        my $opt = { safe => 1 };
+        if ($lei->{opt}->{verbose}) {
+                $opt->{verbose} = 1;
+                $save = SelectSaver->new($lei->{2});
+        }
+        File::Path::remove_tree(@dirs, $opt);
+        $lei->child_error(0) if defined $cwd;
+}
+
+sub lei_forget_search {
+        my ($lei, @outs) = @_;
+        my $prune = $lei->{opt}->{prune};
+        $prune // return do_forget_search($lei, @outs);
+        return $lei->fail("--prune and @outs incompatible") if @outs;
+        my @tmp = PublicInbox::LeiSavedSearch::list($lei);
+        my $self = bless { -mail_sync => 1 }, __PACKAGE__;
+        $self->filter_lss($lei, $prune) // return
+                        $lei->fail("only --prune=$prune not understood");
+        if ($self->{o_remote}) { # setup lei->{auth}
+                $self->prepare_inputs($lei, $self->{o_remote}) or return;
+        }
+        $lei->wq1_start($self);
+}
+
+sub do_prune {
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        for my $o (@{$self->{o_local} // []}) {
+                next if -e $o;
+                $lei->qerr("# pruning $o");
+                eval { do_forget_search($lei, $o) };
+                $lei->child_error(0, "E: $@") if $@;
+        }
+        for my $o (@{$self->{o_remote} // []}) {
+                my $uri = PublicInbox::URIimap->new($o);
+                next if $lei->{net}->mic_for_folder($uri);
+                $lei->qerr("# pruning $uri");
+                eval { do_forget_search($lei, $o) };
+                $lei->child_error(0, "E: $@") if $@;
+        }
+}
+
+# called in top-level lei-daemon when LeiAuth is done
+sub net_merge_all_done {
+        my ($self) = @_;
+        $self->wq_do('do_prune');
+        $self->wq_close;
+}
+
+*_wq_done_wait = \&PublicInbox::LEI::wq_done_wait;
+*_complete_forget_search = \&PublicInbox::LeiUp::_complete_up;
+
+1;
diff --git a/lib/PublicInbox/LeiHelp.pm b/lib/PublicInbox/LeiHelp.pm
new file mode 100644
index 00000000..fa0e7866
--- /dev/null
+++ b/lib/PublicInbox/LeiHelp.pm
@@ -0,0 +1,103 @@
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# -h/--help support for lei
+package PublicInbox::LeiHelp;
+use strict;
+use v5.10.1;
+use Text::Wrap qw(wrap);
+
+my %NOHELP = map { $_ => 1 } qw(mfolder);
+
+sub call {
+        my ($self, $errmsg, $CMD, $OPTDESC) = @_;
+        my $cmd = $self->{cmd} // 'COMMAND';
+        my @info = @{$CMD->{$cmd} // [ '...', '...' ]};
+        my @top = ($cmd, shift(@info) // ());
+        my $cmd_desc = shift(@info);
+        $cmd_desc = $cmd_desc->($self) if ref($cmd_desc) eq 'CODE';
+        $cmd_desc =~ s/default: /default:\xa0/;
+        my @opt_desc;
+        my $lpad = 2;
+        for my $sw (grep { !ref } @info) { # ("prio=s", "z", $GLP_PASS)
+                my $desc = $OPTDESC->{"$sw\t$cmd"} // $OPTDESC->{$sw} // next;
+                my $arg_vals = '';
+                ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY';
+
+                # lower-case is a keyword (e.g. `content', `oid'),
+                # ALL_CAPS is a string description (e.g. `PATH')
+                if ($desc !~ /default/ && $arg_vals =~ /\b([a-z]+)[,\|]/) {
+                        $desc .= " (default:\xa0`$1')";
+                } else {
+                        $desc =~ s/default: /default:\xa0/;
+                }
+                my (@vals, @s, @l);
+                my $x = $sw;
+                if ($x =~ s/!\z//) { # solve! => --no-solve
+                        $x =~ s/(\A|\|)/$1no-/g
+                } elsif ($x =~ s/\+\z//) { # verbose|v+
+                } elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s"
+                        @vals = (' [', undef, ']');
+                } elsif ($x =~ s/=.+//) { # required arg: $x = "type=s"
+                        @vals = (' ', undef);
+                } # else: no args $x = 'threads|t'
+
+                # we support underscore options from public-inbox-* commands;
+                # but they've never been documented and will likely go away.
+                # $x = help|h
+                for (grep { !/_/ && !$NOHELP{$_} } split(/\|/, $x)) {
+                        length($_) > 1 ? push(@l, "--$_") : push(@s, "-$_");
+                }
+                if (!scalar(@vals)) { # no args 'threads|t'
+                } elsif ($arg_vals =~ s/\A([A-Z_=]+)\b//) { # "NAME"
+                        $vals[1] = $1;
+                } else {
+                        $vals[1] = uc(substr($l[0], 2)); # "--type" => "TYPE"
+                }
+                if ($arg_vals =~ /([,\|])/) {
+                        my $sep = $1;
+                        my @allow = split(/\Q$sep\E/, $arg_vals);
+                        my $must = $sep eq '|' ? 'Must' : 'Can';
+                        @allow = map { length $_ ? "`$_'" : () } @allow;
+                        my $last = pop @allow;
+                        $desc .= "\n$must be one of: " .
+                                join(', ', @allow) . " or $last";
+                }
+                my $lhs = join(', ', @s, @l) . join('', @vals);
+                if ($x =~ /\|\z/) { # "stdin|" or "clear|"
+                        $lhs =~ s/\A--/- , --/;
+                } else {
+                        $lhs =~ s/\A--/    --/; # pad if no short options
+                }
+                $lpad = length($lhs) if length($lhs) > $lpad;
+                push @opt_desc, $lhs, $desc;
+        }
+        my $msg = $errmsg ? "E: $errmsg\n" : '';
+        $msg .= <<EOF;
+usage: lei @top
+$cmd_desc
+
+EOF
+        $lpad += 2;
+        local $Text::Wrap::columns = 78 - $lpad;
+        # local $Text::Wrap::break = ; # don't break on nbsp (\xa0)
+        my $padding = ' ' x ($lpad + 2);
+        while (my ($lhs, $rhs) = splice(@opt_desc, 0, 2)) {
+                $msg .= '  '.pack("A$lpad", $lhs);
+                $rhs = wrap('', '', $rhs);
+                $rhs =~ s/\n/\n$padding/sg; # LHS pad continuation lines
+                $msg .= $rhs;
+                $msg .= "\n";
+        }
+        my $fd = $errmsg ? 2 : 1;
+        $self->start_pager if -t $self->{$fd};
+        $msg =~ s/\xa0/ /gs; # convert NBSP to SP
+        print { $self->{$fd} } $msg;
+        $self->x_it($errmsg ? (1 << 8) : 0); # stderr => failure
+        undef;
+}
+
+# the "lei help" command
+sub lei_help { $_[0]->_help }
+
+1;
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
new file mode 100644
index 00000000..5521188c
--- /dev/null
+++ b/lib/PublicInbox/LeiImport.pm
@@ -0,0 +1,163 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# front-end for the "lei import" sub-command
+package PublicInbox::LeiImport;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::Compat qw(uniqstr);
+
+# /^input_/ subs are used by (or override) PublicInbox::LeiInput superclass
+
+sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
+        my ($self, $eml, $vmd) = @_;
+        my $xoids = $self->{lei}->{ale}->xoids_for($eml);
+        if (my $all_vmd = $self->{all_vmd}) {
+                @$vmd{keys %$all_vmd} = values %$all_vmd;
+        }
+        $self->{lei}->{sto}->wq_do('set_eml', $eml, $vmd, $xoids);
+}
+
+sub input_mbox_cb { # MboxReader callback
+        my ($eml, $self) = @_;
+        my $vmd;
+        if ($self->{-import_kw}) {
+                my $kw = PublicInbox::MboxReader::mbox_keywords($eml);
+                $vmd = { kw => $kw } if scalar(@$kw);
+        }
+        input_eml_cb($self, $eml, $vmd);
+}
+
+sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn
+        my ($self, $f, $fl) = @_;
+        my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or
+                die "BUG: $f was not from a Maildir?\n";
+        my $kw = PublicInbox::MdirReader::flags2kw($fl);
+        substr($folder, 0, 0) = 'maildir:'; # add prefix
+        my $lse = $self->{lse} //= $self->{lei}->{sto}->search;
+        my $lms = $self->{-lms_rw} //= $self->{lei}->lms; # may be 0 or undef
+        my @oidbin = $lms ? $lms->name_oidbin($folder, $bn) : ();
+        @oidbin > 1 and warn("W: $folder/*/$$bn not unique:\n",
+                                map { "\t".unpack('H*', $_)."\n" } @oidbin);
+        my @docids = sort { $a <=> $b } uniqstr
+                        map { $lse->over->oidbin_exists($_) } @oidbin;
+        my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
+        if (scalar @docids) {
+                $lse->kw_changed(undef, $kw, \@docids) or return;
+        }
+        if (my $eml = eml_from_path($f)) {
+                $vmd->{sync_info} = [ $folder, \$bn ] if $self->{-mail_sync};
+                $self->input_eml_cb($eml, $vmd);
+        }
+}
+
+sub input_mh_cb {
+        my ($mhdir, $n, $kw, $eml, $self) = @_;
+        substr($mhdir, 0, 0) = 'mh:'; # add prefix
+        my $lse = $self->{lse} //= $self->{lei}->{sto}->search;
+        my $lms = $self->{-lms_rw} //= $self->{lei}->lms; # may be 0 or undef
+        my @oidbin = $lms ? $lms->num_oidbin($mhdir, $n) : ();
+        @oidbin > 1 and warn("W: $mhdir/$n not unique:\n",
+                                map { "\t".unpack('H*', $_)."\n" } @oidbin);
+        my @docids = sort { $a <=> $b } uniqstr
+                        map { $lse->over->oidbin_exists($_) } @oidbin;
+        if (scalar @docids) {
+                $lse->kw_changed(undef, $kw, \@docids) or return;
+        }
+        if (defined $eml) {
+                my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
+                $vmd->{sync_info} = [ $mhdir, $n + 0 ] if $self->{-mail_sync};
+                $self->input_eml_cb($eml, $vmd);
+        }
+        # TODO:
+        # elsif (my $ikw = $self->{lei}->{ikw}) { # old message, kw only
+        #        $ikw->wq_io_do('ck_update_kw', [], "mh:$dir", $uid, $kw);
+}
+
+sub input_net_cb { # imap_each / nntp_each
+        my ($uri, $uid, $kw, $eml, $self) = @_;
+        if (defined $eml) {
+                my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
+                $vmd->{sync_info} = [ $$uri, $uid ] if $self->{-mail_sync};
+                $self->input_eml_cb($eml, $vmd);
+        } elsif (my $ikw = $self->{lei}->{ikw}) { # old message, kw only
+                # we send $uri as a bare SCALAR and not a URIimap ref to
+                # reduce socket traffic:
+                $ikw->wq_io_do('ck_update_kw', [], $$uri, $uid, $kw);
+        }
+}
+
+sub do_import_index ($$@) {
+        my ($self, $lei, @inputs) = @_;
+        my $sto = $lei->_lei_store(1);
+        $sto->write_prepare($lei);
+        $self->{-import_kw} = $lei->{opt}->{kw} // 1;
+        $self->{all_vmd} = $lei->{vmd_mod} if keys %{$lei->{vmd_mod}};
+        $lei->ale; # initialize for workers to read (before LeiPmdir->new)
+        $self->{-mail_sync} = $lei->{opt}->{'mail-sync'} // 1;
+        $self->prepare_inputs($lei, \@inputs) or return;
+
+        my $j = $lei->{opt}->{jobs} // 0;
+        $j =~ /\A([0-9]+),[0-9]+\z/ and $j = $1 + 0;
+        $j ||= scalar(@{$self->{inputs}}) || 1;
+        my $ikw;
+        my $net = $lei->{net};
+        if ($net) {
+                # $j = $net->net_concurrency($j); TODO
+                if ($lei->{opt}->{incremental} // 1) {
+                        $net->{incremental} = 1;
+                        $net->{-lms_rw} = $lei->lms // 0;
+                        if ($self->{-import_kw} && $net->{-lms_rw} &&
+                                        !$lei->{opt}->{'new-only'} &&
+                                        $net->{imap_order}) {
+                                require PublicInbox::LeiImportKw;
+                                $ikw = PublicInbox::LeiImportKw->new($lei);
+                                $net->{each_old} = 1;
+                        }
+                }
+        } else {
+                my $nproc = $self->detect_nproc;
+                $j = $nproc if $j > $nproc;
+        }
+        ($lei->{opt}->{'new-only'} && (!$net || !$net->{imap_order})) and
+                warn "# --new-only is only for IMAP\n";
+        $lei->{-eml_noisy} = 1;
+        $lei->{-err_type} = 'non-fatal';
+        $lei->wq1_start($self, $j);
+}
+
+sub lei_import { # the main "lei import" method
+        my ($lei, @inputs) = @_;
+        my $self = bless {}, __PACKAGE__;
+        do_import_index($self, $lei, @inputs);
+}
+
+sub _complete_import {
+        my ($lei, @argv) = @_;
+        my $has_arg = @argv;
+        my ($pfx, $cur, $match_cb) = $lei->complete_url_prepare(\@argv);
+        my @try = $has_arg ? ($pfx.$cur, $argv[-1]) : ($argv[-1]);
+        push(@try, undef) if defined $try[-1];
+        my (@f, @k);
+        for (@try) {
+                @k = $lei->url_folder_cache->keys($_, 1) and last;
+        }
+        my @L = eval { $lei->_lei_store->search->all_terms('L') };
+        push(@k, map { "+L:$_" } @L);
+        if (my $lms = $lei->lms) {
+                for (@try) {
+                        @f = $lms->folders($_, 1) and last;
+                }
+                push @k, @f;
+        }
+        my @m = map { $match_cb->($_) } @k;
+        @m ? @m : @k;
+}
+
+no warnings 'once';
+*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+
+1;
diff --git a/lib/PublicInbox/LeiImportKw.pm b/lib/PublicInbox/LeiImportKw.pm
new file mode 100644
index 00000000..765e23cd
--- /dev/null
+++ b/lib/PublicInbox/LeiImportKw.pm
@@ -0,0 +1,53 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# WQ worker for dealing with LeiImport IMAP flags on already-imported messages
+# WQ key: {ikw}
+package PublicInbox::LeiImportKw;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+use PublicInbox::Compat qw(uniqstr);
+
+sub new {
+        my ($cls, $lei) = @_;
+        my $self = bless { -wq_ident => 'lei import_kw worker' }, $cls;
+        my $j = $self->detect_nproc // 4;
+        $j = 4 if $j > 4;
+        my ($op_c, $ops) = $lei->workers_start($self, $j);
+        $op_c->{ops} = $ops; # for PktOp->event_step
+        $self->{lei_sock} = $lei->{sock};
+        $lei->{ikw} = $self;
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        $lei->_lei_atfork_child;
+        my $net = delete $lei->{net} // die 'BUG: no lei->{net}';
+        $self->{sto} = $lei->{sto} // die 'BUG: no lei->{sto}';
+        $self->{verbose} = $lei->{opt}->{verbose};
+        $self->{lse} = $self->{sto}->search;
+        $self->{over} = $self->{lse}->over;
+        $self->{-lms_rw} = $net->{-lms_rw} || die 'BUG: net->{-lms_rw} FALSE';
+        $self->SUPER::ipc_atfork_child;
+}
+
+sub ck_update_kw { # via wq_io_do
+        my ($self, $url, $uid, $kw) = @_;
+        my @oidbin = $self->{-lms_rw}->num_oidbin($url, $uid);
+        my $uid_url = index($url, 'mh:') == 0 ? $url.$uid : "$url/;UID=$uid";
+        @oidbin > 1 and warn("W: $uid_url not unique:\n",
+                                map { "\t".unpack('H*', $_)."\n" } @oidbin);
+        my @docids = sort { $a <=> $b } uniqstr
+                map { $self->{over}->oidbin_exists($_) } @oidbin;
+        $self->{lse}->kw_changed(undef, $kw, \@docids) or return;
+        $self->{verbose} and $self->{lei}->qerr("# $uid_url => @$kw\n");
+        $self->{sto}->wq_do('set_eml_vmd', undef, { kw => $kw }, \@docids);
+}
+
+sub _lei_wq_eof { # EOF callback for main lei daemon
+        $_[0]->wq_eof('ikw');
+}
+
+1;
diff --git a/lib/PublicInbox/LeiIndex.pm b/lib/PublicInbox/LeiIndex.pm
new file mode 100644
index 00000000..0e329e58
--- /dev/null
+++ b/lib/PublicInbox/LeiIndex.pm
@@ -0,0 +1,46 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# front-end for the "lei index" sub-command, this is similar to
+# "lei import" but doesn't put a git blob into ~/.local/share/lei/store
+package PublicInbox::LeiIndex;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use PublicInbox::LeiImport;
+
+# /^input_/ subs are used by (or override) PublicInbox::LeiInput superclass
+sub input_eml_cb { # used by input_maildir_cb and input_net_cb
+        my ($self, $eml, $vmd) = @_;
+        my $xoids = $self->{lei}->{ale}->xoids_for($eml);
+        if (my $all_vmd = $self->{all_vmd}) {
+                @$vmd{keys %$all_vmd} = values %$all_vmd;
+        }
+        $self->{lei}->{sto}->wq_do('index_eml_only', $eml, $vmd, $xoids);
+}
+
+sub input_fh { # overrides PublicInbox::LeiInput::input_fh
+        my ($self, $ifmt, $fh, $input, @args) = @_;
+        $self->{lei}->child_error(0, <<EOM);
+$input ($ifmt) not yet supported, try `lei import'
+EOM
+}
+
+sub lei_index {
+        my ($lei, @argv) = @_;
+        $lei->{opt}->{'mail-sync'} = 1;
+        my $self = bless {}, __PACKAGE__;
+        PublicInbox::LeiImport::do_import_index($self, $lei, @argv);
+}
+
+no warnings 'once';
+no strict 'refs';
+for my $m (qw(pmdir_cb input_net_cb input_mh_cb)) {
+        *$m = PublicInbox::LeiImport->can($m);
+}
+
+*_complete_index = \&PublicInbox::LeiImport::_complete_import;
+*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+
+1;
diff --git a/lib/PublicInbox/LeiInit.pm b/lib/PublicInbox/LeiInit.pm
new file mode 100644
index 00000000..94897e61
--- /dev/null
+++ b/lib/PublicInbox/LeiInit.pm
@@ -0,0 +1,40 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# for the "lei init" command, not sure if it's even needed...
+package PublicInbox::LeiInit;
+use v5.10.1;
+
+sub lei_init {
+        my ($self, $dir) = @_;
+        my $cfg = $self->_lei_cfg(1);
+        my $cur = $cfg->{'leistore.dir'};
+        $dir //= $self->store_path;
+        $dir = $self->rel2abs($dir);
+        my @cur = stat($cur) if defined($cur);
+        $cur = $self->canonpath_harder($cur // $dir);
+        my @dir = stat($dir);
+        my $exists = "# leistore.dir=$cur already initialized" if @dir;
+        if (@cur) {
+                if ($cur eq $dir) {
+                        $self->_lei_store(1)->done;
+                        return $self->qerr($exists);
+                }
+
+                # some folks like symlinks and bind mounts :P
+                if (@dir && "@cur[1,0]" eq "@dir[1,0]") {
+                        $self->_config('leistore.dir', $dir) or return;
+                        $self->_lei_store(1)->done;
+                        return $self->qerr("$exists (as $cur)");
+                }
+                return $self->fail(<<"");
+E: leistore.dir=$cur already initialized and it is not $dir
+
+        }
+        $self->_config('leistore.dir', $dir) or return;
+        $self->_lei_store(1)->done;
+        $exists //= "# leistore.dir=$dir newly initialized";
+        $self->qerr($exists);
+}
+
+1;
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
new file mode 100644
index 00000000..c388f7dc
--- /dev/null
+++ b/lib/PublicInbox/LeiInput.pm
@@ -0,0 +1,544 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# parent class for LeiImport, LeiConvert, LeiIndex
+package PublicInbox::LeiInput;
+use v5.12;
+use PublicInbox::DS;
+use PublicInbox::Spawn qw(which popen_rd);
+use PublicInbox::InboxWritable qw(eml_from_path);
+
+# JMAP RFC 8621 4.1.1
+# https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml
+our @KW = (qw(seen answered flagged draft), # widely-compatible
+        qw(forwarded), # IMAP + Maildir
+        qw(phishing junk notjunk)); # rarely supported
+
+# note: RFC 8621 states "Users may add arbitrary keywords to an Email",
+# but is it good idea?  Stick to the system and reserved ones, for now.
+# The widely-compatible ones map to IMAP system flags, Maildir flags
+# and mbox Status/X-Status headers.
+my %KW = map { $_ => 1 } @KW;
+my $L_MAX = 244; # Xapian term limit - length('L')
+
+# RFC 8621, sec 2 (Mailboxes) a "label" for us is a JMAP Mailbox "name"
+# "Servers MAY reject names that violate server policy"
+my %ERR = (
+        L => sub {
+                my ($label) = @_;
+                length($label) >= $L_MAX and
+                        return "`$label' too long (must be <= $L_MAX)";
+                $label =~ /[A-Z]/ and
+                        return "`$label' must be lowercase";
+                $label =~ m{\A[a-z0-9_](?:[a-z0-9_\-\./\@,]*[a-z0-9])?\z} ?
+                        undef : "`$label' is invalid";
+        },
+        kw => sub {
+                my ($kw) = @_;
+                $KW{$kw} ? undef : <<EOM;
+`$kw' is not one of: `seen', `flagged', `answered', `draft'
+`junk', `notjunk', `phishing' or `forwarded'
+EOM
+        }
+);
+
+sub check_input_format ($;$) {
+        my ($lei, $files) = @_;
+        my $opt_key = 'in-format';
+        my $fmt = $lei->{opt}->{$opt_key};
+        if (!$fmt) {
+                my $err = $files ? "regular file(s):\n@$files" : '--stdin';
+                return $lei->fail("--$opt_key unset for $err");
+        }
+        return 1 if $fmt eq 'eml';
+        require PublicInbox::MboxLock if $files;
+        require PublicInbox::MboxReader;
+        PublicInbox::MboxReader->reads($fmt) or
+                return $lei->fail("--$opt_key=$fmt unrecognized");
+        1;
+}
+
+sub input_mbox_cb { # base MboxReader callback
+        my ($eml, $self) = @_;
+        $eml->header_set($_) for (qw(Status X-Status));
+        $self->input_eml_cb($eml);
+}
+
+sub input_maildir_cb {
+        my ($fn, $kw, $eml, $self) = @_;
+        $self->input_eml_cb($eml);
+}
+
+sub input_mh_cb {
+        my ($dn, $n, $kw, $eml, $self) = @_;
+        $self->input_eml_cb($eml);
+}
+
+sub input_net_cb { # imap_each, nntp_each cb
+        my ($url, $uid, $kw, $eml, $self) = @_;
+        $self->input_eml_cb($eml);
+}
+
+# import a single file handle of $name
+# Subclass must define ->input_eml_cb and ->input_mbox_cb
+sub input_fh {
+        my ($self, $ifmt, $fh, $name, @args) = @_;
+        if ($ifmt eq 'eml') {
+                my $buf = eval { PublicInbox::IO::read_all $fh, 0 };
+                my $e = $@;
+                return $self->{lei}->child_error($?, <<"") if !$fh->close || $e;
+error reading $name: $! (\$?=$?) (\$@=$e)
+
+                PublicInbox::Eml::strip_from($buf);
+
+                # a user may feed just a body: git diff | lei rediff -U9
+                if ($self->{-force_eml}) {
+                        my $eml = PublicInbox::Eml->new($buf);
+                        substr($buf, 0, 0) = "\n\n" if !$eml->{bdy};
+                }
+                $self->input_eml_cb(PublicInbox::Eml->new(\$buf), @args);
+        } else {
+                # prepare_inputs already validated $ifmt
+                my $cb = PublicInbox::MboxReader->reads($ifmt) //
+                                die "BUG: bad fmt=$ifmt";
+                $cb->(undef, $fh, $self->can('input_mbox_cb'), $self, @args);
+        }
+}
+
+# handles mboxrd endpoints described in Documentation/design_notes.txt
+sub handle_http_input ($$@) {
+        my ($self, $url, @args) = @_;
+        my $lei = $self->{lei} or die 'BUG: {lei} missing';
+        my $curl_opt = delete $self->{"-curl-$url"} or
+                                die("BUG: $url curl options not prepared");
+        my $uri = pop @$curl_opt;
+        my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return;
+        push @$curl, '-s', @$curl_opt;
+        my $cmd = $curl->for_uri($lei, $uri);
+        $lei->qerr("# $cmd");
+        my $fh = popen_rd($cmd, undef, { 2 => $lei->{2} });
+        grep(/\A--compressed\z/, @$curl) or
+                $fh = IO::Uncompress::Gunzip->new($fh,
+                                        MultiStream => 1, AutoClose => 1);
+        eval { $self->input_fh('mboxrd', $fh, $url, @args) };
+        my $err = $@ ? ": $@" : '';
+        $lei->child_error($?, "@$cmd failed$err") if $err || $?;
+}
+
+sub oid2eml { # git->cat_async cb
+        my ($bref, $oid, $type, $size, $self) = @_;
+        if ($type eq 'blob') {
+                $self->input_eml_cb(PublicInbox::Eml->new($bref));
+        } else {
+                warn "W: $oid is type=$type\n";
+        }
+}
+
+sub each_ibx_eml_unindexed {
+        my ($self, $ibx, @args) = @_;
+        $ibx->isa('PublicInbox::Inbox') or return $self->{lei}->fail(<<EOM);
+unindexed extindex $ibx->{topdir} not supported
+EOM
+        require PublicInbox::SearchIdx;
+        my $n = $ibx->max_git_epoch;
+        my @g = defined($n) ? map { $ibx->git_epoch($_) } (0..$n) : ($ibx->git);
+        my $sync = { D => {}, ibx => $ibx }; # D => {} filters out deletes
+        my ($f, $at, $ct, $oid, $cmt);
+        for my $git (grep defined, @g) {
+                my $s = PublicInbox::SearchIdx::log2stack($sync, $git, 'HEAD');
+                while (($f, $at, $ct, $oid, $cmt) = $s->pop_rec) {
+                        $git->cat_async($oid, \&oid2eml, $self) if $f eq 'm';
+                }
+                $git->cleanup; # wait all
+        }
+}
+
+sub each_ibx_eml {
+        my ($self, $ibx, @args) = @_; # TODO: is @args used at all?
+        my $over = $ibx->over or return each_ibx_eml_unindexed(@_);
+        my $git = $ibx->git;
+        my $prev = 0;
+        my $smsg;
+        my $ids = $over->ids_after(\$prev);
+        while (@$ids) {
+                for (@$ids) {
+                        $smsg = $over->get_art($_) // next;
+                        $git->cat_async($smsg->{blob}, \&oid2eml, $self);
+                }
+                $ids = $over->ids_after(\$prev);
+        }
+        $git->cat_async_wait;
+}
+
+sub input_path_url {
+        my ($self, $input, @args) = @_;
+        my $lei = $self->{lei};
+        my $ifmt = lc($lei->{opt}->{'in-format'} // '');
+        # TODO auto-detect?
+        if ($input =~ m!\Aimaps?://!i) {
+                $lei->{net}->imap_each($input, $self->can('input_net_cb'),
+                                                $self, @args);
+                return;
+        } elsif ($input =~ m!\A(?:nntps?|s?news)://!i) {
+                $lei->{net}->nntp_each($input, $self->can('input_net_cb'),
+                                                $self, @args);
+                return;
+        } elsif ($input =~ m!\Ahttps?://!i) {
+                handle_http_input($self, $input, @args);
+                return;
+        }
+
+        # local-only below
+        my $ifmt_pfx = '';
+        if ($input =~ s!\A([a-z0-9]+):!!i) {
+                $ifmt_pfx = "$1:";
+                $ifmt = lc($1);
+        } elsif ($input =~ /\.(?:patch|eml)\z/i) {
+                $ifmt = 'eml';
+        } elsif ($input =~ m{\A(?:.+)/(?:new|cur)/([^/]+)\z} && -f $input) {
+                my $bn = $1;
+                my $fl = PublicInbox::MdirReader::maildir_basename_flags($bn);
+                return if index($fl, 'T') >= 0;
+                return $self->pmdir_cb($input, $fl) if $self->can('pmdir_cb');
+                my $eml = eml_from_path($input) or return
+                        $lei->qerr("# $input not readable");
+                my $kw = PublicInbox::MdirReader::flags2kw($fl);
+                $self->can('input_maildir_cb')->($input, $kw, $eml, $self);
+                return;
+        }
+        my $devfd = $lei->path_to_fd($input) // return;
+        if ($devfd >= 0) {
+                $self->input_fh($ifmt, $lei->{$devfd}, $input, @args);
+        } elsif ($devfd < 0 && $input =~ m{\A(.+/)([0-9]+)\z} && -f $input) {
+                my ($dn, $n) = ($1, $2);
+                my $mhr = PublicInbox::MHreader->new($dn, $lei->{3});
+                $mhr->mh_read_one($n, $self->can('input_mh_cb'), $self);
+        } elsif (-f $input && $ifmt eq 'eml') {
+                open my $fh, '<', $input or
+                                        return $lei->fail("open($input): $!");
+                $self->input_fh($ifmt, $fh, $input, @args);
+        } elsif (-f _) {
+                my $m = $lei->{opt}->{'lock'} //
+                        PublicInbox::MboxLock->defaults;
+                my $mbl = PublicInbox::MboxLock->acq($input, 0, $m);
+                my $zsfx = PublicInbox::MboxReader::zsfx($input);
+                if ($zsfx) {
+                        my $in = delete $mbl->{fh};
+                        $mbl->{fh} =
+                             PublicInbox::MboxReader::zsfxcat($in, $zsfx, $lei);
+                }
+                local $PublicInbox::DS::in_loop = 0 if $zsfx; # awaitpid
+                $self->input_fh($ifmt, $mbl->{fh}, $input, @args);
+        } elsif (-d _ && $ifmt eq 'maildir') {
+                my $mdr = PublicInbox::MdirReader->new;
+                if (my $pmd = $self->{pmd}) {
+                        $mdr->maildir_each_file($input,
+                                                $pmd->can('each_mdir_fn'),
+                                                $pmd, @args);
+                } else {
+                        $mdr->maildir_each_eml($input,
+                                                $self->can('input_maildir_cb'),
+                                                $self, @args);
+                }
+        } elsif (-d _ && $ifmt eq 'mh') {
+                my $mhr = PublicInbox::MHreader->new($input.'/', $lei->{3});
+                $mhr->{sort} = $lei->{opt}->{sort} // [ 'sequence'];
+                $mhr->mh_each_eml($self->can('input_mh_cb'), $self, @args);
+        } elsif (-d _ && $ifmt =~ /\A(?:v1|v2)\z/) {
+                my $ibx = PublicInbox::Inbox->new({inboxdir => $input});
+                each_ibx_eml($self, $ibx, @args);
+        } elsif (-d _ && $ifmt eq 'extindex') {
+                my $esrch = PublicInbox::ExtSearch->new($input);
+                each_ibx_eml($self, $esrch, @args);
+        } elsif ($self->{missing_ok} && !-e $input) { # don't ->fail
+                if ($lei->{cmd} eq 'p2q') {
+                        my $fp = [ qw(git format-patch --stdout -1), $input ];
+                        my $rdr = { 2 => $lei->{2} };
+                        my $fh = popen_rd($fp, undef, $rdr);
+                        eval { $self->input_fh('eml', $fh, $input, @args) };
+                        my $err = $@ ? ": $@" : '';
+                        $lei->child_error($?, "@$fp failed$err") if $err || $?;
+                } else {
+                        $self->folder_missing("$ifmt:$input");
+                }
+        } else {
+                $lei->fail("$ifmt_pfx$input unsupported (TODO)");
+        }
+}
+
+# subclasses should overrride this (see LeiRefreshMailSync)
+sub folder_missing { die "BUG: ->folder_missing undefined for $_[0]" }
+
+sub bad_http ($$;$) {
+        my ($lei, $url, $alt) = @_;
+        my $x = $alt ? "did you mean <$alt>?" : 'download and import manually';
+        $lei->fail("E: <$url> not recognized, $x");
+}
+
+sub prepare_http_input ($$$) {
+        my ($self, $lei, $url) = @_;
+        require URI;
+        require PublicInbox::MboxReader;
+        require PublicInbox::LeiCurl;
+        require IO::Uncompress::Gunzip;
+        $self->{curl} //= which('curl') or
+                                return $lei->fail("curl missing for <$url>");
+        my $uri = URI->new($url);
+        my $path = $uri->path;
+        my %qf = $uri->query_form;
+        my @curl_opt;
+        if ($path =~ m!/(?:t\.mbox\.gz|all\.mbox\.gz)\z!) {
+                # OK
+        } elsif ($path =~ m!/raw\z!) {
+                push @curl_opt, '--compressed';
+        # convert search query to mboxrd request since they require POST
+        # this is only intended for PublicInbox::WWW, and will false-positive
+        # on many other search engines... oh well
+        } elsif (defined $qf{'q'}) {
+                $qf{x} = 'm';
+                $uri->query_form(\%qf);
+                push @curl_opt, '-d', '';
+                $$uri ne $url and $lei->qerr(<<"");
+# <$url> rewritten to <$$uri> with HTTP POST
+
+        # try to provide hints for /$INBOX/$MSGID/T/ and /$INBOX/
+        } elsif ($path =~ s!/[tT]/\z!/t.mbox.gz! ||
+                        $path =~ s!/t\.atom\z!/t.mbox.gz! ||
+                        $path =~ s!/([^/]+\@[^/]+)/\z!/$1/raw!) {
+                $uri->path($path);
+                return bad_http($lei, $url, $$uri);
+        } else {
+                return bad_http($lei, $url);
+        }
+        $self->{"-curl-$url"} = [ @curl_opt, $uri ]; # for handle_http_input
+}
+
+sub add_dir ($$$$) {
+        my ($lei, $istate, $ifmt, $input) = @_;
+        if ($istate->{-may_sync}) {
+                $$input = "$ifmt:".$lei->abs_path($$input);
+                push @{$istate->{-sync}->{ok}}, $$input if $istate->{-sync};
+        } else {
+                substr($$input, 0, 0) = "$ifmt:"; # prefix
+        }
+        push @{$istate->{$ifmt}}, $$input;
+}
+
+sub prepare_inputs { # returns undef on error
+        my ($self, $lei, $inputs) = @_;
+        my $in_fmt = $lei->{opt}->{'in-format'};
+        my $sync = $lei->{opt}->{'mail-sync'} ? {} : undef; # using LeiMailSync
+        my $may_sync = $sync || $self->{-mail_sync};
+        if ($lei->{opt}->{stdin}) {
+                @$inputs and return
+                        $lei->fail("--stdin and @$inputs do not mix");
+                check_input_format($lei) or return;
+                push @$inputs, '/dev/stdin';
+                push @{$sync->{no}}, '/dev/stdin' if $sync;
+        }
+        my $net = $lei->{net}; # NetWriter may be created by l2m
+        my @f;
+        my $istate = { -sync => $sync, -may_sync => $may_sync };
+        # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
+        for my $input (@$inputs) {
+                my $input_path = $input;
+                if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) {
+                        require PublicInbox::NetReader;
+                        $net //= PublicInbox::NetReader->new;
+                        $net->add_url($input, $self->{-ls_ok});
+                        push @{$sync->{ok}}, $input if $sync;
+                } elsif ($input_path =~ m!\Ahttps?://!i) { # mboxrd.gz
+                        # TODO: how would we detect r/w JMAP?
+                        push @{$sync->{no}}, $input if $sync;
+                        prepare_http_input($self, $lei, $input_path) or return;
+                } elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
+                        my $ifmt = lc $1;
+                        if (($in_fmt // $ifmt) ne $ifmt) {
+                                return $lei->fail(<<"");
+--in-format=$in_fmt and `$ifmt:' conflict
+
+                        }
+                        ($sync && $ifmt !~ /\A(?:maildir|mh)\z/i) and
+                                push(@{$sync->{no}}, $input);
+                        my $devfd = $lei->path_to_fd($input_path) // return;
+                        if ($devfd >= 0 || (-f $input_path || -p _)) {
+                                require PublicInbox::MboxLock;
+                                require PublicInbox::MboxReader;
+                                PublicInbox::MboxReader->reads($ifmt) or return
+                                        $lei->fail("$ifmt not supported");
+                        } elsif (-d $input_path) { # TODO extindex
+                                $ifmt =~ /\A(?:maildir|mh|v1|v2|extindex)\z/ or
+                                        return$lei->fail("$ifmt not supported");
+                                $input = $input_path;
+                                add_dir $lei, $istate, $ifmt, \$input;
+                        } elsif ($self->{missing_ok} &&
+                                        $ifmt =~ /\A(?:maildir|mh)\z/ &&
+                                        !-e $input_path) {
+                                # for "lei rm-watch" on missing Maildir
+                                $may_sync and $input = "$ifmt:".
+                                                $lei->abs_path($input_path);
+                        } else {
+                                my $m = "Unable to handle $input";
+                                $input =~ /\A(?:L|kw):/ and
+                                        $m .= ", did you mean +$input?";
+                                return $lei->fail($m);
+                        }
+                } elsif ($input =~ /\.(?:eml|patch)\z/i && -f $input) {
+                        lc($in_fmt//'eml') eq 'eml' or return $lei->fail(<<"");
+$input is `eml', not --in-format=$in_fmt
+
+                        push @{$sync->{no}}, $input if $sync;
+                } elsif ($input =~ m{\A(.+)/(new|cur)/([^/]+)\z} && -f $input) {
+                        # single file in a Maildir
+                        my ($mdir, $nc, $bn) = ($1, $2, $3);
+                        my $other = $mdir . ($nc eq 'new' ? '/cur' : '/new');
+                        return $lei->fail(<<EOM) if !-d $other;
+No `$other' directory for `$input'
+EOM
+                        lc($in_fmt//'eml') eq 'eml' or return $lei->fail(<<"");
+$input is `eml', not --in-format=$in_fmt
+
+                        if ($sync) {
+                                $input = $lei->abs_path($mdir) . "/$nc/$bn";
+                                push @{$sync->{ok}}, $input;
+                        }
+                        require PublicInbox::MdirReader;
+                } else {
+                        my $devfd = $lei->path_to_fd($input) // return;
+                        if ($devfd < 0 && $input =~ m{\A(.+)/([0-9]+)\z} &&
+                                        -f $input) { # single file in MH dir
+                                my ($mh, $n) = ($1, $2);
+                                lc($in_fmt//'eml') eq 'eml' or
+                                                return $lei->fail(<<"");
+$input is `eml', not --in-format=$in_fmt
+
+                                if ($sync) {
+                                        $input = $lei->abs_path($mh)."/$n";
+                                        push @{$sync->{ok}}, $input;
+                                }
+                                require PublicInbox::MHreader;
+                        } elsif ($devfd >= 0 || -f $input || -p _) {
+                                push @{$sync->{no}}, $input if $sync;
+                                push @f, $input;
+                        } elsif (-d "$input/new" && -d "$input/cur") {
+                                add_dir $lei, $istate, 'maildir', \$input;
+                        } elsif (-e "$input/inbox.lock") {
+                                add_dir $lei, $istate, 'v2', \$input;
+                        } elsif (-e "$input/ssoma.lock") {
+                                add_dir $lei, $istate, 'v1', \$input;
+                        } elsif (-e "$input/ei.lock") {
+                                add_dir $lei, $istate, 'extindex', \$input;
+                        } elsif (-f "$input/.mh_sequences") {
+                                add_dir $lei, $istate, 'mh', \$input;
+                        } elsif ($self->{missing_ok} && !-e $input) {
+                                if ($lei->{cmd} eq 'p2q') {
+                                        # will run "git format-patch"
+                                } elsif ($may_sync) { # for lei rm-watch
+                                        # FIXME: support MH, here
+                                        $input = 'maildir:'.
+                                                $lei->abs_path($input);
+                                }
+                        } else {
+                                return $lei->fail("Unable to handle $input")
+                        }
+                }
+        }
+        if (@f) { check_input_format($lei, \@f) or return }
+        if ($sync && $sync->{no}) {
+                return $lei->fail(<<"") if !$sync->{ok};
+--mail-sync specified but no inputs support it
+
+                # non-fatal if some inputs support support sync
+                warn("# --mail-sync will only be used for @{$sync->{ok}}\n");
+                warn("# --mail-sync is not supported for: @{$sync->{no}}\n");
+        }
+        if ($net) {
+                $net->{-can_die} = 1;
+                if (my $err = $net->errors($lei)) {
+                        return $lei->fail($err);
+                }
+                $net->{quiet} = $lei->{opt}->{quiet};
+                require PublicInbox::LeiAuth;
+                $lei->{auth} //= PublicInbox::LeiAuth->new;
+                $lei->{net} //= $net;
+        }
+        if (my $md = $istate->{maildir}) {
+                require PublicInbox::MdirReader;
+                if ($self->can('pmdir_cb')) {
+                        require PublicInbox::LeiPmdir;
+                        $self->{pmd} = PublicInbox::LeiPmdir->new($lei, $self);
+                }
+                grep(!m!\Amaildir:/!i, @$md) and die "BUG: @$md (no pfx)";
+
+                # start watching Maildirs ASAP
+                if ($may_sync && $lei->{sto}) {
+                        $lei->lms(1)->lms_write_prepare->add_folders(@$md);
+                        $lei->refresh_watches;
+                }
+        }
+        if (my $mh = $istate->{mh}) {
+                require PublicInbox::MHreader;
+                grep(!m!\Amh:!i, @$mh) and die "BUG: @$mh (no pfx)";
+                if ($may_sync && $lei->{sto}) {
+                        $lei->lms(1)->lms_write_prepare->add_folders(@$mh);
+                        # $lei->refresh_watches; TODO
+                }
+        }
+        require PublicInbox::ExtSearch if $istate->{extindex};
+        $self->{inputs} = $inputs;
+}
+
+sub process_inputs {
+        my ($self) = @_;
+        my $err;
+        for my $input (@{$self->{inputs}}) {
+                eval { $self->input_path_url($input) };
+                next unless $@;
+                $err = "$input: $@";
+                last;
+        }
+        # always commit first, even on error partial work is acceptable for
+        # lei <import|tag|convert>
+        $self->{lei}->sto_barrier_request;
+        $self->{lei}->fail($err) if $err;
+}
+
+sub input_only_atfork_child {
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        $lei->_lei_atfork_child;
+        PublicInbox::IPC::ipc_atfork_child($self);
+        $lei->{auth}->do_auth_atfork($self) if $lei->{auth};
+        undef;
+}
+
+# alias this as "net_merge_all_done" to use as an LeiAuth callback
+sub input_only_net_merge_all_done {
+        my ($self) = @_;
+        $self->wq_io_do('process_inputs');
+        $self->wq_close;
+}
+
+# like Getopt::Long, but for +kw:FOO and -kw:FOO to prepare
+# for update_xvmd -> update_vmd
+# returns something like { "+L" => [ @Labels ], ... }
+sub vmd_mod_extract {
+        my ($lei, $argv) = @_;
+        my (@new_argv, @err);
+        for my $x (@$argv) {
+                if ($x =~ /\A(\+|\-)(kw|L):(.+)\z/) {
+                        my ($op, $pfx, $val) = ($1, $2, $3);
+                        if (my $err = $ERR{$pfx}->($val)) {
+                                push @err, $err;
+                        } else { # set "+kw", "+L", "-L", "-kw"
+                                push @{$lei->{vmd_mod}->{$op.$pfx}}, $val;
+                        }
+                } else {
+                        push @new_argv, $x;
+                }
+        }
+        @$argv = @new_argv;
+        @err;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiInspect.pm b/lib/PublicInbox/LeiInspect.pm
new file mode 100644
index 00000000..576ab2c7
--- /dev/null
+++ b/lib/PublicInbox/LeiInspect.pm
@@ -0,0 +1,293 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei inspect" general purpose inspector for stuff in SQLite and
+# Xapian.  Will eventually be useful with plain public-inboxes,
+# not just lei/store.  This is totally half-baked at the moment
+# but useful for testing.
+package PublicInbox::LeiInspect;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+use PublicInbox::Config;
+use PublicInbox::MID qw(mids);
+use PublicInbox::NetReader qw(imap_uri nntp_uri);
+use PublicInbox::LeiOverview;
+*iso8601 = \&PublicInbox::LeiOverview::iso8601;
+
+sub _json_prep ($) {
+        my ($smsg) = @_;
+        $smsg->{$_} += 0 for qw(bytes lines); # integerize
+        $smsg->{dt} = iso8601($smsg->{ds}) if defined($smsg->{ds});
+        $smsg->{rt} = iso8601($smsg->{ts}) if defined($smsg->{ts});
+        +{ %$smsg } # unbless and scalarize
+}
+
+sub inspect_blob ($$) {
+        my ($lei, $oidhex) = @_;
+        my $ent = {};
+        if (my $lse = $lei->{lse}) {
+                my $oidbin = pack('H*', $oidhex);
+                my @docids = $lse ? $lse->over->oidbin_exists($oidbin) : ();
+                $ent->{'lei/store'} = \@docids if @docids;
+                my $lms = $lei->lms;
+                if (my $loc = $lms ? $lms->locations_for($oidbin) : undef) {
+                        $ent->{'mail-sync'} = $loc;
+                }
+        }
+        $ent;
+}
+
+sub inspect_imap_uid ($$) {
+        my ($lei, $uid_uri) = @_;
+        my $ent = {};
+        my $lms = $lei->lms or return $ent;
+        my @oidhex = $lms->imap_oidhex($lei, $uid_uri);
+        $ent->{$$uid_uri} = @oidhex == 1 ? $oidhex[0] :
+                        ((@oidhex == 0) ? undef : \@oidhex);
+        $ent;
+}
+
+sub inspect_nntp_range {
+        my ($lei, $uri) = @_;
+        my ($ng, $beg, $end) = $uri->group;
+        $uri = $uri->clone;
+        $uri->group($ng);
+        my $ent = {};
+        my $ret = { "$uri" => $ent };
+        my $lms = $lei->lms or return $ret;
+        my $folders = [ $$uri ];
+        eval { $lms->arg2folder($lei, $folders) };
+        $lei->qerr("# no folders match $$uri (non-fatal)") if $@;
+        $end //= $beg;
+        for my $art ($beg..$end) {
+                my @oidhex = map { unpack('H*', $_) }
+                        $lms->num_oidbin($folders->[0], $art);
+                $ent->{$art} = @oidhex == 1 ? $oidhex[0] :
+                                ((@oidhex == 0) ? undef : \@oidhex);
+        }
+        $ret;
+}
+
+sub inspect_sync_folder ($$) {
+        my ($lei, $folder) = @_;
+        my $ent = {};
+        my $lms = $lei->lms or return $ent;
+        my $folders = [ $folder ];
+        eval { $lms->arg2folder($lei, $folders) };
+        $lei->qerr("# no folders match $folder (non-fatal)") if $@;
+        for my $f (@$folders) {
+                $ent->{$f} = $lms->location_stats($f); # may be undef
+        }
+        $ent
+}
+
+sub _inspect_doc ($$) {
+        my ($ent, $doc) = @_;
+        my $data = $doc->get_data;
+        $ent->{data_length} = length($data);
+        $ent->{description} = $doc->get_description;
+        $ent->{$_} = $doc->$_ for (qw(termlist_count values_count));
+        my $cur = $doc->termlist_begin;
+        my $end = $doc->termlist_end;
+        for (; $cur != $end; $cur++) {
+                my $tn = $cur->get_termname;
+                $tn =~ s/\A([A-Z]+)// or warn "$tn no prefix! (???)";
+                my $term = ($1 // '');
+                push @{$ent->{terms}->{$term}}, $tn;
+        }
+        $cur = $doc->values_begin;
+        $end = $doc->values_end;
+        for (; $cur != $end; $cur++) {
+                my $n = $cur->get_valueno;
+                my $v = $cur->get_value;
+                my $iv = PublicInbox::Search::sortable_unserialise($v);
+                $v = $iv + 0 if defined $iv;
+                # not using ->[$n] since we may have large gaps in $n
+                $ent->{'values'}->{$n} = $v;
+        }
+        $ent;
+}
+
+sub inspect_docid ($$;$) {
+        my ($lei, $docid, $ent) = @_;
+        require PublicInbox::Search;
+        $ent //= {};
+        my $xdb;
+        if ($xdb = delete $ent->{xdb}) { # from inspect_num
+        } elsif (defined(my $dir = $lei->{opt}->{dir})) {
+                no warnings 'once';
+                $xdb = $PublicInbox::Search::X{Database}->new($dir);
+        } elsif ($lei->{lse}) {
+                $xdb = $lei->{lse}->xdb;
+        }
+        $xdb or return $lei->fail('no Xapian DB');
+        my $doc = $xdb->get_document($docid); # raises
+        $ent->{docid} = $docid;
+        _inspect_doc($ent, $doc);
+}
+
+sub dir2ibx ($$) {
+        my ($lei, $dir) = @_;
+        if (-f "$dir/ei.lock") {
+                require PublicInbox::ExtSearch;
+                PublicInbox::ExtSearch->new($dir);
+        } elsif (-f "$dir/inbox.lock" || -d "$dir/public-inbox") {
+                require PublicInbox::Inbox; # v2, v1
+                bless { inboxdir => $dir }, 'PublicInbox::Inbox';
+        } else {
+                $lei->fail("no (indexed) inbox or extindex at $dir");
+        }
+}
+
+sub inspect_num ($$) {
+        my ($lei, $num) = @_;
+        my ($docid, $ibx);
+        my $ent = { num => $num };
+        if (defined(my $dir = $lei->{opt}->{dir})) {
+                $ibx = dir2ibx($lei, $dir) or return;
+                if (my $srch = $ibx->search) {
+                        $ent->{xdb} = $srch->xdb and
+                                $docid = $srch->num2docid($num);
+                }
+        } elsif ($lei->{lse}) {
+                $ibx = $lei->{lse};
+                $lei->{lse}->xdb; # set {nshard} for num2docid
+                $docid = $lei->{lse}->num2docid($num);
+        }
+        if ($ibx && $ibx->over) {
+                my $smsg = $ibx->over->get_art($num);
+                $ent->{smsg} = _json_prep($smsg) if $smsg;
+        }
+        defined($docid) ? inspect_docid($lei, $docid, $ent) : $ent;
+}
+
+sub inspect_mid ($$) {
+        my ($lei, $mid) = @_;
+        my $ibx;
+        my $ent = { mid => $mid };
+        if (defined(my $dir = $lei->{opt}->{dir})) {
+                $ibx = dir2ibx($lei, $dir)
+        } else {
+                $ibx = $lei->{lse};
+        }
+        if ($ibx && $ibx->over) {
+                my ($id, $prev);
+                while (my $smsg = $ibx->over->next_by_mid($mid, \$id, \$prev)) {
+                        push @{$ent->{smsg}}, _json_prep($smsg);
+                }
+        }
+        if ($ibx && $ibx->search) {
+                my $mset = $ibx->search->mset(qq{mid:"$mid"});
+                for (sort { $a->get_docid <=> $b->get_docid } $mset->items) {
+                        my $tmp = { docid => $_->get_docid };
+                        _inspect_doc($tmp, $_->get_document);
+                        push @{$ent->{xdoc}}, $tmp;
+                }
+        }
+        $ent;
+}
+
+sub inspect1 ($$$) {
+        my ($lei, $item, $more) = @_;
+        my $ent;
+        if ($item =~ /\Ablob:(.+)/) {
+                $ent = inspect_blob($lei, $1);
+        } elsif ($item =~ m!\A(?:maildir|mh):!i || -d $item) {
+                $ent = inspect_sync_folder($lei, $item);
+        } elsif ($item =~ m!\Adocid:([0-9]+)\z!) {
+                $ent = inspect_docid($lei, $1 + 0);
+        } elsif ($item =~ m!\Anum:([0-9]+)\z!) {
+                $ent = inspect_num($lei, $1 + 0);
+        } elsif ($item =~ m!\A(?:mid|m):(.+)\z!) {
+                $ent = inspect_mid($lei, $1);
+        } elsif (my $iuri = imap_uri($item)) {
+                if (defined($iuri->uid)) {
+                        $ent = inspect_imap_uid($lei, $iuri);
+                } else {
+                        $ent = inspect_sync_folder($lei, $item);
+                }
+        } elsif (my $nuri = nntp_uri($item)) {
+                if (defined(my $mid = $nuri->message)) {
+                        $ent = inspect_mid($lei, $mid);
+                } else {
+                        my ($group, $beg, $end) = $nuri->group;
+                        if (defined($beg)) {
+                                $ent = inspect_nntp_range($lei, $nuri);
+                        } else {
+                                $ent = inspect_sync_folder($lei, $item);
+                        }
+                }
+        } else { # TODO: more things
+                return $lei->fail("$item not understood");
+        }
+        $lei->out($lei->{json}->encode($ent));
+        $lei->out(',') if $more;
+        1;
+}
+
+sub inspect_argv { # via wq_do
+        my ($self) = @_;
+        my ($lei, $argv) = delete @$self{qw(lei argv)};
+        my $multi = scalar(@$argv) > 1;
+        $lei->{1}->autoflush(0);
+        $lei->out('[') if $multi;
+        while (defined(my $x = shift @$argv)) {
+                eval { inspect1($lei, $x, scalar(@$argv)) or return };
+                warn "E: $@\n" if $@;
+        }
+        $lei->out(']') if $multi;
+}
+
+sub inspect_start ($$) {
+        my ($lei, $argv) = @_;
+        my $self = bless { lei => $lei, argv => $argv }, __PACKAGE__;
+        my ($op_c, $ops) = $lei->workers_start($self, 1);
+        $lei->{wq1} = $self;
+        $lei->wait_wq_events($op_c, $ops);
+        $self->wq_do('inspect_argv');
+        $self->wq_close;
+}
+
+sub do_inspect { # lei->do_env cb
+        my ($lei) = @_;
+        my $str = delete $lei->{stdin_buf};
+        PublicInbox::Eml::strip_from($str);
+        my $eml = PublicInbox::Eml->new(\$str);
+        inspect_start($lei, [ 'blob:'.$lei->git_oid($eml)->hexdigest,
+                        map { "mid:$_" } @{mids($eml)} ]);
+}
+
+sub lei_inspect {
+        my ($lei, @argv) = @_;
+        $lei->{json} = ref(PublicInbox::Config::json())->new->utf8->canonical;
+        $lei->{lse} = ($lei->{opt}->{external} // 1) ? do {
+                my $sto = $lei->_lei_store;
+                $sto ? $sto->search : undef;
+        } : undef;
+        my $isatty = -t $lei->{1};
+        $lei->{json}->pretty(1)->indent(2) if $lei->{opt}->{pretty} || $isatty;
+        $lei->start_pager if $isatty;
+        if ($lei->{opt}->{stdin}) {
+                return $lei->fail(<<'') if @argv;
+no args allowed on command-line with --stdin
+
+                $lei->slurp_stdin(\&do_inspect);
+        } else {
+                inspect_start($lei, \@argv);
+        }
+}
+
+sub _complete_inspect {
+        require PublicInbox::LeiRefreshMailSync;
+        PublicInbox::LeiRefreshMailSync::_complete_refresh_mail_sync(@_);
+        # TODO: message-ids?, blobs? could get expensive...
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        $self->{lei}->_lei_atfork_child;
+        $self->SUPER::ipc_atfork_child;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm
new file mode 100644
index 00000000..274a9605
--- /dev/null
+++ b/lib/PublicInbox/LeiLcat.pm
@@ -0,0 +1,160 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# lcat: local cat, display a local message by Message-ID or blob,
+# extracting from URL necessary
+# "lei lcat <URL|SPEC>"
+package PublicInbox::LeiLcat;
+use strict;
+use v5.10.1;
+use PublicInbox::LeiViewText;
+use URI::Escape qw(uri_unescape);
+use PublicInbox::MID qw($MID_EXTRACT);
+
+sub lcat_folder ($$;$$) {
+        my ($lei, $folder, $beg, $end) = @_;
+        my $lms = $lei->{-lms_rw} //= $lei->lms // return;
+        my $folders = [ $folder ];
+        eval { $lms->arg2folder($lei, $folders) };
+        return $lei->child_error(0, "# unknown folder: $folder") if $@;
+        my %range;
+        if (defined($beg)) { # NNTP article range
+                $range{min} = $beg;
+                $range{max} = $end // $beg;
+        }
+        for my $f (@$folders) {
+                my $fid = $lms->fid_for($f);
+                push @{$lei->{lcat_todo}}, { fid => $fid, %range };
+        }
+}
+
+sub lcat_imap_uri ($$) {
+        my ($lei, $uri) = @_;
+        # cf. LeiXSearch->lcat_dump
+        my $lms = $lei->{-lms_rw} //= $lei->lms // return;
+        if (defined $uri->uid) {
+                push @{$lei->{lcat_todo}}, $lms->imap_oidhex($lei, $uri);
+        } elsif (defined(my $fid = $lms->fid_for($$uri))) {
+                push @{$lei->{lcat_todo}}, { fid => $fid };
+        } else {
+                lcat_folder($lei, $$uri);
+        }
+}
+
+sub lcat_nntp_uri ($$) {
+        my ($lei, $uri) = @_;
+        my $mid = $uri->message; # already unescaped by URI::news
+        return "mid:$mid" if defined($mid);
+        my $lms = $lei->{-lms_rw} //= $lei->lms // return;
+        my ($ng, $beg, $end) = $uri->group;
+        $uri->group($ng);
+        lcat_folder($lei, $$uri, $beg, $end);
+        '""';
+}
+
+sub extract_1 ($$) {
+        my ($lei, $x) = @_;
+        if ($x =~ m!\b(maildir:.+)!i) {
+                lcat_folder($lei, $1);
+                '""'; # blank query, using {lcat_todo}
+        } elsif ($x =~ m!\b(([a-z]+)://\S+)!i) {
+                my ($u, $scheme) = ($1, $2);
+                $u =~ s/[\>\]\)\,\.\;]+\z//;
+                if ($scheme =~ m!\A(imaps?)\z!i) {
+                        require PublicInbox::URIimap;
+                        lcat_imap_uri($lei, PublicInbox::URIimap->new($u));
+                        return '""'; # blank query, using {lcat_todo}
+                } elsif ($scheme =~ m!\A(?:nntps?|s?news)\z!i) {
+                        require PublicInbox::URInntps;
+                        $u = PublicInbox::URInntps->new($u);
+                        return lcat_nntp_uri($lei, $u);
+                } # http, or something else:
+                require URI;
+                $u = URI->new($u);
+                my $p = $u->path;
+                my $term;
+                if ($p =~ m!([^/]+\@[^/]+)!) { # common msgid pattern
+                        $term = 'mid:'.uri_unescape($1);
+
+                        # is it a URL which returns the full thread?
+                        if ($u->scheme =~ /\Ahttps?/i &&
+                                $p =~ m!/(?:T/?|t/?|t\.mbox\.gz|t\.atom)\b!) {
+
+                                $lei->{mset_opt}->{threads} = 1;
+                        }
+                } elsif ($u->scheme =~ /\Ahttps?/i &&
+                                # some msgids don't have '@', see if it looks like
+                                # a public-inbox URL:
+                                $p =~ m!/([^/]+)/(raw|t/?|T/?|
+                                        t\.mbox\.gz|t\.atom)\z!x) {
+                        $lei->{mset_opt}->{threads} = 1 if $2 && $2 ne 'raw';
+                        $term = 'mid:'.uri_unescape($1);
+                }
+                $term;
+        } elsif ($x =~ $MID_EXTRACT) { # <$MSGID>
+                "mid:$1";
+        } elsif ($x =~ /\b((?:m|mid):\S+)/) { # our own prefixes (and mairix)
+                $1;
+        } elsif ($x =~ /\bid:(\S+)/) { # notmuch convention
+                "mid:$1";
+        } elsif ($x =~ /\bblob:([0-9a-f]{7,})\b/) {
+                push @{$lei->{lcat_todo}}, $1; # cf. LeiToMail->wq_atexit_child
+                '""'; # blank query
+        } else {
+                undef;
+        }
+}
+
+sub extract_all {
+        my ($lei, @argv) = @_;
+        my $strict = !$lei->{opt}->{stdin};
+        my @q;
+        for my $x (@argv) {
+                if (my $term = extract_1($lei, $x)) {
+                        push @q, $term;
+                } elsif ($strict) {
+                        return $lei->fail(<<"");
+could not extract Message-ID from $x
+
+                }
+        }
+        delete $lei->{-lms_rw};
+        @q ? join(' OR ', @q) : $lei->fail("no Message-ID in: @argv");
+}
+
+sub do_lcat { # lei->do_env cb
+        my ($lei) = @_;
+        my @argv = split(/\s+/, delete($lei->{stdin_buf}));
+        $lei->{mset_opt}->{qstr} = extract_all($lei, @argv) or return;
+        $lei->_start_query;
+}
+
+sub lei_lcat {
+        my ($lei, @argv) = @_;
+        my $lxs = $lei->lxs_prepare or return;
+        $lei->ale->refresh_externals($lxs, $lei);
+        $lei->_lei_store(1);
+        my $opt = $lei->{opt};
+        my %mset_opt;
+        $mset_opt{asc} = $opt->{'reverse'} ? 1 : 0;
+        $opt->{sort} //= 'relevance';
+        $mset_opt{relevance} = 1;
+        $lei->{mset_opt} = \%mset_opt;
+        $opt->{'format'} //= 'text' unless defined($opt->{output});
+        if ($lei->{opt}->{stdin}) {
+                return $lei->fail(<<'') if @argv;
+no args allowed on command-line with --stdin
+
+                return $lei->slurp_stdin(\&do_lcat);
+        }
+        $lei->{mset_opt}->{qstr} = extract_all($lei, @argv) or return;
+        $lei->_start_query;
+}
+
+sub _complete_lcat {
+        require PublicInbox::LeiRefreshMailSync;
+        PublicInbox::LeiRefreshMailSync::_complete_refresh_mail_sync(@_);
+        # TODO: message-ids?, blobs? could get expensive...
+}
+
+1;
diff --git a/lib/PublicInbox/LeiLsExternal.pm b/lib/PublicInbox/LeiLsExternal.pm
new file mode 100644
index 00000000..2cdd0c4d
--- /dev/null
+++ b/lib/PublicInbox/LeiLsExternal.pm
@@ -0,0 +1,34 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei ls-external" command
+package PublicInbox::LeiLsExternal;
+use strict;
+use v5.10.1;
+use PublicInbox::Config qw(glob2re);
+
+# TODO: does this need JSON output?
+sub lei_ls_external {
+        my ($lei, $filter) = @_;
+        my $do_glob = !$lei->{opt}->{globoff}; # glob by default
+        my ($OFS, $ORS) = $lei->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+        $filter //= '*';
+        my $re = $do_glob ? glob2re($filter) : undef;
+        $re .= '/?\\z' if defined $re;
+        $re //= index($filter, '/') < 0 ?
+                        qr!/\Q$filter\E/?\z! : # exact basename match
+                        qr/\Q$filter\E/; # grep -F semantics
+        my @ext = $lei->externals_each(my $boost = {});
+        @ext = $lei->{opt}->{'invert-match'} ? grep(!/$re/, @ext)
+                                        : grep(/$re/, @ext);
+        if ($lei->{opt}->{'local'} && !$lei->{opt}->{remote}) {
+                @ext = grep(!m!\A[a-z\+]+://!, @ext);
+        } elsif ($lei->{opt}->{remote} && !$lei->{opt}->{'local'}) {
+                @ext = grep(m!\A[a-z\+]+://!, @ext);
+        }
+        for my $loc (@ext) {
+                $lei->out($loc, $OFS, 'boost=', $boost->{$loc}, $ORS);
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/LeiLsLabel.pm b/lib/PublicInbox/LeiLsLabel.pm
new file mode 100644
index 00000000..474224d4
--- /dev/null
+++ b/lib/PublicInbox/LeiLsLabel.pm
@@ -0,0 +1,17 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei ls-label" command
+package PublicInbox::LeiLsLabel;
+use strict;
+use v5.10.1;
+
+sub lei_ls_label { # the "lei ls-label" method
+        my ($lei, @argv) = @_;
+        # TODO: document stats/counts (expensive)
+        my @L = eval { $lei->_lei_store->search->all_terms('L') };
+        my $ORS = $lei->{opt}->{z} ? "\0" : "\n";
+        $lei->out(map { $_.$ORS } @L);
+}
+
+1;
diff --git a/lib/PublicInbox/LeiLsMailSource.pm b/lib/PublicInbox/LeiLsMailSource.pm
new file mode 100644
index 00000000..ab6c1e60
--- /dev/null
+++ b/lib/PublicInbox/LeiLsMailSource.pm
@@ -0,0 +1,126 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# command for listing NNTP groups and IMAP folders,
+# handy for users with git-credential-helper configured
+# TODO: list JMAP labels
+package PublicInbox::LeiLsMailSource;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+
+sub input_path_url { # overrides LeiInput version
+        my ($self, $url) = @_;
+        # TODO: support ndjson and other JSONs we support elsewhere
+        my $lei = $self->{lei};
+        my $json = $lei->{json};
+        my $ORS = $self->{lei}->{opt}->{z} ? "\0" : "\n";
+        my @f;
+        if ($url =~ m!\Aimaps?://!i) {
+                my $uri = PublicInbox::URIimap->new($url);
+                my $sec = $lei->{net}->can('uri_section')->($uri);
+                my $mic = $lei->{net}->mic_get($uri) or
+                        return $lei->err("E: $uri");
+                my $l = $mic->folders_hash($uri->path); # server-side filter
+                @$l = map { $_->[2] } # undo Schwartzian transform below:
+                        sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] }
+                        map { # prepare to sort -imapd slices numerically
+                                $_->{name} =~ /\A(.+?)\.([0-9]+)\z/ ?
+                                [ $1, $2 + 0, $_ ] : [ $_->{name}, -1, $_ ];
+                        } @$l;
+                @f = map { "$sec/$_->{name}" } @$l;
+                if ($json) {
+                        $_->{url} = "$sec/$_->{name}" for @$l;
+                        $lei->puts($json->encode($l));
+                } else {
+                        if ($self->{lei}->{opt}->{url}) {
+                                $_->{name} = "$sec/$_->{name}" for @$l;
+                        }
+                        $lei->out(join($ORS, (map { $_->{name} } @$l), ''));
+                }
+        } elsif ($url =~ m!\A(?:nntps?|s?news)://!i) {
+                my $uri = PublicInbox::URInntps->new($url);
+                my $nn = $lei->{net}->nn_get($uri) or
+                        return $lei->err("E: $uri");
+                # $l = name => description
+                my $l = $nn->newsgroups($uri->group) // return $lei->err(<<EOM);
+E: $uri LIST NEWSGROUPS: ${\($lei->{net}->ndump($nn->message))}
+E: login may be required, try adding `-c nntp.debug' to your command
+EOM
+                my $sec = $lei->{net}->can('uri_section')->($uri);
+                if ($json) {
+                        my $all = $nn->list;
+                        my @x;
+                        for my $ng (sort keys %$l) {
+                                my $desc = $l->{$ng};
+
+# we need to drop CR ourselves iff using IO::Socket::SSL since
+# Net::Cmd::getline doesn't get used by Net::NNTP if TLS is in play, noted in:
+# <https://rt.cpan.org/Ticket/Display.html?id=129966>
+                                $desc =~ s/\r\z//;
+
+                                my ($high, $low, $status) = @{$all->{$ng}};
+                                push @x, { name => $ng, url => "$sec/$ng",
+                                        low => $low + 0,
+                                        high => $high + 0, status => $status,
+                                        description => $desc };
+                        }
+                        @f = map { "$sec/$_" } keys %$all;
+                        $lei->puts($json->encode(\@x));
+                } else {
+                        @f = map { "$sec/$_" } keys %$l;
+                        if ($self->{lei}->{opt}->{url}) {
+                                $lei->out(join($ORS, sort(@f), ''));
+                        } else {
+                                $lei->out(join($ORS, sort(keys %$l), ''));
+                        }
+                }
+        } else { die "BUG: $url not supported" }
+        if (@f) {
+                my $fc = $lei->url_folder_cache;
+                my $lk = $fc->lock_for_scope;
+                $fc->dbh->begin_work;
+                my $now = time;
+                $fc->set($_, $now) for @f;
+                $fc->dbh->commit;
+        }
+}
+
+sub lei_ls_mail_source {
+        my ($lei, $url, $pfx) = @_;
+        $url =~ m!\A(?:imaps?|nntps?|s?news)://!i or return
+                $lei->fail('only NNTP and IMAP URLs supported');
+        my $self = bless { pfx => $pfx, -ls_ok => 1 }, __PACKAGE__;
+        $self->{cfg} = $lei->_lei_cfg; # may be undef
+        $self->prepare_inputs($lei, [ $url ]) or return;
+        my $isatty = -t $lei->{1};
+        if ($lei->{opt}->{l}) {
+                my $json = ref(PublicInbox::Config->json)->new->utf8->canonical;
+                $lei->{json} = $json;
+                $json->ascii(1) if $lei->{opt}->{ascii};
+                $json->pretty(1)->indent(2) if $isatty || $lei->{opt}->{pretty};
+        }
+        $lei->start_pager if $isatty;
+        $lei->{-err_type} = 'non-fatal';
+        $lei->wq1_start($self);
+}
+
+sub _complete_ls_mail_source {
+        my ($lei, @argv) = @_;
+        my $match_cb = $lei->complete_url_prepare(\@argv);
+        my @k = $lei->url_folder_cache->keys($argv[-1] // undef, 1);
+        my @m = map { $match_cb->($_) } @k;
+        my %f = map { $_ => 1 } (@m ? @m : @k);
+        if (my $lms = $lei->lms) {
+                @k = $lms->folders($argv[-1] // undef, 1);
+                @m = map { $match_cb->($_) } grep(m!\A[a-z]+://!, @k);
+                if (@m) { @f{@m} = @m } else { @f{@k} = @k }
+        }
+        keys %f;
+}
+
+no warnings 'once';
+*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+
+1;
diff --git a/lib/PublicInbox/LeiLsMailSync.pm b/lib/PublicInbox/LeiLsMailSync.pm
new file mode 100644
index 00000000..1400d488
--- /dev/null
+++ b/lib/PublicInbox/LeiLsMailSync.pm
@@ -0,0 +1,31 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# front-end for the "lei ls-mail-sync" sub-command
+package PublicInbox::LeiLsMailSync;
+use strict;
+use v5.10.1;
+use PublicInbox::LeiMailSync;
+use PublicInbox::Config qw(glob2re);
+
+sub lei_ls_mail_sync {
+        my ($lei, $filter) = @_;
+        my $lms = $lei->lms or return;
+        my $opt = $lei->{opt};
+        my $re = $opt->{globoff} ? undef : glob2re($filter // '*');
+        $re .= '/?\\z' if defined $re;
+        $re //= index($filter, '/') < 0 ?
+                        qr!/\Q$filter\E/?\z! : # exact basename match
+                        qr/\Q$filter\E/; # grep -F semantics
+        my @f = $lms->folders;
+        @f = $opt->{'invert-match'} ? grep(!/$re/, @f) : grep(/$re/, @f);
+        if ($opt->{'local'} && !$opt->{remote}) {
+                @f = grep(!m!\A[a-z\+]+://!i, @f);
+        } elsif ($opt->{remote} && !$opt->{'local'}) {
+                @f = grep(m!\A[a-z\+]+://!i, @f);
+        }
+        my $ORS = $opt->{z} ? "\0" : "\n";
+        $lei->out(join($ORS, @f, ''));
+}
+
+1;
diff --git a/lib/PublicInbox/LeiLsSearch.pm b/lib/PublicInbox/LeiLsSearch.pm
new file mode 100644
index 00000000..0193e590
--- /dev/null
+++ b/lib/PublicInbox/LeiLsSearch.pm
@@ -0,0 +1,109 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei ls-search" to display results saved via "lei q --save"
+package PublicInbox::LeiLsSearch;
+use strict;
+use v5.10.1;
+use PublicInbox::LeiSavedSearch;
+use parent qw(PublicInbox::IPC);
+
+sub do_ls_search_long {
+        my ($self, $pfx) = @_;
+        # TODO: share common JSON output code with LeiOverview
+        my $json = $self->{json}->new->utf8->canonical;
+        my $lei = $self->{lei};
+        $json->ascii(1) if $lei->{opt}->{ascii};
+        my $fmt = $lei->{opt}->{'format'};
+        $lei->{1}->autoflush(0);
+        my $ORS = "\n";
+        my $pretty = $lei->{opt}->{pretty};
+        my $EOR;  # TODO: compact pretty like "lei q"
+        if ($fmt =~ /\A(concat)?json\z/ && $pretty) {
+                $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},";
+        }
+        if ($fmt eq 'json') {
+                $lei->out('[');
+                $ORS = ",\n";
+        }
+        my @x = sort(grep(/\A\Q$pfx/, PublicInbox::LeiSavedSearch::list($lei)));
+        while (my $x = shift @x) {
+                $ORS = '' if !scalar(@x);
+                my $lss = PublicInbox::LeiSavedSearch->up($lei, $x) or next;
+                my $cfg = $lss->{-cfg};
+                my $ent = {
+                        q => $cfg->get_all('lei.q'),
+                        output => $cfg->{'lei.q.output'},
+                };
+                for my $k ($lss->ARRAY_FIELDS) {
+                        my $ary = $cfg->get_all("lei.q.$k") // next;
+                        $ent->{$k} = $ary;
+                }
+                for my $k ($lss->BOOL_FIELDS) {
+                        my $val = $cfg->{"lei.q.$k"} // next;
+                        $ent->{$k} = $val;
+                }
+                if (defined $EOR) { # pretty, but compact
+                        $EOR = "\n}" if !scalar(@x);
+                        my $buf = "{\n";
+                        $buf .= join(",\n", map {;
+                                my $f = $_;
+                                if (my $v = $ent->{$f}) {
+                                        $v = $json->encode([$v]);
+                                        qq{  "$f": }.substr($v, 1, -1);
+                                } else {
+                                        ();
+                                }
+                        # key order by importance
+                        } (qw(output q), $lss->ARRAY_FIELDS,
+                                $lss->BOOL_FIELDS) );
+                        $lei->out($buf .= $EOR);
+                } else {
+                        $lei->out($json->encode($ent), $ORS);
+                }
+        }
+        if ($fmt eq 'json') {
+                $lei->out("]\n");
+        } elsif ($fmt eq 'concatjson') {
+                $lei->out("\n");
+        }
+}
+
+sub bg_worker ($$$) {
+        my ($lei, $pfx, $json) = @_;
+        my $self = bless { json => $json }, __PACKAGE__;
+        my ($op_c, $ops) = $lei->workers_start($self, 1);
+        $lei->{wq1} = $self;
+        $self->wq_io_do('do_ls_search_long', [], $pfx);
+        $self->wq_close;
+        $lei->wait_wq_events($op_c, $ops);
+}
+
+sub lei_ls_search {
+        my ($lei, $pfx) = @_;
+        my $fmt = $lei->{opt}->{'format'} // '';
+        if ($lei->{opt}->{l}) {
+                $lei->{opt}->{'format'} //= $fmt = 'json';
+        }
+        my $json;
+        my $tty = -t $lei->{1};
+        $lei->start_pager if $tty;
+        if ($fmt =~ /\A(ldjson|ndjson|jsonl|(?:concat)?json)\z/) {
+                $lei->{opt}->{pretty} //= $tty;
+                $json = ref(PublicInbox::Config->json);
+        } elsif ($fmt ne '') {
+                return $lei->fail("unknown format: $fmt");
+        }
+        my $ORS = "\n";
+        if ($lei->{opt}->{z}) {
+                return $lei->fail('-z and --format do not mix') if $json;
+                $ORS = "\0";
+        }
+        $pfx //= '';
+        return bg_worker($lei, $pfx, $json) if $json;
+        for (sort(grep(/\A\Q$pfx/, PublicInbox::LeiSavedSearch::list($lei)))) {
+                $lei->out($_, $ORS);
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/LeiLsWatch.pm b/lib/PublicInbox/LeiLsWatch.pm
new file mode 100644
index 00000000..f96dc4ec
--- /dev/null
+++ b/lib/PublicInbox/LeiLsWatch.pm
@@ -0,0 +1,15 @@
+# Copyright all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::LeiLsWatch;
+use strict;
+use v5.10.1;
+
+sub lei_ls_watch {
+        my ($lei) = @_;
+        my $cfg = $lei->_lei_cfg or return;
+        my @w = (join("\n", keys %$cfg) =~ m/^watch\.(.+?)\.state$/sgm);
+        $lei->puts(join("\n", @w)) if @w;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiMailDiff.pm b/lib/PublicInbox/LeiMailDiff.pm
new file mode 100644
index 00000000..af6ecf82
--- /dev/null
+++ b/lib/PublicInbox/LeiMailDiff.pm
@@ -0,0 +1,44 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# The "lei mail-diff" sub-command, diffs input contents against
+# the first message of input
+package PublicInbox::LeiMailDiff;
+use v5.12;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput PublicInbox::MailDiff);
+use PublicInbox::Spawn qw(run_wait);
+require PublicInbox::LeiRediff;
+
+sub diff_a ($$) {
+        my ($self, $eml) = @_;
+        my $dir = "$self->{tmp}/N".(++$self->{nr});
+        $self->dump_eml($dir, $eml);
+        my $cmd = [ qw(git diff --no-index) ];
+        my $lei = $self->{lei};
+        PublicInbox::LeiRediff::_lei_diff_prepare($lei, $cmd);
+        push @$cmd, qw(-- a), "N$self->{nr}";
+        my $rdr = { -C => "$self->{tmp}" };
+        @$rdr{1, 2} = @$lei{1, 2};
+        run_wait($cmd, $lei->{env}, $rdr) and $lei->child_error($?);
+}
+
+sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
+        my ($self, $eml) = @_;
+        $self->{tmp} ? diff_a($self, $eml) : $self->prep_a($eml);
+}
+
+sub lei_mail_diff {
+        my ($lei, @argv) = @_;
+        my $self = bless {}, __PACKAGE__;
+        $self->prepare_inputs($lei, \@argv) or return;
+        my $isatty = -t $lei->{1};
+        $lei->{opt}->{color} //= $isatty;
+        $lei->start_pager if $isatty;
+        $lei->{-err_type} = 'non-fatal';
+        $self->{-raw_hdr} = $lei->{opt}->{'raw-header'};
+        $lei->wq1_start($self);
+}
+
+no warnings 'once';
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+1;
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
new file mode 100644
index 00000000..c498421c
--- /dev/null
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -0,0 +1,712 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# for maintaining synchronization between lei/store <=> Maildir|MH|IMAP|JMAP
+package PublicInbox::LeiMailSync;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::Lock);
+use PublicInbox::Compat qw(uniqstr);
+use DBI qw(:sql_types); # SQL_BLOB
+use PublicInbox::ContentHash qw(git_sha);
+use Carp ();
+use PublicInbox::Git qw(%HEXLEN2SHA);
+use PublicInbox::IO qw(read_all);
+
+sub dbh_new {
+        my ($self) = @_;
+        my $f = $self->{filename};
+        my $creat = !-s $f;
+        if ($creat) {
+                require PublicInbox::Syscall;
+                open my $fh, '+>>', $f or Carp::croak "open($f): $!";
+                PublicInbox::Syscall::nodatacow_fh($fh);
+        }
+        my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', {
+                AutoCommit => 1,
+                RaiseError => 1,
+                PrintError => 0,
+                sqlite_use_immediate_transaction => 1,
+        });
+        # no sqlite_unicode, here, all strings are binary
+        create_tables($self, $dbh);
+        $dbh->do('PRAGMA journal_mode = WAL') if $creat;
+        $dbh->do('PRAGMA case_sensitive_like = ON');
+        $dbh;
+}
+
+sub new {
+        my ($cls, $f) = @_;
+        bless {
+                filename => $f,
+                fmap => {},
+                lock_path => "$f.flock",
+        }, $cls;
+}
+
+sub lms_write_prepare { ($_[0]->{dbh} //= dbh_new($_[0])); $_[0] }
+
+sub lms_pause {
+        my ($self) = @_;
+        $self->{fmap} = {};
+        my $dbh = delete $self->{dbh};
+        eval { $dbh->do('PRAGMA optimize') } if $dbh;
+}
+
+sub create_tables {
+        my ($self, $dbh) = @_;
+        my $lk = $self->lock_for_scope;
+
+        $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS folders (
+        fid INTEGER PRIMARY KEY,
+        loc VARBINARY NOT NULL, /* URL;UIDVALIDITY=$N or $TYPE:/pathname */
+        UNIQUE (loc)
+)
+
+        $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS blob2num (
+        oidbin VARBINARY NOT NULL,
+        fid INTEGER NOT NULL, /* folder ID */
+        uid INTEGER NOT NULL, /* NNTP article number, IMAP UID, MH number */
+        /* not UNIQUE(fid, uid), since we may have broken servers */
+        UNIQUE (oidbin, fid, uid)
+)
+
+        # speeds up LeiImport->ck_update_kw (for "lei import") by 5-6x:
+        $dbh->do(<<'');
+CREATE INDEX IF NOT EXISTS idx_fid_uid ON blob2num(fid,uid)
+
+        $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS blob2name (
+        oidbin VARBINARY NOT NULL,
+        fid INTEGER NOT NULL, /* folder ID */
+        name VARBINARY NOT NULL, /* Maildir basename, JMAP blobId */
+        /* not UNIQUE(fid, name), since we may have broken software */
+        UNIQUE (oidbin, fid, name)
+)
+
+        # speeds up LeiImport->pmdir_cb (for "lei import") by ~6x:
+        $dbh->do(<<'');
+CREATE INDEX IF NOT EXISTS idx_fid_name ON blob2name(fid,name)
+
+}
+
+# used to fixup pre-1.7.0 folders
+sub update_fid ($$$) {
+        my ($dbh, $fid, $loc) = @_;
+        my $sth = $dbh->prepare(<<'');
+UPDATE folders SET loc = ? WHERE fid = ?
+
+        $sth->bind_param(1, $loc, SQL_BLOB);
+        $sth->bind_param(2, $fid);
+        $sth->execute;
+}
+
+sub get_fid ($$$) {
+        my ($sth, $folder, $dbh) = @_;
+        $sth->bind_param(1, $folder, SQL_BLOB);
+        $sth->execute;
+        my ($fid) = $sth->fetchrow_array;
+        if (defined $fid) { # for downgrade+upgrade (1.8 -> 1.7 -> 1.8)
+                my $del = $dbh->prepare_cached(<<'');
+DELETE FROM folders WHERE loc = ? AND fid != ?
+
+                $del->execute($folder, $fid);
+        } else {
+                $sth->bind_param(1, $folder, SQL_VARCHAR);
+                $sth->execute; # fixup old stuff
+                ($fid) = $sth->fetchrow_array;
+                update_fid($dbh, $fid, $folder) if defined($fid);
+        }
+        $fid;
+}
+
+sub fid_for {
+        my ($self, $folder, $creat) = @_;
+        my $dbh = $self->{dbh} //= dbh_new($self);
+        my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT fid FROM folders WHERE loc = ? LIMIT 1
+
+        my $fid = get_fid($sth, $folder, $dbh);
+        return $fid if defined($fid);
+
+        # caller had trailing slash (LeiToMail)
+        if ($folder =~ s!\A((?:maildir|mh):.*?)/+\z!$1!i) {
+                $fid = get_fid($sth, $folder, $dbh);
+                if (defined $fid) {
+                        update_fid($dbh, $fid, $folder);
+                        return $fid;
+                }
+        # sometimes we stored trailing slash..
+        } elsif ($folder =~ m!\A(?:maildir|mh):!i) {
+                $fid = get_fid($sth, $folder, $dbh);
+                if (defined $fid) {
+                        update_fid($dbh, $fid, $folder);
+                        return $fid;
+                }
+        } elsif ($creat && $folder =~ m!\Aimaps?://!i) {
+                require PublicInbox::URIimap;
+                my $uri = PublicInbox::URIimap->new($folder);
+                $uri->uidvalidity //
+                        Carp::croak("BUG: $folder has no UIDVALIDITY");
+                defined($uri->uid) and Carp::confess("BUG: $folder has UID");
+        }
+
+        return unless $creat;
+        ($fid) = $dbh->selectrow_array('SELECT MAX(fid) FROM folders');
+
+        $fid += 1;
+        # in case we're reusing, clobber existing stale refs:
+        $dbh->do('DELETE FROM blob2name WHERE fid = ?', undef, $fid);
+        $dbh->do('DELETE FROM blob2num WHERE fid = ?', undef, $fid);
+
+        $sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)');
+        $sth->bind_param(1, $fid);
+        $sth->bind_param(2, $folder, SQL_BLOB);
+        $sth->execute;
+
+        $fid;
+}
+
+sub add_folders {
+        my ($self, @folders) = @_;
+        my $lk = $self->lock_for_scope;
+        for my $f (@folders) { $self->{fmap}->{$f} //= fid_for($self, $f, 1) }
+}
+
+sub set_src {
+        my ($self, $oidbin, $folder, $id) = @_;
+        my $lk = $self->lock_for_scope;
+        my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
+        my $dbh = $self->{dbh};
+        my ($sth, @param3, $del_old);
+        if (ref($id)) { # scalar name
+                @param3 = ($$id, SQL_BLOB);
+                $sth = $dbh->prepare_cached(<<'');
+INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
+
+                $del_old = $dbh->prepare_cached(<<'');
+DELETE FROM blob2name WHERE oidbin = ? AND fid = ? AND name = ?
+
+        } else { # numeric ID (IMAP UID, MH number)
+                @param3 = ($id);
+                $sth = $dbh->prepare_cached(<<'');
+INSERT OR IGNORE INTO blob2num (oidbin, fid, uid) VALUES (?, ?, ?)
+
+                $del_old = $dbh->prepare_cached(<<'');
+DELETE FROM blob2num WHERE oidbin = ? AND fid = ? AND uid = ?
+
+        }
+        $sth->bind_param(1, $oidbin, SQL_BLOB);
+        $sth->bind_param(2, $fid);
+        $sth->bind_param(3, @param3);
+        my $ret = $sth->execute;
+        $del_old->execute($oidbin, $fid, $param3[0]);
+        $ret;
+}
+
+sub clear_src {
+        my ($self, $folder, $id) = @_;
+        my $lk = $self->lock_for_scope;
+        my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
+        my ($sth, @param3);
+        if (ref($id)) { # scalar name
+                @param3 = ($$id, SQL_BLOB);
+                $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM blob2name WHERE fid = ? AND name = ?
+
+        } else {
+                @param3 = ($id);
+                $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM blob2num WHERE fid = ? AND uid = ?
+
+        }
+        $sth->bind_param(1, $fid);
+        $sth->bind_param(2, @param3);
+        my $ret = $sth->execute;
+
+        # older versions may not have used SQL_BLOB:
+        if (defined($ret) && $ret == 0 && scalar(@param3) == 2) {
+                $sth->bind_param(1, $fid);
+                $sth->bind_param(2, $param3[0]);
+                $ret = $sth->execute;
+        }
+        $ret;
+}
+
+# Maildir-only
+sub mv_src {
+        my ($self, $folder, $oidbin, $id, $newbn) = @_;
+        my $lk = $self->lock_for_scope;
+        my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
+        $self->{dbh}->begin_work;
+        my $sth = $self->{dbh}->prepare_cached(<<'');
+UPDATE blob2name SET name = ? WHERE fid = ? AND oidbin = ? AND name = ?
+
+        # eval since unique constraint may fail due to race
+        $sth->bind_param(1, $newbn, SQL_BLOB);
+        $sth->bind_param(2, $fid);
+        $sth->bind_param(3, $oidbin, SQL_BLOB);
+        $sth->bind_param(4, $$id, SQL_BLOB);
+        my $nr = eval { $sth->execute };
+        if (!defined($nr) || $nr == 0) { # $nr may be `0E0'
+                # delete from old, pre-SQL_BLOB rows:
+                my $del_old = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM blob2name WHERE fid = ? AND oidbin = ? AND name = ?
+
+                $del_old->execute($fid, $oidbin, $$id); # missing-OK
+                $del_old->execute($fid, $oidbin, $newbn); # ditto
+
+                # may race with a clear_src, ensure new value exists
+                $sth = $self->{dbh}->prepare_cached(<<'');
+INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
+
+                $sth->bind_param(1, $oidbin, SQL_BLOB);
+                $sth->bind_param(2, $fid);
+                $sth->bind_param(3, $newbn, SQL_BLOB);
+                $sth->execute;
+        }
+        $self->{dbh}->commit;
+}
+
+# read-only, iterates every oidbin + UID or name for a given folder
+sub each_src {
+        my ($self, $folder, $cb, @args) = @_;
+        my $dbh = $self->{dbh} //= dbh_new($self);
+        my ($fid, @rng);
+        my $and_ge_le = '';
+        if (ref($folder) eq 'HASH') {
+                $fid = $folder->{fid} // die "BUG: no `fid'";
+                @rng = grep(defined, @$folder{qw(min max)});
+                $and_ge_le = 'AND uid >= ? AND uid <= ?' if @rng;
+        } else {
+                $fid = $self->{fmap}->{$folder} //=
+                        fid_for($self, $folder) // return;
+        }
+
+        # minimize implicit txn time to avoid blocking writers by
+        # batching SELECTs.  This looks wonky but is necessary since
+        # $cb-> may access the DB on its own.
+        my $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng);
+SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? $and_ge_le
+ORDER BY _rowid_ ASC LIMIT 1000
+
+        my $min = @$ary ? $ary->[-1]->[0] : undef;
+        while (defined $min) {
+                for my $row (@$ary) { $cb->($row->[1], $row->[2], @args) }
+
+                $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng, $min);
+SELECT _rowid_,oidbin,uid FROM blob2num
+WHERE fid = ? $and_ge_le AND _rowid_ > ?
+ORDER BY _rowid_ ASC LIMIT 1000
+
+                $min = @$ary ? $ary->[-1]->[0] : undef;
+        }
+
+        $ary = $dbh->selectall_arrayref(<<'', undef, $fid);
+SELECT _rowid_,oidbin,name FROM blob2name WHERE fid = ?
+ORDER BY _rowid_ ASC LIMIT 1000
+
+        $min = @$ary ? $ary->[-1]->[0] : undef;
+        while (defined $min) {
+                for my $row (@$ary) { $cb->($row->[1], \($row->[2]), @args) }
+
+                $ary = $dbh->selectall_arrayref(<<'', undef, $fid, $min);
+SELECT _rowid_,oidbin,name FROM blob2name WHERE fid = ? AND _rowid_ > ?
+ORDER BY _rowid_ ASC LIMIT 1000
+
+                $min = @$ary ? $ary->[-1]->[0] : undef;
+        }
+}
+
+sub location_stats {
+        my ($self, $folder) = @_;
+        my $dbh = $self->{dbh} //= dbh_new($self);
+        my $fid;
+        my $ret = {};
+        $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder) // return;
+        my ($row) = $dbh->selectrow_array(<<"", undef, $fid);
+SELECT COUNT(name) FROM blob2name WHERE fid = ?
+
+        $ret->{'name.count'} = $row if $row;
+        my $ntype = ($folder =~ m!\A(?:nntps?|s?news)://!i) ? 'article' :
+                (($folder =~ m!\Aimaps?://!i) ? 'uid' : "TODO<$folder>");
+        for my $op (qw(count min max)) {
+                ($row) = $dbh->selectrow_array(<<"", undef, $fid);
+SELECT $op(uid) FROM blob2num WHERE fid = ?
+
+                $row or last;
+                $ret->{"$ntype.$op"} = $row;
+        }
+        $ret;
+}
+
+# must be called with lock
+sub _forget_fids ($;@) {
+        my $dbh = shift;
+        $dbh->begin_work;
+        for my $t (qw(blob2name blob2num folders)) {
+                my $sth = $dbh->prepare_cached("DELETE FROM $t WHERE fid = ?");
+                $sth->execute($_) for @_;
+        }
+        $dbh->commit;
+}
+
+# returns a { location => [ list-of-ids-or-names ] } mapping
+sub locations_for {
+        my ($self, $oidbin) = @_;
+        my ($fid, $sth, $id, %fid2id, %seen);
+        my $dbh = $self->{dbh} //= dbh_new($self);
+        $sth = $dbh->prepare('SELECT fid,uid FROM blob2num WHERE oidbin = ?');
+        $sth->bind_param(1, $oidbin, SQL_BLOB);
+        $sth->execute;
+        while (my ($fid, $uid) = $sth->fetchrow_array) {
+                push @{$fid2id{$fid}}, $uid;
+                $seen{"$uid.$fid"} = 1;
+        }
+
+        # deal with 1.7.0 DBs :<
+        $sth->bind_param(1, $oidbin, SQL_VARCHAR);
+        $sth->execute;
+        while (my ($fid, $uid) = $sth->fetchrow_array) {
+                next if $seen{"$uid.$fid"};
+                push @{$fid2id{$fid}}, $uid;
+        }
+
+        %seen = ();
+        $sth = $dbh->prepare('SELECT fid,name FROM blob2name WHERE oidbin = ?');
+        $sth->bind_param(1, $oidbin, SQL_BLOB);
+        $sth->execute;
+        while (my ($fid, $name) = $sth->fetchrow_array) {
+                push @{$fid2id{$fid}}, $name;
+                $seen{"$fid.$name"} = 1;
+        }
+
+        # deal with 1.7.0 DBs :<
+        $sth->bind_param(1, $oidbin, SQL_VARCHAR);
+        $sth->execute;
+        while (my ($fid, $name) = $sth->fetchrow_array) {
+                next if $seen{"$fid.$name"};
+                push @{$fid2id{$fid}}, $name;
+        }
+
+        $sth = $dbh->prepare('SELECT loc FROM folders WHERE fid = ? LIMIT 1');
+        my $ret = {};
+        my $drop_fids = $dbh->{ReadOnly} ? undef : {};
+        while (my ($fid, $ids) = each %fid2id) {
+                $sth->execute($fid);
+                my ($loc) = $sth->fetchrow_array;
+                unless (defined $loc) {
+                        my $del = '';
+                        if ($drop_fids) {
+                                $del = ' (deleting)';
+                                $drop_fids->{$fid} = $fid;
+                        }
+                        my $oidhex = unpack('H*', $oidbin);
+                        warn "E: fid=$fid for $oidhex stale/unknown:\n", map {
+                                        'E: '.(ref() ? $$_ : "#$_")."$del\n";
+                                } @$ids;
+                        next;
+                }
+                $ret->{$loc} = $ids;
+        }
+        if ($drop_fids && scalar(values %$drop_fids)) {
+                my $lk = $self->lock_for_scope;
+                _forget_fids($self->{dbh}, values %$drop_fids);
+        }
+        scalar(keys %$ret) ? $ret : undef;
+}
+
+# returns a list of folders used for completion
+sub folders {
+        my ($self, @pfx) = @_;
+        my $sql = 'SELECT loc FROM folders';
+        my $re;
+        if (defined($pfx[0])) {
+                $sql .= ' WHERE loc REGEXP ?'; # DBD::SQLite uses perlre
+                if (ref($pfx[0])) { # assume qr// "Regexp"
+                        $re = $pfx[0];
+                } else {
+                        $re = !!$pfx[1] ? '.*' : '';
+                        $re .= quotemeta($pfx[0]);
+                        $re .= '.*';
+                }
+        }
+        my $sth = ($self->{dbh} //= dbh_new($self))->prepare($sql);
+        $sth->bind_param(1, $re) if defined($re);
+        $sth->execute;
+        map { $_->[0] } @{$sth->fetchall_arrayref};
+}
+
+sub blob_mismatch ($$$) {
+        my ($f, $oidhex, $rawref) = @_;
+        my $sha = $HEXLEN2SHA{length($oidhex)};
+        my $got = git_sha($sha, $rawref)->hexdigest;
+        $got eq $oidhex ? undef : warn("$f changed $oidhex => $got\n");
+}
+
+sub local_blob {
+        my ($self, $oidhex, $vrfy) = @_;
+        my $dbh = $self->{dbh} //= dbh_new($self);
+        my $oidbin = pack('H*', $oidhex);
+
+        my $b2n = $dbh->prepare(<<'');
+SELECT f.loc,b.name FROM blob2name b
+LEFT JOIN folders f ON b.fid = f.fid
+WHERE b.oidbin = ?
+
+        $b2n->bind_param(1, $oidbin, SQL_BLOB);
+        $b2n->execute;
+        while (my ($d, $n) = $b2n->fetchrow_array) {
+                substr($d, 0, length('maildir:')) = '';
+                # n.b. both mbsync and offlineimap use ":2," as a suffix
+                # in "new/", despite (from what I understand of reading
+                # <https://cr.yp.to/proto/maildir.html>), the ":2," only
+                # applies to files in "cur/".
+                my @try = $n =~ /:2,[a-zA-Z]+\z/ ? qw(cur new) : qw(new cur);
+                for my $x (@try) {
+                        my $f = "$d/$x/$n";
+                        open my $fh, '<', $f or next;
+                        # some (buggy) Maildir writers are non-atomic:
+                        my $raw = read_all($fh, -s $fh // next);
+                        next if $vrfy && blob_mismatch $f, $oidhex, \$raw;
+                        return \$raw;
+                }
+        }
+
+        # MH, except `uid' is not always unique (can be packed)
+        $b2n = $dbh->prepare(<<'');
+SELECT f.loc,b.uid FROM blob2num b
+LEFT JOIN folders f ON b.fid = f.fid
+WHERE b.oidbin = ? AND f.loc REGEXP '^mh:/'
+
+        $b2n->bind_param(1, $oidbin, SQL_BLOB);
+        $b2n->execute;
+        while (my ($f, $n) = $b2n->fetchrow_array) {
+                $f =~ s/\Amh://s or die "BUG: not MH: $f";
+                $f .= "/$n";
+                open my $fh, '<', $f or next;
+                my $raw = read_all($fh, -s $fh // next);
+                next if blob_mismatch $f, $oidhex, \$raw;
+                return \$raw;
+        }
+        undef;
+}
+
+sub match_imap_url {
+        my ($self, $url, $all) = @_; # $all = [ $lms->folders ];
+        $all //= [ $self->folders ];
+        require PublicInbox::URIimap;
+        my $want = PublicInbox::URIimap->new($url)->canonical;
+        my ($s, $h, $mb) = ($want->scheme, $want->host, $want->mailbox);
+        my @uri = map { PublicInbox::URIimap->new($_)->canonical }
+                grep(m!\A\Q$s\E://.*?\Q$h\E\b.*?/\Q$mb\E\b!, @$all);
+        my @match;
+        for my $x (@uri) {
+                next if $x->mailbox ne $want->mailbox;
+                next if $x->host ne $want->host;
+                next if $x->port != $want->port;
+                my $x_uidval = $x->uidvalidity;
+                next if ($want->uidvalidity // $x_uidval) != $x_uidval;
+
+                # allow nothing in want to possibly match ";AUTH=ANONYMOUS"
+                if (defined($x->auth) && !defined($want->auth) &&
+                                !defined($want->user)) {
+                        push @match, $x;
+                # or maybe user was forgotten on CLI:
+                } elsif (defined($x->user) && !defined($want->user)) {
+                        push @match, $x;
+                } elsif (($x->user//"\0") eq ($want->user//"\0")) {
+                        push @match, $x;
+                }
+        }
+        return @match if wantarray;
+        scalar(@match) <= 1 ? $match[0] :
+                        "E: `$url' is ambiguous:\n\t".join("\n\t", @match)."\n";
+}
+
+sub match_nntp_url ($$$) {
+        my ($self, $url, $all) = @_; # $all = [ $lms->folders ];
+        $all //= [ $self->folders ];
+        require PublicInbox::URInntps;
+        my $want = PublicInbox::URInntps->new($url)->canonical;
+        my ($s, $h, $p) = ($want->scheme, $want->host, $want->port);
+        my $ng = $want->group; # force scalar (no article ranges)
+        my @uri = map { PublicInbox::URInntps->new($_)->canonical }
+                grep(m!\A\Q$s\E://.*?\Q$h\E\b.*?/\Q$ng\E\b!, @$all);
+        my @match;
+        for my $x (@uri) {
+                next if $x->group ne $ng || $x->host ne $h || $x->port != $p;
+                # maybe user was forgotten on CLI:
+                if (defined($x->userinfo) && !defined($want->userinfo)) {
+                        push @match, $x;
+                } elsif (($x->userinfo//"\0") eq ($want->userinfo//"\0")) {
+                        push @match, $x;
+                }
+        }
+        return @match if wantarray;
+        scalar(@match) <= 1 ? $match[0] :
+                        "E: `$url' is ambiguous:\n\t".join("\n\t", @match)."\n";
+}
+
+# returns undef on failure, number on success
+sub group2folders {
+        my ($self, $lei, $all, $folders) = @_;
+        return $lei->fail(<<EOM) if @$folders;
+--all= not compatible with @$folders on command-line
+EOM
+        my %x = map { $_ => $_ } split(/,/, $all);
+        my @ok = grep(defined, delete(@x{qw(local remote), ''}));
+        push(@ok, '') if $all eq '';
+        my @no = keys %x;
+        if (@no) {
+                @no = (join(',', @no));
+                return $lei->fail(<<EOM);
+--all=@no not accepted (must be `local' and/or `remote')
+EOM
+        }
+        my @all = $self->folders;
+        for my $ok (@ok) {
+                if ($ok eq 'local') {
+                        push @$folders, grep(!m!\A[a-z0-9\+]+://!i, @all);
+                } elsif ($ok eq 'remote') {
+                        push @$folders, grep(m!\A[a-z0-9\+]+://!i, @all);
+                } elsif ($ok ne '') {
+                        return $lei->fail("--all=$all not understood");
+                } else {
+                        push @$folders, @all;
+                }
+        }
+        @$folders = uniqstr @$folders;
+        scalar(@$folders) || $lei->fail(<<EOM);
+no --mail-sync folders known to lei
+EOM
+}
+
+# map CLI args to folder table entries, returns undef on failure
+sub arg2folder {
+        my ($self, $lei, $folders) = @_;
+        my @all = $self->folders;
+        my %all = map { $_ => 1 } @all;
+        my @no;
+        for (@$folders) {
+                next if $all{$_}; # ok
+                if (m!\A(maildir|mh):(.+)!i) {
+                        my $type = lc $1;
+                        my $d = "$type:".$lei->abs_path($2);
+                        push(@no, $_) unless $all{$d};
+                        $_ = $d;
+                } elsif (-d "$_/new" && -d "$_/cur") {
+                        my $d = 'maildir:'.$lei->abs_path($_);
+                        push(@no, $_) unless $all{$d};
+                        $_ = $d;
+                } elsif (m!\Aimaps?://!i) {
+                        my $orig = $_;
+                        my $res = match_imap_url($self, $orig, \@all);
+                        if (ref $res) {
+                                $_ = $$res;
+                                $lei->qerr(<<EOM);
+# using `$res' instead of `$orig'
+EOM
+                        } else {
+                                warn($res, "\n") if defined $res;
+                                push @no, $orig;
+                        }
+                } elsif (m!\A(?:nntps?|s?news)://!i) {
+                        my $orig = $_;
+                        my $res = match_nntp_url($self, $orig, \@all);
+                        if (ref $res) {
+                                $_ = $$res;
+                                $lei->qerr(<<EOM);
+# using `$res' instead of `$orig'
+EOM
+                        } else {
+                                warn($res, "\n") if defined $res;
+                                push @no, $orig;
+                        }
+                } else {
+                        push @no, $_;
+                }
+        }
+        if (@no) {
+                my $no = join("\n\t", @no);
+                die <<EOF;
+No sync information for: $no
+Run `lei ls-mail-sync' to display valid choices
+EOF
+        }
+}
+
+sub forget_folders {
+        my ($self, @folders) = @_;
+        my $lk = $self->lock_for_scope;
+        _forget_fids($self->{dbh}, map {
+                delete($self->{fmap}->{$_}) //
+                        fid_for($self, $_) // ();
+        } @folders);
+}
+
+# only used for changing canonicalization errors
+sub rename_folder {
+        my ($self, $old, $new) = @_;
+        my $lk = $self->lock_for_scope;
+        my $ofid = delete($self->{fmap}->{$old}) //
+                fid_for($self, $old) // return;
+        eval {
+                $self->{dbh}->do(<<EOM, undef, $new, $ofid);
+UPDATE folders SET loc = ? WHERE fid = ?
+EOM
+        };
+        if ($@ =~ /\bunique\b/i) {
+                my $nfid = $self->{fmap}->{$new} // fid_for($self, $new);
+                for my $t (qw(blob2name blob2num)) {
+                        $self->{dbh}->do(<<EOM, undef, $nfid, $ofid);
+UPDATE OR REPLACE $t SET fid = ? WHERE fid = ?
+EOM
+                }
+                $self->{dbh}->do(<<EOM, undef, $ofid);
+DELETE FROM folders WHERE fid = ?
+EOM
+        }
+}
+
+sub num_oidbin ($$$) {
+        my ($self, $url, $uid) = @_; # $url MUST have UIDVALIDITY if IMAP
+        my $fid = $self->{fmap}->{$url} //= fid_for($self, $url) // return ();
+        my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
+SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ? ORDER BY _rowid_
+EOM
+        $sth->execute($fid, $uid);
+        # for public-inbox <= 1.7.0:
+        uniqstr(map { $_->[0] } @{$sth->fetchall_arrayref});
+}
+
+sub name_oidbin ($$$) {
+        my ($self, $mdir, $nm) = @_;
+        my $fid = $self->{fmap}->{$mdir} //= fid_for($self, $mdir) // return;
+        my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
+SELECT oidbin FROM blob2name WHERE fid = ? AND name = ?
+EOM
+        $sth->bind_param(1, $fid);
+        $sth->bind_param(2, $nm, SQL_BLOB);
+        $sth->execute;
+        my @bin = map { $_->[0] } @{$sth->fetchall_arrayref};
+        $sth->bind_param(1, $fid);
+        $sth->bind_param(2, $nm, SQL_VARCHAR);
+        $sth->execute;
+        my @old = map { $_->[0] } @{$sth->fetchall_arrayref};
+        uniqstr @bin, @old # for public-inbox <= 1.7.0
+}
+
+sub imap_oidhex {
+        my ($self, $lei, $uid_uri) = @_;
+        my $mailbox_uri = $uid_uri->clone;
+        $mailbox_uri->uid(undef);
+        my $folders = [ $$mailbox_uri ];
+        eval { $self->arg2folder($lei, $folders) };
+        $lei->qerr("# no sync information for $mailbox_uri") if $@;
+        map { unpack('H*',$_) } num_oidbin($self, $folders->[0], $uid_uri->uid)
+}
+
+1;
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
new file mode 100644
index 00000000..e7c265bd
--- /dev/null
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -0,0 +1,1386 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei add-external --mirror" support (also "public-inbox-clone");
+package PublicInbox::LeiMirror;
+use v5.12;
+use parent qw(PublicInbox::IPC);
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
+use IO::Compress::Gzip qw(gzip $GzipError);
+use PublicInbox::Spawn qw(spawn run_wait run_die run_qx);
+use PublicInbox::IO qw(write_file);
+use File::Path ();
+use File::Temp ();
+use File::Spec ();
+use Fcntl qw(SEEK_SET O_CREAT O_EXCL O_WRONLY);
+use Carp qw(croak);
+use URI;
+use PublicInbox::Config qw(glob2re);
+use PublicInbox::Inbox;
+use PublicInbox::LeiCurl;
+use PublicInbox::OnDestroy;
+use PublicInbox::SHA qw(sha256_hex sha_all);
+use POSIX qw(strftime);
+use PublicInbox::Admin qw(fmt_localtime);
+use autodie qw(chdir chmod close open pipe readlink
+                seek symlink sysopen sysseek truncate unlink);
+use PublicInbox::Git qw(git_exe);
+
+our $LIVE; # pid => callback
+our $FGRP_TODO; # objstore -> [[ to resume ], [ to clone ]]
+our $TODO; # reference => [ non-fgrp mirror objects ]
+our @PUH; # post-update hooks
+
+sub keep_going ($) {
+        $LIVE && (!$_[0]->{lei}->{child_error} ||
+                $_[0]->{lei}->{opt}->{'keep-going'});
+}
+
+sub _wq_done_wait { # awaitpid cb (via wq_eof)
+        my ($pid, $mrr, $lei) = @_;
+        if ($?) {
+                $lei->child_error($?);
+        } elsif (!$lei->{child_error}) {
+                if (!$mrr->{dry_run} && $lei->{cmd} ne 'public-inbox-clone') {
+                        require PublicInbox::LeiAddExternal;
+                        PublicInbox::LeiAddExternal::_finish_add_external(
+                                                        $lei, $mrr->{dst});
+                }
+                $lei->qerr("# mirrored $mrr->{src} => $mrr->{dst}");
+        }
+        $lei->dclose;
+}
+
+# for old installations without manifest.js.gz
+sub try_scrape {
+        my ($self, $fallback_manifest) = @_;
+        my $uri = URI->new($self->{src});
+        my $lei = $self->{lei};
+        my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
+        my $cmd = $curl->for_uri($lei, $uri, '--compressed');
+        my $opt = { 0 => $lei->{0}, 2 => $lei->{2} };
+        my $html = run_qx($cmd, undef, $opt);
+        return $lei->child_error($?, "@$cmd failed") if $?;
+
+        # we grep with URL below, we don't want Subject/From headers
+        # making us clone random URLs.  This assumes remote instances
+        # prior to public-inbox 1.7.0
+        # 5b96edcb1e0d8252 (www: move mirror instructions to /text/, 2021-08-28)
+        my @html = split(/<hr>/, $html);
+        my @urls = ($html[-1] =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g);
+        if (!@urls && $fallback_manifest) {
+                warn <<EOM;
+W: failed to extract URLs from $uri, trying manifest.js.gz...
+EOM
+                return start_clone_url($self);
+        }
+        my $url = $uri->as_string;
+        chop($url) eq '/' or die "BUG: $uri not canonicalized";
+
+        # since this is for old instances w/o manifest.js.gz, try v1 first
+        return clone_v1($self) if grep(m!\A\Q$url\E/*\z!, @urls);
+        if (my @v2_urls = grep(m!\A\Q$url\E/[0-9]+\z!, @urls)) {
+                my %v2_epochs = map {
+                        my ($n) = (m!/([0-9]+)\z!);
+                        $n => [ URI->new($_), '' ]
+                } @v2_urls; # uniq
+                clone_v2_prep($self, \%v2_epochs);
+                delete local $lei->{opt}->{epoch};
+                clone_all($self);
+                return;
+        }
+
+        # filter out common URLs served by WWW (e.g /$MSGID/T/)
+        if (@urls && $url =~ s!/+[^/]+\@[^/]+/.*\z!! &&
+                        grep(m!\A\Q$url\E/*\z!, @urls)) {
+                die <<"";
+E: confused by scraping <$uri>, did you mean <$url>?
+
+        }
+        @urls and die <<"";
+E: confused by scraping <$uri>, got ambiguous results:
+@urls
+
+        die "E: scraping <$uri> revealed nothing\n";
+}
+
+sub clone_cmd {
+        my ($lei, $opt) = @_;
+        my @cmd = (git_exe);
+        $opt->{$_} = $lei->{$_} for (0..2);
+        # we support "-c $key=$val" for arbitrary git config options
+        # e.g.: git -c http.proxy=socks5h://127.0.0.1:9050
+        push(@cmd, '-c', $_) for @{$lei->{opt}->{c} // []};
+        push @cmd, qw(clone --mirror);
+        push @cmd, '-q' if $lei->{opt}->{quiet} ||
+                        ($lei->{opt}->{jobs} // 1) > 1;
+        push @cmd, '-v' if $lei->{opt}->{verbose};
+        # XXX any other options to support?
+        # --reference is tricky with multiple epochs, but handled
+        # automatically if using manifest.js.gz
+        @cmd;
+}
+
+sub ft_rename ($$$;$) {
+        my ($ft, $dst, $open_mode, $fh) = @_;
+        my @st = stat($fh // $dst);
+        my $mode = @st ? ($st[2] & 07777) : ($open_mode & ~umask);
+        chmod($mode, $ft);
+        require File::Copy;
+        File::Copy::mv($ft->filename, $dst) or croak "E: mv($ft => $dst): $!";
+        $ft->unlink_on_destroy(0);
+}
+
+sub do_reap ($;$) {
+        my ($self, $jobs) = @_;
+        $jobs //= $self->{-jobs} //= $self->{lei}->{opt}->{jobs} // 1;
+        $jobs = 1 if $jobs < 1;
+        while (keys(%$LIVE) >= $jobs) {
+                my $pid = waitpid(-1, 0) // die "waitpid(-1): $!";
+                if (my $x = delete $LIVE->{$pid}) {
+                        my $cb = shift @$x;
+                        $cb->(@$x) if $cb;
+                } else {
+                        warn "reaped unknown PID=$pid ($?)\n";
+                }
+        }
+}
+
+sub _get_txt_start { # non-fatal
+        my ($self, $endpoint, $fini) = @_;
+        my $uri = URI->new($self->{cur_src} // $self->{src});
+        my $lei = $self->{lei};
+        my $path = $uri->path;
+        chop($path) eq '/' or die "BUG: $uri not canonicalized";
+        $uri->path("$path/$endpoint");
+        my $f = (split(m!/!, $endpoint))[-1];
+        my $ft = File::Temp->new(TEMPLATE => "$f-XXXX", TMPDIR => 1);
+        my $opt = { 0 => $lei->{0}, 1 => $lei->{1}, 2 => $lei->{2} };
+        my $cmd = $self->{curl}->for_uri($lei, $uri, qw(--compressed -R -o),
+                                        $ft->filename);
+        do_reap($self);
+        $lei->qerr("# @$cmd");
+        return if $self->{dry_run};
+        $self->{"-get_txt.$endpoint"} = [ $ft, $cmd, $uri ];
+        $LIVE->{spawn($cmd, undef, $opt)} =
+                        [ \&_get_txt_done, $self, $endpoint, $fini ];
+}
+
+sub _get_txt_done { # returns true on error (non-fatal), undef on success
+        my ($self, $endpoint) = @_;
+        my ($fh, $cmd, $uri) = @{delete $self->{"-get_txt.$endpoint"}};
+        my $cerr = $?;
+        $? = 0; # don't influence normal lei exit
+        return warn("$uri missing\n") if ($cerr >> 8) == 22;
+        return warn("# @$cmd failed (non-fatal)\n") if $cerr;
+        seek($fh, 0, SEEK_SET);
+        $self->{"mtime.$endpoint"} = (stat($fh))[9];
+        $self->{"txt.$endpoint"} = PublicInbox::IO::read_all $fh, -s _;
+        undef; # success
+}
+
+sub _write_inbox_config {
+        my ($self) = @_;
+        my $buf = delete($self->{'txt._/text/config/raw'}) // return;
+        my $dst = $self->{cur_dst} // $self->{dst};
+        my $f = "$dst/inbox.config.example";
+        my $mtime = delete $self->{'mtime._/text/config/raw'};
+        if (CORE::sysopen(my $fh, $f, O_CREAT|O_EXCL|O_WRONLY)) {
+                print $fh $buf;
+                chmod(0444 & ~umask, $fh);
+                $fh->flush or die "flush($f): $!";
+                if (defined $mtime) {
+                        utime($mtime, $mtime, $fh) or die "utime($f): $!";
+                }
+        } elsif (!$!{EEXIST}) {
+                die "open($f): $!";
+        }
+        my $cfg = PublicInbox::Config->git_config_dump($f,
+                                                { 2 => $self->{lei}->{2} });
+        my $ibx = $self->{ibx} = {}; # for indexing
+        for my $sec (grep(/\Apublicinbox\./, @{$cfg->{-section_order}})) {
+                for (qw(address newsgroup nntpmirror)) {
+                        $ibx->{$_} = $cfg->{"$sec.$_"};
+                }
+        }
+}
+
+sub set_description ($) {
+        my ($self) = @_;
+        my $dst = $self->{cur_dst} // $self->{dst};
+        chomp(my $orig = PublicInbox::IO::try_cat("$dst/description"));
+        my $d = $orig;
+        while (defined($d) && ($d =~ m!^\(\$INBOX_DIR/description missing\)! ||
+                        $d =~ /^Unnamed repository/ || $d !~ /\S/)) {
+                $d = delete($self->{'txt.description'});
+        }
+        $d //= 'mirror of '.($self->{cur_src} // $self->{src});
+        atomic_write($dst, 'description', $d."\n") if $d ne $orig;
+}
+
+sub index_cloned_inbox {
+        my ($self, $iv) = @_;
+        my $lei = $self->{lei};
+
+        # n.b. public-inbox-clone works w/o (SQLite || Xapian)
+        # lei is useless without Xapian + SQLite
+        if ($lei->{cmd} ne 'public-inbox-clone') {
+                require PublicInbox::InboxWritable;
+                require PublicInbox::Admin;
+                my $ibx = delete($self->{ibx}) // {
+                        address => [ 'lei@example.com' ],
+                        version => $iv,
+                };
+                $ibx->{inboxdir} = $self->{cur_dst} // $self->{dst};
+                PublicInbox::Inbox->new($ibx);
+                PublicInbox::InboxWritable->new($ibx);
+                my $opt = {};
+                for my $sw ($lei->index_opt) {
+                        my ($k) = ($sw =~ /\A([\w-]+)/);
+                        $opt->{$k} = $lei->{opt}->{$k};
+                }
+                # force synchronous awaitpid for v2:
+                local $PublicInbox::DS::in_loop = 0;
+                my $cfg = PublicInbox::Config->new(undef, { 2 => $lei->{2} });
+                my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
+                local %ENV = (%ENV, %$env) if $env;
+                PublicInbox::Admin::progress_prepare($opt, $lei->{2});
+                PublicInbox::Admin::index_inbox($ibx, undef, $opt);
+        }
+        return if defined $self->{cur_dst}; # one of many repos to clone
+}
+
+sub run_reap {
+        my ($lei, $cmd, $opt) = @_;
+        $lei->qerr("# @$cmd");
+        my $ret = run_wait($cmd, undef, $opt);
+        $? = 0; # don't let it influence normal exit
+        $ret;
+}
+
+sub start_cmd {
+        my ($self, $cmd, $opt, $fini) = @_;
+        do_reap($self);
+        $self->{lei}->qerr("# @$cmd");
+        return if $self->{dry_run};
+        $LIVE->{spawn($cmd, undef, $opt)} = [ \&reap_cmd, $self, $cmd, $fini ]
+}
+
+sub fetch_args ($$) {
+        my ($lei, $opt) = @_;
+        my @cmd; # (git --git-dir=...) to be added by caller
+        $opt->{$_} = $lei->{$_} for (0..2);
+        # we support "-c $key=$val" for arbitrary git config options
+        # e.g.: git -c http.proxy=socks5h://127.0.0.1:9050
+        push(@cmd, '-c', $_) for @{$lei->{opt}->{c} // []};
+        push @cmd, 'fetch';
+        push @cmd, '-q' if $lei->{opt}->{quiet} ||
+                        ($lei->{opt}->{jobs} // 1) > 1;
+        push @cmd, '-v' if $lei->{opt}->{verbose};
+        push(@cmd, '-p') if $lei->{opt}->{prune};
+        PublicInbox::Git::git_version() ge v2.29.0 and
+                push(@cmd, '--no-write-fetch-head');
+        @cmd;
+}
+
+sub upr { # feed `git update-ref --stdin -z' verbosely
+        my ($lei, $w, $op, @rest) = @_; # ($ref, $oid) = @rest
+        $lei->qerr("# $op @rest") if $lei->{opt}->{verbose};
+        print $w "$op ", join("\0", @rest, '') or die "print(w): $!";
+}
+
+sub start_update_ref {
+        my ($fgrp) = @_;
+        pipe(my $r, my $w);
+        my $cmd = [ git_exe, "--git-dir=$fgrp->{cur_dst}",
+                qw(update-ref --stdin -z) ];
+        my $pack = on_destroy \&satellite_done, $fgrp;
+        start_cmd($fgrp, $cmd, { 0 => $r, 2 => $fgrp->{lei}->{2} }, $pack);
+        close $r;
+        $fgrp->{dry_run} ? undef : $w;
+}
+
+sub upref_warn { warn "E: close(update-ref --stdin): $! (need git 1.8.5+)\n" }
+
+sub fgrp_update {
+        my ($fgrp) = @_;
+        return if !keep_going($fgrp);
+        my $srcfh = delete $fgrp->{srcfh} or return;
+        my $dstfh = delete $fgrp->{dstfh} or return;
+        seek($srcfh, 0, SEEK_SET);
+        seek($dstfh, 0, SEEK_SET);
+        my %src = map { chomp; split /\0/ } PublicInbox::IO::read_all $srcfh;
+        my %dst = map { chomp; split /\0/ } PublicInbox::IO::read_all $dstfh;
+        $srcfh = $dstfh = undef;
+        my $w = start_update_ref($fgrp) or return;
+        my $lei = $fgrp->{lei};
+        my $ndel;
+        for my $ref (keys %dst) {
+                my $new = delete $src{$ref};
+                my $old = $dst{$ref};
+                if (defined $new) {
+                        $new eq $old or
+                                upr($lei, $w, 'update', $ref, $new, $old);
+                } else {
+                        upr($lei, $w, 'delete', $ref, $old);
+                        ++$ndel;
+                }
+        }
+        # git's ref files backend doesn't allow directory/file conflicts
+        # between `delete' and `create' ops:
+        if ($ndel && scalar(keys %src)) {
+                $fgrp->{-create_refs} = \%src;
+        } else {
+                while (my ($ref, $oid) = each %src) {
+                        upr($lei, $w, 'create', $ref, $oid);
+                }
+        }
+        $w->close or upref_warn();
+}
+
+sub satellite_done {
+        my ($fgrp) = @_;
+        if (my $create = delete $fgrp->{-create_refs}) {
+                my $w = start_update_ref($fgrp) or return;
+                while (my ($ref, $oid) = each %$create) {
+                        upr($fgrp->{lei}, $w, 'create', $ref, $oid);
+                }
+                $w->close or upref_warn();
+        } else {
+                pack_refs($fgrp, $fgrp->{cur_dst});
+                run_puh($fgrp);
+        }
+}
+
+sub pack_refs {
+        my ($self, $git_dir) = @_;
+        my $cmd = [git_exe, "--git-dir=$git_dir", qw(pack-refs --all --prune)];
+        start_cmd($self, $cmd, { 2 => $self->{lei}->{2} });
+}
+
+sub unlink_fetch_head ($) {
+        my ($git_dir) = @_;
+        return if CORE::unlink("$git_dir/FETCH_HEAD") || $!{ENOENT};
+        warn "W: unlink($git_dir/FETCH_HEAD): $!";
+}
+
+sub fgrpv_done {
+        my ($fgrpv) = @_;
+        return if !$LIVE;
+        my $first = $fgrpv->[0] // die 'BUG: no fgrpv->[0]';
+        return if !keep_going($first);
+        unlink_fetch_head($first->{-osdir}) if !$first->{dry_run};
+        pack_refs($first, $first->{-osdir}); # objstore refs always packed
+        for my $fgrp (@$fgrpv) {
+                my $rn = $fgrp->{-remote};
+                my %opt = ( 2 => $fgrp->{lei}->{2} );
+                my $update_ref = on_destroy \&fgrp_update, $fgrp;
+                my $src = [ git_exe, "--git-dir=$fgrp->{-osdir}",
+                        'for-each-ref',
+                        "--format=refs/%(refname:lstrip=3)%00%(objectname)",
+                        "refs/remotes/$rn/" ];
+                open(my $sfh, '+>', undef);
+                $fgrp->{srcfh} = $sfh;
+                start_cmd($fgrp, $src, { %opt, 1 => $sfh }, $update_ref);
+                my $dst = [ git_exe, "--git-dir=$fgrp->{cur_dst}",
+                        'for-each-ref', '--format=%(refname)%00%(objectname)' ];
+                open(my $dfh, '+>', undef);
+                $fgrp->{dstfh} = $dfh;
+                start_cmd($fgrp, $dst, { %opt, 1 => $dfh }, $update_ref);
+        }
+}
+
+sub fgrp_fetch_all {
+        my ($self) = @_;
+        my $todo = $FGRP_TODO;
+        $FGRP_TODO = \'BUG on further use';
+        keys(%$todo) or return;
+
+        # Rely on the fgrptmp remote groups in the config file rather
+        # than listing all remotes since the remote name list may exceed
+        # system argv limits:
+        my $grp = 'fgrptmp';
+
+        my @git = (@{$self->{-torsocks}}, git_exe);
+        my $j = $self->{lei}->{opt}->{jobs};
+        my $opt = {};
+        my @fetch = do {
+                local $self->{lei}->{opt}->{jobs} = 1;
+                (fetch_args($self->{lei}, $opt), qw(--no-tags --multiple));
+        };
+        push(@fetch, "-j$j") if $j;
+        while (my ($osdir, $fgrp_old_new) = each %$todo) {
+                my $f = "$osdir/config";
+                return if !keep_going($self);
+                my ($old, $new) = @$fgrp_old_new;
+                @$old = sort { $b->{-sort} <=> $a->{-sort} } @$old;
+                # $new is ordered by {references}
+                my $cmd = [ git_exe, "--git-dir=$osdir", qw(config -f), $f ];
+
+                # clobber settings from previous run atomically
+                for ("remotes.$grp", 'fetch.hideRefs') {
+                        my $c = [ @$cmd, '--unset-all', $_ ];
+                        $self->{lei}->qerr("# @$c");
+                        next if $self->{dry_run};
+                        run_wait($c, undef, $opt);
+                        die "E: @$c \$?=$?" if ($? && ($? >> 8) != 5);
+                }
+
+                # permanent configs:
+                my $cfg = PublicInbox::Config->git_config_dump($f);
+                for my $fgrp (@$old, @$new) {
+                        my $u = $fgrp->{-uri} // die 'BUG: no {-uri}';
+                        my $rn = $fgrp->{-remote} // die 'BUG: no {-remote}';
+                        for ("url=$u", "fetch=+refs/*:refs/remotes/$rn/*",
+                                        'tagopt=--no-tags') {
+                                my ($k, $v) = split(/=/, $_, 2);
+                                $k = "remote.$rn.$k";
+                                next if ($cfg->{$k} // '') eq $v;
+                                my $c = [@$cmd, $k, $v];
+                                $fgrp->{lei}->qerr("# @$c");
+                                next if $fgrp->{dry_run};
+                                run_die($c, undef, $opt);
+                        }
+                }
+
+                if (!$self->{dry_run}) {
+                        # update the config atomically via O_APPEND while
+                        # respecting git-config locking
+                        sysopen(my $lk, "$f.lock", O_CREAT|O_EXCL|O_WRONLY);
+                        open my $fh, '>>', $f;
+                        $fh->autoflush(1);
+                        my $buf = '';
+                        if (@$old) {
+                                $buf = "[fetch]\n\thideRefs = refs\n";
+                                $buf .= join('', map {
+                                        "\thideRefs = !refs/remotes/" .
+                                                "$_->{-remote}/\n";
+                                } @$old);
+                        }
+                        $buf .= join('', "[remotes]\n",
+                                (map { "\t$grp = $_->{-remote}\n" } @$old),
+                                (map { "\t$grp = $_->{-remote}\n" } @$new));
+                        print $fh $buf or die "print($f): $!";
+                        close $fh;
+                        unlink("$f.lock");
+                }
+                $cmd  = [ @git, "--git-dir=$osdir", @fetch, $grp ];
+                push @$old, @$new;
+                my $end = on_destroy \&fgrpv_done, $old;
+                start_cmd($self, $cmd, $opt, $end);
+        }
+}
+
+# keep this idempotent for future use by public-inbox-fetch
+sub forkgroup_prep {
+        my ($self, $uri) = @_;
+        $self->{-ent} // return;
+        my $os = $self->{-objstore} // return;
+        my $fg = $self->{-ent}->{forkgroup} // return;
+        my $dir = "$os/$fg.git";
+        if (!-d $dir && !$self->{dry_run}) {
+                PublicInbox::Import::init_bare($dir);
+                write_file '+>>', "$dir/config", <<EOM;
+[repack]
+        useDeltaIslands = true
+[pack]
+        island = refs/remotes/([^/]+)/
+EOM
+        }
+        my $key = $self->{-key} // die 'BUG: no -key';
+        my $rn = substr(sha256_hex($key), 0, 16);
+        if (!-d $self->{cur_dst} && !$self->{dry_run}) {
+                PublicInbox::Import::init_bare($self->{cur_dst});
+                write_file '+>>', "$self->{cur_dst}/config", <<EOM;
+; rely on the "$rn" remote in the
+; $fg fork group for fetches
+; only uncomment the following iff you detach from fork groups
+; [remote "origin"]
+;        url = $uri
+;        fetch = +refs/*:refs/*
+;        mirror = true
+EOM
+        }
+        if (!$self->{dry_run}) {
+                my $alt = File::Spec->rel2abs("$dir/objects");
+                my $o = "$self->{cur_dst}/objects";
+                my $l = File::Spec->abs2rel($alt, File::Spec->rel2abs($o));
+                open my $fh, '+>>', my $f = "$o/info/alternates";
+                seek($fh, 0, SEEK_SET); # Perl did SEEK_END when it saw '>>'
+                my $seen = grep /\A\Q$l\E\n/, PublicInbox::IO::read_all $fh;
+                print $fh "$l\n" if !$seen;
+                close $fh;
+        }
+        bless {
+                %$self, -osdir => $dir, -remote => $rn, -uri => $uri
+        }, __PACKAGE__;
+}
+
+sub fp_done {
+        my ($self, $cmd, $cb, @arg) = @_;
+        if ($?) {
+                $self->{lei}->err("@$cmd failed (\$?=$?) (non-fatal)");
+                $? = 0; # don't let it influence normal exit
+        }
+        return if !keep_going($self);
+        my $fh = delete $self->{-show_ref} // die 'BUG: no show-ref output';
+        sysseek($fh, 0, SEEK_SET);
+        $self->{-ent} // die 'BUG: no -ent';
+        my $A = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint';
+        my $B = sha_all(1, $fh)->hexdigest;
+        return $cb->($self, @arg) if $A ne $B;
+        $self->{lei}->qerr("# $self->{-key} up-to-date");
+}
+
+sub cmp_fp_do {
+        my ($self, $cb, @arg) = @_;
+        # $cb is either resume_fetch or fgrp_enqueue
+        $self->{-ent} // return $cb->($self, @arg);
+        my $new = $self->{-ent}->{fingerprint} // return $cb->($self, @arg);
+        my $key = $self->{-key} // die 'BUG: no -key';
+        if (my $cur_ent = $self->{-local_manifest}->{$key}) {
+                # runs go_fetch->DESTROY run if eq
+                return if $cur_ent->{fingerprint} eq $new;
+        }
+        my $dst = $self->{cur_dst} // $self->{dst};
+        my $cmd = [git_exe, "--git-dir=$dst", 'show-ref'];
+        my $opt = { 2 => $self->{lei}->{2} };
+        open($opt->{1}, '+>', undef);
+        $self->{-show_ref} = $opt->{1};
+        do_reap($self);
+        $self->{lei}->qerr("# @$cmd");
+        $LIVE->{spawn($cmd, undef, $opt)} = [ \&fp_done, $self, $cmd,
+                                                $cb, @arg ];
+}
+
+sub resume_fetch {
+        my ($self, $uri, $fini) = @_;
+        return if !keep_going($self);
+        my $dst = $self->{cur_dst} // $self->{dst};
+        my @git = (git_exe, "--git-dir=$dst");
+        my $opt = { 2 => $self->{lei}->{2} };
+        my $rn = 'random'.int(rand(1 << 30));
+        for ("url=$uri", "fetch=+refs/*:refs/*", 'mirror=true') {
+                push @git, '-c', "remote.$rn.$_";
+        }
+        my $cmd = [ @{$self->{-torsocks}}, @git,
+                        fetch_args($self->{lei}, $opt), $rn ];
+        push @$cmd, '-P' if $self->{lei}->{prune}; # --prune-tags implied
+        my $run_puh = on_destroy \&run_puh, $self, $fini;
+        ++$self->{chg}->{nr_chg};
+        start_cmd($self, $cmd, $opt, $run_puh);
+}
+
+sub fgrp_enqueue {
+        my ($fgrp, $end) = @_; # $end calls fgrp_fetch_all
+        return if !keep_going($fgrp);
+        ++$fgrp->{chg}->{nr_chg};
+        my $dst = $FGRP_TODO->{$fgrp->{-osdir}} //= [ [], [] ]; # [ old, new ]
+        push @{$dst->[defined($fgrp->{-sort} ? 0 : 1)]}, $fgrp;
+}
+
+sub clone_v1 {
+        my ($self, $end) = @_;
+        my $lei = $self->{lei};
+        my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
+        my $uri = URI->new($self->{cur_src} // $self->{src});
+        my $path = $uri->path;
+        $path =~ s!/*\z!! and $uri->path($path);
+        defined($lei->{opt}->{epoch}) and
+                die "$uri is a v1 inbox, --epoch is not supported\n";
+        $self->{-torsocks} //= $curl->torsocks($lei, $uri) or return;
+        my $dst = $self->{cur_dst} // $self->{dst};
+        my $resume = -d $dst;
+        if ($resume) { # respect read-only cloned w/ --epoch=
+                my @st = stat(_); # for root
+                if (!-w _ || !($st[2] & 0222)) {
+                        warn "# skipping $dst, not writable\n";
+                        return;
+                }
+        }
+        my $fini = on_destroy \&v1_done, $self;
+        if (my $fgrp = forkgroup_prep($self, $uri)) {
+                $fgrp->{-fini} = $fini;
+                if ($resume) {
+                        $fgrp->{-sort} = $fgrp->{-ent}->{modified};
+                        cmp_fp_do($fgrp, \&fgrp_enqueue, $end);
+                } else { # new repo, save for last
+                        fgrp_enqueue($fgrp, $end);
+                }
+        } elsif ($resume) {
+                cmp_fp_do($self, \&resume_fetch, $uri, $fini);
+        } else { # normal clone
+                my $cmd = [ @{$self->{-torsocks}},
+                                clone_cmd($lei, my $opt = {}), "$uri", $dst ];
+                if (defined($self->{-ent})) {
+                        if (defined(my $ref = $self->{-ent}->{reference})) {
+                                -e "$self->{dst}$ref" and
+                                        push @$cmd, '--reference',
+                                                "$self->{dst}$ref";
+                        }
+                }
+                ++$self->{chg}->{nr_chg};
+                start_cmd($self, $cmd, $opt,
+                        on_destroy(\&run_puh, $self, $fini));
+        }
+        if (!$self->{-is_epoch} && $lei->{opt}->{'inbox-config'} =~
+                                /\A(?:always|v1)\z/s &&
+                        !-f "$dst/inbox.config.example") {
+                _get_txt_start($self, '_/text/config/raw', $fini);
+        }
+
+        my $d = $self->{-ent} ? $self->{-ent}->{description} : undef;
+        utf8::encode($self->{'txt.description'} = $d) if defined $d;
+        (!defined($d) && !$end) and
+                _get_txt_start($self, 'description', $fini);
+
+        $end or do_reap($self, 1); # for non-manifest clone
+}
+
+sub parse_epochs ($$) {
+        my ($opt_epochs, $v2_epochs) = @_; # $epochs "LOW..HIGH"
+        $opt_epochs // return; # undef => all epochs
+        my ($lo, $dotdot, $hi, @extra) = split(/(\.\.)/, $opt_epochs);
+        undef($lo) if ($lo // '') eq '';
+        my $re = qr/\A~?[0-9]+\z/;
+        if (@extra || (($lo // '0') !~ $re) ||
+                        (($hi // '0') !~ $re) ||
+                        !(grep(defined, $lo, $hi))) {
+                die <<EOM;
+--epoch=$opt_epochs not in the form of `LOW..HIGH', `LOW..', nor `..HIGH'
+EOM
+        }
+        my @n = sort { $a <=> $b } keys %$v2_epochs;
+        for (grep(defined, $lo, $hi)) {
+                if (/\A[0-9]+\z/) {
+                        $_ > $n[-1] and die
+"`$_' exceeds maximum available epoch ($n[-1])\n";
+                        $_ < $n[0] and die
+"`$_' is lower than minimum available epoch ($n[0])\n";
+                } elsif (/\A~([0-9]+)/) {
+                        my $off = -$1 - 1;
+                        $n[$off] // die "`$_' is out of range\n";
+                        $_ = $n[$off];
+                } else { die "`$_' not understood\n" }
+        }
+        defined($lo) && defined($hi) && $lo > $hi and die
+"low value (`$lo') exceeds high (`$hi')\n";
+        $lo //= $n[0] if $dotdot;
+        $hi //= $n[-1] if $dotdot;
+        $hi //= $lo;
+        my $want = {};
+        for ($lo..$hi) {
+                if (defined $v2_epochs->{$_}) {
+                        $want->{$_} = 1;
+                } else {
+                        warn
+"# epoch $_ is not available (non-fatal, $lo..$hi)\n";
+                }
+        }
+        $want
+}
+
+sub init_placeholder ($$$) {
+        my ($src, $edst, $ent) = @_;
+        PublicInbox::Import::init_bare($edst);
+        my @owner = defined($ent->{owner}) ? (<<EOM) : ();
+[gitweb]
+        owner = $ent->{owner}
+EOM
+        write_file '>>', "$edst/config", <<EOM, @owner;
+[remote "origin"]
+        url = $src
+        fetch = +refs/*:refs/*
+        mirror = true
+
+; This git epoch was created read-only and "public-inbox-fetch"
+; will not fetch updates for it unless write permission is added.
+; Hint: chmod +w $edst
+EOM
+        my %map = (head => 'HEAD', description => undef);
+        while (my ($key, $fn) = each %map) {
+                my $val = $ent->{$key} // next;
+                $fn //= $key;
+                write_file '>', "$edst/$fn", $val;
+        }
+}
+
+sub reap_cmd { # async, called via SIGCHLD
+        my ($self, $cmd) = @_;
+        my $cerr = $?;
+        $? = 0; # don't let it influence normal exit
+        $self->{lei}->child_error($cerr, "@$cmd failed (\$?=$cerr)") if $cerr;
+}
+
+sub up_fp_done {
+        my ($self) = @_;
+        return if !keep_going($self);
+        my $fh = delete $self->{-show_ref_up} // die 'BUG: no show-ref output';
+        sysseek($fh, 0, SEEK_SET);
+        $self->{-ent} // die 'BUG: no -ent';
+        my $A = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint';
+        my $B = sha_all(1, $fh)->hexdigest;
+        return if $A eq $B;
+        $self->{-ent}->{fingerprint} = $B;
+        push @{$self->{chg}->{fp_mismatch}}, $self->{-key};
+}
+
+sub atomic_write ($$$) {
+        my ($dn, $bn, $raw) = @_;
+        my $ft = File::Temp->new(DIR => $dn, TEMPLATE => "$bn-XXXX");
+        print $ft $raw;
+        $ft->flush or die "flush($ft): $!";
+        ft_rename($ft, "$dn/$bn", 0666);
+}
+
+sub run_next_puh {
+        my ($self) = @_;
+        my $puh = shift @{$self->{-puh_todo}} // return delete($self->{-fini});
+        my $fini = on_destroy \&run_next_puh, $self;
+        my $cmd = [ @$puh, ($self->{cur_dst} // $self->{dst}) ];
+        my $opt = +{ map { $_ => $self->{lei}->{$_} } (0..2) };
+        start_cmd($self, $cmd, undef, $opt, $fini);
+}
+
+sub run_puh {
+        my ($self, $fini) = @_;
+        $self->{-fini} = $fini;
+        @{$self->{-puh_todo}} = @PUH;
+        run_next_puh($self);
+}
+
+# modifies the to-be-written manifest entry, and sets values from it, too
+sub update_ent {
+        my ($self) = @_;
+        my $key = $self->{-key} // die 'BUG: no -key';
+        my $new = $self->{-ent}->{fingerprint};
+        my $cur = $self->{-local_manifest}->{$key}->{fingerprint} // "\0";
+        my $dst = $self->{cur_dst} // $self->{dst};
+        if (defined($new) && $new ne $cur) {
+                my $cmd = [git_exe, "--git-dir=$dst", 'show-ref'];
+                my $opt = { 2 => $self->{lei}->{2} };
+                open($opt->{1}, '+>', undef);
+                $self->{-show_ref_up} = $opt->{1};
+                my $done = on_destroy \&up_fp_done, $self;
+                start_cmd($self, $cmd, $opt, $done);
+        }
+        $new = $self->{-ent}->{head};
+        $cur = $self->{-local_manifest}->{$key}->{head} // "\0";
+        if (defined($new) && $new ne $cur) {
+                # n.b. grokmirror writes raw contents to $dst/HEAD w/o locking
+                my $cmd = [ git_exe, "--git-dir=$dst" ];
+                if ($new =~ s/\Aref: //) {
+                        push @$cmd, qw(symbolic-ref HEAD), $new;
+                } elsif ($new =~ /\A[a-f0-9]{40,}\z/) {
+                        push @$cmd, qw(update-ref --no-deref HEAD), $new;
+                } else {
+                        undef $cmd;
+                        warn "W: $key: {head} => `$new' not understood\n";
+                }
+                start_cmd($self, $cmd, { 2 => $self->{lei}->{2} }) if $cmd;
+        }
+        if (my $symlinks = $self->{-ent}->{symlinks}) {
+                my $top = File::Spec->rel2abs($self->{dst});
+                push @{$self->{-new_symlinks}}, @$symlinks;
+                for my $p (@$symlinks) {
+                        my $ln = "$top/$p";
+                        $ln =~ tr!/!/!s;
+                        my (undef, $dn, $bn) = File::Spec->splitpath($ln);
+                        File::Path::mkpath($dn);
+                        my $tgt = "$top/$key";
+                        $tgt = File::Spec->abs2rel($tgt, $dn);
+                        if (lstat($ln)) {
+                                if (-l _) {
+                                        next if readlink($ln) eq $tgt;
+                                        unlink($ln);
+                                } else {
+                                        push @{$self->{chg}->{badlink}}, $p;
+                                }
+                        }
+                        symlink($tgt, $ln);
+                        ++$self->{chg}->{nr_chg};
+                }
+        }
+        if (defined(my $t = $self->{-ent}->{modified})) {
+                my ($dn, $bn) = ("$dst/info/web", 'last-modified');
+                my $orig = PublicInbox::IO::try_cat("$dn/$bn");
+                $t = strftime('%F %T', gmtime($t))." +0000\n";
+                File::Path::mkpath($dn);
+                atomic_write($dn, $bn, $t) if $orig ne $t;
+        }
+
+        $new = $self->{-ent}->{owner} // return;
+        $cur = $self->{-local_manifest}->{$key}->{owner} // "\0";
+        return if $cur eq $new;
+        utf8::encode($new); # to octets
+        my $cmd = [ git_exe, qw(config -f), "$dst/config",
+                        'gitweb.owner', $new ];
+        start_cmd($self, $cmd, { 2 => $self->{lei}->{2} });
+}
+
+sub v1_done { # called via OnDestroy
+        my ($self) = @_;
+        return if $self->{dry_run} || !keep_going($self);
+        _write_inbox_config($self);
+        my $dst = $self->{cur_dst} // $self->{dst};
+        unlink_fetch_head($dst);
+        update_ent($self) if $self->{-ent};
+        my $o = "$dst/objects";
+        if (CORE::open(my $fh, '<', my $fn = "$o/info/alternates")) {;
+                my $base = File::Spec->rel2abs($o);
+                my @l = <$fh>;
+                my $ft;
+                for (@l) {
+                        next unless m!\A/!;
+                        $_ = File::Spec->abs2rel($_, $base);
+                        $ft //= File::Temp->new(TEMPLATE => '.XXXX',
+                                                DIR => "$o/info");
+                }
+                if ($ft) {
+                        print $ft @l;
+                        $ft->flush or die "flush($ft): $!";
+                        ft_rename($ft, $fn, 0666, $fh);
+                }
+        }
+        eval { set_description($self) };
+        warn $@ if $@;
+        return if ($self->{-is_epoch} ||
+                $self->{lei}->{opt}->{'inbox-config'} ne 'always');
+        write_makefile($dst, 1);
+        index_cloned_inbox($self, 1);
+}
+
+sub v2_done { # called via OnDestroy
+        my ($self) = @_;
+        return if $self->{dry_run} || !keep_going($self);
+        my $dst = $self->{cur_dst} // $self->{dst};
+        require PublicInbox::Lock;
+        my $lk = PublicInbox::Lock->new("$dst/inbox.lock");
+        my $lck = $lk->lock_for_scope;
+        _write_inbox_config($self);
+        require PublicInbox::MultiGit;
+        my $mg = PublicInbox::MultiGit->new($dst, 'all.git', 'git');
+        $mg->fill_alternates;
+        for my $i ($mg->git_epochs) { $mg->epoch_cfg_set($i) }
+        for my $edst (@{delete($self->{-read_only}) // []}) {
+                my @st = stat($edst) or die "stat($edst): $!";
+                chmod($st[2] & 0555, $edst);
+        }
+        write_makefile($dst, 2);
+        undef $lck; # unlock
+        eval { set_description($self) };
+        warn $@ if $@;
+        index_cloned_inbox($self, 2);
+}
+
+sub clone_v2_prep ($$;$) {
+        my ($self, $v2_epochs, $m) = @_; # $m => manifest.js.gz hashref
+        my $lei = $self->{lei};
+        my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
+        my $first_uri = (map { $_->[0] } values %$v2_epochs)[0];
+        $self->{-torsocks} //= $curl->torsocks($lei, $first_uri) or return;
+        my $dst = $self->{cur_dst} // $self->{dst};
+        my $want = parse_epochs($lei->{opt}->{epoch}, $v2_epochs);
+        my $task = $m ? bless { %$self }, __PACKAGE__ : $self;
+        my (@skip, $desc);
+        my $fini = on_destroy \&v2_done, $task;
+        for my $nr (sort { $a <=> $b } keys %$v2_epochs) {
+                my ($uri, $key) = @{$v2_epochs->{$nr}};
+                my $src = $uri->as_string;
+                my $edst = $dst;
+                $src =~ m!/([0-9]+)(?:\.git)?\z! or die <<"";
+failed to extract epoch number from $src
+
+                $1 + 0 == $nr or die "BUG: <$uri> miskeyed $1 != $nr";
+                $edst .= "/git/$nr.git";
+                my $ent;
+                if ($m) {
+                        $ent = $m->{$key} //
+                                die("BUG: `$key' not in manifest.js.gz");
+                        if (defined(my $d = $ent->{description})) {
+                                $d =~ s/ \[epoch [0-9]+\]\z//s;
+                                $desc = $d;
+                        }
+                }
+                if (!$want || $want->{$nr}) {
+                        my $etask = bless { %$task, -key => $key }, __PACKAGE__;
+                        $etask->{-ent} = $ent; # may have {reference}
+                        $etask->{cur_src} = $src;
+                        $etask->{cur_dst} = $edst;
+                        $etask->{-is_epoch} = $fini;
+                        my $ref = $ent->{reference} // '';
+                        push @{$TODO->{$ref}}, $etask;
+                        $self->{any_want}->{$key} = 1;
+                } else { # create a placeholder so users only need to chmod +w
+                        init_placeholder($src, $edst, $ent);
+                        push @{$task->{-read_only}}, $edst;
+                        push @skip, $key;
+                }
+        }
+        # filter out the epochs we skipped
+        $self->{chg}->{manifest} = 1 if $m && delete(@$m{@skip});
+
+        $self->{dry_run} or File::Path::mkpath($dst);
+
+        if ($lei->{opt}->{'inbox-config'} =~ /\A(?:always|v2)\z/s &&
+                        !-f "$dst/inbox.config.example") {
+                _get_txt_start($task, '_/text/config/raw', $fini);
+        }
+
+        defined($desc) ? ($task->{'txt.description'} = $desc) :
+                _get_txt_start($task, 'description', $fini);
+}
+
+sub decode_manifest ($$$) {
+        my ($fh, $fn, $uri) = @_;
+        my $js;
+        my $gz = PublicInbox::IO::read_all $fh;
+        gunzip(\$gz => \$js, MultiStream => 1) or
+                die "gunzip($uri): $GunzipError\n";
+        my $m = eval { PublicInbox::Config->json->decode($js) };
+        die "$uri: error decoding `$js': $@\n" if $@;
+        ref($m) eq 'HASH' or die "$uri unknown type: ".ref($m);
+        $m;
+}
+
+sub load_current_manifest ($) {
+        my ($self) = @_;
+        my $fn = $self->{-manifest} // return;
+        if (CORE::open(my $fh, '<', $fn)) {
+                decode_manifest($fh, $fn, $fn);
+        } elsif ($!{ENOENT}) { # non-fatal, we can just do it slowly
+                $self->{-initial_clone} or
+                        warn "W: open($fn): $! (non-fatal)\n";
+                undef;
+        } else {
+                die "E: open($fn): $!\n";
+        }
+}
+
+sub multi_inbox ($$$) {
+        my ($self, $path, $m) = @_;
+        my $incl = $self->{lei}->{opt}->{include};
+        my $excl = $self->{lei}->{opt}->{exclude};
+
+        # assuming everything not v2 is v1, for now
+        my @v1 = sort grep(!m!.+/git/[0-9]+\.git\z!, keys %$m);
+        my @v2_epochs = sort grep(m!.+/git/[0-9]+\.git\z!, keys %$m);
+        my $v2 = {};
+
+        for (@v2_epochs) {
+                m!\A(/.+)/git/[0-9]+\.git\z! or die "BUG: $_";
+                push @{$v2->{$1}}, $_;
+        }
+        my $n = scalar(keys %$v2) + scalar(@v1);
+        my @orig = defined($incl // $excl) ? (keys %$v2, @v1) : ();
+        if (defined $incl) {
+                my $re = '(?:'.join('\\z|', map {
+                                glob2re($_) // qr/\A\Q$_\E/
+                        } @$incl).'\\z)';
+                my @gone = delete @$v2{grep(!/$re/, keys %$v2)};
+                delete @$m{map { @$_ } @gone} and $self->{chg}->{manifest} = 1;
+                delete @$m{grep(!/$re/, @v1)} and $self->{chg}->{manifest} = 1;
+                @v1 = grep(/$re/, @v1);
+        }
+        if (defined $excl) {
+                my $re = '(?:'.join('\\z|', map {
+                                glob2re($_) // qr/\A\Q$_\E/
+                        } @$excl).'\\z)';
+                my @gone = delete @$v2{grep(/$re/, keys %$v2)};
+                delete @$m{map { @$_ } @gone} and $self->{chg}->{manifest} = 1;
+                delete @$m{grep(/$re/, @v1)} and $self->{chg}->{manifest} = 1;
+                @v1 = grep(!/$re/, @v1);
+        }
+        my $ret; # { v1 => [ ... ], v2 => { "/$inbox_name" => [ epochs ] }}
+        $ret->{v1} = \@v1 if @v1;
+        $ret->{v2} = $v2 if keys %$v2;
+        $ret //= @orig ? "Nothing to clone, available repositories:\n\t".
+                                join("\n\t", sort @orig)
+                        : "Nothing available to clone\n";
+        my $path_pfx = '';
+
+        # PSGI mount prefixes and manifest.js.gz prefixes don't always align...
+        if (@v2_epochs) {
+                until (grep(m!\A\Q$$path\E/git/[0-9]+\.git\z!,
+                                @v2_epochs) == @v2_epochs) {
+                        $$path =~ s!\A(/[^/]+)/!/! or last;
+                        $path_pfx .= $1;
+                }
+        } elsif (@v1) {
+                while (!defined($m->{$$path}) && $$path =~ s!\A(/[^/]+)/!/!) {
+                        $path_pfx .= $1;
+                }
+        }
+        ($path_pfx, $n, $ret);
+}
+
+sub clone_all {
+        my ($self, $m) = @_;
+        my $todo = $TODO;
+        $TODO = \'BUG on further use';
+        my $end = on_destroy \&fgrp_fetch_all, $self;
+        {
+                my $nodep = delete $todo->{''};
+
+                # do not download unwanted deps
+                my $any_want = delete $self->{any_want};
+                my @unwanted = grep { !$any_want->{$_} } keys %$todo;
+                my @nodep = delete(@$todo{@unwanted});
+                push(@$nodep, @$_) for @nodep;
+
+                # handle no-dependency repos, first
+                for (@$nodep) {
+                        clone_v1($_, $end);
+                        return if !keep_going($self);
+                }
+        }
+        # resolve references, deepest, first:
+        while (scalar keys %$todo) {
+                for my $x (keys %$todo) {
+                        my ($nr, $nxt);
+                        # resolve multi-level references
+                        while ($m && defined($nxt = $m->{$x}->{reference})) {
+                                exists($todo->{$nxt}) or last;
+                                if (++$nr > 1000) {
+                                        $m->{$x}->{reference} = undef;
+                                        $m->{$nxt}->{reference} = undef;
+                                        warn <<EOM
+E: dependency loop detected (`$x' => `$nxt'), breaking
+EOM
+                                }
+                                $x = $nxt;
+                        }
+                        my $y = delete $todo->{$x} // next; # already done
+                        for (@$y) {
+                                clone_v1($_, $end);
+                                return if !keep_going($self);
+                        }
+                        last; # restart %$todo iteration
+                }
+        }
+
+        # $end->DESTROY will call fgrp_fetch_all once all references
+        # in $LIVE are gone, and do_reap will eventually drain $LIVE
+        $end = undef;
+        do_reap($self, 1);
+}
+
+sub dump_manifest ($$) {
+        my ($m, $ft) = @_;
+        # write the smaller manifest if epochs were skipped so
+        # users won't have to delete manifest if they +w an
+        # epoch they no longer want to skip
+        my $json = PublicInbox::Config->json->encode($m);
+        my $mtime = (stat($ft))[9];
+        seek($ft, 0, SEEK_SET);
+        truncate($ft, 0);
+        gzip(\$json => $ft) or die "gzip($ft): $GzipError";
+        $ft->flush or die "flush($ft): $!";
+        utime($mtime, $mtime, "$ft") or die "utime(..., $ft): $!";
+}
+
+sub dump_project_list ($$) {
+        my ($self, $m) = @_;
+        my $f = $self->{'-project-list'};
+        my $old = defined($f) ? PublicInbox::IO::try_cat($f) : '';
+        my %new;
+
+        open my $dh, '<', '.';
+        if (!$self->{dry_run} || -d $self->{dst}) {
+                chdir($self->{dst});
+        }
+        my @local = grep { -e $_ ? ($new{$_} = undef) : 1 } split(/\n/s, $old);
+        chdir($dh);
+
+        $new{substr($_, 1)} = 1 for keys %$m; # drop leading '/'
+        my @list = sort keys %new;
+        my @remote = grep { !defined($new{$_}) } @list;
+        my %lnk = map { substr($_, 1) => undef } @{$self->{-new_symlinks}};
+        @remote = grep { !exists($lnk{$_}) } @remote;
+
+        if (@remote) {
+                warn <<EOM;
+The following local repositories are ignored/gone from $self->{src}:
+EOM
+                warn "\t", $_, "\n" for @remote;
+
+                if ($self->{lei}->{opt}->{purge} && !$self->{dry_run}) {
+                        my $o = {};
+                        $o->{verbose} = 1 if $self->{lei}->{opt}->{verbose};
+                        my $dst = $self->{dst};
+                        File::Path::remove_tree(map { "$dst/$_" } @remote, $o);
+                        my %rm = map { $_ => undef } @remote;
+                        @list = grep { !exists($rm{$_}) } @list;
+                        $self->{lei}->qerr('# purged ');
+                }
+        }
+        if (defined($f) && @local) {
+                warn <<EOM;
+The following repos in $f no longer exist on the filesystem:
+EOM
+                warn "\t", $_, "\n" for @local;
+        }
+        $self->{chg}->{nr_chg} += scalar(@remote) + scalar(@local);
+        return if !defined($f) || $self->{dry_run};
+        my (undef, $dn, $bn) = File::Spec->splitpath($f);
+        my $new = join("\n", @list, '');
+        atomic_write($dn, $bn, $new) if $new ne $old;
+}
+
+# FIXME: this gets confused by single inbox instance w/ global manifest.js.gz
+sub try_manifest {
+        my ($self) = @_;
+        my $uri = URI->new($self->{src});
+        my $lei = $self->{lei};
+        my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
+        $self->{-torsocks} //= $curl->torsocks($lei, $uri) or return;
+        my $path = $uri->path;
+        chop($path) eq '/' or die "BUG: $uri not canonicalized";
+        my $rmf = $lei->{opt}->{'remote-manifest'} // '/manifest.js.gz';
+        if ($rmf =~ m!\A[^/:]+://!) {
+                $uri = URI->new($rmf);
+        } else {
+                $rmf = "/$rmf" if index($rmf, '/') != 0;
+                $uri->path($path.$rmf);
+        }
+        my $manifest = $self->{-manifest} // "$self->{dst}/manifest.js.gz";
+        my %opt = (UNLINK => 1, SUFFIX => '.tmp', TMPDIR => 1);
+        if (!$self->{dry_run} && $manifest =~ m!\A(.+?)/[^/]+\z! and -d $1) {
+                $opt{DIR} = $1; # allows fast rename(2) w/o EXDEV
+                delete $opt{TMPDIR};
+        }
+        my $ft = File::Temp->new(TEMPLATE => '.manifest-XXXX', %opt);
+        my $cmd = $curl->for_uri($lei, $uri, qw(-R -o), $ft->filename);
+        push(@$cmd, '-z', $manifest) if -f $manifest;
+        my $mf_url = "$uri";
+        %opt = map { $_ => $lei->{$_} } (0..2);
+        my $cerr = run_reap($lei, $cmd, \%opt);
+        if ($cerr) {
+                return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing
+                return $lei->child_error($cerr, "@$cmd failed");
+        }
+
+        # bail out if curl -z/--timecond hit 304 Not Modified, $ft will be empty
+        if (-f $manifest && !-s $ft) {
+                $lei->child_error(127 << 8) if $lei->{opt}->{'exit-code'};
+                return $lei->qerr("# $manifest unchanged");
+        }
+
+        my $m = eval { decode_manifest($ft, $ft, $uri) };
+        if ($@) {
+                warn $@;
+                return try_scrape($self);
+        }
+        local $self->{chg} = {};
+        local $self->{-local_manifest} = load_current_manifest($self);
+        local $self->{-new_symlinks} = [];
+        my ($path_pfx, $n, $multi) = multi_inbox($self, \$path, $m);
+        return $lei->child_error(0, $multi) if !ref($multi);
+        my $v2 = delete $multi->{v2};
+        if ($v2) {
+                for my $name (sort keys %$v2) {
+                        my $epochs = delete $v2->{$name};
+                        my %v2_epochs = map {
+                                $uri->path($n > 1 ? $path_pfx.$path.$_
+                                                : $path_pfx.$_);
+                                my ($e) = ("$uri" =~ m!/([0-9]+)\.git\z!);
+                                $e // die "no [0-9]+\.git in `$uri'";
+                                $e => [ $uri->clone, $_ ];
+                        } @$epochs;
+                        ("$uri" =~ m!\A(.+/)git/[0-9]+\.git\z!) or
+                                die "BUG: `$uri' !~ m!/git/[0-9]+.git!";
+                        local $self->{cur_src} = $1;
+                        local $self->{cur_dst} = $self->{dst};
+                        if ($n > 1 && $uri->path =~ m!\A\Q$path_pfx$path\E/(.+)/
+                                                        git/[0-9]+\.git\z!x) {
+                                $self->{cur_dst} .= "/$1";
+                        }
+                        index($self->{cur_dst}, "\n") >= 0 and die <<EOM;
+E: `$self->{cur_dst}' must not contain newline
+EOM
+                        clone_v2_prep($self, \%v2_epochs, $m);
+                        return if !keep_going($self);
+                }
+        }
+        if (my $v1 = delete $multi->{v1}) {
+                my $p = $path_pfx.$path;
+                chop($p) if substr($p, -1, 1) eq '/';
+                $uri->path($p);
+                for my $name (@$v1) {
+                        my $task = bless { %$self }, __PACKAGE__;
+                        $task->{-ent} = $m->{$name} //
+                                        die("BUG: no `$name' in manifest");
+                        $task->{cur_src} = "$uri";
+                        $task->{cur_dst} = $task->{dst};
+                        $task->{-key} = $name;
+                        if ($n > 1) {
+                                $task->{cur_dst} .= $name;
+                                $task->{cur_src} .= $name;
+                        }
+                        index($task->{cur_dst}, "\n") >= 0 and die <<EOM;
+E: `$task->{cur_dst}' must not contain newline
+EOM
+                        $task->{cur_src} .= '/';
+                        my $dep = $task->{-ent}->{reference} // '';
+                        push @{$TODO->{$dep}}, $task; # for clone_all
+                        $self->{any_want}->{$name} = 1;
+                }
+        }
+        delete local $lei->{opt}->{epoch} if defined($v2);
+        clone_all($self, $m);
+        return if !keep_going($self);
+
+        # set by clone_v2_prep/-I/--exclude
+        my $mis = delete $self->{chg}->{fp_mismatch};
+        if ($mis) {
+                my $t = fmt_localtime((stat($ft))[9]);
+                warn <<EOM;
+W: Fingerprints for the following repositories do not match
+W: $mf_url @ $t:
+W: These repositories may have updated since $t:
+EOM
+                warn "\t", $_, "\n" for @$mis;
+                warn <<EOM if !$self->{lei}->{opt}->{prune};
+W: The above fingerprints may never match without --prune
+EOM
+        }
+        if ((delete($self->{chg}->{manifest}) || $mis) && !$self->{dry_run}) {
+                dump_manifest($m => $ft);
+        }
+        my $bad = delete $self->{chg}->{badlink};
+        warn(<<EOM, map { ("\t", $_, "\n") } @$bad) if $bad;
+W: The following exist and have not been converted to symlinks
+EOM
+        dump_project_list($self, $m);
+        ft_rename($ft, $manifest, 0666) if !$self->{dry_run};
+        !$self->{chg}->{nr_chg} && $lei->{opt}->{'exit-code'} and
+                $lei->child_error(127 << 8);
+}
+
+sub start_clone_url {
+        my ($self) = @_;
+        return try_manifest($self) if $self->{src} =~ m!\Ahttps?://!;
+        die "TODO: non-HTTP/HTTPS clone of $self->{src} not supported, yet";
+}
+
+sub do_mirror { # via wq_io_do or public-inbox-clone
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        $self->{dry_run} = 1 if $lei->{opt}->{'dry-run'};
+        umask($lei->{client_umask}) if defined $lei->{client_umask};
+        $self->{-initial_clone} = 1 if !-d $self->{dst};
+        local @PUH;
+        if (defined(my $puh = $lei->{opt}->{'post-update-hook'})) {
+                require Text::ParseWords;
+                @PUH = map { [ Text::ParseWords::shellwords($_) ] } @$puh;
+        }
+        eval {
+                my $ic = $lei->{opt}->{'inbox-config'} //= 'always';
+                $ic =~ /\A(?:v1|v2|always|never)\z/s or die <<"";
+--inbox-config must be one of `always', `v2', `v1', or `never'
+
+                # we support these switches with '' (empty string).
+                # defaults match example conf distributed with grokmirror
+                my @pairs = qw(objstore objstore manifest manifest.js.gz
+                                project-list projects.list);
+                while (@pairs) {
+                        my ($k, $default) = splice(@pairs, 0, 2);
+                        my $v = $lei->{opt}->{$k} // next;
+                        $v = $default if $v eq '';
+                        $v = "$self->{dst}/$v" if $v !~ m!\A\.{0,2}/!;
+                        $self->{"-$k"} = $v;
+                }
+
+                local $LIVE = {};
+                local $TODO = {};
+                local $FGRP_TODO = {};
+                my $iv = $lei->{opt}->{'inbox-version'} //
+                        return start_clone_url($self);
+                return clone_v1($self) if $iv == 1;
+                die "bad --inbox-version=$iv\n" if $iv != 2;
+                die <<EOM if $self->{src} !~ m!://!;
+cloning local v2 inboxes not supported
+EOM
+                try_scrape($self, 1);
+        };
+        $lei->fail($@) if $@;
+}
+
+sub start {
+        my ($cls, $lei, $src, $dst) = @_;
+        my $self = bless { src => $src, dst => $dst }, $cls;
+        $lei->request_umask;
+        my ($op_c, $ops) = $lei->workers_start($self, 1);
+        $lei->{wq1} = $self;
+        $self->wq_io_do('do_mirror', []);
+        $self->wq_close;
+        $lei->wait_wq_events($op_c, $ops);
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        $self->{lei}->_lei_atfork_child;
+        $self->SUPER::ipc_atfork_child;
+}
+
+sub write_makefile {
+        my ($dir, $ibx_ver) = @_;
+        my $f = "$dir/Makefile";
+        if (CORE::sysopen my $fh, $f, O_CREAT|O_EXCL|O_WRONLY) {
+                print $fh <<EOM;
+# This is a v$ibx_ver public-inbox, see the public-inbox-v$ibx_ver-format(5)
+# manpage for more information on the format.  This Makefile is
+# intended as a familiar wrapper for users unfamiliar with
+# public-inbox-* commands.
+#
+# See the respective manpages for public-inbox-fetch(1),
+# public-inbox-index(1), etc for more information on
+# some of the commands used by this Makefile.
+#
+# This Makefile will not be modified nor read by public-inbox,
+# so you may edit it freely with your own convenience targets
+# and notes.  public-inbox-fetch will recreate it if removed.
+EOM
+                print $fh <<'EOM';
+# the default target:
+help :
+        @echo Common targets:
+        @echo '    make fetch        - fetch from remote git repostorie(s)'
+        @echo '    make update       - fetch and update index '
+        @echo
+        @echo Rarely needed targets:
+        @echo '    make reindex      - may be needed for new features/bugfixes'
+        @echo '    make compact      - rewrite Xapian storage to save space'
+        @echo '    make index        - initial index after clone'
+
+fetch :
+        public-inbox-fetch
+update :
+        @if ! public-inbox-fetch --exit-code; \
+        then \
+                c=$$?; \
+                test $$c -eq 127 && exit 0; \
+                exit $$c; \
+        elif test -f msgmap.sqlite3 || test -f public-inbox/msgmap.sqlite3; \
+        then \
+                public-inbox-index; \
+        else \
+                echo 'public-inbox index not initialized'; \
+                echo 'see public-inbox-index(1) man page'; \
+        fi
+index :
+        public-inbox-index
+reindex :
+        public-inbox-index --reindex
+compact :
+        public-inbox-compact
+
+.PHONY : help fetch update index reindex compact
+EOM
+                close $fh;
+        } else {
+                die "open($f): $!" unless $!{EEXIST};
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/LeiNoteEvent.pm b/lib/PublicInbox/LeiNoteEvent.pm
new file mode 100644
index 00000000..8d900d0c
--- /dev/null
+++ b/lib/PublicInbox/LeiNoteEvent.pm
@@ -0,0 +1,133 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# internal command for dealing with inotify, kqueue vnodes, etc
+# it is a semi-persistent worker
+package PublicInbox::LeiNoteEvent;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+use PublicInbox::DS;
+use Errno qw(ENOENT);
+
+our $to_flush; # { cfgpath => $lei }
+
+sub flush_lei ($;$) {
+        my ($lei, $manual) = @_;
+        my $lne = delete $lei->{cfg}->{-lei_note_event} // return;
+        $lne->{lei_sock} = $lei->{sock} if $manual;
+        $lne->wq_close; # runs _lei_wq_eof;
+}
+
+# we batch up writes and flush every 5s (matching Linux default
+# writeback behavior) since MUAs can trigger a storm of inotify events
+sub flush_task { # PublicInbox::DS timer callback
+        my $todo = $to_flush // return;
+        $to_flush = undef;
+        for my $lei (values %$todo) { flush_lei($lei) }
+}
+
+sub eml_event ($$$$) {
+        my ($self, $eml, $vmd, $state) = @_;
+        my $sto = $self->{lei}->{sto};
+        if ($state =~ /\Aimport-(?:rw|ro)\z/) {
+                $sto->wq_do('set_eml', $eml, $vmd);
+        } elsif ($state =~ /\Aindex-(?:rw|ro)\z/) {
+                my $xoids = $self->{lei}->ale->xoids_for($eml);
+                $sto->wq_do('index_eml_only', $eml, $vmd, $xoids);
+        } elsif ($state =~ /\Atag-(?:rw|ro)\z/) {
+                my $docids = [];
+                my $c = eval {
+                        $self->{lse}->kw_changed($eml, $vmd->{kw}, $docids);
+                } // 1; # too new, assume changed since still to-be-committed.
+                if (scalar @$docids) { # already in lei/store
+                        $sto->wq_do('set_eml_vmd', undef, $vmd, $docids) if $c;
+                } elsif (my $xoids = $self->{lei}->ale->xoids_for($eml)) {
+                        # it's in an external, only set kw, here
+                        $sto->wq_do('set_xvmd', $xoids, $eml, $vmd);
+                } # else { totally unknown: ignore
+        } else {
+                warn "unknown state: $state (in $self->{lei}->{cfg}->{'-f'})\n";
+        }
+}
+
+sub maildir_event { # via wq_nonblock_do
+        my ($self, $fn, $vmd, $state) = @_;
+        if (my $eml = PublicInbox::InboxWritable::eml_from_path($fn)) {
+                eml_event($self, $eml, $vmd, $state);
+        } elsif ($! == ENOENT) {
+                $self->{lms}->clear_src(@{$vmd->{sync_info}});
+        } # else: eml_from_path already warns
+}
+
+sub _mh_cb { # mh_read_one cb
+        my ($dir, $bn, $kw, $eml, $self, $state) = @_;
+}
+
+sub mh_event { # via wq_nonblock_do
+        my ($self, $folder, $bn, $state) = @_;
+        my $dir = substr($folder, 3);
+        require PublicInbox::MHreader; # if we forked early
+        my $mhr = PublicInbox::MHreader->new($dir, $self->{lei}->{3});
+        $mhr->mh_read_one($bn, \&_mh_cb, $self, $state);
+}
+
+sub lei_note_event {
+        my ($lei, $folder, $new_cur, $bn, $fn, @rest) = @_;
+        die "BUG: unexpected: @rest" if @rest;
+        my $cfg = $lei->_lei_cfg or return; # gone (race)
+        my $sto = $lei->_lei_store or return; # gone
+        return flush_lei($lei, 1) if $folder eq 'done'; # special case
+        my $lms = $lei->lms or return;
+        $lms->lms_write_prepare if $new_cur eq ''; # for ->clear_src below
+        $lei->{opt}->{quiet} = 1;
+        $lms->arg2folder($lei, [ $folder ]);
+        my $state = $cfg->get_1("watch.$folder.state") // 'tag-rw';
+        return if $state eq 'pause';
+        if ($new_cur eq '') {
+                my $id = $folder =~ /\Amaildir:/ ? \$bn : $bn + 0;
+                return $lms->clear_src($folder, $id);
+        }
+        $lms->lms_pause;
+        $lei->ale; # prepare
+        $sto->write_prepare($lei);
+        require PublicInbox::MHreader if $folder =~ /\Amh:/; # optimistic
+        my $self = $cfg->{-lei_note_event} //= do {
+                my $wq = bless { lms => $lms }, __PACKAGE__;
+                # MUAs such as mutt can trigger massive rename() storms so
+                # use some CPU, but don't overwhelm slower storage, either
+                my $jobs = $wq->detect_nproc // 1;
+                $jobs = 4 if $jobs > 4; # same default as V2Writable
+                my ($op_c, $ops) = $lei->workers_start($wq, $jobs);
+                $lei->wait_wq_events($op_c, $ops);
+                PublicInbox::DS::add_uniq_timer('flush_timer', 5, \&flush_task);
+                $to_flush->{$lei->{cfg}->{'-f'}} //= $lei;
+                $wq->prepare_nonblock;
+                $lei->{lne} = $wq;
+        };
+        if ($folder =~ /\Amaildir:/i) {
+                require PublicInbox::MdirReader;
+                my $fl = PublicInbox::MdirReader::maildir_basename_flags($bn)
+                        // return;
+                return if index($fl, 'T') >= 0;
+                my $kw = PublicInbox::MdirReader::flags2kw($fl);
+                my $vmd = { kw => $kw, sync_info => [ $folder, \$bn ] };
+                $self->wq_nonblock_do('maildir_event', $fn, $vmd, $state);
+        } elsif ($folder =~ /\Amh:/) {
+                $self->wq_nonblock_do('mh_event', $folder, $bn, $state);
+        } # else: TODO: imap
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        $self->{lei}->_lei_atfork_child(1); # persistent, for a while
+        $self->{lms}->lms_write_prepare;
+        $self->{lse} = $self->{lei}->{sto}->search;
+        $self->SUPER::ipc_atfork_child;
+}
+
+sub _lei_wq_eof { # EOF callback for main lei daemon
+        $_[0]->wq_eof('lne');
+}
+
+1;
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
new file mode 100644
index 00000000..0529bbe4
--- /dev/null
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -0,0 +1,263 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# per-mitem/smsg iterators for search results
+# "ovv" => "Overview viewer"
+package PublicInbox::LeiOverview;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::Lock);
+use POSIX qw(strftime);
+use Fcntl qw(F_GETFL O_APPEND);
+use File::Spec;
+use File::Temp ();
+use PublicInbox::MID qw($MID_EXTRACT);
+use PublicInbox::Address qw(pairs);
+use PublicInbox::Config;
+use PublicInbox::Search qw(get_pct);
+use PublicInbox::LeiDedupe;
+use PublicInbox::LeiToMail;
+
+# cf. https://en.wikipedia.org/wiki/JSON_streaming
+my $JSONL = 'ldjson|ndjson|jsonl'; # 3 names for the same thing
+
+sub iso8601 ($) { strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($_[0])) }
+
+# we open this in the parent process before ->wq_io_do handoff
+sub ovv_out_lk_init ($) {
+        my ($self) = @_;
+        my $tmp = File::Temp->new("lei-ovv.dst.$$.lock-XXXX",
+                                        TMPDIR => 1, UNLINK => 0);
+        $self->{"lk_id.$self.$$"} = $self->{lock_path} = $tmp->filename;
+}
+
+sub ovv_out_lk_cancel ($) {
+        my ($self) = @_;
+        my $lock_path = delete $self->{"lk_id.$self.$$"} or return;
+        unlink($lock_path);
+}
+
+sub detect_fmt ($) {
+        my ($dst) = @_;
+        if ($dst =~ m!\A([:/]+://)!) {
+                die "$1 support not implemented, yet\n";
+        } elsif (!-e $dst || -d _) { # maildir is the default TODO: MH
+                -e "$dst/inbox.lock" ? 'v2' : 'maildir';
+        } elsif (-f _ || -p _) {
+                die "unable to determine mbox family of $dst\n";
+        } else {
+                die "unable to determine format of $dst\n";
+        }
+}
+
+sub new {
+        my ($class, $lei, $ofmt_key) = @_;
+        my $opt = $lei->{opt};
+        my $dst = $opt->{output} // '-';
+        $dst = '/dev/stdout' if $dst eq '-';
+        $ofmt_key //= 'format';
+
+        my $fmt = $opt->{$ofmt_key};
+        $fmt = lc($fmt) if defined $fmt;
+        if ($dst =~ m!\A([a-z0-9\+]+)://!is) {
+                defined($fmt) and die <<"";
+--$ofmt_key=$fmt invalid with URL $dst
+
+                $fmt = lc $1;
+        } elsif ($dst =~ s/\A([a-z0-9]+)://is) { # e.g. Maildir:/home/user/Mail/
+                my $ofmt = lc $1;
+                $fmt //= $ofmt;
+                die <<"" if $fmt ne $ofmt;
+--$ofmt_key=$fmt and --output=$ofmt conflict
+
+        }
+        my $devfd = $lei->path_to_fd($dst) // return;
+        $fmt //= $devfd >= 0 ? 'json' : detect_fmt($dst);
+
+        if (index($dst, '://') < 0) { # not a URL, so assume path
+                 $dst = $lei->canonpath_harder($dst);
+        } # else URL
+
+        my $self = bless { fmt => $fmt, dst => $dst }, $class;
+        $lei->{ovv} = $self;
+        my $json;
+        if ($fmt =~ /\A($JSONL|(?:concat)?json)\z/) {
+                $json = $self->{json} = ref(PublicInbox::Config->json);
+        }
+        if ($devfd >= 0) {
+                my $isatty = $lei->{need_pager} = -t $lei->{$devfd};
+                $opt->{pretty} //= $isatty;
+                if (!$isatty && -f _) {
+                        my $fl = fcntl($lei->{$devfd}, F_GETFL, 0) //
+                                        die("fcntl(/dev/fd/$devfd): $!\n");
+                        ovv_out_lk_init($self) unless ($fl & O_APPEND);
+                } else {
+                        ovv_out_lk_init($self);
+                }
+        } elsif (!$opt->{quiet}) {
+                $lei->{-progress} = 1;
+        }
+        if ($json) {
+                $lei->{dedupe} //= PublicInbox::LeiDedupe->new($lei);
+        } else {
+                $lei->{l2m} = PublicInbox::LeiToMail->new($lei);
+                if ($opt->{mua} && $lei->{l2m}->lock_free) {
+                        $lei->{early_mua} = 1;
+                        $opt->{alert} //= [ ':WINCH,:bell' ] if -t $lei->{1};
+                }
+        }
+        die("--shared is only for v2 inbox output\n") if
+                $self->{fmt} ne 'v2' && $lei->{opt}->{shared};
+        $self;
+}
+
+# called once by parent
+sub ovv_begin {
+        my ($self, $lei) = @_;
+        if ($self->{fmt} eq 'json') {
+                $lei->out('[');
+        } # TODO HTML/Atom/...
+}
+
+# called once by parent (via PublicInbox::PktOp  '' => query_done)
+sub ovv_end {
+        my ($self, $lei) = @_;
+        if ($self->{fmt} eq 'json') {
+                # JSON doesn't allow trailing commas, and preventing
+                # trailing commas is a PITA when parallelizing outputs
+                $lei->out("null]\n");
+        } elsif ($self->{fmt} eq 'concatjson') {
+                $lei->out("\n");
+        }
+}
+
+# prepares an smsg for JSON
+sub _unbless_smsg {
+        my ($smsg, $mitem) = @_;
+
+        # TODO: make configurable
+        # num/tid are nonsensical with multi-inbox search,
+        # lines/bytes are not generally useful
+        delete @$smsg{qw(num tid lines bytes)};
+        $smsg->{rt} = iso8601(delete $smsg->{ts}); # JMAP receivedAt
+        $smsg->{dt} = iso8601(delete $smsg->{ds}); # JMAP UTCDate
+        $smsg->{pct} = get_pct($mitem) if $mitem;
+        if (my $r = delete $smsg->{references}) {
+                @{$smsg->{refs}} = ($r =~ m/$MID_EXTRACT/go);
+        }
+        if (my $m = delete($smsg->{mid})) {
+                $smsg->{'m'} = $m;
+        }
+        for my $f (qw(from to cc)) {
+                my $v = delete $smsg->{$f} or next;
+                $smsg->{substr($f, 0, 1)} = pairs($v);
+        }
+        $smsg->{'s'} = delete $smsg->{subject};
+        my $kw = delete($smsg->{kw});
+        scalar { %$smsg, ($kw && scalar(@$kw) ? (kw => $kw) : ()) }; # unbless
+}
+
+sub ovv_atexit_child {
+        my ($self, $lei) = @_;
+        if (my $bref = delete $lei->{ovv_buf}) {
+                my $lk = $self->lock_for_scope;
+                $lei->out($$bref);
+        }
+}
+
+# JSON module ->pretty output wastes too much vertical white space,
+# this (IMHO) provides better use of screen real-estate while not
+# being excessively compact:
+sub _json_pretty {
+        my ($json, $k, $v) = @_;
+        if (ref $v eq 'ARRAY') {
+                if (@$v) {
+                        my $sep = ",\n" . (' ' x (length($k) + 7));
+                        if (ref($v->[0])) { # f/t/c
+                                $v = '[' . join($sep, map {
+                                        my $pair = $json->encode($_);
+                                        $pair =~ s/(null|"),"/$1, "/g;
+                                        $pair;
+                                } @$v) . ']';
+                        } elsif ($k eq 'kw') { # keywords are short, one-line
+                                $v = $json->encode($v);
+                                $v =~ s/","/", "/g;
+                        } else { # refs, labels, ...
+                                $v = '[' . join($sep, map {
+                                        substr($json->encode([$_]), 1, -1);
+                                } @$v) . ']';
+                        }
+                } else {
+                        $v = '[]';
+                }
+        }
+        qq{  "$k": }.$v;
+}
+
+sub ovv_each_smsg_cb { # runs in wq worker usually
+        my ($self, $lei) = @_;
+        my ($json, $dedupe);
+        if (my $pkg = $self->{json}) {
+                $json = $pkg->new;
+                $json->utf8->canonical;
+                $json->ascii(1) if $lei->{opt}->{ascii};
+        }
+        my $l2m = $lei->{l2m};
+        if (!$l2m) {
+                $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
+                $dedupe->prepare_dedupe;
+        }
+        $lei->{ovv_buf} = \(my $buf = '') if !$l2m;
+        if ($l2m) {
+                sub {
+                        my ($smsg, $mitem, $eml) = @_;
+                        $smsg->{pct} = get_pct($mitem) if $mitem;
+                        eval { $l2m->wq_io_do('write_mail', [], $smsg, $eml) };
+                        $lei->fail($@) if $@ && !$!{ECONNRESET} && !$!{EPIPE};
+                }
+        } elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) {
+                my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},";
+                my $lse = $lei->{lse};
+                sub { # DIY prettiness :P
+                        my ($smsg, $mitem) = @_;
+                        return if $dedupe->is_smsg_dup($smsg);
+                        $lse->xsmsg_vmd($smsg, $smsg->{L} ? undef : 1);
+                        $smsg = _unbless_smsg($smsg, $mitem);
+                        $buf .= "{\n";
+                        $buf .= join(",\n", map {
+                                my $v = $smsg->{$_};
+                                if (ref($v)) {
+                                        _json_pretty($json, $_, $v);
+                                } else {
+                                        $v = $json->encode([$v]);
+                                        qq{  "$_": }.substr($v, 1, -1);
+                                }
+                        } sort keys %$smsg);
+                        $buf .= $EOR;
+                        return if length($buf) < 65536;
+                        my $lk = $self->lock_for_scope;
+                        $lei->out($buf);
+                        $buf = '';
+                }
+        } elsif ($json) {
+                my $ORS = $self->{fmt} eq 'json' ? ",\n" : "\n"; # JSONL
+                my $lse = $lei->{lse};
+                sub {
+                        my ($smsg, $mitem) = @_;
+                        return if $dedupe->is_smsg_dup($smsg);
+                        $lse->xsmsg_vmd($smsg, $smsg->{L} ? undef : 1);
+                        $buf .= $json->encode(_unbless_smsg(@_)) . $ORS;
+                        return if length($buf) < 65536;
+                        my $lk = $self->lock_for_scope;
+                        $lei->out($buf);
+                        $buf = '';
+                }
+        } else {
+                die "TODO: unhandled case $self->{fmt}"
+        }
+}
+
+no warnings 'once';
+*DESTROY = \&ovv_out_lk_cancel;
+
+1;
diff --git a/lib/PublicInbox/LeiP2q.pm b/lib/PublicInbox/LeiP2q.pm
new file mode 100644
index 00000000..68faa016
--- /dev/null
+++ b/lib/PublicInbox/LeiP2q.pm
@@ -0,0 +1,198 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# front-end for the "lei patch-to-query" sub-command
+package PublicInbox::LeiP2q;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use PublicInbox::Eml;
+use PublicInbox::Smsg;
+use PublicInbox::MsgIter qw(msg_part_text);
+use PublicInbox::Git qw(git_unquote);
+use PublicInbox::OnDestroy;
+use URI::Escape qw(uri_escape_utf8);
+my $FN = qr!((?:"?[^/\n]+/[^\r\n]+)|/dev/null)!;
+
+sub xphrase ($) {
+        my ($s) = @_;
+        return () unless $s =~ /\S/;
+        # cf. xapian-core/queryparser/queryparser.lemony
+        # [\./:\\\@] - is_phrase_generator (implicit phrase search)
+        # FIXME not really sure about these..., we basically want to
+        # extract the longest phrase possible that Xapian can handle
+        map {
+                s/\A\s*//;
+                s/\s+\z//;
+                m![^\./:\\\@\-\w]! ? qq("$_") : $_ ;
+        } ($s =~ m!(\w[\|=><,\./:\\\@\-\w\s]+)!g);
+}
+
+sub add_qterm ($$@) {
+        my ($self, $p, @v) = @_;
+        for (@v) {
+                $self->{qseen}->{"$p\0$_"} //=
+                        push(@{$self->{qterms}->{$p}}, $_);
+        }
+}
+
+sub extract_terms { # eml->each_part callback
+        my ($p, $self) = @_;
+        my $part = $p->[0]; # ignore $depth and @idx;
+        my $ct = $part->content_type || 'text/plain';
+        my ($s, undef) = msg_part_text($part, $ct);
+        defined $s or return;
+        my $in_diff;
+        # TODO: b: nq: q:
+        for (split(/\n/, $s)) {
+                if ($in_diff && s/^ //) { # diff context
+                        add_qterm($self, 'dfctx', xphrase($_));
+                } elsif (/^-- $/) { # email signature begins
+                        $in_diff = undef;
+                } elsif (m!^diff --git $FN $FN!) {
+                        # wait until "---" and "+++" to capture filenames
+                        $in_diff = 1;
+                } elsif (/^index ([a-f0-9]+)\.\.([a-f0-9]+)\b/) {
+                        my ($oa, $ob) = ($1, $2);
+                        add_qterm($self, 'dfpre', $oa);
+                        add_qterm($self, 'dfpost', $ob);
+                        # who uses dfblob?
+                } elsif (m!^(?:---|\+{3}) ($FN)!) {
+                        next if $1 eq '/dev/null';
+                        my $fn = (split(m!/!, git_unquote($1.''), 2))[1];
+                        add_qterm($self, 'dfn', xphrase($fn));
+                } elsif ($in_diff && s/^\+//) { # diff added
+                        add_qterm($self, 'dfb', xphrase($_));
+                } elsif ($in_diff && s/^-//) { # diff removed
+                        add_qterm($self, 'dfa', xphrase($_));
+                } elsif (/^@@ (?:\S+) (?:\S+) @@\s*$/) {
+                        # traditional diff w/o -p
+                } elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)/) {
+                        add_qterm($self, 'dfhh', xphrase($1));
+                } elsif (/^(?:dis)similarity index/ ||
+                                /^(?:old|new) mode/ ||
+                                /^(?:deleted|new) file mode/ ||
+                                /^(?:copy|rename) (?:from|to) / ||
+                                /^(?:dis)?similarity index / ||
+                                /^\\ No newline at end of file/ ||
+                                /^Binary files .* differ/) {
+                } elsif ($_ eq '') {
+                        # possible to be in diff context, some mail may be
+                        # stripped by MUA or even GNU diff(1).  "git apply"
+                        # treats a bare "\n" as diff context, too
+                } else {
+                        $in_diff = undef;
+                }
+        }
+}
+
+my %pfx2smsg = (
+        t => [ qw(to) ],
+        c => [ qw(cc) ],
+        f => [ qw(from) ],
+        tc => [ qw(to cc) ],
+        tcf => [ qw(to cc from) ],
+        a => [ qw(to cc from) ],
+        s => [ qw(subject) ],
+        bs => [ qw(subject) ], # body handled elsewhere
+        d => [ qw(ds) ], # nonsense?
+        dt => [ qw(ds) ], # ditto...
+        rt => [ qw(ts) ], # ditto...
+);
+
+sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
+        my ($self, $eml) = @_;
+        my $diff_want = $self->{diff_want} // do {
+                my $want = $self->{lei}->{opt}->{want} // [ qw(dfpost7) ];
+                my @want = split(/[, ]+/, "@$want");
+                for (@want) {
+                        /\A(?:(d|dt|rt):)?([0-9]+)(\.(?:day|weeks)s?)?\z/
+                                or next;
+                        my ($pfx, $n, $unit) = ($1, $2, $3);
+                        $n *= 86400 * ($unit =~ /week/i ? 7 : 1);
+                        $_ = [ $pfx, $n ];
+                }
+                $self->{want_order} = \@want;
+                $self->{diff_want} = +{ map { $_ => 1 } @want };
+        };
+        my $smsg = bless {}, 'PublicInbox::Smsg';
+        $smsg->populate($eml);
+        while (my ($pfx, $fields) = each %pfx2smsg) {
+                next unless $diff_want->{$pfx};
+                for my $f (@$fields) {
+                        my $v = $smsg->{$f} // next;
+                        add_qterm($self, $pfx, xphrase($v));
+                }
+        }
+        $eml->each_part(\&extract_terms, $self, 1);
+}
+
+sub emit_query {
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        if ($lei->{opt}->{debug}) {
+                my $json = ref(PublicInbox::Config->json)->new;
+                $json->utf8->canonical->pretty;
+                print { $lei->{2} } $json->encode($self->{qterms});
+        }
+        my (@q, %seen);
+        for my $pfx (@{$self->{want_order}}) {
+                if (ref($pfx) eq 'ARRAY') {
+                        my ($p, $t_range) = @$pfx; # TODO
+
+                } elsif ($pfx =~ m!\A(?:OR|XOR|AND|NOT)\z! ||
+                                $pfx =~ m!\A(?:ADJ|NEAR)(?:/[0-9]+)?\z!) {
+                        push @q, $pfx;
+                } else {
+                        my $plusminus = ($pfx =~ s/\A([\+\-])//) ? $1 : '';
+                        my $end = ($pfx =~ s/([0-9\*]+)\z//) ? $1 : '';
+                        my $x = delete($self->{qterms}->{$pfx}) or next;
+                        my $star = $end =~ tr/*//d ? '*' : '';
+                        my $min_len = ($end || 0) + 0;
+
+                        # no wildcards for bool_pfx_external
+                        $star = '' if $pfx =~ /\A(dfpre|dfpost|mid)\z/;
+                        $pfx = "$plusminus$pfx:";
+                        if ($min_len) {
+                                push @q, map {
+                                        my @t = ($pfx.$_.$star);
+                                        while (length > $min_len) {
+                                                chop $_;
+                                                push @t, 'OR', $pfx.$_.$star;
+                                        }
+                                        @t;
+                                } @$x;
+                        } else {
+                                push @q, map {
+                                        my $k = $pfx.$_.$star;
+                                        $seen{$k}++ ? () : $k
+                                } @$x;
+                        }
+                }
+        }
+        if ($lei->{opt}->{uri}) {
+                @q = (join('+', map { uri_escape_utf8($_) } @q));
+        } else {
+                @q = (join(' ', @q));
+        }
+        $lei->out(@q, "\n");
+}
+
+sub lei_p2q { # the "lei patch-to-query" entry point
+        my ($lei, @inputs) = @_;
+        $lei->{opt}->{'in-format'} //= 'eml' if $lei->{opt}->{stdin};
+        my $self = bless { missing_ok => 1 }, __PACKAGE__;
+        $self->prepare_inputs($lei, \@inputs) or return;
+        $lei->wq1_start($self);
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        PublicInbox::LeiInput::input_only_atfork_child($self);
+        on_destroy \&emit_query, $self;
+}
+
+no warnings 'once';
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+
+1;
diff --git a/lib/PublicInbox/LeiPmdir.pm b/lib/PublicInbox/LeiPmdir.pm
new file mode 100644
index 00000000..d4aa0212
--- /dev/null
+++ b/lib/PublicInbox/LeiPmdir.pm
@@ -0,0 +1,54 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# WQ worker for dealing with parallel Maildir reads;
+# this does NOT use the {shard_info} field of LeiToMail
+# (and we may remove {shard_info})
+# WQ key: {pmd}
+package PublicInbox::LeiPmdir;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+
+sub new {
+        my ($cls, $lei, $ipt) = @_;
+        my $self = bless { -wq_ident => 'lei Maildir worker' }, $cls;
+        my $jobs = $lei->{opt}->{jobs} // '';
+        $jobs =~ /\A[0-9]+,([0-9]+)\z/ and $jobs = $1;
+        my $nproc = $jobs || do {
+                # barely tested with >=4 CPUs, though I suspect I/O latency
+                # of SATA SSD storage will make >=4 processes unnecessary,
+                # here.  NVMe users may wish to use '-j'
+                my $n = $self->detect_nproc;
+                $n = $n > 4 ? 4 : $n;
+        };
+        my ($op_c, $ops) = $lei->workers_start($self, $nproc,
+                undef, { ipt => $ipt }); # LeiInput subclass
+        $op_c->{ops} = $ops; # for PktOp->event_step
+        $self->{lei_sock} = $lei->{sock}; # keep client for pmd_done_wait
+        $lei->{pmd} = $self;
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        my $ipt = $self->{ipt} // die 'BUG: no self->{ipt}';
+        my $lei = $ipt->{lei} = $self->{lei};
+        delete @$lei{qw(auth net)}; # no network access in this worker
+        $ipt->ipc_atfork_child; # calls _lei_atfork_child;
+}
+
+sub each_mdir_fn { # maildir_each_file callback
+        my ($f, $fl, $self, @args) = @_;
+        $self->wq_io_do('mdir_iter', [], $f, $fl, @args);
+}
+
+sub mdir_iter { # via wq_io_do
+        my ($self, $f, $fl, @args) = @_;
+        $self->{ipt}->pmdir_cb($f, $fl, @args);
+}
+
+sub _lei_wq_eof { # EOF callback for main lei daemon
+        $_[0]->wq_eof('pmd');
+}
+
+1;
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
new file mode 100644
index 00000000..eadf811f
--- /dev/null
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -0,0 +1,241 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# handles "lei q" command and provides internals for
+# several other sub-commands (up, lcat, ...)
+package PublicInbox::LeiQuery;
+use v5.12;
+
+sub prep_ext { # externals_each callback
+        my ($lxs, $exclude, $loc) = @_;
+        $lxs->prepare_external($loc) unless $exclude->{$loc};
+}
+
+sub _start_query { # used by "lei q" and "lei up"
+        my ($self) = @_;
+        require PublicInbox::LeiOverview;
+        PublicInbox::LeiOverview->new($self) or return;
+        my $opt = $self->{opt};
+        require PublicInbox::OverIdx; # loads DBI
+        PublicInbox::OverIdx::fork_ok($opt);
+        my ($xj, $mj) = split(/,/, $opt->{jobs} // '');
+        (defined($xj) && $xj ne '' && $xj !~ /\A[1-9][0-9]*\z/) and
+                die "`$xj' search jobs must be >= 1\n";
+        my $lxs = $self->{lxs};
+        $xj ||= $lxs->concurrency($opt); # allow: "--jobs ,$WRITER_ONLY"
+        my $nproc = $lxs->detect_nproc || 1; # don't memoize, schedtool(1) exists
+        $xj = $nproc if $xj > $nproc;
+        $lxs->{-wq_nr_workers} = $xj;
+        (defined($mj) && $mj !~ /\A[1-9][0-9]*\z/) and
+                die "`$mj' writer jobs must be >= 1\n";
+        my $l2m = $self->{l2m};
+        # we use \1 (a ref) to distinguish between default vs. user-supplied
+        if ($l2m && grep { $opt->{$_} //= \1 } (qw(mail-sync import-remote
+                                                        import-before))) {
+                $self->_lei_store(1)->write_prepare($self);
+                if ($opt->{'mail-sync'}) {
+                        my $lms = $l2m->{-lms_rw} = $self->lms(1);
+                        $lms->lms_write_prepare->lms_pause; # just create
+                }
+        }
+        $l2m and $l2m->{-wq_nr_workers} //= $mj // do {
+                # keep some CPU for git, and don't overload IMAP destinations
+                my $n = int($nproc * 0.75 + 0.5);
+                $self->{net} && $n > 4 ? 4 : $n;
+        };
+
+        # descending docid order is cheapest, MUA controls sorting order
+        $self->{mset_opt}->{relevance} //= -2 if $l2m || $opt->{threads};
+
+        my $tot = $self->{mset_opt}->{total} //= $self->{opt}->{limit} // 10000;
+        $self->{mset_opt}->{limit} = $tot > 10000 ? 10000 : $tot;
+        $self->{mset_opt}->{offset} //= 0;
+        $self->{mset_opt}->{threads} //= $opt->{threads};
+
+        if ($self->{net}) {
+                require PublicInbox::LeiAuth;
+                $self->{auth} = PublicInbox::LeiAuth->new
+        }
+        $lxs->do_query($self);
+}
+
+sub do_qry { # do_env cb
+        my ($lei) = @_;
+        $lei->{mset_opt}->{q_raw} = $lei->{mset_opt}->{qstr}
+                                                = delete $lei->{stdin_buf};
+        $lei->{lse}->query_approxidate($lei->{lse}->git,
+                                        $lei->{mset_opt}->{qstr});
+        _start_query($lei);
+}
+
+# make the URI||PublicInbox::{Inbox,ExtSearch} a config-file friendly string
+sub cfg_ext ($) {
+        my ($x) = @_;
+        $x->isa('URI') ? "$x" : ($x->{inboxdir} // $x->{topdir});
+}
+
+sub lxs_prepare {
+        my ($self) = @_;
+        require PublicInbox::LeiXSearch;
+        # prepare any number of LeiXSearch || LeiSearch || Inbox || URL
+        my $lxs = $self->{lxs} = PublicInbox::LeiXSearch->new;
+        my $opt = $self->{opt};
+        my @only = @{$opt->{only} // []};
+        # --local is enabled by default unless --only is used
+        # we'll allow "--only $LOCATION --local"
+        my $sto = $self->_lei_store(1);
+        $self->{lse} = $sto->search;
+        if ($opt->{'local'} //= scalar(@only) ? 0 : 1) {
+                $lxs->prepare_external($self->{lse});
+        }
+        if (@only) {
+                my $only;
+                for my $loc (@only) {
+                        my @loc = $self->get_externals($loc) or return;
+                        for (@loc) {
+                                my $x = $lxs->prepare_external($_);
+                                push(@$only, cfg_ext($x)) if $x;
+                        }
+                }
+                $opt->{only} = $only if $only;
+        } else {
+                my (@ilocals, @iremotes, $incl);
+                for my $loc (@{$opt->{include} // []}) {
+                        my @loc = $self->get_externals($loc) or return;
+                        for (@loc) {
+                                my $x = $lxs->prepare_external($_);
+                                push(@$incl, cfg_ext($x)) if $x;
+                        }
+                        @ilocals = @{$lxs->{locals} // []};
+                        @iremotes = @{$lxs->{remotes} // []};
+                }
+                $opt->{include} = $incl if $incl;
+                # --external is enabled by default, but allow --no-external
+                if ($opt->{external} //= 1) {
+                        my $ex = $self->canonicalize_excludes($opt->{exclude});
+                        my @excl = keys %$ex;
+                        $opt->{exclude} = \@excl if scalar(@excl);
+                        $self->externals_each(\&prep_ext, $lxs, $ex);
+                        $opt->{remote} //= !($lxs->locals - $opt->{'local'});
+                        $lxs->{locals} = \@ilocals if !$opt->{'local'};
+                        $lxs->{remotes} = \@iremotes if !$opt->{remote};
+                }
+        }
+        ($lxs->locals || $lxs->remotes) ? ($self->{lxs} = $lxs) :
+                die("no local or remote inboxes to search\n");
+}
+
+# the main "lei q SEARCH_TERMS" method
+sub lei_q {
+        my ($self, @argv) = @_;
+        PublicInbox::Config->json; # preload before forking
+        my $lxs = lxs_prepare($self) or return;
+        $self->ale->refresh_externals($lxs, $self);
+        my $opt = $self->{opt};
+        my %mset_opt;
+        $mset_opt{asc} = $opt->{'reverse'} ? 1 : 0;
+        if (defined(my $sort = $opt->{'sort'})) {
+                if ($sort eq 'relevance') {
+                        $mset_opt{relevance} = 1;
+                } elsif ($sort eq 'docid') {
+                        $mset_opt{relevance} = $mset_opt{asc} ? -1 : -2;
+                } elsif ($sort =~ /\Areceived(?:-?[aA]t)?\z/) {
+                        # the default
+                } else {
+                        die "unrecognized --sort=$sort\n";
+                }
+                $opt->{save} and return
+                        $self->fail('--save and --sort are incompatible');
+        }
+        $self->{mset_opt} = \%mset_opt;
+
+        if ($opt->{stdin}) {
+                return $self->fail(<<'') if @argv;
+no query allowed on command-line with --stdin
+
+                return $self->slurp_stdin(\&do_qry);
+        }
+        chomp(@argv) and $self->qerr("# trailing `\\n' removed");
+        $mset_opt{q_raw} = [ @argv ]; # copy
+        $mset_opt{qstr} =
+                $self->{lse}->query_argv_to_string($self->{lse}->git, \@argv);
+        _start_query($self);
+}
+
+# shell completion helper called by lei__complete
+sub _complete_q {
+        my ($self, @argv) = @_;
+        join('', @argv) =~ /\bL:\S*\z/ and
+                return eval { $self->_lei_store->search->all_terms('L') };
+        my @cur;
+        my $cb = $self->lazy_cb(qw(forget-external _complete_));
+        while (@argv) {
+                if ($argv[-1] =~ /\A(?:-I|(?:--(?:include|exclude|only)))\z/) {
+                        my @c = $cb->($self, @cur);
+                        # try basename match:
+                        if (scalar(@cur) == 1 && index($cur[0], '/') < 0) {
+                                my $all = $self->externals_each;
+                                my %bn;
+                                for my $loc (keys %$all) {
+                                        my $bn = (split(m!/!, $loc))[-1];
+                                        ++$bn{$bn};
+                                }
+                                push @c, grep {
+                                        $bn{$_} == 1 && /\A\Q$cur[0]/
+                                } keys %bn;
+                        }
+                        return @c if @c;
+                }
+                unshift(@cur, pop @argv);
+        }
+        ();
+}
+
+# Stuff we may pass through to curl (as of 7.64.0), see curl manpage for
+# details, so most options which make sense for HTTP/HTTPS (including proxy
+# support for Tor and other methods of getting past weird networks).
+# Most of these are untested by us, some may not make sense for our use case
+# and typos below are likely.
+# n.b. some short options (-$NUMBER) are not supported since they conflict
+# with other "lei q" switches.
+# FIXME: Getopt::Long doesn't easily let us support support options with
+# '.' in them (e.g. --http1.1)
+# TODO: should we depend on "-c http.*" options for things which have
+# analogues in git(1)? that would reduce likelihood of conflicts with
+# our other CLI options
+# Note: some names are renamed to avoid potential conflicts,
+# see %lei2curl in lib/PublicInbox/LeiCurl.pm
+sub curl_opt { qw(
+        curl-config=s@
+        abstract-unix-socket=s anyauth basic cacert=s capath=s
+        cert-status cert-type cert=s ciphers=s
+        connect-timeout=s connect-to=s cookie-jar=s cookie=s crlfile=s
+        digest disable dns-interface=s dns-ipv4-addr=s dns-ipv6-addr=s
+        dns-servers=s doh-url=s egd-file=s engine=s false-start
+        happy-eyeballs-timeout-ms=s haproxy-protocol header=s@
+        http2-prior-knowledge http2 insecure
+        interface=s ipv4 ipv6 junk-session-cookies
+        key-type=s key=s limit-rate=s local-port=s location-trusted location
+        max-redirs=i max-time=s negotiate netrc-file=s netrc-optional netrc
+        no-alpn no-buffer no-npn no-sessionid noproxy=s ntlm-wb ntlm
+        pass=s pinnedpubkey=s post301 post302 post303 preproxy=s
+        proxy-anyauth proxy-basic proxy-cacert=s proxy-capath=s
+        proxy-cert-type=s proxy-cert=s proxy-ciphers=s proxy-crlfile=s
+        proxy-digest proxy-header=s@ proxy-insecure
+        proxy-key-type=s proxy-key proxy-negotiate proxy-ntlm proxy-pass=s
+        proxy-pinnedpubkey=s proxy-service-name=s proxy-ssl-allow-beast
+        proxy-tls13-ciphers=s proxy-tlsauthtype=s proxy-tlspassword=s
+        proxy-tlsuser=s proxy-tlsv1 proxy-user=s proxy=s
+        proxytunnel=s pubkey=s random-file=s referer=s resolve=s
+        retry-connrefused retry-delay=s retry-max-time=s retry=i
+        sasl-ir service-name=s socks4=s socks4a=s socks5-basic
+        socks5-gssapi-service-name=s socks5-gssapi socks5-hostname=s socks5=s
+        speed-limit speed-type ssl-allow-beast sslv2 sslv3
+        suppress-connect-headers tcp-fastopen tls-max=s
+        tls13-ciphers=s tlsauthtype=s tlspassword=s tlsuser=s
+        tlsv1 trace-ascii=s trace-time trace=s
+        unix-socket=s user-agent=s user=s
+)
+}
+
+1;
diff --git a/lib/PublicInbox/LeiRediff.pm b/lib/PublicInbox/LeiRediff.pm
new file mode 100644
index 00000000..66359dd4
--- /dev/null
+++ b/lib/PublicInbox/LeiRediff.pm
@@ -0,0 +1,299 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# The "lei rediff" sub-command, regenerates diffs with new options
+package PublicInbox::LeiRediff;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use File::Temp 0.19 (); # 0.19 for ->newdir
+use PublicInbox::Spawn qw(run_wait popen_wr which);
+use PublicInbox::MsgIter qw(msg_part_text);
+use PublicInbox::ViewDiff;
+use PublicInbox::LeiBlob;
+use PublicInbox::Git qw(git_quote git_unquote);
+use PublicInbox::Import;
+use PublicInbox::LEI;
+use PublicInbox::SolverGit;
+
+my $MODE = '(100644|120000|100755|160000)';
+
+sub rediff_user_cb { # called by solver when done
+        my ($res, $self) = @_;
+        my $lei = $self->{lei};
+        my $log_buf = delete $lei->{log_buf};
+        $$log_buf =~ s/^/# /sgm;
+        ref($res) eq 'ARRAY' or return $lei->child_error(0, $$log_buf);
+        $lei->qerr($$log_buf);
+        my ($git, $oid, $type, $size, $di) = @$res;
+        my $oid_want = delete $self->{cur_oid_want};
+
+        # don't try to support all the git-show(1) options for non-blob,
+        # this is just a convenience:
+        $type ne 'blob' and return warn(<<EOF);
+# $oid is a $type of $size bytes in:
+# $git->{git_dir} (wanted: $oid_want)
+EOF
+        $self->{blob}->{$oid_want} = $oid;
+        push @{$self->{gits}}, $git if $git->{-tmp};
+}
+
+# returns a full blob for oid_want
+sub solve_1 ($$$) {
+        my ($self, $oid_want, $hints) = @_;
+        return if $oid_want =~ /\A0+\z/;
+        $self->{cur_oid_want} = $oid_want;
+        my $solver = bless {
+                gits => $self->{gits},
+                user_cb => \&rediff_user_cb,
+                uarg => $self,
+                inboxes => [ $self->{lxs}->locals, @{$self->{rmt}} ],
+        }, 'PublicInbox::SolverGit';
+        open my $log, '+>', \(my $log_buf = '') or die "PerlIO::scalar: $!";
+        $self->{lei}->{log_buf} = \$log_buf;
+        local $PublicInbox::DS::in_loop = 0; # waitpid synchronously
+        $solver->solve($self->{lei}->{env}, $log, $oid_want, $hints);
+        $self->{blob}->{$oid_want}; # full OID
+}
+
+sub _lei_diff_prepare ($$) {
+        my ($lei, $cmd) = @_;
+        my $opt = $lei->{opt};
+        push @$cmd, '--'.($opt->{color} && !$opt->{'no-color'} ? '' : 'no-').
+                        'color';
+        for my $o (@PublicInbox::LEI::diff_opt) {
+                my $c = '';
+                # remove single char short option
+                $o =~ s/\|([a-z0-9])\b//i and $c = $1;
+                if ($o =~ s/=[is]@\z//) {
+                        my $v = $opt->{$o} or next;
+                        push @$cmd, map { $c ? "-$c$_" : "--$o=$_" } @$v;
+                } elsif ($o =~ s/=[is]\z//) {
+                        my $v = $opt->{$o} // next;
+                        push @$cmd, $c ? "-$c$v" : "--$o=$v";
+                } elsif ($o =~ s/:[is]\z//) {
+                        my $v = $opt->{$o} // next;
+                        push @$cmd, $c ? "-$c$v" :
+                                        ($v eq '' ? "--$o" : "--$o=$v");
+                } elsif ($o =~ s/!\z//) {
+                        my $v = $opt->{$o} // next;
+                        push @$cmd, $v ? "--$o" : "--no-$o";
+                } elsif ($opt->{$o}) {
+                        push @$cmd, $c ? "-$c" : "--$o";
+                }
+        }
+        push(@$cmd, "-O$opt->{'order-file'}") if $opt->{'order-file'};
+}
+
+sub diff_ctxq ($$) {
+        my ($self, $ctxq) = @_;
+        return unless $ctxq;
+        my $blob = $self->{blob};
+        my $ta = <<'EOM';
+reset refs/heads/A
+commit refs/heads/A
+author <a@s> 0 +0000
+committer <c@s> 0 +0000
+data 0
+EOM
+        my $tb = $ta;
+        $tb =~ tr!A!B!;
+        my $lei = $self->{lei};
+        while (my ($oid_a, $oid_b, $pa, $pb, $ma, $mb) = splice(@$ctxq, 0, 6)) {
+                my $xa = $blob->{$oid_a} //= solve_1($self, $oid_a,
+                                                        { path_b => $pa });
+                my $xb = $blob->{$oid_b} //= solve_1($self, $oid_b, {
+                                                oid_a => $oid_a,
+                                                path_a => $pa,
+                                                path_b => $pb
+                                        });
+                $ta .= "M $ma $xa ".git_quote($pa)."\n" if $xa;
+                $tb .= "M $mb $xb ".git_quote($pb)."\n" if $xb;
+        }
+        my $rw = $self->{gits}->[-1]; # has all known alternates
+        if (!$rw->{-tmp}) {
+                my $d = "$self->{rdtmp}/for_tree.git";
+                -d $d or PublicInbox::Import::init_bare($d);
+                # always overwrite
+                PublicInbox::IO::write_file '>', "$d/objects/info/alternates",
+                        map { $_->git_path('objects')."\n" } @{$self->{gits}};
+                $rw = PublicInbox::Git->new($d);
+        }
+        my $w = popen_wr($rw->cmd(qw(fast-import
+                                --quiet --done --date-format=raw)),
+                        $lei->{env}, { 2 => $lei->{2} });
+        print $w $ta, "\n", $tb, "\ndone\n" or die "print fast-import: $!";
+        $w->close or die "close w fast-import: \$?=$? \$!=$!";
+
+        my $cmd = $rw->cmd('diff');
+        _lei_diff_prepare($lei, $cmd);
+        $lei->qerr("# git @$cmd[2..$#$cmd]");
+        push @$cmd, qw(A B);
+        run_wait($cmd, $lei->{env}, { 2 => $lei->{2}, 1 => $lei->{1} }) and
+                $lei->child_error($?); # for git diff --exit-code
+        undef;
+}
+
+# awaitpid callback
+sub wait_requote { $_[1]->child_error($?) if $? }
+
+sub requote ($$) { # '> ' prefix(es) lei->{1}
+        my ($lei, $pfx) = @_;
+        my $opt = { 1 => $lei->{1}, 2 => $lei->{2} };
+        # $^X (perl) is overkill, but maybe there's a weird system w/o sed
+        my $w = popen_wr([$^X, '-pe', "s/^/$pfx/"], $lei->{env}, $opt,
+                         \&wait_requote, $lei);
+        binmode $w, ':utf8';
+        $w;
+}
+
+sub extract_oids { # Eml each_part callback
+        my ($ary, $self) = @_;
+        my $lei = $self->{lei};
+        my ($p, undef, $idx) = @$ary;
+        $lei->out($p->header_obj->as_string, "\n");
+        my ($s, undef) = msg_part_text($p, $p->content_type || 'text/plain');
+        defined $s or return;
+
+        $self->{dqre} && $s =~ s/$self->{dqre}//g && $lei->{opt}->{drq} and
+                local $lei->{1} = requote($lei, $1);
+
+        my @top = split($PublicInbox::ViewDiff::EXTRACT_DIFFS, $s);
+        undef $s;
+        my $blobs = $self->{blobs}; # blobs to resolve
+        my $ctxq;
+        while (defined(my $x = shift @top)) {
+                if (scalar(@top) >= 4 &&
+                                $top[1] =~ $PublicInbox::ViewDiff::IS_OID &&
+                                $top[0] =~ $PublicInbox::ViewDiff::IS_OID) {
+                        my ($ma, $mb);
+                        $x =~ /^old mode $MODE/sm and $ma = $1;
+                        $x =~ /^new mode $MODE/sm and $mb = $1;
+                        if (!defined($ma) && $x =~
+                                /^index [a-z0-9]+\.\.[a-z0-9]+ $MODE/sm) {
+                                $ma = $mb = $1;
+                        }
+                        $ma //= '100644';
+                        $mb //= $ma;
+                        my ($oid_a, $oid_b, $pa, $pb) = splice(@top, 0, 4);
+                        $pa eq '/dev/null' or
+                                $pa = (split(m'/', git_unquote($pa), 2))[1];
+                        $pb eq '/dev/null' or
+                                $pb = (split(m'/', git_unquote($pb), 2))[1];
+                        $blobs->{$oid_a} //= undef;
+                        $blobs->{$oid_b} //= undef;
+                        push @$ctxq, $oid_a, $oid_b, $pa, $pb, $ma, $mb;
+                } elsif ($ctxq) {
+                        my @out;
+                        for (split(/^/sm, $x)) {
+                                if (/\A-- \r?\n/s) { # email sig starts
+                                        push @out, $_;
+                                        $ctxq = diff_ctxq($self, $ctxq);
+                                } elsif ($ctxq && (/\A[\+\- ]/ || /\A@@ / ||
+                                        # allow totally blank lines w/o leading
+                                        # SP, git-apply does:
+                                                        /\A\r?\n/s)) {
+                                        next;
+                                } else {
+                                        push @out, $_;
+                                }
+                        }
+                        $self->{lei}->out(@out) if @out;
+                } else {
+                        $ctxq = diff_ctxq($self, $ctxq);
+                        $self->{lei}->out($x);
+                }
+        }
+        $ctxq = diff_ctxq($self, $ctxq);
+}
+
+# ensure dequoted parts are available for rebuilding patches:
+sub dequote_add { # Eml each_part callback
+        my ($ary, $self) = @_;
+        my ($p, undef, $idx) = @$ary;
+        my ($s, undef) = msg_part_text($p, $p->content_type || 'text/plain');
+        defined $s or return;
+        if ($s =~ s/$self->{dqre}//g) { # remove '> ' prefix(es)
+                substr($s, 0, 0, "part-dequoted: $idx\n\n");
+                utf8::encode($s);
+                $self->{tmp_sto}->add_eml(PublicInbox::Eml->new(\$s));
+        }
+}
+
+sub input_eml_cb { # callback for all emails
+        my ($self, $eml) = @_;
+        {
+                local $SIG{__WARN__} = sub {
+                        return if "@_" =~ /^no email in From: .*? or Sender:/;
+                        return if PublicInbox::Eml::warn_ignore(@_);
+                        warn @_;
+                };
+                $self->{tmp_sto}->add_eml($eml);
+                $eml->each_part(\&dequote_add, $self) if $self->{dqre};
+                $self->{tmp_sto}->done;
+        }
+        $eml->each_part(\&extract_oids, $self, 1);
+}
+
+sub lei_rediff {
+        my ($lei, @inputs) = @_;
+        ($lei->{opt}->{drq} && $lei->{opt}->{'dequote-only'}) and return
+                $lei->fail('--drq and --dequote-only are mutually exclusive');
+        ($lei->{opt}->{drq} && !$lei->{opt}->{verbose}) and
+                $lei->{opt}->{quiet} //= 1;
+        $lei->_lei_store(1)->write_prepare($lei);
+        $lei->{opt}->{'in-format'} //= 'eml' if $lei->{opt}->{stdin};
+        # maybe it's a non-email (code) blob from a coderepo
+        my $git_dirs = $lei->{opt}->{'git-dir'} //= [];
+        if ($lei->{opt}->{cwd} // 1) {
+                my $cgd = PublicInbox::LeiBlob::get_git_dir($lei, '.');
+                unshift(@$git_dirs, $cgd) if defined $cgd;
+        }
+        return $lei->fail('no --git-dir to try') unless @$git_dirs;
+        my $lxs = $lei->lxs_prepare;
+        if ($lxs->remotes) {
+                require PublicInbox::LeiRemote;
+                $lei->{curl} //= which('curl') or return
+                        $lei->fail('curl needed for '.join(', ',$lxs->remotes));
+        }
+        $lei->ale->refresh_externals($lxs, $lei);
+        my $self = bless {
+                -force_eml => 1, # for LeiInput->input_fh
+                lxs => $lxs,
+        }, __PACKAGE__;
+        $self->prepare_inputs($lei, \@inputs) or return;
+        my $isatty = -t $lei->{1};
+        $lei->{opt}->{color} //= $isatty;
+        $lei->start_pager if $isatty;
+        $lei->wq1_start($self);
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        PublicInbox::LeiInput::input_only_atfork_child(@_);
+        my $lei = $self->{lei};
+        $lei->{1}->autoflush(1);
+        binmode $lei->{1}, ':utf8';
+        $self->{blobs} = {}; # oidhex => filename
+        $self->{rdtmp} = File::Temp->newdir('lei-rediff-XXXX', TMPDIR => 1);
+        $self->{tmp_sto} = PublicInbox::LeiStore->new(
+                        "$self->{rdtmp}/tmp.store",
+                        { creat => { nproc => 1 }, indexlevel => 'medium' });
+        $self->{tmp_sto}->{priv_eidx}->{parallel} = 0;
+        $self->{rmt} = [ $self->{tmp_sto}->search, map {
+                        PublicInbox::LeiRemote->new($lei, $_)
+                } $self->{lxs}->remotes ];
+        $self->{gits} = [ map {
+                        PublicInbox::Git->new($lei->rel2abs($_))
+                } @{$self->{lei}->{opt}->{'git-dir'}} ];
+        $lei->{env}->{TMPDIR} = $self->{rdtmp}->dirname;
+        if (my $nr = ($lei->{opt}->{drq} || $lei->{opt}->{'dequote-only'})) {
+                my $re = '\s*> ' x $nr;
+                $self->{dqre} = qr/^($re)/ms;
+        }
+        undef;
+}
+
+no warnings 'once';
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+1;
diff --git a/lib/PublicInbox/LeiRefreshMailSync.pm b/lib/PublicInbox/LeiRefreshMailSync.pm
new file mode 100644
index 00000000..dde23274
--- /dev/null
+++ b/lib/PublicInbox/LeiRefreshMailSync.pm
@@ -0,0 +1,109 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei refresh-mail-sync" drops dangling sync information
+# and attempts to detect moved files
+package PublicInbox::LeiRefreshMailSync;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use PublicInbox::LeiImport;
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::Import;
+
+sub folder_missing { # may be called by LeiInput
+        my ($self, $folder) = @_;
+        $self->{lms}->forget_folders($folder);
+}
+
+sub prune_mdir { # lms->each_src callback
+        my ($oidbin, $id, $self, $mdir) = @_;
+        my @try = $$id =~ /:2,[a-zA-Z]*\z/ ? qw(cur new) : qw(new cur);
+        for (@try) { return if -f "$mdir/$_/$$id" }
+        # both tries failed
+        $self->{lms}->clear_src("maildir:$mdir", $id);
+}
+
+sub prune_imap { # lms->each_src callback
+        my ($oidbin, $uid, $self, $uids, $url) = @_;
+        return if exists $uids->{$uid};
+        $self->{lms}->clear_src($url, $uid);
+}
+
+# detects missed file moves
+sub pmdir_cb { # called via LeiPmdir->each_mdir_fn
+        my ($self, $f, $fl) = @_;
+        my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or
+                die "BUG: $f was not from a Maildir?";
+        substr($folder, 0, 0) = 'maildir:'; # add prefix
+        return if scalar($self->{lms}->name_oidbin($folder, $bn));
+        my $eml = eml_from_path($f) // return;
+        my $oidbin = $self->{lei}->git_oid($eml)->digest;
+        $self->{lms}->set_src($oidbin, $folder, \$bn);
+}
+
+sub input_path_url { # overrides PublicInbox::LeiInput::input_path_url
+        my ($self, $input, @args) = @_;
+        if ($input =~ /\Amaildir:(.+)/i) {
+                $self->{lms}->each_src($input, \&prune_mdir, $self, $1);
+                $self->{lse} //= $self->{lei}->{sto}->search;
+                # call pmdir_cb (via maildir_each_file -> each_mdir_fn)
+                PublicInbox::LeiInput::input_path_url($self, $input);
+        } elsif ($input =~ m!\Aimaps?://!i) {
+                my $uri = PublicInbox::URIimap->new($input);
+                if (my $mic = $self->{lei}->{net}->mic_for_folder($uri)) {
+                        my $uids = $mic->search('UID 1:*');
+                        $uids = +{ map { $_ => undef } @$uids };
+                        $self->{lms}->each_src($$uri, \&prune_imap, $self,
+                                                $uids, $$uri)
+                } else {
+                        $self->folder_missing($$uri);
+                }
+        } else { die "BUG: $input not supported" }
+        $self->{lei}->sto_barrier_request;
+}
+
+sub lei_refresh_mail_sync {
+        my ($lei, @folders) = @_;
+        my $sto = $lei->_lei_store or return $lei->fail(<<EOM);
+lei/store uninitialized, see lei-import(1)
+EOM
+        my $lms = $lei->lms or return $lei->fail(<<EOM);
+lei mail_sync.sqlite3 uninitialized, see lei-import(1)
+EOM
+        if (defined(my $all = $lei->{opt}->{all})) {
+                $lms->group2folders($lei, $all, \@folders) or return;
+                # TODO: handle NNTP servers which delete messages
+                @folders = grep(!m!\Anntps?://!, @folders);
+        } else {
+                $lms->arg2folder($lei, \@folders); # may die
+        }
+        $lms->lms_pause; # must be done before fork
+        $sto->write_prepare($lei);
+        my $self = bless { missing_ok => 1, lms => $lms }, __PACKAGE__;
+        $lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs
+        $self->prepare_inputs($lei, \@folders) or return;
+        $lei->{-err_type} = 'non-fatal';
+        $lei->wq1_start($self);
+}
+
+sub ipc_atfork_child { # needed for PublicInbox::LeiPmdir
+        my ($self) = @_;
+        PublicInbox::LeiInput::input_only_atfork_child($self);
+        $self->{lms}->lms_write_prepare;
+        undef;
+}
+
+sub _complete_refresh_mail_sync {
+        my ($lei, @argv) = @_;
+        my $lms = $lei->lms or return ();
+        my $match_cb = $lei->complete_url_prepare(\@argv);
+        my @k = $lms->folders($argv[-1] // undef, 1);
+        my @m = map { $match_cb->($_) } @k;
+        @m ? @m : @k
+}
+
+no warnings 'once';
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+
+1;
diff --git a/lib/PublicInbox/LeiReindex.pm b/lib/PublicInbox/LeiReindex.pm
new file mode 100644
index 00000000..3f109f33
--- /dev/null
+++ b/lib/PublicInbox/LeiReindex.pm
@@ -0,0 +1,49 @@
+# Copyright all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei reindex" command to reindex everything in lei/store
+package PublicInbox::LeiReindex;
+use v5.12;
+use parent qw(PublicInbox::IPC);
+
+sub reindex_full {
+        my ($lei) = @_;
+        my $sto = $lei->{sto};
+        my $max = $sto->search->over(1)->max;
+        $lei->qerr("# reindexing 1..$max");
+        $sto->wq_do('reindex_art', $_) for (1..$max);
+}
+
+sub reindex_store { # via wq_do
+        my ($self) = @_;
+        my ($lei, $argv) = delete @$self{qw(lei argv)};
+        if (!@$argv) {
+                reindex_full($lei);
+        }
+}
+
+sub lei_reindex {
+        my ($lei, @argv) = @_;
+        my $sto = $lei->_lei_store or return $lei->fail('nothing indexed');
+        $sto->write_prepare($lei);
+        my $self = bless { lei => $lei, argv => \@argv }, __PACKAGE__;
+        my ($op_c, $ops) = $lei->workers_start($self, 1);
+        $lei->{wq1} = $self;
+        $lei->wait_wq_events($op_c, $ops);
+        $self->wq_do('reindex_store');
+        $self->wq_close;
+}
+
+sub _lei_wq_eof { # EOF callback for main lei daemon
+        my ($lei) = @_;
+        $lei->{sto}->wq_do('reindex_done');
+        $lei->wq_eof;
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        $self->{lei}->_lei_atfork_child;
+        $self->SUPER::ipc_atfork_child;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiRemote.pm b/lib/PublicInbox/LeiRemote.pm
new file mode 100644
index 00000000..d6fc40a4
--- /dev/null
+++ b/lib/PublicInbox/LeiRemote.pm
@@ -0,0 +1,84 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Make remote externals HTTP(S) inboxes behave like
+# PublicInbox::Inbox and PublicInbox::Search/ExtSearch.
+# This exists solely for SolverGit.  It is a high-latency a
+# synchronous API that is not at all fast.
+package PublicInbox::LeiRemote;
+use v5.10.1;
+use strict;
+use IO::Uncompress::Gunzip;
+use PublicInbox::MboxReader;
+use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::LeiCurl;
+use PublicInbox::ContentHash qw(git_sha);
+
+sub new {
+        my ($cls, $lei, $uri) = @_;
+        bless { uri => $uri, lei => $lei }, $cls;
+}
+
+sub isrch { $_[0] } # SolverGit expcets this
+
+sub each_mboxrd_eml { # callback for MboxReader->mboxrd
+        my ($eml, $self) = @_;
+        my $lei = $self->{lei};
+        my $xoids = $lei->{ale}->xoids_for($eml, 1);
+        my $smsg = bless {}, 'PublicInbox::Smsg';
+        if ($lei->{sto} && !$xoids) { # memoize locally
+                my $res = $lei->{sto}->wq_do('add_eml', $eml);
+                $smsg = $res if ref($res) eq ref($smsg);
+        }
+        $smsg->{blob} //= $xoids ? (keys(%$xoids))[0]
+                                : $lei->git_oid($eml)->hexdigest;
+        $smsg->populate($eml);
+        $smsg->{mid} //= '(none)';
+        push @{$self->{smsg}}, $smsg;
+}
+
+sub mset {
+        my ($self, $qstr, undef) = @_; # $opt ($_[2]) ignored
+        my $lei = $self->{lei};
+        my $curl = PublicInbox::LeiCurl->new($lei, $lei->{curl});
+        push @$curl, '-s', '-d', '';
+        my $uri = $self->{uri}->clone;
+        $uri->query_form(q => $qstr, x => 'm', r => 1); # r=1: relevance
+        my $cmd = $curl->for_uri($self->{lei}, $uri);
+        $self->{lei}->qerr("# $cmd");
+        $self->{smsg} = [];
+        my $fh = popen_rd($cmd, undef, { 2 => $lei->{2} });
+        $fh = IO::Uncompress::Gunzip->new($fh, MultiStream=>1, AutoClose=>1);
+        eval { PublicInbox::MboxReader->mboxrd($fh, \&each_mboxrd_eml, $self) };
+        my $err = $@ ? ": $@" : '';
+        my $wait = $self->{lei}->{sto}->wq_do('barrier');
+        $lei->child_error($?, "@$cmd failed$err") if $err || $?;
+        $self; # we are the mset (and $ibx, and $self)
+}
+
+sub size { scalar @{$_[0]->{smsg}} } # size of previous results
+
+sub mset_to_smsg {
+        my ($self, $ibx, $mset) = @_; # all 3 are $self
+        wantarray ? ($self->size, @{$self->{smsg}}) : $self->{smsg};
+}
+
+sub base_url { "$_[0]->{uri}" }
+
+sub smsg_eml {
+        my ($self, $smsg) = @_;
+        my $bref = $self->{lei}->ale->git->cat_file($smsg->{blob}) // do {
+                my $lms = $self->{lei}->lms;
+                ($lms ? $lms->local_blob($smsg->{blob}, 1) : undef) // do {
+                        my $sto = $self->{lei}->{sto} //
+                                        $self->{lei}->_lei_store;
+                        $sto && $sto->{-wq_s1} ?
+                                $sto->wq_do('cat_blob', $smsg->{blob}) : undef;
+                }
+        };
+        return PublicInbox::Eml->new($bref) if $bref;
+        warn("E: $self->{uri} $smsg->{blob} gone <$smsg->{mid}>\n");
+        undef;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiRm.pm b/lib/PublicInbox/LeiRm.pm
new file mode 100644
index 00000000..00b12485
--- /dev/null
+++ b/lib/PublicInbox/LeiRm.pm
@@ -0,0 +1,30 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# implements the "lei rm" command, you can point this at
+# an entire spam mailbox or read a message from stdin
+package PublicInbox::LeiRm;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+
+sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
+        my ($self, $eml) = @_;
+        $self->{lei}->{sto}->wq_do('remove_eml', $eml);
+}
+
+sub lei_rm {
+        my ($lei, @inputs) = @_;
+        $lei->_lei_store(1)->write_prepare($lei);
+        $lei->{opt}->{'in-format'} //= 'eml' if $lei->{opt}->{stdin};
+        my $self = bless {}, __PACKAGE__;
+        $self->prepare_inputs($lei, \@inputs) or return;
+        $lei->{-err_type} = 'non-fatal';
+        $lei->wq1_start($self);
+}
+
+no warnings 'once';
+*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+
+1;
diff --git a/lib/PublicInbox/LeiRmWatch.pm b/lib/PublicInbox/LeiRmWatch.pm
new file mode 100644
index 00000000..19bee3ab
--- /dev/null
+++ b/lib/PublicInbox/LeiRmWatch.pm
@@ -0,0 +1,31 @@
+# Copyright all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei rm-watch" command
+package PublicInbox::LeiRmWatch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::LeiInput);
+
+sub lei_rm_watch {
+        my ($lei, @argv) = @_;
+        my $cfg = $lei->_lei_cfg(1);
+        $lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs
+        my $self = bless { missing_ok => 1 }, __PACKAGE__;
+        $self->prepare_inputs($lei, \@argv) or return;
+        for my $w (@{$self->{inputs}}) {
+                $lei->_config('--remove-section', "watch.$w") or return;
+        }
+        delete $lei->{cfg}; # force reload
+        $lei->refresh_watches;
+}
+
+sub _complete_rm_watch {
+        my ($lei, @argv) = @_;
+        my $cfg = $lei->_lei_cfg or return;
+        my $match_cb = $lei->complete_url_prepare(\@argv);
+        my @w = (join("\n", keys %$cfg) =~ m/^watch\.(.+?)\.state$/sgm);
+        map { $match_cb->($_) } @w;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
new file mode 100644
index 00000000..9ae9dcdb
--- /dev/null
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -0,0 +1,304 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# pretends to be like LeiDedupe and also PublicInbox::Inbox
+package PublicInbox::LeiSavedSearch;
+use v5.12;
+use parent qw(PublicInbox::Lock);
+use PublicInbox::Git;
+use PublicInbox::OverIdx;
+use PublicInbox::LeiSearch;
+use PublicInbox::Config;
+use PublicInbox::Spawn qw(run_die);
+use PublicInbox::ContentHash qw(git_sha);
+use PublicInbox::MID qw(mids_for_index);
+use PublicInbox::SHA qw(sha256_hex);
+use File::Temp ();
+use IO::Handle ();
+our $LOCAL_PFX = qr!\A(?:maildir|mh|mbox.+|mmdf|v2):!i; # TODO: put in LeiToMail?
+
+# move this to PublicInbox::Config if other things use it:
+my %cquote = ("\n" => '\\n', "\t" => '\\t', "\b" => '\\b');
+sub cquote_val ($) { # cf. git-config(1)
+        my ($val) = @_;
+        $val =~ s/([\n\t\b])/$cquote{$1}/g;
+        $val =~ s/\"/\\\"/g;
+        $val;
+}
+
+sub ARRAY_FIELDS () { qw(only include exclude) }
+sub BOOL_FIELDS () {
+        qw(external local remote import-remote import-before threads)
+}
+
+sub SINGLE_FIELDS () { qw(limit dedupe output) }
+
+sub lss_dir_for ($$;$) {
+        my ($lei, $dstref, $on_fs) = @_;
+        my $pfx;
+        if ($$dstref =~ m,\Aimaps?://,i) { # already canonicalized
+                require PublicInbox::URIimap;
+                my $uri = PublicInbox::URIimap->new($$dstref)->canonical;
+                $$dstref = $$uri;
+                $pfx = $uri->mailbox;
+        } else {
+                # can't use Cwd::abs_path since dirname($$dstref) may not exist
+                $$dstref = $lei->rel2abs($$dstref);
+                $$dstref =~ tr!/!/!s;
+                $pfx = $$dstref;
+        }
+        ($pfx) = ($pfx =~ m{([^/]+)/*\z}); # basename
+        my $lss_dir = $lei->share_path . '/saved-searches/';
+        my $d = "$lss_dir$pfx-".sha256_hex($$dstref);
+
+        # fall-back to looking up by st_ino + st_dev in case we're in
+        # a symlinked or bind-mounted path
+        if ($on_fs && !-d $d && -e $$dstref) {
+                my @cur = stat(_);
+                my $want = pack('dd', @cur[1,0]); # st_ino + st_dev
+                my ($c, $o, @st);
+                for my $g ("$pfx-*", '*') {
+                        my @maybe = glob("$lss_dir$g/lei.saved-search");
+                        for my $f (@maybe) {
+                                $c = $lei->cfg_dump($f) // next;
+                                $o = $c->{'lei.q.output'} // next;
+                                $o =~ s!$LOCAL_PFX!! or next;
+                                @st = stat($o) or next;
+                                next if pack('dd', @st[1,0]) ne $want;
+                                $f =~ m!\A(.+?)/[^/]+\z! and return $1;
+                        }
+                }
+        }
+        $d;
+}
+
+sub list {
+        my ($lei, $pfx) = @_;
+        my $lss_dir = $lei->share_path.'/saved-searches';
+        return () unless -d $lss_dir;
+        # TODO: persist the cache?  Use another format?
+        my $fh = File::Temp->new(TEMPLATE => 'lss_list-XXXX', TMPDIR => 1) or
+                die "File::Temp->new: $!";
+        print $fh "[include]\n";
+        for my $p (glob("$lss_dir/*/lei.saved-search")) {
+                print $fh "\tpath = ", cquote_val($p), "\n";
+        }
+        $fh->flush or die "flush: $fh";
+        my $cfg = $lei->cfg_dump($fh->filename);
+        my $out = $cfg ? $cfg->get_all('lei.q.output') : [];
+        s!$LOCAL_PFX!! for @$out;;
+        @$out;
+}
+
+sub translate_dedupe ($$) {
+        my ($self, $lei) = @_;
+        my $dd = $lei->{opt}->{dedupe} // 'content';
+        return 1 if $dd eq 'content'; # the default
+        return $self->{"-dedupe_$dd"} = 1 if ($dd eq 'oid' || $dd eq 'mid');
+        die("--dedupe=$dd requires --no-save\n");
+}
+
+sub up { # updating existing saved search via "lei up"
+        my ($cls, $lei, $dst) = @_;
+        my $f;
+        my $self = bless { ale => $lei->ale }, $cls;
+        my $dir = $dst;
+        output2lssdir($self, $lei, \$dir, \$f) or
+                return die("--no-save was used with $dst cwd=".
+                                        $lei->rel2abs('.')."\n");
+        $self->{-cfg} = $lei->cfg_dump($f) // return $lei->child_error;
+        $self->{-ovf} = "$dir/over.sqlite3";
+        $self->{'-f'} = $f;
+        $self->{lock_path} = "$self->{-f}.flock";
+        $self;
+}
+
+sub new { # new saved search "lei q --save"
+        my ($cls, $lei) = @_;
+        my $self = bless { ale => $lei->ale }, $cls;
+        require File::Path;
+        my $dst = $lei->{ovv}->{dst};
+
+        # canonicalize away relative paths into the config
+        if ($lei->{ovv}->{fmt} eq 'maildir' &&
+                        $dst =~ m!(?:/*|\A)\.\.(?:/*|\z)! && !-d $dst) {
+                File::Path::make_path($dst);
+                $lei->{ovv}->{dst} = $dst = $lei->abs_path($dst);
+        }
+        my $dir = lss_dir_for($lei, \$dst);
+        File::Path::make_path($dir); # raises on error
+        $self->{-cfg} = {};
+        my $f = $self->{'-f'} = "$dir/lei.saved-search";
+        translate_dedupe($self, $lei) or return;
+        open my $fh, '>', $f or return $lei->fail("open $f: $!");
+        my $sq_dst = PublicInbox::Config::squote_maybe($dst);
+        my $q = $lei->{mset_opt}->{q_raw} // die 'BUG: {q_raw} missing';
+        if (ref $q) {
+                $q = join("\n", map { "\tq = ".cquote_val($_) } @$q);
+        } else {
+                $q = "\tq = ".cquote_val($q);
+        }
+        $dst = "$lei->{ovv}->{fmt}:$dst" if $dst !~ m!\Aimaps?://!i;
+        $lei->{opt}->{output} = $dst;
+        print $fh <<EOM;
+; to refresh with new results, run: lei up $sq_dst
+; `maxuid' and `lastresult' lines are maintained by "lei up" for optimization
+[lei]
+$q
+[lei "q"]
+EOM
+        for my $k (ARRAY_FIELDS) {
+                my $ary = $lei->{opt}->{$k} // next;
+                for my $x (@$ary) {
+                        print $fh "\t$k = ".cquote_val($x)."\n";
+                }
+        }
+        for my $k (BOOL_FIELDS) {
+                my $val = $lei->{opt}->{$k} // next;
+                print $fh "\t$k = ".($val ? 1 : 0)."\n";
+        }
+        for my $k (SINGLE_FIELDS) {
+                my $val = $lei->{opt}->{$k} // next;
+                print $fh "\t$k = $val\n";
+        }
+        $lei->{opt}->{stdin} and print $fh <<EOM;
+[lei "internal"]
+        rawstr = 1 # stdin was used initially
+EOM
+        close($fh) or return $lei->fail("close $f: $!");
+        $self->{lock_path} = "$self->{-f}.flock";
+        $self->{-ovf} = "$dir/over.sqlite3";
+        $self;
+}
+
+sub description { $_[0]->{qstr} } # for WWW
+
+sub cfg_set { # called by LeiXSearch
+        my ($self, @args) = @_;
+        my $lk = $self->lock_for_scope; # git-config doesn't wait
+        run_die([qw(git config -f), $self->{'-f'}, @args]);
+}
+
+# drop-in for LeiDedupe API
+sub is_dup {
+        my ($self, $eml, $smsg) = @_;
+        my $oidx = $self->{oidx} // die 'BUG: no {oidx}';
+        my $lk;
+        if ($self->{-dedupe_mid}) {
+                $lk //= $self->lock_for_scope_fast;
+                for my $mid (@{mids_for_index($eml)}) {
+                        my ($id, $prv);
+                        return 1 if $oidx->next_by_mid($mid, \$id, \$prv);
+                }
+        }
+        my $blob = $smsg ? $smsg->{blob} : git_sha(1, $eml)->hexdigest;
+        $lk //= $self->lock_for_scope_fast;
+        return 1 if $oidx->blob_exists($blob);
+        if (my $xoids = PublicInbox::LeiSearch::xoids_for($self, $eml, 1)) {
+                for my $docid (values %$xoids) {
+                        $oidx->add_xref3($docid, -1, $blob, '.');
+                }
+                $oidx->commit_lazy;
+                if ($self->{-dedupe_oid}) {
+                        exists $xoids->{$blob} ? 1 : undef;
+                } else {
+                        1;
+                }
+        } else {
+                # n.b. above xoids_for fills out eml->{-lei_fake_mid} if needed
+                unless ($smsg) {
+                        $smsg = bless {}, 'PublicInbox::Smsg';
+                        $smsg->{bytes} = 0;
+                        $smsg->populate($eml);
+                }
+                $smsg->{blob} //= $blob;
+                $oidx->begin_lazy;
+                $smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
+                $oidx->add_overview($eml, $smsg);
+                $oidx->add_xref3($smsg->{num}, -1, $blob, '.');
+                $oidx->commit_lazy;
+                undef;
+        }
+}
+
+sub prepare_dedupe {
+        my ($self) = @_;
+        $self->{oidx} //= do {
+                my $creat = !-f $self->{-ovf};
+                my $lk = $self->lock_for_scope; # git-config doesn't wait
+                my $oidx = PublicInbox::OverIdx->new($self->{-ovf});
+                $oidx->{-no_fsync} = 1;
+                $oidx->dbh;
+                if ($creat) {
+                        $oidx->{dbh}->do('PRAGMA journal_mode = WAL');
+                        $oidx->eidx_prep; # for xref3
+                }
+                $oidx
+        };
+}
+
+sub over { $_[0]->{oidx} } # for xoids_for
+
+# don't use ale->git directly since is_dup is called inside
+# ale->git->cat_async callbacks
+sub git { $_[0]->{git} //= PublicInbox::Git->new($_[0]->{ale}->git->{git_dir}) }
+
+sub pause_dedupe {
+        my ($self) = @_;
+        my ($git, $oidx) = delete @$self{qw(git oidx)};
+        $git->cleanup if $git;
+        $oidx->commit_lazy if $oidx;
+        delete $self->{lockfh}; # from lock_for_scope_fast;
+}
+
+sub reset_dedupe {
+        my ($self) = @_;
+        prepare_dedupe($self);
+        my $lk = $self->lock_for_scope_fast;
+        for my $t (qw(xref3 over id2num)) {
+                $self->{oidx}->{dbh}->do("DELETE FROM $t");
+        }
+        pause_dedupe($self);
+}
+
+sub mm { undef }
+
+sub altid_map { {} }
+
+sub cloneurl { [] }
+
+# find existing directory containing a `lei.saved-search' file based on
+# $dir_ref which is an output
+sub output2lssdir {
+        my ($self, $lei, $dir_ref, $fn_ref) = @_;
+        my $dst = $$dir_ref; # imap://$MAILBOX, /path/to/maildir, /path/to/mbox
+        my $dir = lss_dir_for($lei, \$dst, 1);
+        my $f = "$dir/lei.saved-search";
+        if (-f $f && -r _) {
+                $self->{-cfg} = $lei->cfg_dump($f) // return;
+                $$dir_ref = $dir;
+                $$fn_ref = $f;
+                return 1;
+        }
+        undef;
+}
+
+# cf. LeiDedupe->has_entries
+sub has_entries {
+        my $oidx = $_[0]->{oidx} // die 'BUG: no {oidx}';
+        my @n = $oidx->{dbh}->selectrow_array('SELECT num FROM over LIMIT 1');
+        scalar(@n) ? 1 : undef;
+}
+
+no warnings 'once';
+*nntp_url = \&cloneurl;
+*base_url = \&PublicInbox::Inbox::base_url;
+*smsg_eml = \&PublicInbox::Inbox::smsg_eml;
+*smsg_by_mid = \&PublicInbox::Inbox::smsg_by_mid;
+*msg_by_mid = \&PublicInbox::Inbox::msg_by_mid;
+*modified = \&PublicInbox::Inbox::modified;
+*max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
+*isrch = *search = \&mm; # TODO
+*DESTROY = \&pause_dedupe;
+
+1;
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
new file mode 100644
index 00000000..684668c5
--- /dev/null
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -0,0 +1,187 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# read-only counterpart for PublicInbox::LeiStore
+package PublicInbox::LeiSearch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::ExtSearch); # PublicInbox::Search->reopen
+use PublicInbox::Search qw(xap_terms);
+use PublicInbox::ContentHash qw(content_digest content_hash git_sha);
+use PublicInbox::MID qw(mids mids_for_index);
+use PublicInbox::Compat qw(uniqstr);
+use Carp qw(croak);
+
+sub _msg_kw { # retry_reopen callback
+        my ($self, $num) = @_;
+        my $xdb = $self->xdb; # set {nshard} for num2docid;
+        xap_terms('K', $xdb, $self->num2docid($num));
+}
+
+sub msg_keywords { # array or hashref
+        my ($self, $num) = @_;
+        $self->retry_reopen(\&_msg_kw, $num);
+}
+
+sub _oid_kw { # retry_reopen callback
+        my ($self, $nums) = @_;
+        my $xdb = $self->xdb; # set {nshard};
+        my %kw;
+        for my $num (@$nums) { # there should only be one...
+                my $doc = $xdb->get_document($self->num2docid($num));
+                my $x = xap_terms('K', $doc);
+                %kw = (%kw, %$x);
+        }
+        \%kw;
+}
+
+# returns undef if blob is unknown
+sub oidbin_keywords {
+        my ($self, $oidbin) = @_;
+        my @num = $self->over->oidbin_exists($oidbin) or return;
+        $self->retry_reopen(\&_oid_kw, \@num);
+}
+
+sub _xsmsg_vmd { # retry_reopen
+        my ($self, $smsg, $want_label) = @_;
+        my $xdb = $self->xdb; # set {nshard};
+        my (@kw, @L, $doc, $x);
+        @kw = qw(flagged) if delete($smsg->{lei_q_tt_flagged});
+        my @num = $self->over->blob_exists($smsg->{blob});
+        for my $num (@num) { # there should only be one...
+                $doc = $xdb->get_document($self->num2docid($num));
+                push @kw, xap_terms('K', $doc);
+                push @L, xap_terms('L', $doc) if $want_label # JSON/JMAP only
+        }
+        @{$smsg->{kw}} = sort(uniqstr(@kw)) if @kw;
+        @{$smsg->{L}} = uniqstr(@L) if @L;
+}
+
+# lookup keywords+labels for external messages
+sub xsmsg_vmd {
+        my ($self, $smsg, $want_label) = @_;
+        return if $smsg->{kw}; # already set by LeiXSearch->mitem_kw
+        eval { $self->retry_reopen(\&_xsmsg_vmd, $smsg, $want_label) };
+        warn "$$ $0 (nshard=$self->{nshard}) $smsg->{blob}: $@" if $@;
+}
+
+# when a message has no Message-IDs at all, this is needed for
+# unsent Draft messages, at least
+sub content_key ($) {
+        my ($eml) = @_;
+        my $dig = content_digest($eml);
+        my $chash = $dig->clone->digest;
+        my $mids = mids_for_index($eml);
+        unless (@$mids) {
+                $eml->{-lei_fake_mid} = $mids->[0] =
+                                PublicInbox::Import::digest2mid($dig, $eml, 0);
+        }
+        ($chash, $mids);
+}
+
+sub _cmp_1st { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $cmp) = @_;
+        # cmp: [chash, xoids, smsg, lms]
+        $bref //= $cmp->[3] ? $cmp->[3]->local_blob($oid, 1) : undef;
+        if ($bref && content_hash(PublicInbox::Eml->new($bref)) eq $cmp->[0]) {
+                $cmp->[1]->{$oid} = $cmp->[2]->{num};
+        }
+}
+
+# returns { OID => num } mapping for $eml matches
+# The `num' hash value only makes sense from LeiSearch itself
+# and is nonsense from the PublicInbox::LeiALE subclass
+sub xoids_for {
+        my ($self, $eml, $min) = @_;
+        my ($chash, $mids) = content_key($eml);
+        my @overs = ($self->over // $self->overs_all);
+        my $git = $self->git;
+        my $xoids = {};
+        # no lms when used via {ale}:
+        my $lms = $self->{-lms_rw} //= lms($self) if defined($self->{topdir});
+        for my $mid (@$mids) {
+                for my $o (@overs) {
+                        my ($id, $prev);
+                        while (my $cur = $o->next_by_mid($mid, \$id, \$prev)) {
+                                # {bytes} may be '' from old bug
+                                $cur->{bytes} = 1 if $cur->{bytes} eq '';
+                                next if $cur->{bytes} == 0 ||
+                                        $xoids->{$cur->{blob}};
+                                $git->cat_async($cur->{blob}, \&_cmp_1st,
+                                                [$chash, $xoids, $cur, $lms]);
+                                if ($min && scalar(keys %$xoids) >= $min) {
+                                        $git->async_wait_all;
+                                        return $xoids;
+                                }
+                        }
+                }
+        }
+        $git->async_wait_all;
+
+        # it could be an 'lei index'-ed file that just got renamed
+        if (scalar(keys %$xoids) < ($min // 1) && defined($self->{topdir})) {
+                my $hex = git_sha(1, $eml)->hexdigest;
+                my @n = $overs[0]->blob_exists($hex);
+                for (@n) { $xoids->{$hex} //= $_ }
+        }
+        scalar(keys %$xoids) ? $xoids : undef;
+}
+
+# returns true if $eml is indexed by lei/store and keywords don't match
+sub kw_changed {
+        my ($self, $eml, $new_kw_sorted, $docids) = @_;
+        my $cur_kw;
+        if ($eml) {
+                my $xoids = xoids_for($self, $eml) // return;
+                $docids //= [];
+                @$docids = sort { $a <=> $b } values %$xoids;
+                if (!@$docids && $self->over) {
+                        my $bin = git_sha(1, $eml)->digest;
+                        @$docids = $self->over->oidbin_exists($bin);
+                }
+        }
+        for my $id (@$docids) {
+                $cur_kw = eval { msg_keywords($self, $id) } and last;
+        }
+        if (!defined($cur_kw) && $@) {
+                $docids = join(', num:', @$docids);
+                croak "E: num:$docids keyword lookup failure: $@";
+        }
+        # RFC 5550 sec 5.9 on the $Forwarded keyword states:
+        # "Once set, the flag SHOULD NOT be cleared"
+        if (exists($cur_kw->{forwarded}) &&
+                        !grep(/\Aforwarded\z/, @$new_kw_sorted)) {
+                delete $cur_kw->{forwarded};
+        }
+        $cur_kw = join("\0", sort keys %$cur_kw);
+        join("\0", @$new_kw_sorted) eq $cur_kw ? 0 : 1;
+}
+
+sub qparse_new {
+        my ($self) = @_;
+        my $qp = $self->SUPER::qparse_new; # PublicInbox::Search
+        $self->{qp_flags} |= PublicInbox::Search::FLAG_PHRASE() |
+                                PublicInbox::Search::FLAG_PURE_NOT();
+        $qp->add_boolean_prefix('kw', 'K');
+        $qp->add_boolean_prefix('L', 'L');
+        $qp
+}
+
+sub lms {
+        my ($self) = @_;
+        require PublicInbox::LeiMailSync;
+        my $f = "$self->{topdir}/mail_sync.sqlite3";
+        -f $f ? PublicInbox::LeiMailSync->new($f) : undef;
+}
+
+# allow SolverGit->resolve_patch to work with "lei index"
+sub smsg_eml {
+        my ($self, $smsg) = @_;
+        PublicInbox::Inbox::smsg_eml($self, $smsg) // do {
+                my $lms = lms($self);
+                my $bref = $lms ? $lms->local_blob($smsg->{blob}, 1) : undef;
+                $bref ? PublicInbox::Eml->new($bref) : undef;
+        };
+}
+
+1;
diff --git a/lib/PublicInbox/LeiSelfSocket.pm b/lib/PublicInbox/LeiSelfSocket.pm
new file mode 100644
index 00000000..0e15bc7c
--- /dev/null
+++ b/lib/PublicInbox/LeiSelfSocket.pm
@@ -0,0 +1,37 @@
+# Copyright all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# dummy placeholder socket for internal lei commands.
+# This receives what script/lei receives, but isn't connected
+# to an interactive terminal so I'm not sure what to do with it...
+package PublicInbox::LeiSelfSocket;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use Data::Dumper;
+$Data::Dumper::Useqq = 1; # should've been the Perl default :P
+use PublicInbox::Syscall qw(EPOLLIN);
+use PublicInbox::IPC;
+
+sub new {
+        my ($cls, $r) = @_;
+        my $self = bless {}, $cls;
+        $r->blocking(0);
+        $self->SUPER::new($r, EPOLLIN);
+}
+
+sub event_step {
+        my ($self) = @_;
+        my ($buf, @fds);
+        @fds = $PublicInbox::IPC::recv_cmd->($self->{sock}, $buf, 4096 * 33);
+        if (scalar(@fds) == 1 && !defined($fds[0])) {
+                return if $!{EAGAIN};
+                die "recvmsg: $!" unless $!{ECONNRESET};
+        } else { # just in case open so perl can auto-close them:
+                for (@fds) { open my $fh, '+<&=', $_ };
+        }
+        return $self->close if $buf eq '';
+        warn Dumper({ 'unexpected self msg' => $buf, fds => \@fds });
+        # TODO: figure out what to do with these messages...
+}
+
+1;
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
new file mode 100644
index 00000000..b2da2bc3
--- /dev/null
+++ b/lib/PublicInbox/LeiStore.pm
@@ -0,0 +1,667 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Local storage (cache/memo) for lei(1), suitable for personal/private
+# mail iff on encrypted device/FS.  Based on v2, but only deduplicates
+# git storage based on git OID (index deduplication is done in ContentHash)
+#
+# for xref3, the following are constant: $eidx_key = '.', $xnum = -1
+#
+# We rely on the synchronous IPC API for this in lei-daemon and
+# multiple lei clients to write to it at once.  This allows the
+# lei/store IPC process to be decoupled from network latency in
+# lei WQ workers.
+package PublicInbox::LeiStore;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::Lock PublicInbox::IPC);
+use PublicInbox::ExtSearchIdx;
+use PublicInbox::Eml;
+use PublicInbox::Import;
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::V2Writable;
+use PublicInbox::ContentHash qw(content_hash);
+use PublicInbox::MID qw(mids);
+use PublicInbox::LeiSearch;
+use PublicInbox::MDA;
+use PublicInbox::Spawn qw(spawn);
+use PublicInbox::MdirReader;
+use PublicInbox::LeiToMail;
+use PublicInbox::Compat qw(uniqstr);
+use PublicInbox::OnDestroy;
+use File::Temp qw(tmpnam);
+use POSIX ();
+use IO::Handle (); # ->autoflush
+use Sys::Syslog qw(syslog openlog);
+use Errno qw(EEXIST ENOENT);
+use PublicInbox::Syscall qw(rename_noreplace);
+use PublicInbox::LeiStoreErr;
+use PublicInbox::DS qw(add_uniq_timer);
+
+sub new {
+        my (undef, $dir, $opt) = @_;
+        my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt);
+        my $self = bless { priv_eidx => $eidx }, __PACKAGE__;
+        eidx_init($self)->done if $opt->{creat};
+        $self;
+}
+
+sub git { $_[0]->{priv_eidx}->git } # read-only
+
+sub packing_factor { $PublicInbox::V2Writable::PACKING_FACTOR }
+
+sub rotate_bytes {
+        $_[0]->{rotate_bytes} // ((1024 * 1024 * 1024) / $_[0]->packing_factor)
+}
+
+sub git_ident ($) {
+        my ($git) = @_;
+        my $rdr = {};
+        open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!";
+        chomp(my $i = $git->qx([qw(var GIT_COMMITTER_IDENT)], undef, $rdr));
+        $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ and return ($1, $2);
+        my ($user, undef, undef, undef, undef, undef, $gecos) = getpwuid($<);
+        ($user) = (($user // $ENV{USER} // '') =~ /([\w\-\.\+]+)/);
+        $user //= 'lei-user';
+        ($gecos) = (($gecos // '') =~ /([\w\-\.\+ \t]+)/);
+        $gecos //= 'lei user';
+        require Sys::Hostname;
+        my ($host) = (Sys::Hostname::hostname() =~ /([\w\-\.]+)/);
+        $host //= 'localhost';
+        ($gecos, "$user\@$host")
+}
+
+sub importer {
+        my ($self) = @_;
+        my $max;
+        my $im = $self->{im};
+        if ($im) {
+                return $im if $im->{bytes_added} < $self->rotate_bytes;
+
+                delete $self->{im};
+                $im->done;
+                undef $im;
+                $self->barrier;
+                $max = $self->{priv_eidx}->{mg}->git_epochs + 1;
+        }
+        my (undef, $tl) = eidx_init($self); # acquire lock
+        $max //= $self->{priv_eidx}->{mg}->git_epochs;
+        while (1) {
+                my $latest = $self->{priv_eidx}->{mg}->add_epoch($max);
+                my $git = PublicInbox::Git->new($latest);
+                $self->done; # unlock
+                # re-acquire lock, update alternates for new epoch
+                (undef, $tl) = eidx_init($self);
+                my $packed_bytes = $git->packed_bytes;
+                my $unpacked_bytes = $packed_bytes / $self->packing_factor;
+                if ($unpacked_bytes >= $self->rotate_bytes) {
+                        $max++;
+                        next;
+                }
+                my ($n, $e) = git_ident($git);
+                $self->{im} = $im = PublicInbox::Import->new($git, $n, $e);
+                $im->{bytes_added} = int($packed_bytes / $self->packing_factor);
+                $im->{lock_path} = undef;
+                $im->{path_type} = 'v2';
+                return $im;
+        }
+}
+
+sub search {
+        PublicInbox::LeiSearch->new($_[0]->{priv_eidx}->{topdir});
+}
+
+sub cat_blob {
+        my ($self, $oid) = @_;
+        $self->{im} ? $self->{im}->cat_blob($oid) : undef;
+}
+
+sub schedule_commit {
+        my ($self, $sec) = @_;
+        add_uniq_timer($self->{priv_eidx}->{topdir}, $sec, \&barrier, $self);
+}
+
+# follows the stderr file
+sub _tail_err {
+        my ($self) = @_;
+        my $err = $self->{-tmp_err} // return;
+        $err->clearerr; # clear EOF marker
+        my @msg = readline($err);
+        PublicInbox::LeiStoreErr::emit($self->{-err_wr}, @msg) and return;
+        # syslog is the last resort if lei-daemon broke
+        syslog('warning', '%s', $_) for @msg;
+}
+
+sub eidx_init {
+        my ($self) = @_;
+        my $eidx = $self->{priv_eidx};
+        my $tl = wantarray && $self->{-err_wr} ?
+                        on_destroy(\&_tail_err, $self) :
+                        undef;
+        $eidx->idx_init({-private => 1}); # acquires lock
+        wantarray ? ($eidx, $tl) : $eidx;
+}
+
+sub _docids_for ($$) {
+        my ($self, $eml) = @_;
+        my %docids;
+        my $eidx = $self->{priv_eidx};
+        my ($chash, $mids) = PublicInbox::LeiSearch::content_key($eml);
+        my $oidx = $eidx->{oidx};
+        my $im = $self->{im};
+        for my $mid (@$mids) {
+                my ($id, $prev);
+                while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) {
+                        next if $cur->{bytes} == 0; # external-only message
+                        my $oid = $cur->{blob};
+                        my $docid = $cur->{num};
+                        my $bref = $im ? $im->cat_blob($oid) : undef;
+                        $bref //= $eidx->git->cat_file($oid) //
+                                _lms_rw($self)->local_blob($oid, 1) // do {
+                                warn "W: $oid (#$docid) <$mid> not found\n";
+                                next;
+                        };
+                        local $self->{current_info} = $oid;
+                        my $x = PublicInbox::Eml->new($bref);
+                        $docids{$docid} = $docid if content_hash($x) eq $chash;
+                }
+        }
+        sort { $a <=> $b } values %docids;
+}
+
+# n.b. similar to LeiExportKw->export_kw_md, but this is for a single eml
+sub export1_kw_md ($$$$$) {
+        my ($self, $mdir, $bn, $oidbin, $vmdish) = @_; # vmd/vmd_mod
+        my $orig = $bn;
+        my (@try, $unkn, $kw);
+        if ($bn =~ s/:2,([a-zA-Z]*)\z//) {
+                ($kw, $unkn) = PublicInbox::MdirReader::flags2kw($1);
+                if (my $set = $vmdish->{kw}) {
+                        $kw = $set;
+                } elsif (my $add = $vmdish->{'+kw'}) {
+                        @$kw{@$add} = ();
+                } elsif (my $del = $vmdish->{-kw}) {
+                        delete @$kw{@$del};
+                } # else no changes...
+                @try = qw(cur new);
+        } else { # no keywords, yet, could be in new/
+                @try = qw(new cur);
+                $unkn = [];
+                if (my $set = $vmdish->{kw}) {
+                        $kw = $set;
+                } elsif (my $add = $vmdish->{'+kw'}) {
+                        @$kw{@$add} = (); # auto-vivify
+                } else { # ignore $vmdish->{-kw}
+                        $kw = [];
+                }
+        }
+        $kw = [ keys %$kw ] if ref($kw) eq 'HASH';
+        $bn .= ':2,'. PublicInbox::LeiToMail::kw2suffix($kw, @$unkn);
+        return if $orig eq $bn; # no change
+
+        # we use link(2) + unlink(2) since rename(2) may
+        # inadvertently clobber if the "uniquefilename" part wasn't
+        # actually unique.
+        my $dst = "$mdir/cur/$bn";
+        for my $d (@try) {
+                my $src = "$mdir/$d/$orig";
+                if (rename_noreplace($src, $dst)) {
+                        # TODO: verify oidbin?
+                        $self->{lms}->mv_src("maildir:$mdir",
+                                        $oidbin, \$orig, $bn);
+                        return;
+                } elsif ($! == EEXIST) { # lost race with "lei export-kw"?
+                        return;
+                } elsif ($! != ENOENT) {
+                        syslog('warning', "rename_noreplace($src -> $dst): $!");
+                }
+        }
+        for (@try) { return if -e "$mdir/$_/$orig" };
+        $self->{lms}->clear_src("maildir:$mdir", \$orig);
+}
+
+sub sto_export_kw ($$$) {
+        my ($self, $docid, $vmdish) = @_; # vmdish (vmd or vmd_mod)
+        my ($eidx, $tl) = eidx_init($self);
+        my $lms = _lms_rw($self) // return;
+        my $xr3 = $eidx->{oidx}->get_xref3($docid, 1);
+        for my $row (@$xr3) {
+                my (undef, undef, $oidbin) = @$row;
+                my $locs = $lms->locations_for($oidbin) // next;
+                while (my ($loc, $ids) = each %$locs) {
+                        if ($loc =~ s!\Amaildir:!!i) {
+                                for my $id (@$ids) {
+                                        export1_kw_md($self, $loc, $id,
+                                                        $oidbin, $vmdish);
+                                }
+                        }
+                        # TODO: IMAP
+                }
+        }
+}
+
+# vmd = { kw => [ qw(seen ...) ], L => [ qw(inbox ...) ] }
+sub set_eml_vmd {
+        my ($self, $eml, $vmd, $docids) = @_;
+        my ($eidx, $tl) = eidx_init($self);
+        $docids //= [ _docids_for($self, $eml) ];
+        for my $docid (@$docids) {
+                $eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd);
+                sto_export_kw($self, $docid, $vmd);
+        }
+        $docids;
+}
+
+sub add_eml_vmd {
+        my ($self, $eml, $vmd) = @_;
+        my ($eidx, $tl) = eidx_init($self);
+        my @docids = _docids_for($self, $eml);
+        for my $docid (@docids) {
+                $eidx->idx_shard($docid)->ipc_do('add_vmd', $docid, $vmd);
+        }
+        \@docids;
+}
+
+sub remove_eml_vmd { # remove just the VMD
+        my ($self, $eml, $vmd) = @_;
+        my ($eidx, $tl) = eidx_init($self);
+        my @docids = _docids_for($self, $eml);
+        for my $docid (@docids) {
+                $eidx->idx_shard($docid)->ipc_do('remove_vmd', $docid, $vmd);
+        }
+        \@docids;
+}
+
+sub _lms_rw ($) { # it is important to have eidx processes open before lms
+        my ($self) = @_;
+        $self->{lms} // do {
+                require PublicInbox::LeiMailSync;
+                my ($eidx, $tl) = eidx_init($self);
+                my $f = "$self->{priv_eidx}->{topdir}/mail_sync.sqlite3";
+                my $lms = PublicInbox::LeiMailSync->new($f);
+                $lms->lms_write_prepare;
+                $self->{lms} = $lms;
+        };
+}
+
+sub _remove_if_local { # git->cat_async arg
+        my ($bref, $oidhex, $type, $size, $self) = @_;
+        $self->{im}->remove($bref) if $bref;
+}
+
+sub remove_docids ($;@) {
+        my ($self, @docids) = @_;
+        my $eidx = eidx_init($self);
+        for my $docid (@docids) {
+                $eidx->remove_doc($docid);
+                $eidx->{oidx}->{dbh}->do(<<EOF, undef, $docid);
+DELETE FROM xref3 WHERE docid = ?
+EOF
+        }
+}
+
+# remove the entire message from the index, does not touch mail_sync.sqlite3
+sub remove_eml {
+        my ($self, $eml) = @_;
+        my $im = $self->importer; # may create new epoch
+        my ($eidx, $tl) = eidx_init($self);
+        my $oidx = $eidx->{oidx};
+        my @docids = _docids_for($self, $eml);
+        my $git = $eidx->git;
+        for my $docid (@docids) {
+                my $xr3 = $oidx->get_xref3($docid, 1);
+                for my $row (@$xr3) {
+                        my (undef, undef, $oidbin) = @$row;
+                        my $oidhex = unpack('H*', $oidbin);
+                        $git->cat_async($oidhex, \&_remove_if_local, $self);
+                }
+        }
+        $git->async_wait_all;
+        remove_docids($self, @docids);
+        \@docids;
+}
+
+sub oid2docid ($$) {
+        my ($self, $oid) = @_;
+        my $eidx = eidx_init($self);
+        my ($docid, @cull) = $eidx->{oidx}->blob_exists($oid);
+        if (@cull) { # fixup old bugs...
+                warn <<EOF;
+W: $oid indexed as multiple docids: $docid @cull, culling to fixup old bugs
+EOF
+                remove_docids($self, @cull);
+        }
+        $docid;
+}
+
+sub _add_vmd ($$$$) {
+        my ($self, $idx, $docid, $vmd) = @_;
+        $idx->ipc_do('add_vmd', $docid, $vmd);
+        sto_export_kw($self, $docid, $vmd);
+}
+
+sub _docids_and_maybe_kw ($$) {
+        my ($self, $docids) = @_;
+        return $docids unless wantarray;
+        my (@kw, $idx, @tmp);
+        for my $num (@$docids) { # likely only 1, unless ContentHash changes
+                # can't use ->search->msg_keywords on uncommitted docs
+                $idx = $self->{priv_eidx}->idx_shard($num);
+                @tmp = eval { $idx->ipc_do('get_terms', 'K', $num) };
+                $@ ? warn("#$num get_terms: $@") : push(@kw, @tmp);
+        }
+        @kw = sort(uniqstr(@kw)) if @$docids > 1;
+        ($docids, \@kw);
+}
+
+sub _reindex_1 { # git->cat_async callback
+        my ($bref, $hex, $type, $size, $smsg) = @_;
+        my $self = delete $smsg->{-sto};
+        my ($eidx, $tl) = eidx_init($self);
+        $bref //= _lms_rw($self)->local_blob($hex, 1);
+        if ($bref) {
+                my $eml = PublicInbox::Eml->new($bref);
+                $smsg->{-merge_vmd} = 1; # preserve existing keywords
+                $eidx->idx_shard($smsg->{num})->index_eml($eml, $smsg);
+        } elsif ($type eq 'missing') {
+                # pre-release/buggy lei may've indexed external-only msgs,
+                # try to correct that, here
+                warn("E: missing $hex, culling (ancient lei artifact?)\n");
+                $smsg->{to} = $smsg->{cc} = $smsg->{from} = '';
+                $smsg->{bytes} = 0;
+                $eidx->{oidx}->update_blob($smsg, '');
+                my $eml = PublicInbox::Eml->new("\r\n\r\n");
+                $eidx->idx_shard($smsg->{num})->index_eml($eml, $smsg);
+        } else {
+                warn("E: $type $hex\n");
+        }
+}
+
+sub reindex_art {
+        my ($self, $art) = @_;
+        my ($eidx, $tl) = eidx_init($self);
+        my $smsg = $eidx->{oidx}->get_art($art) // return;
+        return if $smsg->{bytes} == 0; # external-only message
+        $smsg->{-sto} = $self;
+        $eidx->git->cat_async($smsg->{blob} // die("no blob (#$art)"),
+                                \&_reindex_1, $smsg);
+}
+
+sub reindex_done {
+        my ($self) = @_;
+        my ($eidx, $tl) = eidx_init($self);
+        $eidx->git->async_wait_all;
+        # ->done to be called via sto_barrier_request
+}
+
+sub add_eml {
+        my ($self, $eml, $vmd, $xoids) = @_;
+        my $im = $self->{-fake_im} // $self->importer; # may create new epoch
+        my ($eidx, $tl) = eidx_init($self);
+        my $oidx = $eidx->{oidx}; # PublicInbox::Import::add checks this
+        my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg';
+        $smsg->{-eidx_git} = $eidx->git if !$self->{-fake_im};
+        my $im_mark = $im->add($eml, undef, $smsg);
+        if ($vmd && $vmd->{sync_info}) {
+                _lms_rw($self)->set_src($smsg->oidbin, @{$vmd->{sync_info}});
+        }
+        unless ($im_mark) { # duplicate blob returns undef
+                return unless wantarray || $vmd;
+                my @docids = $oidx->blob_exists($smsg->{blob});
+                if ($vmd) {
+                        for my $docid (@docids) {
+                                my $idx = $eidx->idx_shard($docid);
+                                _add_vmd($self, $idx, $docid, $vmd);
+                        }
+                }
+                return _docids_and_maybe_kw $self, \@docids;
+        }
+
+        local $self->{current_info} = $smsg->{blob};
+        my $vivify_xvmd = delete($smsg->{-vivify_xvmd}) // []; # exact matches
+        if ($xoids) { # fuzzy matches from externals in ale->xoids_for
+                delete $xoids->{$smsg->{blob}}; # added later
+                if (scalar keys %$xoids) {
+                        my %docids = map { $_ => 1 } @$vivify_xvmd;
+                        for my $oid (keys %$xoids) {
+                                my $docid = oid2docid($self, $oid);
+                                $docids{$docid} = $docid if defined($docid);
+                        }
+                        @$vivify_xvmd = sort { $a <=> $b } keys(%docids);
+                }
+        }
+        if (@$vivify_xvmd) { # docids list
+                $xoids //= {};
+                $xoids->{$smsg->{blob}} = 1;
+                for my $docid (@$vivify_xvmd) {
+                        my $cur = $oidx->get_art($docid);
+                        my $idx = $eidx->idx_shard($docid);
+                        if (!$cur || $cur->{bytes} == 0) { # really vivifying
+                                $smsg->{num} = $docid;
+                                $oidx->add_overview($eml, $smsg);
+                                $smsg->{-merge_vmd} = 1;
+                                $idx->index_eml($eml, $smsg);
+                        } else { # lse fuzzy hit off ale
+                                $idx->ipc_do('add_eidx_info', $docid, '.', $eml);
+                        }
+                        for my $oid (keys %$xoids) {
+                                $oidx->add_xref3($docid, -1, $oid, '.');
+                        }
+                        _add_vmd($self, $idx, $docid, $vmd) if $vmd;
+                }
+                _docids_and_maybe_kw $self, $vivify_xvmd;
+        } elsif (my @docids = _docids_for($self, $eml)) {
+                # fuzzy match from within lei/store
+                for my $docid (@docids) {
+                        my $idx = $eidx->idx_shard($docid);
+                        $oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
+                        # add_eidx_info for List-Id
+                        $idx->ipc_do('add_eidx_info', $docid, '.', $eml);
+                        _add_vmd($self, $idx, $docid, $vmd) if $vmd;
+                }
+                _docids_and_maybe_kw $self, \@docids;
+        } else { # totally new message, no keywords
+                delete $smsg->{-oidx}; # for IPC-friendliness
+                $smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
+                $oidx->add_overview($eml, $smsg);
+                $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
+                my $idx = $eidx->idx_shard($smsg->{num});
+                $idx->index_eml($eml, $smsg);
+                _add_vmd($self, $idx, $smsg->{num}, $vmd) if $vmd;
+                wantarray ? ($smsg, []) : $smsg;
+        }
+}
+
+sub set_eml {
+        my ($self, $eml, $vmd, $xoids) = @_;
+        add_eml($self, $eml, $vmd, $xoids) //
+                set_eml_vmd($self, $eml, $vmd);
+}
+
+sub index_eml_only {
+        my ($self, $eml, $vmd, $xoids) = @_;
+        require PublicInbox::FakeImport;
+        local $self->{-fake_im} = PublicInbox::FakeImport->new;
+        set_eml($self, $eml, $vmd, $xoids);
+}
+
+# store {kw} / {L} info for a message which is only in an external
+sub _external_only ($$$) {
+        my ($self, $xoids, $eml) = @_;
+        my $eidx = $self->{priv_eidx};
+        my $oidx = $eidx->{oidx} // die 'BUG: {oidx} missing';
+        my $smsg = bless { blob => '' }, 'PublicInbox::Smsg';
+        $smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
+        # save space for an externals-only message
+        my $hdr = $eml->header_obj;
+        $smsg->populate($hdr); # sets lines == 0
+        $smsg->{bytes} = 0;
+        delete @$smsg{qw(From Subject)};
+        $smsg->{to} = $smsg->{cc} = $smsg->{from} = '';
+        $oidx->add_overview($hdr, $smsg); # subject+references for threading
+        $smsg->{subject} = '';
+        for my $oid (keys %$xoids) {
+                $oidx->add_xref3($smsg->{num}, -1, $oid, '.');
+        }
+        my $idx = $eidx->idx_shard($smsg->{num});
+        $idx->index_eml(PublicInbox::Eml->new("\n\n"), $smsg);
+        ($smsg, $idx);
+}
+
+sub update_xvmd {
+        my ($self, $xoids, $eml, $vmd_mod) = @_;
+        my ($eidx, $tl) = eidx_init($self);
+        my $oidx = $eidx->{oidx};
+        my %seen;
+        for my $oid (keys %$xoids) {
+                my $docid = oid2docid($self, $oid) // next;
+                delete $xoids->{$oid};
+                next if $seen{$docid}++;
+                my $idx = $eidx->idx_shard($docid);
+                $idx->ipc_do('update_vmd', $docid, $vmd_mod);
+                sto_export_kw($self, $docid, $vmd_mod);
+        }
+        return unless scalar(keys(%$xoids));
+
+        # see if it was indexed, but with different OID(s)
+        if (my @docids = _docids_for($self, $eml)) {
+                for my $docid (@docids) {
+                        next if $seen{$docid};
+                        for my $oid (keys %$xoids) {
+                                $oidx->add_xref3($docid, -1, $oid, '.');
+                        }
+                        my $idx = $eidx->idx_shard($docid);
+                        $idx->ipc_do('update_vmd', $docid, $vmd_mod);
+                        sto_export_kw($self, $docid, $vmd_mod);
+                }
+                return;
+        }
+        # totally unseen
+        my ($smsg, $idx) = _external_only($self, $xoids, $eml);
+        $idx->ipc_do('update_vmd', $smsg->{num}, $vmd_mod);
+        sto_export_kw($self, $smsg->{num}, $vmd_mod);
+}
+
+# set or update keywords for external message, called via ipc_do
+sub set_xvmd {
+        my ($self, $xoids, $eml, $vmd) = @_;
+
+        my ($eidx, $tl) = eidx_init($self);
+        my $oidx = $eidx->{oidx};
+        my %seen;
+
+        # see if we can just update existing docs
+        for my $oid (keys %$xoids) {
+                my $docid = oid2docid($self, $oid) // next;
+                delete $xoids->{$oid}; # all done with this oid
+                next if $seen{$docid}++;
+                my $idx = $eidx->idx_shard($docid);
+                $idx->ipc_do('set_vmd', $docid, $vmd);
+                sto_export_kw($self, $docid, $vmd);
+        }
+        return unless scalar(keys(%$xoids));
+
+        # n.b. we don't do _docids_for here, we expect the caller
+        # already checked $lse->kw_changed before calling this sub
+
+        return unless (@{$vmd->{kw} // []}) || (@{$vmd->{L} // []});
+        # totally unseen:
+        my ($smsg, $idx) = _external_only($self, $xoids, $eml);
+        $idx->ipc_do('add_vmd', $smsg->{num}, $vmd);
+        sto_export_kw($self, $smsg->{num}, $vmd);
+}
+
+sub check_done {
+        my ($self) = @_;
+        $self->git->_active ?
+                add_uniq_timer("$self-check_done", 5, \&check_done, $self) :
+                done($self);
+}
+
+sub xchg_stderr {
+        my ($self) = @_;
+        _tail_err($self) if $self->{-err_wr};
+        my $dir = $self->{priv_eidx}->{topdir};
+        return unless -e $dir;
+        delete $self->{-tmp_err};
+        my ($err, $name) = tmpnam();
+        open STDERR, '>>', $name or die "dup2: $!";
+        unlink($name);
+        STDERR->autoflush(1); # shared with shard subprocesses
+        $self->{-tmp_err} = $err; # separate file description for RO access
+        undef;
+}
+
+sub _commit ($$) {
+        my ($self, $cmd) = @_; # cmd is 'done' or 'barrier'
+        my ($errfh, $lei_sock) = @$self{0, 1}; # via sto_barrier_request
+        my @err;
+        if ($self->{im}) {
+                eval { $self->{im}->$cmd };
+                push(@err, "E: import $cmd: $@\n") if $@;
+        }
+        delete $self->{lms};
+        eval { $self->{priv_eidx}->$cmd };
+        push(@err, "E: priv_eidx $cmd: $@\n") if $@;
+        print { $errfh // \*STDERR } @err;
+        send($lei_sock, 'child_error 256', 0) if @err && $lei_sock;
+        xchg_stderr($self);
+        die @err if @err;
+        # $lei_sock goes out-of-scope and script/lei can terminate
+}
+
+sub barrier {
+        my ($self) = @_;
+        _commit $self, 'barrier';
+        add_uniq_timer("$self-check_done", 5, \&check_done, $self);
+        undef;
+}
+
+sub done { _commit $_[0], 'done' }
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        $lei->_lei_atfork_child(1) if $lei;
+        xchg_stderr($self);
+        if (my $to_close = delete($self->{to_close})) {
+                close($_) for @$to_close;
+        }
+        openlog('lei/store', 'pid,nowait,nofatal,ndelay', 'user');
+        $self->SUPER::ipc_atfork_child;
+}
+
+sub recv_and_run {
+        my ($self, @args) = @_;
+        local $PublicInbox::DS::in_loop = 0; # waitpid synchronously
+        $self->SUPER::recv_and_run(@args);
+}
+
+sub _sto_atexit { # awaitpid cb
+        my ($pid) = @_;
+        warn "lei/store PID:$pid died \$?=$?\n" if $?;
+}
+
+sub write_prepare {
+        my ($self, $lei) = @_;
+        $lei // die 'BUG: $lei not passed';
+        unless ($self->{-wq_s1}) {
+                my $dir = $lei->store_path;
+                substr($dir, -length('/lei/store'), 10, '');
+                pipe(my ($r, $w)) or die "pipe: $!";
+                $w->autoflush(1);
+                # Mail we import into lei are private, so headers filtered out
+                # by -mda for public mail are not appropriate
+                local @PublicInbox::MDA::BAD_HEADERS = ();
+                local $SIG{ALRM} = 'IGNORE';
+                $self->wq_workers_start("lei/store $dir", 1, $lei->oldset, {
+                                        lei => $lei,
+                                        -err_wr => $w,
+                                        to_close => [ $r ],
+                                }, \&_sto_atexit);
+                PublicInbox::LeiStoreErr->new($r, $lei);
+        }
+        $lei->{sto} = $self;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiStoreErr.pm b/lib/PublicInbox/LeiStoreErr.pm
new file mode 100644
index 00000000..c8bc72b6
--- /dev/null
+++ b/lib/PublicInbox/LeiStoreErr.pm
@@ -0,0 +1,62 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# forwards stderr from lei/store process to any lei clients using
+# the same store, falls back to syslog if no matching clients exist.
+package PublicInbox::LeiStoreErr;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::Syscall qw(EPOLLIN);
+use Sys::Syslog qw(openlog syslog closelog);
+use IO::Handle (); # ->blocking
+use Time::HiRes ();
+use autodie qw(open);
+our $err_wr;
+
+# We don't want blocked stderr on clients to block lei/store or lei-daemon.
+# We can't make stderr non-blocking since it can break MUAs or anything
+# lei might spawn.  So we setup a timer to wake us up after a second if
+# printing to a user's stderr hasn't completed, yet.  Unfortunately,
+# EINTR alone isn't enough since Perl auto-restarts writes on signals,
+# so to interrupt writes to clients with blocked stderr, we dup the
+# error output to $err_wr ahead-of-time and close $err_wr in the
+# SIGALRM handler to ensure `print' gets aborted:
+
+sub abort_err_wr { close($err_wr) if $err_wr; undef $err_wr }
+
+sub emit ($@) {
+        my ($efh, @msg) = @_;
+        open(local $err_wr, '>&', $efh); # fdopen(dup(fileno($efh)), "w")
+        local $SIG{ALRM} = \&abort_err_wr;
+        Time::HiRes::alarm(1.0, 0.1);
+        my $ret = print $err_wr @msg;
+        Time::HiRes::alarm(0);
+        $ret;
+}
+
+sub new {
+        my ($cls, $rd, $lei) = @_;
+        my $self = bless { sock => $rd, store_path => $lei->store_path }, $cls;
+        $rd->blocking(0);
+        $self->SUPER::new($rd, EPOLLIN); # level-trigger
+}
+
+sub event_step {
+        my ($self) = @_;
+        my $n = sysread($self->{sock}, my $buf, 8192);
+        return ($!{EAGAIN} ? 0 : $self->close) if !defined($n);
+        return $self->close if !$n;
+        my $printed;
+        for my $lei (grep defined, @PublicInbox::DS::FD_MAP) {
+                my $cb = $lei->can('store_path') // next;
+                next if $cb->($lei) ne $self->{store_path};
+                emit($lei->{2} // next, $buf) and $printed = 1;
+        }
+        if (!$printed) {
+                openlog('lei/store', 'pid,nowait,nofatal,ndelay', 'user');
+                for my $l (split(/\n/, $buf)) { syslog('warning', '%s', $l) }
+                closelog(); # don't share across fork
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/LeiSucks.pm b/lib/PublicInbox/LeiSucks.pm
new file mode 100644
index 00000000..ddb3faf7
--- /dev/null
+++ b/lib/PublicInbox/LeiSucks.pm
@@ -0,0 +1,74 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Undocumented hidden command somebody might discover if they're
+# frustrated and need to report a bug.  There's no manpage and
+# it won't show up in tab completions or help.
+package PublicInbox::LeiSucks;
+use strict;
+use v5.10.1;
+use PublicInbox::SHA qw(sha1_hex);
+use Config;
+use POSIX ();
+use PublicInbox::Config;
+use PublicInbox::IPC;
+use PublicInbox::IO qw(read_all);
+
+sub lei_sucks {
+        my ($lei, @argv) = @_;
+        $lei->start_pager if -t $lei->{1};
+        my ($os, undef, $rel, undef, $mac)= POSIX::uname();
+        if ($mac eq 'x86_64' && $Config{ptrsize} == 4) {
+                $mac .= $Config{cppsymbols} =~ /\b__ILP32__=1\b/ ?
+                        ',u=x32' : ',u=x86';
+        }
+        eval { require PublicInbox };
+        my $pi_ver = eval('$PublicInbox::VERSION') // '(???)';
+        my $nproc = PublicInbox::IPC::detect_nproc() // '?';
+        my @out = ("lei $pi_ver\n",
+                "perl $Config{version} / $os $rel / $mac ".
+                "ptrsize=$Config{ptrsize} nproc=$nproc\n");
+        chomp(my $gv = `git --version` || "git missing");
+        $gv =~ s/ version / /;
+        my $json = ref(PublicInbox::Config->json);
+        $json .= ' ' . eval('$'.$json.'::VERSION') if $json;
+        $json ||= '(no JSON)';
+        push @out, "$gv / $json\n";
+        if (eval { require PublicInbox::Over }) {
+                push @out, 'SQLite '.
+                        (eval('$DBD::SQLite::sqlite_version') // '(undef)') .
+                        ', DBI '.(eval('$DBI::VERSION') // '(undef)') .
+                        ', DBD::SQLite '.
+                        (eval('$DBD::SQLite::VERSION') // '(undef)')."\n";
+        } else {
+                push @out, "Unable to load DBI / DBD::SQLite: $@\n";
+        }
+        if (eval { require PublicInbox::Search } &&
+                        PublicInbox::Search::load_xapian()) {
+                push @out, 'Xapian '.
+                        join('.', map {
+                                $PublicInbox::Search::Xap->can($_)->();
+                        } qw(major_version minor_version revision)) .
+                        ", bindings: $PublicInbox::Search::Xap";
+                my $xs_ver = eval '$'."$PublicInbox::Search::Xap".'::VERSION';
+                push @out, $xs_ver ? " $xs_ver\n" : " SWIG\n";
+        } else {
+                push @out, "Xapian not available: $@\n";
+        }
+        push @out, "public-inbox blob OIDs of loaded features:\n";
+        for my $m (grep(m{^PublicInbox/}, sort keys %INC)) {
+                my $f = $INC{$m} // next; # lazy require failed (missing dep)
+                open my $fh, '<', $f or do { warn "open($f): $!"; next };
+                my $size = -s $fh;
+                my $hex = sha1_hex("blob $size\0".read_all($fh, $size));
+                push @out, '  '.$hex.' '.$m."\n";
+        }
+        push @out, <<'EOM';
+Let us know how it sucks!  Please include the above and any other
+relevant information when sending plain-text mail to us at:
+meta@public-inbox.org -- archives: https://public-inbox.org/meta/
+EOM
+        $lei->out(@out);
+}
+
+1;
diff --git a/lib/PublicInbox/LeiTag.pm b/lib/PublicInbox/LeiTag.pm
new file mode 100644
index 00000000..da8caeb7
--- /dev/null
+++ b/lib/PublicInbox/LeiTag.pm
@@ -0,0 +1,105 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# handles "lei tag" command
+package PublicInbox::LeiTag;
+use v5.12;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::OnDestroy;
+
+sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
+        my ($self, $eml) = @_;
+        if (my $xoids = $self->{lse}->xoids_for($eml) // # tries LeiMailSync
+                        $self->{lei}->{ale}->xoids_for($eml)) {
+                $self->{lei}->{sto}->wq_do('update_xvmd', $xoids, $eml,
+                                                $self->{lei}->{vmd_mod});
+        } else {
+                ++$self->{-nr_unimported};
+        }
+}
+
+sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn
+        my ($self, $f) = @_;
+        my $eml = eml_from_path($f) or return;
+        input_eml_cb($self, $eml);
+}
+
+sub lei_tag { # the "lei tag" method
+        my ($lei, @argv) = @_;
+        $lei->{opt}->{'in-format'} //= 'eml' if $lei->{opt}->{stdin};
+        my $sto = $lei->_lei_store(1)->write_prepare($lei);
+        my $self = bless {}, __PACKAGE__;
+        $lei->ale; # refresh and prepare
+        $self->prepare_inputs($lei, \@argv) or return;
+        grep(defined, @{$lei->{vmd_mod}}{qw(+kw +L -L -kw)}) or
+                return $lei->fail('no keywords or labels specified');
+        $lei->{-err_type} = 'non-fatal';
+        $lei->wq1_start($self);
+}
+
+sub note_unimported {
+        my ($self) = @_;
+        my $n = $self->{-nr_unimported} or return;
+        $self->{lei}->{pkt_op_p}->pkt_do('incr', -nr_unimported => $n);
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        PublicInbox::LeiInput::input_only_atfork_child($self);
+        $self->{lse} = $self->{lei}->{sto}->search;
+        # this goes out-of-scope at worker process exit:
+        on_destroy \&note_unimported, $self;
+}
+
+# Workaround bash word-splitting s to ['kw', ':', 'keyword' ...]
+# Maybe there's a better way to go about this in
+# contrib/completion/lei-completion.bash
+sub _complete_tag_common ($) {
+        my ($argv) = @_;
+        # Workaround bash word-splitting URLs to ['https', ':', '//' ...]
+        # Maybe there's a better way to go about this in
+        # contrib/completion/lei-completion.bash
+        my $re = '';
+        my $cur = pop(@$argv) // '';
+        if (@$argv) {
+                my @x = @$argv;
+                if ($cur eq ':' && @x) {
+                        push @x, $cur;
+                        $cur = '';
+                }
+                while (@x > 2 && $x[0] !~ /\A[+\-](?:kw|L)\z/ &&
+                                        $x[1] ne ':') {
+                        shift @x;
+                }
+                if (@x >= 2) { # qw(kw : $KEYWORD) or qw(kw :)
+                        $re = join('', @x);
+                } else { # just return everything and hope for the best
+                        $re = join('', @$argv);
+                }
+                $re = quotemeta($re);
+        }
+        ($cur, $re);
+}
+
+# FIXME: same problems as _complete_forget_external and similar
+sub _complete_tag {
+        my ($self, @argv) = @_;
+        require PublicInbox::LeiImport;
+        my @in = PublicInbox::LeiImport::_complete_import(@_);
+        my @L = eval { $self->_lei_store->search->all_terms('L') };
+        my @kwL = ((map { ("+kw:$_", "-kw:$_") } @PublicInbox::LeiInput::KW),
+                (map { ("+L:$_", "-L:$_") } @L));
+        my ($cur, $re) = _complete_tag_common(\@argv);
+        my @m = map {
+                # only return the part specified on the CLI
+                # don't duplicate if already 100% completed
+                /\A$re(\Q$cur\E.*)/ ? ($cur eq $1 ? () : $1) : ();
+        } grep(/$re\Q$cur/, @kwL);
+        (@in, (@m ? @m : @kwL));
+}
+
+no warnings 'once'; # the following works even when LeiAuth is lazy-loaded
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+
+1;
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
new file mode 100644
index 00000000..5481b5e4
--- /dev/null
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -0,0 +1,878 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Writes PublicInbox::Eml objects atomically to a mbox variant or Maildir
+package PublicInbox::LeiToMail;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+use PublicInbox::Eml;
+use PublicInbox::IO;
+use PublicInbox::Git;
+use PublicInbox::Spawn qw(spawn);
+use PublicInbox::Import;
+use IO::Handle; # ->autoflush
+use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
+use PublicInbox::Syscall qw(rename_noreplace);
+use autodie qw(pipe open seek close);
+use Carp qw(croak);
+
+my %kw2char = ( # Maildir characters
+        draft => 'D',
+        flagged => 'F',
+        forwarded => 'P', # passed
+        answered => 'R',
+        seen => 'S',
+);
+
+my %kw2status = (
+        flagged => [ 'X-Status' => 'F' ],
+        answered => [ 'X-Status' => 'A' ],
+        seen => [ 'Status' => 'R' ],
+        draft => [ 'X-Status' => 'T' ],
+);
+
+sub _mbox_hdr_buf ($$$) {
+        my ($eml, $type, $smsg) = @_;
+        $eml->header_set($_) for (qw(Lines Bytes Content-Length));
+
+        my %hdr = (Status => []); # set Status, X-Status
+        for my $k (@{$smsg->{kw} // []}) {
+                if (my $ent = $kw2status{$k}) {
+                        push @{$hdr{$ent->[0]}}, $ent->[1];
+                } else { # X-Label?
+                        warn "# keyword `$k' not supported for mbox\n";
+                }
+        }
+        # When writing to empty mboxes, messages are always 'O'
+        # (not-\Recent in IMAP), it saves MUAs the trouble of
+        # rewriting the mbox if no other changes are made.
+        # We put 'O' at the end (e.g. "Status: RO") to match mutt(1) output.
+        # We only set smsg->{-recent} if augmenting existing stores.
+        my $status = join('', sort(@{$hdr{Status}}));
+        $status .= 'O' unless $smsg->{-recent};
+        $eml->header_set('Status', $status) if $status;
+        if (my $chars = delete $hdr{'X-Status'}) {
+                $eml->header_set('X-Status', join('', sort(@$chars)));
+        }
+        my $buf = delete $eml->{hdr};
+
+        PublicInbox::Eml::strip_from($$buf);
+        my $ident = $smsg->{blob} // 'lei';
+        if (defined(my $pct = $smsg->{pct})) { $ident .= "=$pct" }
+
+        substr($$buf, 0, 0, # prepend From line
+                "From $ident\@$type Thu Jan  1 00:00:00 1970$eml->{crlf}");
+        $buf;
+}
+
+sub atomic_append { # for on-disk destinations (O_APPEND, or O_EXCL)
+        my ($lei, $buf) = @_;
+        if (defined(my $w = syswrite($lei->{1} // return, $$buf))) {
+                return if $w == length($$buf);
+                $buf = "short atomic write: $w != ".length($$buf);
+        } elsif ($!{EPIPE}) {
+                return $lei->note_sigpipe(1);
+        } else {
+                $buf = "atomic write: $!";
+        }
+        $lei->fail($buf);
+}
+
+sub eml2mboxrd ($;$) {
+        my ($eml, $smsg) = @_;
+        my $buf = _mbox_hdr_buf($eml, 'mboxrd', $smsg);
+        if (my $bdy = delete $eml->{bdy}) {
+                $$bdy =~ s/^(>*From )/>$1/gm;
+                $$buf .= $eml->{crlf};
+                substr($$bdy, 0, 0, $$buf); # prepend header
+                $buf = $bdy;
+        }
+        $$buf .= $eml->{crlf};
+        $buf;
+}
+
+sub eml2mboxo {
+        my ($eml, $smsg) = @_;
+        my $buf = _mbox_hdr_buf($eml, 'mboxo', $smsg);
+        if (my $bdy = delete $eml->{bdy}) {
+                $$bdy =~ s/^From />From /gm;
+                $$buf .= $eml->{crlf};
+                substr($$bdy, 0, 0, $$buf); # prepend header
+                $buf = $bdy;
+        }
+        $$buf .= $eml->{crlf};
+        $buf;
+}
+
+sub _mboxcl_common ($$$) {
+        my ($buf, $bdy, $crlf) = @_;
+        # add Lines: so mutt won't have to add it on MUA close
+        my $lines = $$bdy =~ tr!\n!\n!;
+        $$buf .= 'Content-Length: '.length($$bdy).$crlf.
+                'Lines: '.$lines.$crlf.$crlf;
+        substr($$bdy, 0, 0, $$buf); # prepend header
+        $$bdy .= $crlf;
+        $bdy;
+}
+
+# mboxcl still escapes "From " lines
+sub eml2mboxcl {
+        my ($eml, $smsg) = @_;
+        my $buf = _mbox_hdr_buf($eml, 'mboxcl', $smsg);
+        my $bdy = delete($eml->{bdy}) // \(my $empty = '');
+        $$bdy =~ s/^From />From /gm;
+        _mboxcl_common($buf, $bdy, $eml->{crlf});
+}
+
+# mboxcl2 has no "From " escaping
+sub eml2mboxcl2 {
+        my ($eml, $smsg) = @_;
+        my $buf = _mbox_hdr_buf($eml, 'mboxcl2', $smsg);
+        my $bdy = delete($eml->{bdy}) // \(my $empty = '');
+        _mboxcl_common($buf, $bdy, $eml->{crlf});
+}
+
+sub git_to_mail { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $smsg) = @_;
+        $type // return; # called by PublicInbox::Git::close
+        return if $PublicInbox::Git::in_cleanup;
+        my $self = delete $smsg->{l2m} // croak "BUG: no l2m (type=$type)";
+        $self->{lei} // croak "BUG: no {lei} (type=$type)";
+        eval {
+                if ($type eq 'missing' &&
+                          ($bref = $self->{-lms_rw}->local_blob($oid, 1))) {
+                        $type = 'blob';
+                        $size = length($$bref);
+                }
+                $type eq 'blob' or return $self->{lei}->child_error(0,
+                                                "W: $oid is $type (!= blob)");
+                $size or return $self->{lei}->child_error(0,"E: $oid is empty");
+                $smsg->{blob} eq $oid or die "BUG: expected=$smsg->{blob}";
+                $smsg->{bytes} ||= $size;
+                $self->{wcb}->($bref, $smsg);
+        };
+        $self->{lei}->fail("$@ (oid=$oid)") if $@;
+}
+
+sub reap_compress { # awaitpid callback
+        my ($pid, $lei, $cmd, $old_out) = @_;
+        $lei->{1} = $old_out;
+        $lei->fail($?, "@$cmd failed") if $?;
+}
+
+sub _post_augment_mbox { # open a compressor process from top-level lei-daemon
+        my ($self, $lei) = @_;
+        my $zsfx = $self->{zsfx} or return;
+        my $cmd = PublicInbox::MboxReader::zsfx2cmd($zsfx, undef, $lei);
+        my ($r, $w) = @{delete $lei->{zpipe}};
+        my $rdr = { 0 => $r, 1 => $lei->{1}, 2 => $lei->{2}, pgid => 0 };
+        $lei->{1} = PublicInbox::IO::attach_pid($w, spawn($cmd, undef, $rdr),
+                                \&reap_compress, $lei, $cmd, $lei->{1});
+}
+
+# --augment existing output destination, with deduplication
+sub _augment { # MboxReader eml_cb
+        my ($eml, $lei) = @_;
+        # ignore return value, just populate the skv
+        $lei->{dedupe}->is_dup($eml);
+}
+
+sub _mbox_augment_kw_maybe {
+        my ($eml, $lei, $lse, $augment) = @_;
+        my $kw = PublicInbox::MboxReader::mbox_keywords($eml);
+        update_kw_maybe($lei, $lse, $eml, $kw);
+        _augment($eml, $lei) if $augment;
+}
+
+sub _mbox_write_cb ($$) {
+        my ($self, $lei) = @_;
+        my $ovv = $lei->{ovv};
+        my $m = 'eml2'.$ovv->{fmt};
+        my $eml2mbox = $self->can($m) or die "$self->$m missing";
+        $lei->{1} // die "no stdout ($m, $ovv->{dst})"; # redirected earlier
+        $lei->{1}->autoflush(1);
+        my $atomic_append = !defined($ovv->{lock_path});
+        my $dedupe = $lei->{dedupe};
+        $dedupe->prepare_dedupe;
+        my $lse = $lei->{lse}; # may be undef
+        my $set_recent = $dedupe->has_entries;
+        sub { # for git_to_mail
+                my ($buf, $smsg, $eml) = @_;
+                $eml //= PublicInbox::Eml->new($buf);
+                ++$self->{-nr_seen};
+                return if $dedupe->is_dup($eml, $smsg);
+                $lse->xsmsg_vmd($smsg) if $lse;
+                $smsg->{-recent} = 1 if $set_recent;
+                $buf = $eml2mbox->($eml, $smsg);
+                if ($atomic_append) {
+                        atomic_append($lei, $buf);
+                } else {
+                        my $lk = $ovv->lock_for_scope;
+                        $lei->out($$buf);
+                }
+                ++$self->{-nr_write};
+        }
+}
+
+sub update_kw_maybe ($$$$) {
+        my ($lei, $lse, $eml, $kw) = @_;
+        return unless $lse;
+        my $c = $lse->kw_changed($eml, $kw, my $docids = []);
+        my $vmd = { kw => $kw };
+        if (scalar @$docids) { # already in lei/store
+                $lei->{sto}->wq_do('set_eml_vmd', undef, $vmd, $docids) if $c;
+        } elsif (my $xoids = $lei->{ale}->xoids_for($eml)) {
+                # it's in an external, only set kw, here
+                $lei->{sto}->wq_do('set_xvmd', $xoids, $eml, $vmd);
+        } else { # never-before-seen, import the whole thing
+                # XXX this is critical in protecting against accidental
+                # data loss without --augment
+                $lei->{sto}->wq_do('set_eml', $eml, $vmd);
+        }
+}
+
+sub _md_update { # maildir_each_eml cb
+        my ($f, $kw, $eml, $lei, $lse, $unlink) = @_;
+        update_kw_maybe($lei, $lse, $eml, $kw);
+        $unlink ? unlink($f) : _augment($eml, $lei);
+}
+
+# maildir_each_file callback, \&CORE::unlink doesn't work with it
+sub _unlink { unlink($_[0]) }
+
+sub _rand () {
+        state $seq = 0;
+        sprintf('%x,%x,%x,%x', rand(0xffffffff), time, $$, ++$seq);
+}
+
+sub kw2suffix ($;@) {
+        my $kw = shift;
+        join('', sort(map { $kw2char{$_} // () } @$kw, @_));
+}
+
+sub _buf2maildir ($$$$) {
+        my ($dst, $buf, $smsg, $dir) = @_;
+        my $kw = $smsg->{kw} // [];
+        my $rand = ''; # chosen by die roll :P
+        my ($tmp, $fh, $base, $ok);
+        my $common = $smsg->{blob} // _rand;
+        if (defined(my $pct = $smsg->{pct})) { $common .= "=$pct" }
+        do {
+                $tmp = $dst.'tmp/'.$rand.$common;
+        } while (!($ok = sysopen($fh, $tmp, O_CREAT|O_EXCL|O_WRONLY)) &&
+                $!{EEXIST} && ($rand = _rand.','));
+        if ($ok && print $fh $$buf and $fh->close) {
+                $dst .= $dir; # 'new/' or 'cur/'
+                $rand = '';
+                do {
+                        $base = $rand.$common.':2,'.kw2suffix($kw);
+                } while (!($ok = rename_noreplace($tmp, $dst.$base)) &&
+                        $!{EEXIST} && ($rand = _rand.','));
+                \$base;
+        } else {
+                my $err = "Error writing $smsg->{blob} to $dst: $!\n";
+                $_[0] = undef; # clobber dst
+                unlink($tmp);
+                die $err;
+        }
+}
+
+sub _maildir_write_cb ($$) {
+        my ($self, $lei) = @_;
+        my $dedupe = $lei->{dedupe};
+        $dedupe->prepare_dedupe if $dedupe;
+        my $dst = $lei->{ovv}->{dst};
+        my $lse = $lei->{lse}; # may be undef
+        my $lms = $self->{-lms_rw};
+        my $out = $lms ? 'maildir:'.$lei->abs_path($dst) : undef;
+        $lms->lms_write_prepare if $lms;
+
+        # Favor cur/ and only write to new/ when augmenting.  This
+        # saves MUAs from having to do a mass rename when the initial
+        # search result set is huge.
+        my $dir = $dedupe && $dedupe->has_entries ? 'new/' : 'cur/';
+        sub { # for git_to_mail
+                my ($bref, $smsg, $eml) = @_;
+                $dst // return $lei->fail; # dst may be undef-ed in last run
+
+                ++$self->{-nr_seen};
+                return if $dedupe && $dedupe->is_dup($eml //
+                                                PublicInbox::Eml->new($$bref),
+                                                $smsg);
+                $lse->xsmsg_vmd($smsg) if $lse;
+                my $n = _buf2maildir($dst, $bref // \($eml->as_string),
+                                        $smsg, $dir);
+                $lms->set_src($smsg->oidbin, $out, $n) if $lms;
+                ++$self->{-nr_write};
+        }
+}
+
+sub _imap_write_cb ($$) {
+        my ($self, $lei) = @_;
+        my $dedupe = $lei->{dedupe};
+        $dedupe->prepare_dedupe if $dedupe;
+        my $append = $lei->{net}->can('imap_append');
+        my $uri = $self->{uri} // die 'BUG: no {uri}';
+        my $mic = $lei->{net}->mic_get($uri) // die <<EOM;
+E: $uri connection failed.
+E: Consider using `--jobs ,1' to limit IMAP connections
+EOM
+        my $folder = $uri->mailbox;
+        $uri->uidvalidity($mic->uidvalidity($folder));
+        my $lse = $lei->{lse}; # may be undef
+        my $lms = $self->{-lms_rw};
+        $lms->lms_write_prepare if $lms;
+        sub { # for git_to_mail
+                my ($bref, $smsg, $eml) = @_;
+                $mic // return $lei->fail; # mic may be undef-ed in last run
+
+                ++$self->{-nr_seen};
+                return if $dedupe && $dedupe->is_dup($eml //
+                                                PublicInbox::Eml->new($$bref),
+                                                $smsg);
+                $lse->xsmsg_vmd($smsg) if $lse;
+                my $uid = eval { $append->($mic, $folder, $bref, $smsg, $eml) };
+                if (my $err = $@) {
+                        undef $mic;
+                        die $err;
+                }
+                # imap_append returns UID if IMAP server has UIDPLUS extension
+                ($lms && $uid =~ /\A[0-9]+\z/) and
+                        $lms->set_src($smsg->oidbin, $$uri, $uid + 0);
+                ++$self->{-nr_write};
+        }
+}
+
+sub _text_write_cb ($$) {
+        my ($self, $lei) = @_;
+        my $dedupe = $lei->{dedupe};
+        $dedupe->prepare_dedupe if $dedupe;
+        my $lvt = $lei->{lvt};
+        my $ovv = $lei->{ovv};
+        $lei->{1} // die "no stdout ($ovv->{dst})"; # redirected earlier
+        $lei->{1}->autoflush(1);
+        binmode $lei->{1}, ':utf8';
+        my $lse = $lei->{lse}; # may be undef
+        sub { # for git_to_mail
+                my ($bref, $smsg, $eml) = @_;
+                $lse->xsmsg_vmd($smsg) if $lse;
+                $eml //= PublicInbox::Eml->new($bref);
+                return if $dedupe && $dedupe->is_dup($eml, $smsg);
+                my $lk = $ovv->lock_for_scope;
+                $lei->out(${$lvt->eml_to_text($smsg, $eml)}, "\n");
+        }
+}
+
+sub _v2_write_cb ($$) {
+        my ($self, $lei) = @_;
+        my $dedupe = $lei->{dedupe};
+        $dedupe->prepare_dedupe if $dedupe;
+        # only call in worker
+        $PublicInbox::Import::DROP_UNIQUE_UNSUB = $lei->{-drop_unique_unsub};
+        sub { # for git_to_mail
+                my ($bref, $smsg, $eml) = @_;
+                $eml //= PublicInbox::Eml->new($bref);
+                ++$self->{-nr_seen};
+                return if $dedupe && $dedupe->is_dup($eml, $smsg);
+                $lei->{v2w}->add($eml) and ++$self->{-nr_write};
+        }
+}
+
+sub write_cb { # returns a callback for git_to_mail
+        my ($self, $lei) = @_;
+        # _mbox_write_cb, _maildir_write_cb, _imap_write_cb, _v2_write_cb
+        my $m = "_$self->{base_type}_write_cb";
+        $self->$m($lei);
+}
+
+sub new {
+        my ($cls, $lei) = @_;
+        my $fmt = $lei->{ovv}->{fmt};
+        my $dst = $lei->{ovv}->{dst};
+        my $self = bless {}, $cls;
+        my @conflict;
+        if ($fmt eq 'maildir') {
+                require PublicInbox::MdirReader;
+                $self->{base_type} = 'maildir';
+                -e $dst && !-d _ and die
+                                "$dst exists and is not a directory\n";
+                $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
+                $lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
+        } elsif ($fmt eq 'mh') {
+                -e $dst && !-d _ and die
+                                "$dst exists and is not a directory\n";
+                $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
+                $lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
+        } elsif (substr($fmt, 0, 4) eq 'mbox') {
+                require PublicInbox::MboxReader;
+                $self->can("eml2$fmt") or die <<EOM;
+E: bad mbox format: $fmt (did you mean: mboxrd, mboxo, mboxcl, or mboxcl2?)
+EOM
+                $self->{base_type} = 'mbox';
+                if ($lei->{cmd} eq 'q' &&
+                                (($lei->path_to_fd($dst) // -1) < 0) &&
+                                (-f $dst || !-e _)) {
+                        $lei->{opt}->{save} //= \1;
+                }
+        } elsif ($fmt =~ /\Aimaps?\z/) {
+                require PublicInbox::NetWriter;
+                require PublicInbox::URIimap;
+                # {net} may exist from "lei up" for auth
+                my $net = $lei->{net} // PublicInbox::NetWriter->new;
+                $net->{quiet} = $lei->{opt}->{quiet};
+                my $uri = PublicInbox::URIimap->new($dst)->canonical;
+                $net->add_url($$uri);
+                my $err = $net->errors($lei);
+                return $lei->fail($err) if $err;
+                $uri->mailbox or return $lei->fail("No mailbox: $dst");
+                $self->{uri} = $uri;
+                $dst = $lei->{ovv}->{dst} = $$uri; # canonicalized
+                $lei->{net} = $net;
+                $self->{base_type} = 'imap';
+                $lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
+        } elsif ($fmt eq 'text' || $fmt eq 'reply') {
+                require PublicInbox::LeiViewText;
+                $lei->{lvt} = PublicInbox::LeiViewText->new($lei, $fmt);
+                $self->{base_type} = 'text';
+                $self->{-wq_nr_workers} = 1; # for pager
+                @conflict = qw(mua save);
+        } elsif ($fmt eq 'v2') {
+                die "--dedupe=oid and v2 are incompatible\n" if
+                        ($lei->{opt}->{dedupe}//'') eq 'oid';
+                $self->{base_type} = 'v2';
+                $self->{-wq_nr_workers} = 1; # v2 has shards
+                $lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
+                $dst = $lei->{ovv}->{dst} = $lei->abs_path($dst);
+                @conflict = qw(mua sort);
+        } else {
+                die "bad mail --format=$fmt\n";
+        }
+        if ($self->{base_type} =~ /\A(?:text|mbox)\z/) {
+                (-d $dst || (-e _ && !-w _)) and die
+                        "$dst exists and is not a writable file\n";
+        }
+        $lei->{input_opt} and # lei_convert sets this
+                @conflict = grep { !$lei->{input_opt}->{$_} } @conflict;
+        my @err = map { defined($lei->{opt}->{$_}) ? "--$_" : () } @conflict;
+        die "@err incompatible with $fmt\n" if @err;
+        $self->{dst} = $dst;
+        $lei->{dedupe} = $lei->{lss} // do {
+                my $dd_cls = 'PublicInbox::'.
+                        ($lei->{opt}->{save} ? 'LeiSavedSearch' : 'LeiDedupe');
+                eval "require $dd_cls";
+                die "$dd_cls: $@" if $@;
+                my $dd = $dd_cls->new($lei);
+                $lei->{lss} //= $dd if $dd && $dd->can('cfg_set');
+                $dd;
+        };
+        $self;
+}
+
+sub _pre_augment_maildir {
+        my ($self, $lei) = @_;
+        my $dst = $lei->{ovv}->{dst};
+        require File::Path;
+        File::Path::make_path(map { $dst.$_ } qw(tmp new cur));
+        # for utime, so no opendir
+        open $self->{poke_dh}, '<', "${dst}cur";
+}
+
+sub clobber_dst_prepare ($;$) {
+        my ($lei, $f) = @_;
+        if (my $lms = defined($f) ? $lei->lms : undef) {
+                $lms->lms_write_prepare;
+                $lms->forget_folders($f);
+        }
+        my $dedupe = $lei->{dedupe} or return;
+        $dedupe->reset_dedupe if $dedupe->can('reset_dedupe');
+}
+
+sub _do_augment_maildir {
+        my ($self, $lei) = @_;
+        return if $lei->{cmd} eq 'up';
+        my $dst = $lei->{ovv}->{dst};
+        my $lse = $lei->{opt}->{'import-before'} ? $lei->{lse} : undef;
+        my $mdr = PublicInbox::MdirReader->new;
+        if ($lei->{opt}->{augment}) {
+                my $dedupe = $lei->{dedupe};
+                if ($dedupe && $dedupe->prepare_dedupe) {
+                        $mdr->{shard_info} = $self->{shard_info};
+                        $mdr->maildir_each_eml($dst, \&_md_update, $lei, $lse);
+                        $dedupe->pause_dedupe;
+                }
+        } elsif ($lse) {
+                clobber_dst_prepare($lei, "maildir:$dst");
+                $mdr->{shard_info} = $self->{shard_info};
+                $mdr->maildir_each_eml($dst, \&_md_update, $lei, $lse, 1);
+        } else {# clobber existing Maildir
+                clobber_dst_prepare($lei, "maildir:$dst");
+                $mdr->maildir_each_file($dst, \&_unlink);
+        }
+}
+
+sub _imap_augment_or_delete { # PublicInbox::NetReader::imap_each cb
+        my ($uri, $uid, $kw, $eml, $lei, $lse, $delete_mic) = @_;
+        update_kw_maybe($lei, $lse, $eml, $kw);
+        if ($delete_mic) {
+                $lei->{net}->imap_delete_1($uri, $uid, $delete_mic);
+        } else {
+                _augment($eml, $lei);
+        }
+}
+
+sub _do_augment_imap {
+        my ($self, $lei) = @_;
+        return if $lei->{cmd} eq 'up';
+        my $net = $lei->{net};
+        my $lse = $lei->{opt}->{'import-before'} ? $lei->{lse} : undef;
+        if ($lei->{opt}->{augment}) {
+                my $dedupe = $lei->{dedupe};
+                if ($dedupe && $dedupe->prepare_dedupe) {
+                        $net->imap_each($self->{uri}, \&_imap_augment_or_delete,
+                                        $lei, $lse);
+                        $dedupe->pause_dedupe;
+                }
+        } elsif ($lse) {
+                my $delete_mic;
+                clobber_dst_prepare($lei, "$self->{uri}");
+                $net->imap_each($self->{uri}, \&_imap_augment_or_delete,
+                                        $lei, $lse, \$delete_mic);
+                $delete_mic->expunge if $delete_mic;
+        } elsif (!$self->{-wq_worker_nr}) { # undef or 0
+                # clobber existing IMAP folder
+                clobber_dst_prepare($lei, "$self->{uri}");
+                $net->imap_delete_all($self->{uri});
+        }
+}
+
+sub _pre_augment_text {
+        my ($self, $lei) = @_;
+        my $dst = $lei->{ovv}->{dst};
+        my $out;
+        my $devfd = $lei->path_to_fd($dst) // die "bad $dst";
+        if ($devfd >= 0) {
+                $out = $lei->{$devfd};
+        } else { # normal-looking path
+                if (-p $dst) {
+                        open $out, '>', $dst;
+                } elsif (-f _ || !-e _) {
+                        # text allows augment, HTML/Atom won't
+                        my $mode = $lei->{opt}->{augment} ? '>>' : '>';
+                        open $out, $mode, $dst;
+                } else {
+                        die "$dst is not a file or FIFO\n";
+                }
+        }
+        $lei->{ovv}->ovv_out_lk_init if !$lei->{ovv}->{lock_path};
+        $lei->{1} = $out;
+        undef;
+}
+
+sub _pre_augment_mbox {
+        my ($self, $lei) = @_;
+        my $dst = $lei->{ovv}->{dst};
+        my $out;
+        my $devfd = $lei->path_to_fd($dst) // die "bad $dst";
+        if ($devfd >= 0) {
+                $out = $lei->{$devfd};
+        } else { # normal-looking path
+                if (-p $dst) {
+                        open $out, '>', $dst;
+                } elsif (-f _ || !-e _) {
+                        require PublicInbox::MboxLock;
+                        my $m = $lei->{opt}->{'lock'} //
+                                        PublicInbox::MboxLock->defaults;
+                        $self->{mbl} = PublicInbox::MboxLock->acq($dst, 1, $m);
+                        $out = $self->{mbl}->{fh};
+                } else {
+                        die "$dst is not a file or FIFO\n";
+                }
+                $lei->{old_1} = $lei->{1}; # keep for spawning MUA
+        }
+        # Perl does SEEK_END even with O_APPEND :<
+        $self->{seekable} = $out->seek(0, SEEK_SET);
+        if (!$self->{seekable} && !$!{ESPIPE} && !defined($devfd)) {
+                die "seek($dst): $!\n";
+        }
+        if (!$self->{seekable}) {
+                my $imp_before = $lei->{opt}->{'import-before'};
+                die "--import-before specified but $dst is not seekable\n"
+                        if $imp_before && !ref($imp_before);
+                die "--augment specified but $dst is not seekable\n" if
+                        $lei->{opt}->{augment};
+                die "cannot --save with unseekable $dst\n" if
+                        $lei->{dedupe} && $lei->{dedupe}->can('reset_dedupe');
+        }
+        if ($self->{zsfx} = PublicInbox::MboxReader::zsfx($dst)) {
+                pipe(my $r, my $w);
+                $lei->{zpipe} = [ $r, $w ];
+                $lei->{ovv}->{lock_path} and
+                        die 'BUG: unexpected {ovv}->{lock_path}';
+                $lei->{ovv}->ovv_out_lk_init;
+        } elsif (!$self->{seekable} && !$lei->{ovv}->{lock_path}) {
+                $lei->{ovv}->ovv_out_lk_init;
+        }
+        $lei->{1} = $out;
+        undef;
+}
+
+sub finish_output {
+        my ($self, $lei) = @_;
+        my $out = delete $lei->{1} // die 'BUG: no lei->{1}';
+        my $old = delete $lei->{old_1} or return; # path only
+        $lei->{1} = $old;
+        return if $out->close; # reaps gzip|pigz|xz|bzip2
+        my $msg = "E: Error closing $lei->{ovv}->{dst}";
+        $? ? $lei->child_error($?) : ($msg .= " ($!)");
+        die $msg;
+}
+
+sub _do_augment_mbox {
+        my ($self, $lei) = @_;
+        return unless $self->{seekable};
+        my $opt = $lei->{opt};
+        return if $lei->{cmd} eq 'up';
+        my $out = $lei->{1};
+        my ($fmt, $dst) = @{$lei->{ovv}}{qw(fmt dst)};
+        return clobber_dst_prepare($lei) unless -s $out;
+        unless ($opt->{augment} || $opt->{'import-before'}) {
+                truncate($out, 0) or die "truncate($dst): $!";
+                return;
+        }
+        my $rd;
+        if (my $zsfx = $self->{zsfx}) {
+                $rd = PublicInbox::MboxReader::zsfxcat($out, $zsfx, $lei);
+        } else {
+                open($rd, '+>>&', $out);
+        }
+        my $dedupe;
+        if ($opt->{augment}) {
+                $dedupe = $lei->{dedupe};
+                $dedupe->prepare_dedupe if $dedupe;
+        } else {
+                clobber_dst_prepare($lei);
+        }
+        if ($opt->{'import-before'}) { # the default
+                my $lse = $lei->{lse};
+                PublicInbox::MboxReader->$fmt($rd, \&_mbox_augment_kw_maybe,
+                                                $lei, $lse, $opt->{augment});
+                if (!$opt->{augment} and !truncate($out, 0)) {
+                        die "truncate($dst): $!";
+                }
+        } else { # --augment --no-import-before
+                PublicInbox::MboxReader->$fmt($rd, \&_augment, $lei);
+        }
+        # maybe some systems don't honor O_APPEND, Perl does this:
+        seek($out, 0, SEEK_END);
+        $dedupe->pause_dedupe if $dedupe;
+}
+
+sub _pre_augment_v2 {
+        my ($self, $lei) = @_;
+        my $dir = $self->{dst};
+        require PublicInbox::InboxWritable;
+        my ($ibx, @creat);
+        if (-d $dir) {
+                my $opt = { -min_inbox_version => 2 };
+                require PublicInbox::Admin;
+                my @ibx = PublicInbox::Admin::resolve_inboxes([ $dir ], $opt);
+                $ibx = $ibx[0] or die "$dir is not a v2 inbox\n";
+        } else {
+                $creat[0] = {};
+                $ibx = PublicInbox::Inbox->new({
+                        name => 'lei-result', # XXX configurable
+                        inboxdir => $dir,
+                        version => 2,
+                        address => [ 'lei@example.com' ],
+                });
+        }
+        PublicInbox::InboxWritable->new($ibx, @creat);
+        local $PublicInbox::Import::DROP_UNIQUE_UNSUB; # only for workers
+        PublicInbox::Import::load_config(PublicInbox::Config->new, sub {
+                $lei->x_it(shift);
+                die "E: can't write v2 inbox with broken config\n";
+        });
+        $lei->{-drop_unique_unsub} = $PublicInbox::Import::DROP_UNIQUE_UNSUB;
+        $ibx->init_inbox if @creat;
+        $lei->{v2w} = $ibx->importer;
+        return if !$lei->{opt}->{shared};
+        my $d = "$lei->{ale}->{git}->{git_dir}/objects";
+        open my $fh, '+>>', my $f = "$dir/git/0.git/objects/info/alternates";
+        seek($fh, 0, SEEK_SET); # Perl did SEEK_END when it saw '>>'
+        my $seen = grep /\A\Q$d\E\n/, PublicInbox::IO::read_all $fh;
+        print $fh "$d\n" if !$seen;
+        close $fh;
+}
+
+sub pre_augment { # fast (1 disk seek), runs in same process as post_augment
+        my ($self, $lei) = @_;
+        # _pre_augment_maildir, _pre_augment_mbox, _pre_augment_v2
+        my $m = $self->can("_pre_augment_$self->{base_type}") or return;
+        $m->($self, $lei);
+}
+
+sub do_augment { # slow, runs in wq worker
+        my ($self, $lei) = @_;
+        # _do_augment_maildir, _do_augment_mbox, or _do_augment_imap
+        my $m = $self->can("_do_augment_$self->{base_type}") or return;
+        $m->($self, $lei);
+}
+
+sub post_augment_call ($$$$) {
+        my ($self, $lei, $m, $post_augment_done) = @_;
+        eval { $m->($self, $lei) };
+        $lei->{post_augment_err} = $@ if $@; # for post_augment_done
+}
+
+# fast (spawn compressor or mkdir), runs in same process as pre_augment
+sub post_augment {
+        my ($self, $lei, $post_augment_done) = @_;
+        $self->{-au_noted}++ and $lei->qerr("# writing to $self->{dst} ...");
+
+        # _post_augment_mbox
+        my $m = $self->can("_post_augment_$self->{base_type}") or return;
+
+        # --import-before is only for lei-(q|lcat), not lei-convert
+        $lei->{opt}->{'import-before'} or
+                return post_augment_call $self, $lei, $m, $post_augment_done;
+
+        # we can't deal with post_augment until import-before commits:
+        require PublicInbox::EOFpipe;
+        my @io = @$lei{qw(2 sock)};
+        pipe(my $r, $io[2]);
+        PublicInbox::EOFpipe->new($r, \&post_augment_call,
+                                $self, $lei, $m, $post_augment_done);
+        $lei->{sto}->wq_io_do('barrier', \@io);
+        # _post_augment_* && post_augment_done run when barrier is complete
+}
+
+# called by every single l2m worker process
+sub do_post_auth {
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        # lei_xsearch can start as soon as all l2m workers get here
+        $lei->{pkt_op_p}->pkt_do('incr_start_query') or
+                die "incr_start_query: $!";
+        my $aug;
+        if (lock_free($self)) { # all workers do_augment
+                my $mod = $self->{-wq_nr_workers};
+                my $shard = $self->{-wq_worker_nr};
+                if (my $net = $lei->{net}) {
+                        $net->{shard_info} = [ $mod, $shard ];
+                } else { # Maildir
+                        $self->{shard_info} = [ $mod, $shard ];
+                }
+                $aug = 'incr_post_augment';
+        } elsif ($self->{-wq_worker_nr} == 0) { # 1st worker do_augment
+                $aug = 'do_post_augment';
+        }
+        if ($aug) {
+                local $0 = 'do_augment';
+                eval { do_augment($self, $lei) };
+                $lei->fail($@) if $@;
+                $lei->{pkt_op_p}->pkt_do($aug) or die "pkt_do($aug): $!";
+        }
+        # done augmenting, connect the compressor pipe for each worker
+        if (my $zpipe = delete $lei->{zpipe}) {
+                $lei->{1} = $zpipe->[1];
+                close $zpipe->[0];
+        }
+        my $au_peers = delete $self->{au_peers};
+        if ($au_peers) { # wait for peer l2m to finish augmenting:
+                $au_peers->[1] = undef;
+                sysread($au_peers->[0], my $barrier1, 1);
+        }
+        eval { $self->{wcb} = $self->write_cb($lei) };
+        $lei->fail($@) if $@;
+        if ($au_peers) { # wait for peer l2m to set write_cb
+                $au_peers->[3] = undef;
+                sysread($au_peers->[2], my $barrier2, 1);
+        }
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        $lei->_lei_atfork_child;
+        $lei->{auth}->do_auth_atfork($self) if $lei->{auth};
+        $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb();
+        $self->{git} = $self->{lei}->{ale}->git;
+        $SIG{TERM} = sub { # avoid ->DESTROY ordering problems
+                my $git = delete $self->{git};
+                $git->async_wait_all if $git;
+                exit(15 + 128);
+        };
+        $self->SUPER::ipc_atfork_child;
+}
+
+sub lock_free {
+        $_[0]->{base_type} =~ /\A(?:maildir|imap|jmap)\z/ ? 1 : 0;
+}
+
+# wakes up the MUA when complete so it can refresh messages list
+sub poke_dst {
+        my ($self) = @_;
+        if ($self->{base_type} eq 'maildir') {
+                my $t = time + 1;
+                utime($t, $t, $self->{poke_dh}) or warn "futimes: $!";
+        }
+}
+
+sub write_mail { # via ->wq_io_do
+        my ($self, $smsg, $eml) = @_;
+        if ($eml) {
+                eval { $self->{wcb}->(undef, $smsg, $eml) };
+                $self->{lei}->fail("blob=$smsg->{blob} $@") if $@;
+        } else {
+                $smsg->{l2m} = $self;
+                $self->{git}->cat_async($smsg->{blob}, \&git_to_mail, $smsg);
+        }
+}
+
+sub wq_atexit_child {
+        my ($self) = @_;
+        local $PublicInbox::DS::in_loop = 0; # waitpid synchronously
+        my $lei = $self->{lei};
+        $lei->{ale}->git->async_wait_all;
+        my ($nr_w, $nr_s) = delete(@$self{qw(-nr_write -nr_seen)});
+        if (my $v2w = delete $lei->{v2w}) {
+                eval { $v2w->done };
+                $lei->child_error($?, "E: $@ ($v2w->{ibx}->{inboxdir})") if $@;
+        }
+        delete $self->{wcb};
+        (($nr_w //= 0) + ($nr_s //= 0)) or return;
+        return if $lei->{early_mua} || !$lei->{-progress} || !$lei->{pkt_op_p};
+        $lei->{pkt_op_p}->pkt_do('incr', -nr_write => $nr_w, -nr_seen => $nr_s)
+}
+
+# runs on a 1s timer in lei-daemon
+sub augment_inprogress {
+        my ($err, $opt, $dst, $au_noted) = @_;
+        eval {
+                return if $$au_noted++ || !$err || !defined(fileno($err));
+                print $err '# '.($opt->{'import-before'} ?
+                                "importing non-external contents of $dst" : (
+                                ($opt->{dedupe} // 'content') ne 'none') ?
+                                "scanning old contents of $dst for dedupe" :
+                                "removing old contents of $dst")." ...\n";
+        };
+        warn "E: $@ ($dst)" if $@;
+}
+
+# called in top-level lei-daemon when LeiAuth is done
+sub net_merge_all_done {
+        my ($self, $lei) = @_;
+        if ($PublicInbox::DS::in_loop &&
+                        $self->can("_do_augment_$self->{base_type}") &&
+                        !$lei->{opt}->{quiet}) {
+                $self->{-au_noted} = 0;
+                PublicInbox::DS::add_timer(1, \&augment_inprogress,
+                                $lei->{2}, $lei->{opt},
+                                $self->{dst}, \$self->{-au_noted});
+        }
+        $self->wq_broadcast('do_post_auth');
+        $self->wq_close;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiUp.pm b/lib/PublicInbox/LeiUp.pm
new file mode 100644
index 00000000..9931f017
--- /dev/null
+++ b/lib/PublicInbox/LeiUp.pm
@@ -0,0 +1,222 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei up" - updates the result of "lei q --save"
+package PublicInbox::LeiUp;
+use v5.12;
+# n.b. we use LeiInput to setup IMAP auth
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use PublicInbox::LeiSavedSearch; # OverIdx
+use PublicInbox::DS;
+use PublicInbox::PktOp;
+use PublicInbox::LeiFinmsg;
+use PublicInbox::LEI;
+my $REMOTE_RE = qr!\A(?:imap|http)s?://!i; # http(s) will be for JMAP
+
+sub up1 ($$) {
+        my ($lei, $out) = @_;
+        # precedence note for CLI switches between lei q and up:
+        # `lei q --only' > `lei q --no-(remote|local|external)'
+        # `lei up --no-(remote|local|external)' > `lei.q.only' in saved search
+        my %no = map {
+                my $v = $lei->{opt}->{$_}; # set by CLI
+                (defined($v) && !$v) ? ($_ => 1) : ();
+        } qw(remote local external);
+        my $cli_exclude = delete $lei->{opt}->{exclude};
+        my $lss = PublicInbox::LeiSavedSearch->up($lei, $out) or return;
+        my $f = $lss->{'-f'};
+        my $mset_opt = $lei->{mset_opt} = { relevance => -2 };
+        my $q = $lss->{-cfg}->get_all('lei.q') //
+                                die("lei.q unset in $f (out=$out)\n");
+        my $lse = $lei->{lse} // die 'BUG: {lse} missing';
+        my $rawstr = $lss->{-cfg}->{'lei.internal.rawstr'} //
+                (scalar(@$q) == 1 && substr($q->[0], -1) eq "\n");
+        if ($rawstr) {
+                die <<EOM if scalar(@$q) > 1;
+$f: lei.q has multiple values (@$q) (out=$out)
+$f: while lei.internal.rawstr is set
+EOM
+                $lse->query_approxidate($lse->git, $mset_opt->{qstr} = $q->[0]);
+        } else {
+                $mset_opt->{qstr} = $lse->query_argv_to_string($lse->git, $q);
+        }
+        # n.b. only a few CLI args are accepted for "up", so //= usually sets
+        for my $k ($lss->ARRAY_FIELDS) {
+                my $v = $lss->{-cfg}->get_all("lei.q.$k") // next;
+                $lei->{opt}->{$k} //= $v;
+        }
+
+        # --no-(local|remote) CLI flags overrided saved `lei.q.only'
+        my $only = $lei->{opt}->{only};
+        @$only = map { $lei->get_externals($_) } @$only if $only;
+        if (scalar keys %no && $only) {
+                @$only = grep(!m!\Ahttps?://!i, @$only) if $no{remote};
+                @$only = grep(m!\Ahttps?://!i, @$only) if $no{'local'};
+        }
+        if ($cli_exclude) {
+                my $ex = $lei->canonicalize_excludes($cli_exclude);
+                @$only = grep { !$ex->{$_} } @$only if $only;
+                push @{$lei->{opt}->{exclude}}, @$cli_exclude;
+        }
+        delete $lei->{opt}->{only} if $no{external} || ($only && !@$only);
+        for my $k ($lss->BOOL_FIELDS, $lss->SINGLE_FIELDS) {
+                my $v = $lss->{-cfg}->get_1("lei.q.$k") // next;
+                $lei->{opt}->{$k} //= $v;
+        }
+        my $o = $lei->{opt}->{output} // '';
+        return die("lei.q.output unset in $f (out=$out)\n") if $o eq '';
+        $lss->translate_dedupe($lei) or return;
+        $lei->{lss} = $lss; # for LeiOverview->new and query_remote_mboxrd
+        my $lxs = $lei->lxs_prepare or return;
+        $lei->ale->refresh_externals($lxs, $lei);
+        $lei->_start_query;
+}
+
+sub redispatch_all ($$) {
+        my ($self, $lei) = @_;
+        my $upq = [ (@{$self->{o_local} // []}, @{$self->{o_remote} // []}) ];
+        return up1($lei, $upq->[0]) if @$upq == 1; # just one, may start MUA
+
+        PublicInbox::OverIdx::fork_ok($lei->{opt});
+        # FIXME: this is also used per-query, see lei->_start_query
+        my $j = $lei->{opt}->{jobs} || do {
+                my $n = $self->detect_nproc // 1;
+                $n > 4 ? 4 : $n;
+        };
+        $j = ($j =~ /\A([0-9]+)/) ? $1 + 0 : 1; # may be --jobs=$x,$m on CLI
+        # re-dispatch into our event loop w/o creating an extra fork-level
+        # $upq will be drained via DESTROY as each query finishes
+        $lei->{fmsg} = PublicInbox::LeiFinmsg->new($lei);
+        my ($op_c, $op_p) = PublicInbox::PktOp->pair;
+        # call lei->dclose when upq is done processing:
+        $op_c->{ops} = { '' => [ $lei->can('dclose'), $lei ] };
+        my @first_batch = splice(@$upq, 0, $j); # initial parallelism
+        $lei->{-upq} = $upq;
+        $lei->event_step_init; # wait for client disconnects
+        for my $out (@first_batch) {
+                PublicInbox::DS::requeue(
+                        PublicInbox::LeiUp1::nxt($lei, $out, $op_p));
+        }
+}
+
+sub filter_lss {
+        my ($self, $lei, $all) = @_;
+        my @outs = PublicInbox::LeiSavedSearch::list($lei);
+        if ($all eq 'local') {
+                $self->{o_local} = [ grep(!/$REMOTE_RE/, @outs) ];
+        } elsif ($all eq 'remote') {
+                $self->{o_remote} = [ grep(/$REMOTE_RE/, @outs) ];
+        } elsif ($all eq '') {
+                $self->{o_remote} = [ grep(/$REMOTE_RE/, @outs) ];
+                $self->{o_local} = [ grep(!/$REMOTE_RE/, @outs) ];
+        } else {
+                undef;
+        }
+}
+
+sub lei_up {
+        my ($lei, @outs) = @_;
+        my $opt = $lei->{opt};
+        my $self = bless { -mail_sync => 1 }, __PACKAGE__;
+        if (defined(my $all = $opt->{all})) {
+                return $lei->fail("--all and @outs incompatible") if @outs;
+                defined($opt->{mua}) and return
+                        $lei->fail('--all and --mua= are incompatible');
+                filter_lss($self, $lei, $all) // return
+                        $lei->fail("only --all=$all not understood");
+        } elsif ($lei->{lse}) { # redispatched
+                scalar(@outs) == 1 or die "BUG: lse set w/ >1 out[@outs]";
+                return up1($lei, $outs[0]);
+        } else {
+                $self->{o_remote} = [ grep(/$REMOTE_RE/, @outs) ];
+                $self->{o_local} = [ grep(!/$REMOTE_RE/, @outs) ];
+        }
+        $lei->{lse} = $lei->_lei_store(1)->write_prepare($lei)->search;
+        ((@{$self->{o_local} // []} + @{$self->{o_remote} // []}) > 1 &&
+                defined($opt->{mua})) and return $lei->fail(<<EOM);
+multiple outputs and --mua= are incompatible
+EOM
+        if ($self->{o_remote}) { # setup lei->{auth}
+                $self->prepare_inputs($lei, $self->{o_remote}) or return;
+        }
+        if ($lei->{auth}) { # start auth worker
+                require PublicInbox::NetWriter;
+                bless $lei->{net}, 'PublicInbox::NetWriter';
+                $lei->wq1_start($self);
+                # net_merge_all_done will fire when auth is done
+        } else {
+                redispatch_all($self, $lei); # see below
+        }
+}
+
+# called in top-level lei-daemon when LeiAuth is done
+sub net_merge_all_done {
+        my ($self, $lei) = @_;
+        $lei->{net} = delete($self->{-net_new}) if $self->{-net_new};
+        $self->wq_close;
+        eval { redispatch_all($self, $lei) };
+        $lei->child_error(0, "E: $@") if $@;
+}
+
+sub _complete_up { # lei__complete hook
+        my ($lei, @argv) = @_;
+        my $match_cb = $lei->complete_url_prepare(\@argv);
+        map { $match_cb->($_) } PublicInbox::LeiSavedSearch::list($lei);
+}
+
+sub _wq_done_wait { # awaitpid cb
+        my ($pid, $wq, $lei) = @_;
+        $lei->child_error($?, 'auth failure') if $?
+}
+
+no warnings 'once';
+*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
+
+package PublicInbox::LeiUp1; # for redispatch_all
+use v5.12;
+
+sub nxt ($$$) {
+        my ($lei, $out, $op_p) = @_;
+        bless { lei => $lei, out => $out, op_p => $op_p }, __PACKAGE__;
+}
+
+sub event_step { # runs via PublicInbox::DS::requeue
+        my ($self) = @_;
+        my $lei = $self->{lei}; # the original, from lei_up
+        my $l = bless { %$lei }, ref($lei); # per-output copy
+        delete($l->{sock}) or return; # client disconnected if {sock} is gone
+        $l->{opt} = { %{$l->{opt}} }; # deep copy
+        delete $l->{opt}->{all};
+        $l->qerr("# updating $self->{out}");
+        my $o = " (output: $self->{out})"; # add to all warnings
+        my $cb = $SIG{__WARN__} // \&CORE::warn;
+        local $SIG{__WARN__} = sub {
+                my @m = @_;
+                push(@m, $o) if !@m || $m[-1] !~ s/\n\z/$o\n/;
+                $cb->(@m);
+        };
+        $l->{-up1} = $self; # for LeiUp1->DESTROY
+        delete @$l{qw(-socks -event_init_done)};
+        my ($op_c, $op_p) = PublicInbox::PktOp->pair;
+        $self->{unref_on_destroy} = $op_c->{sock}; # to cleanup $lei->{-socks}
+        $lei->pkt_ops($op_c->{ops} //= {}); # errors from $l -> script/lei
+        push @{$lei->{-socks}}, $op_c->{sock}; # script/lei signals to $l
+        $l->{sock} = $op_p->{op_p}; # receive signals from op_c->{sock}
+        $op_c = $op_p = undef;
+
+        eval { $l->dispatch('up', $self->{out}) };
+        $lei->child_error(0, $@) if $@ || $l->{failed}; # lei->fail()
+}
+
+sub DESTROY {
+        my ($self) = @_;
+        return if ($PublicInbox::LEI::daemon_pid // -1) != $$;
+        my $lei = $self->{lei}; # the original, from lei_up
+        my $sock = delete $self->{unref_on_destroy};
+        my $s = $lei->{-socks} // [];
+        @$s = grep { $_ != $sock } @$s;
+        my $out = shift(@{$lei->{-upq}}) or return;
+        PublicInbox::DS::requeue(nxt($lei, $out, $self->{op_p}));
+}
+
+1;
diff --git a/lib/PublicInbox/LeiViewText.pm b/lib/PublicInbox/LeiViewText.pm
new file mode 100644
index 00000000..c7d72c71
--- /dev/null
+++ b/lib/PublicInbox/LeiViewText.pm
@@ -0,0 +1,308 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# PublicInbox::Eml to (optionally colorized) text coverter for terminals
+# the non-HTML counterpart to PublicInbox::View
+package PublicInbox::LeiViewText;
+use strict;
+use v5.10.1;
+use PublicInbox::MsgIter qw(msg_part_text);
+use PublicInbox::MID qw(references);
+use PublicInbox::View;
+use PublicInbox::Hval;
+use PublicInbox::ViewDiff;
+use PublicInbox::Spawn qw(popen_rd);
+use Term::ANSIColor;
+use POSIX ();
+use PublicInbox::Address;
+
+sub _xs {
+        # xhtml_map works since we don't search for HTML ([&<>'"])
+        $_[0] =~ s/([\x7f\x00-\x1f])/$PublicInbox::Hval::xhtml_map{$1}/sge;
+}
+
+my %DEFAULT_COLOR = (
+        # mutt names, loaded from ~/.config/lei/config
+        quoted => 'blue',
+        hdrdefault => 'cyan',
+        status => 'bright_cyan', # smsg stuff
+        attachment => 'bright_red',
+
+        # git names and defaults, falls back to ~/.gitconfig
+        new => 'green',
+        old => 'red',
+        meta => 'bold',
+        frag => 'cyan',
+        func => undef,
+        context => undef,
+);
+
+my $COLOR = qr/(?:bright)?
+                (?:normal|black|red|green|yellow|blue|magenta|cyan|white)/x;
+
+sub my_colored {
+        my ($self, $slot, $buf) = @_;
+        my $val = $self->{"color.$slot"} //=
+                        $self->{-leicfg}->{"color.$slot"} //
+                        $self->{-gitcfg}->{"color.diff.$slot"} //
+                        $self->{-gitcfg}->{"diff.color.$slot"} //
+                        $DEFAULT_COLOR{$slot};
+        $val = $val->[-1] if ref($val) eq 'ARRAY';
+        if (defined $val) {
+                $val = lc $val;
+                # git doesn't use "_", Term::ANSIColor does
+                $val =~ s/\Abright([^_])/bright_$1/ig;
+
+                # git: "green black" => T::A: "green on_black"
+                $val =~ s/($COLOR)(.+?)($COLOR)/$1$2on_$3/;
+
+                # FIXME: convert git #XXXXXX to T::A-compatible colors
+                # for 256-color terminals
+
+                ${$self->{obuf}} .= colored($buf, $val);
+        } else {
+                ${$self->{obuf}} .= $buf;
+        }
+}
+
+sub uncolored { ${$_[0]->{obuf}} .= $_[2] }
+
+sub new {
+        my ($cls, $lei, $fmt) = @_;
+        my $self = bless { %{$lei->{opt}}, -colored => \&uncolored }, $cls;
+        $self->{-quote_reply} = 1 if $fmt eq 'reply';
+        return $self unless $self->{color} //= -t $lei->{1};
+        my @cmd = qw(git config -z --includes -l); # reuse normal git config
+        my $r = popen_rd(\@cmd, undef, { 2 => $lei->{2} });
+        my $cfg = PublicInbox::Config::config_fh_parse($r, "\0", "\n");
+        if (!$r->close) {
+                warn "# @cmd failed, no color (non-fatal \$?=$?)\n";
+                return $self;
+        }
+        $self->{-colored} = \&my_colored;
+        $self->{-gitcfg} = $cfg;
+        $self->{-leicfg} = $lei->{cfg};
+        $self;
+}
+
+sub quote_hdr_buf ($$) {
+        my ($self, $eml) = @_;
+        my $hbuf = '';
+        my $to = $eml->header_raw('Reply-To') //
+                $eml->header_raw('From') //
+                $eml->header_raw('Sender');
+        my $cc = '';
+        for my $f (qw(To Cc)) {
+                for my $v ($eml->header_raw($f)) {
+                        next if $v !~ /\S/;
+                        $cc .= ", $v";
+                        $to //= $v;
+                }
+        }
+        substr($cc, 0, 2, ''); # s/^, //;
+        PublicInbox::View::fold_addresses($to);
+        PublicInbox::View::fold_addresses($cc);
+        _xs($to);
+        _xs($cc);
+        $hbuf .= "To: $to\n" if defined $to && $to =~ /\S/;
+        $hbuf .= "Cc: $cc\n" if $cc =~ /\S/;
+        my $s = $eml->header_str('Subject') // 'your mail';
+        _xs($s);
+        substr($s, 0, 0, 'Re: ') if $s !~ /\bRe:/i;
+        $hbuf .= "Subject: $s\n";
+        if (defined(my $irt = $eml->header_raw('Message-ID'))) {
+                _xs($irt);
+                $hbuf .= "In-Reply-To: $irt\n";
+        }
+        $self->{-colored}->($self, 'hdrdefault', $hbuf);
+        my ($n) = PublicInbox::Address::names($eml->header_str('From') //
+                                        $eml->header_str('Sender') //
+                                        $eml->header_str('Reply-To') //
+                                        'unknown sender');
+        my $d = $eml->header_raw('Date') // 'some unknown date';
+        _xs($d);
+        _xs($n);
+        ${delete $self->{obuf}} . "\nOn $d, $n wrote:\n";
+}
+
+sub hdr_buf ($$) {
+        my ($self, $eml) = @_;
+        my $hbuf = '';
+        for my $f (qw(From To Cc)) {
+                for my $v ($eml->header($f)) {
+                        next if $v !~ /\S/;
+                        PublicInbox::View::fold_addresses($v);
+                        _xs($v);
+                        $hbuf .= "$f: $v\n";
+                }
+        }
+        for my $f (qw(Subject Date Newsgroups Message-ID X-Message-ID)) {
+                for my $v ($eml->header($f)) {
+                        _xs($v);
+                        $hbuf .= "$f: $v\n";
+                }
+        }
+        if (my @irt = $eml->header_raw('In-Reply-To')) {
+                for my $v (@irt) {
+                        _xs($v);
+                        $hbuf .= "In-Reply-To: $v\n";
+                }
+        } else {
+                my $refs = references($eml);
+                if (defined(my $irt = pop @$refs)) {
+                        _xs($irt);
+                        $hbuf .= "In-Reply-To: <$irt>\n";
+                }
+                if (@$refs) {
+                        my $max = $self->{-max_cols};
+                        $hbuf .= 'References: ' .
+                                join("\n\t", map { '<'._xs($_).'>' } @$refs) .
+                                ">\n";
+                }
+        }
+        $self->{-colored}->($self, 'hdrdefault', $hbuf .= "\n");
+}
+
+sub attach_note ($$$$;$) {
+        my ($self, $ct, $p, $fn, $err) = @_;
+        my ($part, $depth, $idx) = @$p;
+        my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...)
+        my $abuf = $err ? <<EOF : '';
+[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
+EOF
+        $abuf .= "[-- Attachment #$idx: ";
+        _xs($ct);
+        my $size = length($part->body);
+        my $ts = "Type: $ct, Size: $size bytes";
+        my $d = $part->header('Content-Description') // $fn // '';
+        _xs($d);
+        $abuf .= $d eq '' ? "$ts --]\n" : "$d --]\n[-- $ts --]\n";
+        if (my $blob = $self->{-smsg}->{blob}) {
+                $abuf .= "[-- lei blob $blob:$idx --]\n";
+        }
+        $self->{-colored}->($self, 'attachment', $abuf);
+        hdr_buf($self, $part) if $part->{is_submsg};
+}
+
+sub flush_text_diff ($$) {
+        my ($self, $cur) = @_;
+        my @top = split($PublicInbox::ViewDiff::EXTRACT_DIFFS, $$cur);
+        undef $$cur; # free memory
+        my $dctx;
+        my $obuf = $self->{obuf};
+        my $colored = $self->{-colored};
+        while (defined(my $x = shift @top)) {
+                if (scalar(@top) >= 4 &&
+                                $top[1] =~ $PublicInbox::ViewDiff::IS_OID &&
+                                $top[0] =~ $PublicInbox::ViewDiff::IS_OID) {
+                        splice(@top, 0, 4);
+                        $dctx = 1;
+                        $colored->($self, 'meta', $x);
+                } elsif ($dctx) {
+                        # Quiet "Complex regular subexpression recursion limit"
+                        # warning.  Perl will truncate matches upon hitting
+                        # that limit, giving us more (and shorter) scalars than
+                        # would be ideal, but otherwise it's harmless.
+                        #
+                        # We could replace the `+' metacharacter with `{1,100}'
+                        # to limit the matches ourselves to 100, but we can
+                        # let Perl do it for us, quietly.
+                        no warnings 'regexp';
+
+                        for my $s (split(/((?:(?:^\+[^\n]*\n)+)|
+                                        (?:(?:^-[^\n]*\n)+)|
+                                        (?:^@@ [^\n]+\n))/xsm, $x)) {
+                                if (!defined($dctx)) {
+                                        ${$self->{obuf}} .= $s;
+                                } elsif ($s =~ s/\A(@@ \S+ \S+ @@\s*)//) {
+                                        $colored->($self, 'frag', $1);
+                                        $colored->($self, 'func', $s);
+                                } elsif ($s =~ /\A\+/) {
+                                        $colored->($self, 'new', $s);
+                                } elsif ($s =~ /\A-- $/sm) { # email sig starts
+                                        $dctx = undef;
+                                        ${$self->{obuf}} .= $s;
+                                } elsif ($s =~ /\A-/) {
+                                        $colored->($self, 'old', $s);
+                                } else {
+                                        $colored->($self, 'context', $s);
+                                }
+                        }
+                } else {
+                        ${$self->{obuf}} .= $x;
+                }
+        }
+}
+
+sub add_text_buf { # callback for Eml->each_part
+        my ($p, $self) = @_;
+        my ($part, $depth, $idx) = @$p;
+        my $ct = $part->content_type || 'text/plain';
+        my $fn = $part->filename;
+        my ($s, $err) = msg_part_text($part, $ct);
+        return attach_note($self, $ct, $p, $fn) unless defined $s;
+        hdr_buf($self, $part) if $part->{is_submsg};
+        $s =~ s/\r+\n/\n/sg;
+        _xs($s);
+        my $diff = ($s =~ /^--- [^\n]+\n\+{3} [^\n]+\n@@ /ms);
+        my @sections = PublicInbox::MsgIter::split_quotes($s);
+        undef $s; # free memory
+        if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
+                # badly-encoded message with $err? tell the world about it!
+                attach_note($self, $ct, $p, $fn, $err);
+                ${$self->{obuf}} .= "\n";
+        }
+        my $colored = $self->{-colored};
+        for my $cur (@sections) {
+                if ($cur =~ /\A>/) {
+                        $colored->($self, 'quoted', $cur);
+                } elsif ($diff) {
+                        flush_text_diff($self, \$cur);
+                } else {
+                        ${$self->{obuf}} .= $cur;
+                }
+                undef $cur; # free memory
+        }
+}
+
+# returns a stringref suitable for $lei->out or print
+sub eml_to_text {
+        my ($self, $smsg, $eml) = @_;
+        local $Term::ANSIColor::EACHLINE = "\n";
+        $self->{obuf} = \(my $obuf = '');
+        $self->{-smsg} = $smsg;
+        $self->{-max_cols} = ($self->{columns} //= 80) - 8; # for header wrap
+        my $h = [];
+        if ($self->{-quote_reply}) {
+                my $blob = $smsg->{blob} // 'unknown-blob';
+                my $pct = $smsg->{pct} // 'unknown';
+                my $t = POSIX::asctime(gmtime($smsg->{ts} // $smsg->{ds} // 0));
+                $h->[0] = "From $blob\@$pct $t";
+        } else {
+                for my $f (qw(blob pct)) {
+                        push @$h, "$f:$smsg->{$f}" if defined $smsg->{$f};
+                }
+                @$h = ("# @$h\n") if @$h;
+                for my $f (qw(kw L)) {
+                        my $v = $smsg->{$f} or next;
+                        push @$h, "# $f:".join(',', @$v)."\n" if @$v;
+                }
+        }
+        $h = join('', @$h);
+        $self->{-colored}->($self, 'status', $h);
+        my $quote_hdr;
+        if ($self->{-quote_reply}) {
+                $quote_hdr = ${delete $self->{obuf}};
+                $quote_hdr .= quote_hdr_buf($self, $eml);
+        } else {
+                hdr_buf($self, $eml);
+        }
+        $eml->each_part(\&add_text_buf, $self, 1);
+        if (defined $quote_hdr) {
+                ${$self->{obuf}} =~ s/^/> /sgm;
+                substr(${$self->{obuf}}, 0, 0, $quote_hdr);
+        }
+        delete $self->{obuf};
+}
+
+1;
diff --git a/lib/PublicInbox/LeiWatch.pm b/lib/PublicInbox/LeiWatch.pm
new file mode 100644
index 00000000..b30e5152
--- /dev/null
+++ b/lib/PublicInbox/LeiWatch.pm
@@ -0,0 +1,12 @@
+# Copyright all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# represents a Maildir, MH or IMAP "watch" item
+package PublicInbox::LeiWatch;
+use v5.12;
+use parent qw(PublicInbox::IPC);
+
+# "url" may be something like "maildir:/path/to/dir" or "mh:/path/to/dir"
+sub new { bless { url => $_[1] }, $_[0] }
+
+1;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
new file mode 100644
index 00000000..43dedd10
--- /dev/null
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -0,0 +1,660 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Combine any combination of PublicInbox::Search,
+# PublicInbox::ExtSearch, and PublicInbox::LeiSearch objects
+# into one Xapian DB
+package PublicInbox::LeiXSearch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::LeiSearch PublicInbox::IPC);
+use PublicInbox::DS qw(now);
+use File::Temp 0.19 (); # 0.19 for ->newdir
+use File::Spec ();
+use PublicInbox::Search qw(xap_terms);
+use PublicInbox::Spawn qw(popen_rd popen_wr which);
+use PublicInbox::MID qw(mids mid_escape);
+use PublicInbox::Smsg;
+use PublicInbox::Eml;
+use PublicInbox::LEI;
+use Fcntl qw(SEEK_SET F_SETFL O_APPEND O_RDWR);
+use PublicInbox::ContentHash qw(git_sha);
+use POSIX qw(strftime);
+use autodie qw(close open read seek truncate);
+use PublicInbox::Syscall qw($F_SETPIPE_SZ);
+use PublicInbox::OnDestroy;
+
+sub new {
+        my ($class) = @_;
+        PublicInbox::Search::load_xapian();
+        bless {
+                qp_flags => $PublicInbox::Search::QP_FLAGS |
+                                PublicInbox::Search::FLAG_PURE_NOT(),
+        }, $class
+}
+
+sub attach_external {
+        my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox
+        my $desc = $ibxish->{inboxdir} // $ibxish->{topdir};
+        my $srch = $ibxish->search //
+                return warn("$desc not indexed for Xapian ($@ $!)\n");
+        my @shards = $srch->xdb_shards_flat or
+                return warn("$desc has no Xapian shards\n");
+
+        if (delete $self->{xdb}) { # XXX: do we need this?
+                # clobber existing {xdb} if amending
+                my $expect = delete $self->{nshard};
+                my $shards = delete $self->{shards_flat};
+                scalar(@$shards) == $expect or die
+                        "BUG: {nshard}$expect != shards=".scalar(@$shards);
+
+                my $prev = {};
+                for my $old_ibxish (@{$self->{shard2ibx}}) {
+                        next if $prev == $old_ibxish;
+                        $prev = $old_ibxish;
+                        my @shards = $old_ibxish->search->xdb_shards_flat;
+                        push @{$self->{shards_flat}}, @shards;
+                }
+                my $nr = scalar(@{$self->{shards_flat}});
+                $nr == $expect or die
+                        "BUG: reloaded $nr shards, expected $expect"
+        }
+        push @{$self->{shards_flat}}, @shards;
+        push(@{$self->{shard2ibx}}, $ibxish) for (@shards);
+}
+
+# returns a list of local inboxes (or count in scalar context)
+sub locals { @{$_[0]->{locals} // []} }
+
+sub remotes { @{$_[0]->{remotes} // []} }
+
+# called by PublicInbox::Search::xdb (usually via ->mset)
+sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} }
+
+sub _mitem_kw { # retry_reopen callback
+        my ($srch, $smsg, $mitem, $flagged) = @_;
+        my $doc = $mitem->get_document;
+        my $kw = xap_terms('K', $doc);
+        $kw->{flagged} = 1 if $flagged;
+        my @L = xap_terms('L', $doc);
+        # we keep the empty {kw} array here to prevent expensive work in
+        # ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty
+        $smsg->{kw} = [ sort keys %$kw ];
+        $smsg->{L} = \@L if scalar(@L);
+}
+
+sub mitem_kw ($$$;$) {
+        my ($srch, $smsg, $mitem, $flagged) = @_;
+        $srch->retry_reopen(\&_mitem_kw, $smsg, $mitem, $flagged);
+}
+
+# like over->get_art
+sub smsg_for {
+        my ($self, $mitem) = @_;
+        # cf. https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID
+        my $nshard = $self->{nshard};
+        my $docid = $mitem->get_docid;
+        my $shard = ($docid - 1) % $nshard;
+        my $num = int(($docid - 1) / $nshard) + 1;
+        my $ibx = $self->{shard2ibx}->[$shard];
+        my $smsg = $ibx->over->get_art($num);
+        return if $smsg->{bytes} == 0; # external message
+        if ($ibx->can('msg_keywords')) {
+                mitem_kw($self, $smsg, $mitem);
+        }
+        $smsg;
+}
+
+sub over {}
+
+sub _check_mset_limit ($$$) {
+        my ($lei, $desc, $mset) = @_;
+        return if defined($lei->{opt}->{limit}); # user requested limit
+        my $est = $mset->get_matches_estimated;
+        my $tot = $lei->{mset_opt}->{total};
+        $est > $tot and $lei->qerr(<<"");
+# $desc estimated matches ($est) exceeds default --limit=$tot
+
+}
+
+sub _mset_more ($$) {
+        my ($mset, $mo) = @_;
+        my $size = $mset->size;
+        $size >= $mo->{limit} && (($mo->{offset} += $size) < $mo->{total});
+}
+
+# $startq will see `q' in do_post_augment -> start_mua if spawning MUA.
+# Otherwise $startq will EOF when do_augment is done augmenting and allow
+# query_combined_mset and query_thread_mset to proceed.
+sub wait_startq ($) {
+        my ($lei) = @_;
+        read(delete($lei->{startq}) // return, my $buf, 1) or return; # EOF
+        die "BUG: wrote `$buf' to au_done" if $buf ne 'q';
+        $lei->{opt}->{quiet} = 1;
+        delete $lei->{opt}->{verbose};
+        delete $lei->{-progress};
+}
+
+sub mset_progress {
+        my $lei = shift;
+        return if $lei->{early_mua} || !$lei->{-progress};
+        if ($lei->{pkt_op_p}) {
+                $lei->{pkt_op_p}->pkt_do('mset_progress', @_);
+        } else { # single lei-daemon consumer
+                my ($desc, $mset_size, $mset_total_est) = @_;
+                $lei->{-mset_total} += $mset_size if $mset_total_est ne '?';
+                $lei->qerr("# $desc $mset_size/$mset_total_est");
+        }
+}
+
+sub query_one_mset { # for --threads and l2m w/o sort
+        my ($self, $ibxish) = @_;
+        my $lei = $self->{lei};
+        my ($srch, $over) = ($ibxish->search, $ibxish->over);
+        my $dir = $ibxish->{inboxdir} // $ibxish->{topdir};
+        return warn("$dir not indexed by Xapian\n") unless ($srch && $over);
+        bless $srch, 'PublicInbox::LeiSearch'; # for ->qparse_new
+        my $mo = { %{$lei->{mset_opt}} }; # copy
+        local $0 = "$0 1 $mo->{qstr}";
+        my $mset;
+        my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
+        my $can_kw = !!$ibxish->can('msg_keywords');
+        my $threads = $lei->{opt}->{threads} // 0;
+        my $fl = $threads > 1 ? 1 : undef;
+        my $mid = $lei->{opt}->{'thread-id'};
+        $mo->{threadid} = $over->mid2tid($mid) if defined $mid;
+        my $lss = $lei->{lss};
+        my $maxk = "external.$dir.maxuid"; # max of previous, so our min
+        my $min = $lss ? ($lss->{-cfg}->{$maxk} // 0) : 0;
+        ref($min) and return warn("$maxk=$min has multiple values\n");
+        ($min =~ /[^0-9]/) and return warn("$maxk=$min not numeric\n");
+        my $first_ids;
+        do {
+                $mset = eval { $srch->mset($mo->{qstr}, $mo) };
+                return $lei->child_error(22 << 8, "E: $@") if $@; # 22 from curl
+                mset_progress($lei, $dir, $mo->{offset} + $mset->size,
+                                $mset->get_matches_estimated);
+                wait_startq($lei); # wait for keyword updates
+                my $ids = $srch->mset_to_artnums($mset, $mo);
+                my $i = 0;
+                if ($threads) {
+                        # copy $ids if $lss since over->expand_thread
+                        # shifts @{$ctx->{ids}}
+                        $first_ids = [ @$ids ] if $lss;
+                        my $ctx = { ids => $ids, min => $min };
+                        my %n2item = map { $ids->[$i++] => $_ } $mset->items;
+                        while ($over->expand_thread($ctx)) { # fills {xids}
+                                for my $n (@{delete $ctx->{xids}}) {
+                                        my $smsg = $over->get_art($n) or next;
+                                        my $mi = delete $n2item{$n};
+                                        next if $smsg->{bytes} == 0;
+                                        if ($mi && $can_kw) {
+                                                mitem_kw($srch, $smsg, $mi, $fl)
+                                        } elsif ($mi && $fl) {
+                                                # call ->xsmsg_vmd, later
+                                                $smsg->{lei_q_tt_flagged} = 1;
+                                        }
+                                        $each_smsg->($smsg, $mi);
+                                }
+                        }
+                } else {
+                        $first_ids = $ids;
+                        my @items = $mset->items; # parallel with @$ids
+                        for my $n (@$ids) {
+                                my $mitem = $items[$i++];
+                                next if $n <= $min;
+                                my $smsg = $over->get_art($n) or next;
+                                next if $smsg->{bytes} == 0;
+                                mitem_kw($srch, $smsg, $mitem, $fl) if $can_kw;
+                                $each_smsg->($smsg, $mitem);
+                        }
+                }
+        } while (_mset_more($mset, $mo));
+        _check_mset_limit($lei, $dir, $mset);
+        if ($lss && scalar(@$first_ids)) {
+                my $max = $first_ids->[0];
+                $lss->cfg_set($maxk, $max);
+                undef $lss;
+        }
+        undef $each_smsg; # may commit
+        $lei->{ovv}->ovv_atexit_child($lei);
+}
+
+sub query_combined_mset { # non-parallel for non-"--threads" users
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        my $mo = { %{$lei->{mset_opt}} };
+        local $0 = "$0 C $mo->{qstr}";
+        my $mset;
+        for my $loc (locals($self)) {
+                attach_external($self, $loc);
+        }
+        my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
+        do {
+                $mset = eval { $self->mset($mo->{qstr}, $mo) };
+                return $lei->child_error(22 << 8, "E: $@") if $@; # 22 from curl
+                mset_progress($lei, 'xsearch', $mo->{offset} + $mset->size,
+                                $mset->get_matches_estimated);
+                wait_startq($lei); # wait for keyword updates
+                for my $mitem ($mset->items) {
+                        my $smsg = smsg_for($self, $mitem) or next;
+                        $each_smsg->($smsg, $mitem);
+                }
+        } while (_mset_more($mset, $mo));
+        _check_mset_limit($lei, 'xsearch', $mset);
+        undef $each_smsg; # may commit
+        $lei->{ovv}->ovv_atexit_child($lei);
+}
+
+sub _smsg_fill ($$) {
+        my ($smsg, $eml) = @_;
+        $smsg->populate($eml);
+        $smsg->parse_references($eml, mids($eml));
+        $smsg->{$_} //= '' for qw(from to cc ds subject references mid);
+        delete @$smsg{qw(From Subject -ds -ts)};
+}
+
+sub each_remote_eml { # callback for MboxReader->mboxrd
+        my ($eml, $self, $lei, $each_smsg) = @_;
+        my $xoids = $lei->{ale}->xoids_for($eml, 1);
+        my $smsg = bless {}, 'PublicInbox::Smsg';
+        if ($self->{import_sto} && !$xoids) {
+                my ($res, $kw) = $self->{import_sto}->wq_do('add_eml', $eml);
+                if (ref($res) eq ref($smsg)) { # totally new message
+                        $smsg = $res;
+                        $self->{-sto_imported} = 1;
+                }
+                $smsg->{kw} = $kw; # short-circuit xsmsg_vmd
+        }
+        $smsg->{blob} //= $xoids ? (keys(%$xoids))[0]
+                                : $lei->git_oid($eml)->hexdigest;
+        _smsg_fill($smsg, $eml);
+        wait_startq($lei);
+        my $nr = ++$lei->{-nr_remote_eml}; # needed for lss->cfg_set
+        if ($lei->{-progress}) {
+                my $now = now();
+                my $next = $lei->{-next_progress} //= ($now + 1);
+                if ($now > $next) {
+                        $lei->{-next_progress} = $now + 1;
+                        mset_progress($lei, $lei->{-current_url}, $nr, '?');
+                }
+        }
+        $each_smsg->($smsg, undef, $eml);
+}
+
+sub fudge_qstr_time ($$$) {
+        my ($lei, $uri, $qstr) = @_;
+        return ($qstr, undef) unless $lei->{lss};
+        my $cfg = $lei->{lss}->{-cfg} // die 'BUG: no lss->{-cfg}';
+        my $cfg_key = "external.$uri.lastresult";
+        my $lr = $cfg->{$cfg_key} or return ($qstr, $cfg_key);
+        if ($lr !~ /\A\-?[0-9]+\z/) {
+                $lei->child_error(0,
+                        "$cfg->{-f}: $cfg_key=$lr not an integer, ignoring");
+                return ($qstr, $cfg_key);
+        }
+        my $rft = $lei->{opt}->{'remote-fudge-time'};
+        if ($rft && $rft !~ /\A-?[0-9]+\z/) {
+                my @t = $lei->{lss}->git->date_parse($rft);
+                my $diff = time - $t[0];
+                $lei->qerr("# $rft => $diff seconds");
+                $rft = $diff;
+        }
+        $lr -= ($rft || (48 * 60 * 60));
+        require PublicInbox::Admin;
+        $lei->qerr("# $uri limiting to ".
+                PublicInbox::Admin::fmt_localtime($lr).' and newer');
+        # this should really be rt: (received-time), but no stable
+        # public-inbox releases support it, yet.
+        my $dt = 'dt:'.strftime('%Y%m%d%H%M%S', gmtime($lr)).'..';
+        if ($qstr =~ /\S/) {
+                substr($qstr, 0, 0, '(');
+                $qstr .= ') AND ';
+        }
+        ($qstr .= $dt, $cfg_key);
+}
+
+sub query_remote_mboxrd {
+        my ($self, $uris) = @_;
+        local $SIG{TERM} = sub { exit(0) }; # for DESTROY (File::Temp, $reap)
+        my $lei = $self->{lei};
+        my $opt = $lei->{opt};
+        my $qstr = $lei->{mset_opt}->{qstr};
+        local $0 = "$0 R $qstr";
+        my @qform = (x => 'm');
+        push(@qform, t => 1) if $opt->{threads};
+        open my $cerr, '+>', undef;
+        my $rdr = { 2 => $cerr };
+        my @lbf_tee;
+        if ($opt->{verbose}) {
+                # spawn a line-buffered tee(1) script, otherwise curl
+                # will write 1 character at-a-time and parallel outputs
+                # mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss
+                # (n.b. POSIX tee(1) cannot do any buffering)
+                my $o = { 1 => $cerr, 2 => $lei->{2} };
+                delete $rdr->{2};
+                @lbf_tee = ([ $^X, qw(-w -p -e), <<'' ], undef, $o);
+BEGIN { $| = 1; use IO::Handle; STDERR->autoflush(1); }
+print STDERR $_;
+
+        }
+        my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return;
+        push @$curl, '-s', '-d', '';
+        my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
+        $self->{import_sto} = $lei->{sto} if $lei->{opt}->{'import-remote'};
+        if (defined(my $mid = $opt->{'thread-id'})) {
+                $mid = mid_escape($mid);
+                for my $uri (@$uris) {
+                        $uri->path($uri->path.$mid.'/');
+                }
+        }
+        for my $uri (@$uris) {
+                $lei->{-current_url} = $uri->as_string;
+                my $start = time;
+                my ($q, $key) = fudge_qstr_time($lei, $uri, $qstr);
+                $uri->query_form(@qform, q => $q);
+                my $cmd = $curl->for_uri($lei, $uri);
+                $lei->qerr("# $cmd");
+                $rdr->{2} //= popen_wr(@lbf_tee) if @lbf_tee;
+                my $fh = popen_rd($cmd, undef, $rdr);
+                $fh = IO::Uncompress::Gunzip->new($fh,
+                                        MultiStream => 1, AutoClose => 1);
+                eval {
+                        PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml,
+                                                $self, $lei, $each_smsg);
+                };
+                my ($exc, $code) = ($@, $?);
+                $lei->sto_barrier_request if delete($self->{-sto_imported});
+                die "E: $exc" if $exc && !$code;
+                my $nr = delete $lei->{-nr_remote_eml} // 0;
+                if (!$code) { # don't update if no results, maybe MTA is down
+                        $lei->{lss}->cfg_set($key, $start) if $key && $nr;
+                        mset_progress($lei, $lei->{-current_url}, $nr, $nr);
+                        next;
+                }
+                delete($rdr->{2})->close if @lbf_tee;
+                seek($cerr, 0, SEEK_SET);
+                read($cerr, my $err, -s $cerr);
+                truncate($cerr, 0);
+                next if (($code >> 8) == 22 && $err =~ /\b404\b/);
+                $uri->query_form(q => $qstr);
+                $lei->child_error($code, "E: <$uri> `$cmd` failed");
+        }
+        undef $each_smsg;
+        $lei->{ovv}->ovv_atexit_child($lei);
+}
+
+sub git { $_[0]->{git} // die 'BUG: git uninitialized' }
+
+sub xsearch_done_wait { # awaitpid cb
+        my ($pid, $wq, $lei) = @_;
+        return if !$?;
+        my $s = $? & 127;
+        return $lei->child_error($?) if $s == 13 || $s == 15;
+        $lei->child_error($?, 'non-fatal error from '.ref($wq)." \$?=$?");
+}
+
+sub query_done { # EOF callback for main daemon
+        my ($lei) = @_;
+        my $l2m = delete $lei->{l2m};
+        delete $lei->{lxs};
+        ($lei->{opt}->{'mail-sync'} && !$lei->{sto}) and
+                warn "BUG: {sto} missing with --mail-sync";
+        $lei->sto_barrier_request;
+        $lei->{ovv}->ovv_end($lei);
+        if ($l2m) { # close() calls LeiToMail reap_compress
+                $l2m->finish_output($lei);
+                if ($l2m->lock_free) {
+                        $l2m->poke_dst;
+                        $lei->poke_mua;
+                } else { # mbox users
+                        delete $l2m->{mbl}; # drop dotlock
+                }
+        }
+        my $nr_w = delete($lei->{-nr_write}) // 0;
+        my $nr_dup = (delete($lei->{-nr_seen}) // 0) - $nr_w;
+        if ($lei->{-progress}) {
+                my $tot = $lei->{-mset_total} // 0;
+                my $x = "$tot matches";
+                $x .= ", $nr_dup duplicates" if $nr_dup;
+                if ($l2m) {
+                        my $m = "# $nr_w written to " .
+                                "$lei->{ovv}->{dst} ($x)";
+                        $nr_w ? $lei->qfin($m) : $lei->qerr($m);
+                } else {
+                        $lei->qerr("# $x");
+                }
+        }
+        $lei->start_mua if $l2m && !$l2m->lock_free;
+        $lei->dclose;
+}
+
+sub post_augment_done { # via on_destroy in top-level lei-daemon
+        my ($lei) = @_;
+        my $err = delete $lei->{post_augment_err};
+        if ($err) {
+                if (my $lxs = delete $lei->{lxs}) {
+                        $lxs->wq_kill(-POSIX::SIGTERM());
+                        $lxs->wq_close;
+                }
+                $lei->fail("$err");
+        }
+        if (!$err && delete $lei->{early_mua}) { # non-augment case
+                eval { $lei->start_mua }; # may trigger wait_startq
+                $lei->fail($@) if $@;
+        }
+        close(delete $lei->{au_done}); # trigger wait_startq if start_mua didn't
+}
+
+sub do_post_augment {
+        my ($lei) = @_;
+        my $l2m = $lei->{l2m} or return; # client disconnected
+        $l2m->post_augment($lei, on_destroy(\&post_augment_done, $lei));
+}
+
+sub incr_post_augment { # called whenever an l2m shard finishes augment
+        my ($lei) = @_;
+        my $l2m = $lei->{l2m} or return; # client disconnected
+        return if ++$lei->{nr_post_augment} != $l2m->{-wq_nr_workers};
+        do_post_augment($lei);
+}
+
+my $MAX_PER_HOST = 4;
+
+sub concurrency {
+        my ($self, $opt) = @_;
+        my $nl = $opt->{threads} ? locals($self) : 1;
+        my $nr = remotes($self);
+        $nr = $MAX_PER_HOST if $nr > $MAX_PER_HOST;
+        $nl + $nr;
+}
+
+sub start_query ($$) { # always runs in main (lei-daemon) process
+        my ($self, $lei) = @_;
+        local $PublicInbox::LEI::current_lei = $lei;
+        if ($lei->{opt}->{threads} ||
+                        defined($lei->{opt}->{'thread-id'}) ||
+                        ($lei->{l2m} && !$lei->{opt}->{'sort'})) {
+                for my $ibxish (locals($self)) {
+                        $self->wq_io_do('query_one_mset', [], $ibxish);
+                }
+        } elsif (locals($self)) {
+                $self->wq_io_do('query_combined_mset', []);
+        }
+        my $i = 0;
+        my $q = [];
+        for my $uri (remotes($self)) {
+                push @{$q->[$i++ % $MAX_PER_HOST]}, $uri;
+        }
+        for my $uris (@$q) {
+                $self->wq_io_do('query_remote_mboxrd', [], $uris);
+        }
+        if ($self->{-do_lcat}) {
+                $self->wq_io_do('lcat_dump', []);
+        }
+        $self->wq_close; # lei_xsearch workers stop when done
+}
+
+sub incr_start_query { # called whenever an l2m shard starts do_post_auth
+        my ($lei, $self) = @_;
+        my $l2m = $lei->{l2m};
+        return if ++$self->{nr_start_query} != $l2m->{-wq_nr_workers};
+        start_query($self, $lei);
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        $self->{lei}->_lei_atfork_child;
+        $self->SUPER::ipc_atfork_child;
+}
+
+sub do_query {
+        my ($self, $lei) = @_;
+        my $l2m = $lei->{l2m};
+        my $qstr = \($lei->{mset_opt}->{qstr});
+        chomp $$qstr;
+        $$qstr =~ s/[ \n\t]+/ /sg; # make URLs and $0 less ugly
+        my $ops = {
+                sigpipe_handler => [ $lei ],
+                fail_handler => [ $lei ],
+                do_post_augment => [ \&do_post_augment, $lei ],
+                incr_post_augment => [ \&incr_post_augment, $lei ],
+                '' => [ \&query_done, $lei ],
+                mset_progress => [ \&mset_progress, $lei ],
+                incr => [ $lei ],
+                x_it => [ $lei ],
+                child_error => [ $lei ],
+                incr_start_query => [ \&incr_start_query, $lei, $self ],
+        };
+        $lei->{auth}->op_merge($ops, $l2m, $lei) if $l2m && $lei->{auth};
+        my $end = $lei->pkt_op_pair;
+        $lei->{1}->autoflush(1);
+        $lei->start_pager if delete $lei->{need_pager};
+        $lei->{ovv}->ovv_begin($lei);
+        die 'BUG: xdb|over open' if $lei->{lse}->{xdb} || $lei->{lse}->{over};
+        if ($l2m) {
+                $l2m->pre_augment($lei);
+                if ($lei->{opt}->{augment} && delete $lei->{early_mua}) {
+                        $lei->start_mua;
+                }
+                if ($l2m->{-wq_nr_workers} > 1 &&
+                                $l2m->{base_type} =~ /\A(?:maildir|mbox)\z/) {
+                        # setup two barriers to coordinate ->has_entries
+                        # between l2m workers
+                        pipe(my ($a_r, $a_w)) or die "pipe: $!";
+                        fcntl($a_r, $F_SETPIPE_SZ, 4096) if $F_SETPIPE_SZ;
+                        pipe(my ($b_r, $b_w)) or die "pipe: $!";
+                        fcntl($b_r, $F_SETPIPE_SZ, 4096) if $F_SETPIPE_SZ;
+                        $l2m->{au_peers} = [ $a_r, $a_w, $b_r, $b_w ];
+                }
+                $l2m->wq_workers_start('lei2mail', undef,
+                                        $lei->oldset, { lei => $lei },
+                                        \&xsearch_done_wait, $lei);
+                pipe($lei->{startq}, $lei->{au_done}) or die "pipe: $!";
+                fcntl($lei->{startq}, $F_SETPIPE_SZ, 4096) if $F_SETPIPE_SZ;
+                delete $l2m->{au_peers};
+                close(delete $l2m->{-wq_s2}); # share wq_s1 with lei_xsearch
+        }
+        $self->wq_workers_start('lei_xsearch', undef,
+                                $lei->oldset, { lei => $lei },
+                                \&xsearch_done_wait, $lei);
+        my $op_c = delete $lei->{pkt_op_c};
+        delete $lei->{pkt_op_p};
+        @$end = ();
+        $self->{-do_lcat} = !!(delete $lei->{lcat_todo});
+        if ($l2m) {
+                $l2m->net_merge_all_done($lei) unless $lei->{auth};
+        } else {
+                start_query($self, $lei);
+        }
+        $lei->event_step_init; # wait for shutdowns
+        $lei->wait_wq_events($op_c, $ops);
+}
+
+sub add_uri {
+        my ($self, $uri) = @_;
+        if (my $curl = $self->{curl} //= which('curl') // 0) {
+                require PublicInbox::MboxReader;
+                require IO::Uncompress::Gunzip;
+                require PublicInbox::LeiCurl;
+                push @{$self->{remotes}}, $uri;
+                $uri;
+        } else {
+                warn "curl missing, ignoring $uri\n";
+                undef;
+        }
+}
+
+# returns URI or PublicInbox::Inbox-like object
+sub prepare_external {
+        my ($self, $loc, $boost) = @_; # n.b. already ordered by boost
+        if (ref $loc) { # already a URI, or PublicInbox::Inbox-like object
+                return add_uri($self, $loc) if $loc->can('scheme');
+                # fall-through on Inbox-like objects
+        } elsif ($loc =~ m!\Ahttps?://!) {
+                require URI;
+                return add_uri($self, URI->new($loc));
+        } elsif (-f "$loc/ei.lock" && -d "$loc/ALL.git/objects") {
+                require PublicInbox::ExtSearch;
+                die "`\\n' not allowed in `$loc'\n" if index($loc, "\n") >= 0;
+                $loc = PublicInbox::ExtSearch->new($loc);
+        } elsif ((-f "$loc/inbox.lock" && -d "$loc/all.git/objects") ||
+                        (-d "$loc/public-inbox" && -d "$loc/objects")) {
+                die "`\\n' not allowed in `$loc'\n" if index($loc, "\n") >= 0;
+                require PublicInbox::Inbox; # v2, v1
+                $loc = bless { inboxdir => $loc }, 'PublicInbox::Inbox';
+        } elsif (!-e $loc) {
+                warn "W: $loc gone, perhaps run: lei forget-external $loc\n";
+                return undef;
+        } else {
+                warn "W: $loc ignored, unable to determine external type\n";
+                return undef;
+        }
+        push @{$self->{locals}}, $loc;
+        $loc;
+}
+
+sub _lcat_i { # LeiMailSync->each_src iterator callback
+        my ($oidbin, $id, $each_smsg) = @_;
+        $each_smsg->({blob => unpack('H*', $oidbin), pct => 100});
+}
+
+sub _lcat2smsg { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $smsg) = @_;
+        if ($bref) {
+                my $eml = PublicInbox::Eml->new($bref);
+                my $json_dump = delete $smsg->{-json_dump};
+                bless $smsg, 'PublicInbox::Smsg';
+                _smsg_fill($smsg, $eml);
+                $json_dump->($smsg, undef, $eml);
+        }
+}
+
+sub lcat_dump { # via wq_io_do
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
+        my $git = $lei->{ale}->git;
+        if (!$lei->{l2m}) {
+                my $json_dump = $each_smsg;
+                $each_smsg = sub {
+                        my ($smsg) = @_;
+                        $smsg->{-json_dump} = $json_dump;
+                        $git->cat_async($smsg->{blob}, \&_lcat2smsg, $smsg);
+                };
+        }
+        my $lms;
+        for my $ent (@{$lei->{lcat_todo}}) {
+                if (ref $ent eq 'HASH') { # { fid => $fid ,.. }
+                        $lms //= $lei->{lse}->lms;
+                        $lms->each_src($ent, \&_lcat_i, $each_smsg);
+                } else { # oidhex
+                        $each_smsg->({ blob => $ent, pct => 100 });
+                }
+        }
+        $git->async_wait_all;
+        undef $each_smsg; # may commit
+        $lei->{ovv}->ovv_atexit_child($lei);
+}
+
+1;
diff --git a/lib/PublicInbox/Limiter.pm b/lib/PublicInbox/Limiter.pm
new file mode 100644
index 00000000..a8d08fc3
--- /dev/null
+++ b/lib/PublicInbox/Limiter.pm
@@ -0,0 +1,50 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::Limiter;
+use v5.12;
+use PublicInbox::Spawn;
+
+sub new {
+        my ($class, $max) = @_;
+        bless {
+                # 32 is same as the git-daemon connection limit
+                max => $max || 32,
+                running => 0,
+                run_queue => [],
+                # RLIMIT_CPU => undef,
+                # RLIMIT_DATA => undef,
+                # RLIMIT_CORE => undef,
+        }, $class;
+}
+
+sub setup_rlimit {
+        my ($self, $name, $cfg) = @_;
+        for my $rlim (@PublicInbox::Spawn::RLIMITS) {
+                my $k = lc($rlim);
+                $k =~ tr/_//d;
+                $k = "publicinboxlimiter.$name.$k";
+                my $v = $cfg->{$k} // next;
+                my @rlimit = split(/\s*,\s*/, $v);
+                if (scalar(@rlimit) == 1) {
+                        push @rlimit, $rlimit[0];
+                } elsif (scalar(@rlimit) != 2) {
+                        warn "could not parse $k: $v\n";
+                }
+                my $inf = $v =~ /\binfinity\b/i ?
+                        $PublicInbox::Spawn::RLIMITS{RLIM_INFINITY} // eval {
+                                require BSD::Resource;
+                                BSD::Resource::RLIM_INFINITY();
+                        } // do {
+                                warn "BSD::Resource missing for $rlim";
+                                next;
+                        } : undef;
+                for my $i (0..$#rlimit) {
+                        next if $rlimit[$i] ne 'INFINITY';
+                        $rlimit[$i] = $inf;
+                }
+                $self->{$rlim} = \@rlimit;
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index a02eafc4..306a57e7 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # two-step linkification.
@@ -11,8 +11,8 @@
 # Maybe this could be done more efficiently...
 package PublicInbox::Linkify;
 use strict;
-use warnings;
-use Digest::SHA qw/sha1_hex/;
+use v5.10.1;
+use PublicInbox::SHA qw(sha1_hex);
 use PublicInbox::Hval qw(ascii_html mid_href);
 use PublicInbox::MID qw($MID_EXTRACT);
 
@@ -68,23 +68,22 @@ sub linkify_1 {
                 # salt this, as this could be exploited to show
                 # links in the HTML which don't show up in the raw mail.
                 my $key = sha1_hex($url . $SALT);
-
+                $key =~ tr/0-9/A-J/; # no digits for YAML highlight
                 $_[0]->{$key} = $url;
-                $beg . 'PI-LINK-'. $key . $end;
+                $beg . 'LINKIFY' . $key . $end;
         ^geo;
         $_[1];
 }
 
 sub linkify_2 {
-        # Added "PI-LINK-" prefix to avoid false-positives on git commits
-        $_[1] =~ s!\bPI-LINK-([a-f0-9]{40})\b!
+        # Added "LINKIFY" prefix to avoid false-positives on git commits
+        $_[1] =~ s!\bLINKIFY([a-fA-J]{40})\b!
                 my $key = $1;
                 my $url = $_[0]->{$key};
                 if (defined $url) {
                         "<a\nhref=\"$url\">$url</a>";
-                } else {
-                        # false positive or somebody tried to mess with us
-                        $key;
+                } else { # false positive or somebody tried to mess with us
+                        'LINKIFY'.$key;
                 }
         !ge;
         $_[1];
@@ -102,20 +101,20 @@ sub linkify_mids {
                 # salt this, as this could be exploited to show
                 # links in the HTML which don't show up in the raw mail.
                 my $key = sha1_hex($html . $SALT);
+                $key =~ tr/0-9/A-J/;
                 my $repl = qq(&lt;<a\nhref="$pfx/$href/">$html</a>&gt;);
                 $repl .= qq{ (<a\nhref="$pfx/$href/raw">raw</a>)} if $raw;
                 $self->{$key} = $repl;
-                'PI-LINK-'. $key;
+                'LINKIFY'.$key;
                 !ge;
         $$str = ascii_html($$str);
-        $$str =~ s!\bPI-LINK-([a-f0-9]{40})\b!
+        $$str =~ s!\bLINKIFY([a-fA-J]{40})\b!
                 my $key = $1;
                 my $repl = $_[0]->{$key};
                 if (defined $repl) {
                         $repl;
-                } else {
-                        # false positive or somebody tried to mess with us
-                        $key;
+                } else { # false positive or somebody tried to mess with us
+                        'LINKIFY'.$key;
                 }
         !ge;
 }
diff --git a/lib/PublicInbox/Listener.pm b/lib/PublicInbox/Listener.pm
index 2e0fc248..c83901b2 100644
--- a/lib/PublicInbox/Listener.pm
+++ b/lib/PublicInbox/Listener.pm
@@ -1,14 +1,15 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Used by -nntpd for listen sockets
 package PublicInbox::Listener;
-use strict;
+use v5.12;
 use parent 'PublicInbox::DS';
 use Socket qw(SOL_SOCKET SO_KEEPALIVE IPPROTO_TCP TCP_NODELAY);
 use IO::Handle;
-use PublicInbox::Syscall qw(EPOLLIN EPOLLEXCLUSIVE EPOLLET);
-use Errno qw(EAGAIN ECONNABORTED EPERM);
+use PublicInbox::Syscall qw(EPOLLIN EPOLLEXCLUSIVE);
+use Errno qw(EAGAIN ECONNABORTED);
+our $MULTI_ACCEPT = 0;
 
 # Warn on transient errors, mostly resource limitations.
 # EINTR would indicate the failure to set NonBlocking in systemd or similar
@@ -16,42 +17,35 @@ my %ERR_WARN = map {;
         eval("Errno::$_()") => $_
 } qw(EMFILE ENFILE ENOBUFS ENOMEM EINTR);
 
-sub new ($$$) {
-        my ($class, $s, $cb) = @_;
+sub new {
+        my ($class, $s, $cb, $multi_accept) = @_;
         setsockopt($s, SOL_SOCKET, SO_KEEPALIVE, 1);
         setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1); # ignore errors on non-TCP
-        listen($s, 1024);
+        listen($s, 2**31 - 1); # kernel will clamp
         my $self = bless { post_accept => $cb }, $class;
-        $self->SUPER::new($s, EPOLLIN|EPOLLET|EPOLLEXCLUSIVE);
+        $self->{multi_accept} = $multi_accept //= $MULTI_ACCEPT;
+        $self->SUPER::new($s, EPOLLIN|EPOLLEXCLUSIVE);
 }
 
 sub event_step {
         my ($self) = @_;
         my $sock = $self->{sock} or return;
-
-        # no loop here, we want to fairly distribute clients
-        # between multiple processes sharing the same socket
-        # XXX our event loop needs better granularity for
-        # a single accept() here to be, umm..., acceptable
-        # on high-traffic sites.
-        if (my $addr = accept(my $c, $sock)) {
-                IO::Handle::blocking($c, 0); # no accept4 :<
-                eval { $self->{post_accept}->($c, $addr, $sock) };
-                warn "E: $@\n" if $@;
-                $self->requeue;
-        } elsif ($! == EAGAIN || $! == ECONNABORTED || $! == EPERM) {
-                # EAGAIN is common and likely
-                # ECONNABORTED is common with bad connections
-                # EPERM happens if firewall rules prevent a connection
-                # on Linux (and everything that emulates Linux).
-                # Firewall rules are sometimes intentional, so we don't
-                # warn on EPERM to avoid being too noisy...
-                return;
-        } elsif (my $sym = $ERR_WARN{int($!)}) {
-                warn "W: accept(): $! ($sym)\n";
-        } else {
-                warn "BUG?: accept(): $!\n";
-        }
+        my $n = $self->{multi_accept};
+        do {
+                if (my $addr = accept(my $c, $sock)) {
+                        IO::Handle::blocking($c, 0); # no accept4 :<
+                        eval { $self->{post_accept}->($c, $addr, $sock) };
+                        warn "E: $@\n" if $@;
+                } elsif ($! == EAGAIN || $! == ECONNABORTED) {
+                        # EAGAIN is common and likely
+                        # ECONNABORTED is common with bad connections
+                        return;
+                } elsif (my $sym = $ERR_WARN{int($!)}) {
+                        return warn "W: accept(): $! ($sym)\n";
+                } else {
+                        return warn "BUG?: accept(): $!\n";
+                }
+        } while ($n--);
 }
 
 1;
diff --git a/lib/PublicInbox/Lock.pm b/lib/PublicInbox/Lock.pm
index b2c8227f..7162d80e 100644
--- a/lib/PublicInbox/Lock.pm
+++ b/lib/PublicInbox/Lock.pm
@@ -1,35 +1,66 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# Base class for per-inbox locking
+# Base class for per-inbox locking, subclassed by several
+# only uses {lock_path} and {lockfh} fields
 package PublicInbox::Lock;
-use strict;
-use warnings;
-use Fcntl qw(:flock :DEFAULT);
+use v5.12;
+use Fcntl qw(LOCK_UN LOCK_EX O_RDWR O_CREAT);
 use Carp qw(croak);
+use PublicInbox::OnDestroy;
+use Errno qw(EINTR);
+use autodie qw(close sysopen syswrite);
+
+sub xflock ($$) {
+        until (flock($_[0], $_[1])) { return if $! != EINTR }
+        1;
+}
+
+sub new { bless { lock_path => $_[1] }, $_[0] }
 
 # we only acquire the flock if creating or reindexing;
 # PublicInbox::Import already has the lock on its own.
 sub lock_acquire {
         my ($self) = @_;
-        my $lock_path = $self->{lock_path};
-        croak 'already locked '.($lock_path // '(undef)') if $self->{lockfh};
-        return unless defined($lock_path);
-        sysopen(my $lockfh, $lock_path, O_WRONLY|O_CREAT) or
-                croak "failed to open $lock_path: $!\n";
-        flock($lockfh, LOCK_EX) or croak "lock $lock_path failed: $!\n";
-        $self->{lockfh} = $lockfh;
+        my $fn = $self->{lock_path};
+        croak 'already locked '.($fn // '(undef)') if $self->{lockfh};
+        $fn // return;
+        sysopen(my $fh, $fn, O_RDWR|O_CREAT);
+        xflock($fh, LOCK_EX) or croak "LOCK_EX $fn: $!";
+        $self->{lockfh} = $fh;
 }
 
 sub lock_release {
         my ($self, $wake) = @_;
-        defined(my $lock_path = $self->{lock_path}) or return;
-        my $lockfh = delete $self->{lockfh} or croak "not locked: $lock_path";
+        my $fn = $self->{lock_path} // return;
+        my $fh = delete $self->{lockfh} or croak "not locked: $fn";
+        syswrite($fh, '.') if $wake;
+        xflock($fh, LOCK_UN) or croak "LOCK_UN $fn: $!";
+        close $fh; # may detect errors
+}
 
-        syswrite($lockfh, '.') if $wake;
+# caller must use return value
+sub lock_for_scope {
+        my ($self) = @_;
+        lock_acquire($self) or return; # lock_path not set
+        on_destroy \&lock_release, $self;
+}
 
-        flock($lockfh, LOCK_UN) or croak "unlock $lock_path failed: $!\n";
-        close $lockfh or croak "close $lock_path failed: $!\n";
+sub lock_acquire_fast {
+        my $fh = $_[0]->{lockfh} or return lock_acquire($_[0]);
+        xflock($fh, LOCK_EX) or croak "LOCK_EX $_[0]->{lock_path}: $!";
+}
+
+sub lock_release_fast {
+        xflock($_[0]->{lockfh} // return, LOCK_UN) or
+                croak "LOCK_UN $_[0]->{lock_path}: $!"
+}
+
+# caller must use return value
+sub lock_for_scope_fast {
+        my ($self) = @_;
+        lock_acquire_fast($self) or return; # lock_path not set
+        on_destroy \&lock_release_fast, $self;
 }
 
 1;
diff --git a/lib/PublicInbox/MDA.pm b/lib/PublicInbox/MDA.pm
index fa4a2ad8..f82194a3 100644
--- a/lib/PublicInbox/MDA.pm
+++ b/lib/PublicInbox/MDA.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # For the -mda script (mail delivery agent)
@@ -83,7 +83,7 @@ sub set_list_headers {
 }
 
 sub inboxes_for_list_id ($$) {
-        my ($klass, $config, $simple) = @_;
+        my ($klass, $pi_cfg, $simple) = @_;
 
         # newer Email::Simple allows header_raw, as does Email::MIME:
         my @list_ids = $simple->can('header_raw') ?
@@ -92,7 +92,7 @@ sub inboxes_for_list_id ($$) {
         my @dests;
         for my $list_id (@list_ids) {
                 $list_id =~ /<[ \t]*(.+)?[ \t]*>/ or next;
-                if (my $ibx = $config->lookup_list_id($1)) {
+                if (my $ibx = $pi_cfg->lookup_list_id($1)) {
                         push @dests, $ibx;
                 }
         }
diff --git a/lib/PublicInbox/MHreader.pm b/lib/PublicInbox/MHreader.pm
new file mode 100644
index 00000000..16e505a2
--- /dev/null
+++ b/lib/PublicInbox/MHreader.pm
@@ -0,0 +1,104 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# MH reader, based on Lib/mailbox.py in cpython source
+package PublicInbox::MHreader;
+use v5.12;
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::OnDestroy;
+use PublicInbox::IO qw(try_cat);
+use PublicInbox::MdirSort;
+use Carp qw(carp);
+use autodie qw(chdir closedir opendir);
+
+my %FL2OFF = ( # mh_sequences key => our keyword
+        replied => 0,
+        flagged => 1,
+        unseen => 2, # negate
+);
+my @OFF2KW = qw(answered flagged); # [2] => unseen (negated)
+
+sub new {
+        my ($cls, $dir, $cwdfh) = @_;
+        if (substr($dir, -1) ne '/') { # TODO: do this earlier
+                carp "W: appending `/' to `$dir' (fix caller)\n";
+                $dir .= '/';
+        }
+        bless { dir => $dir, cwdfh => $cwdfh }, $cls;
+}
+
+sub read_mh_sequences ($) { # caller must chdir($self->{dir})
+        my ($self) = @_;
+        my ($fl, $off, @n);
+        my @seq = ('', '', '');
+        for (split /\n+/s, try_cat('.mh_sequences')) {
+                ($fl, @n) = split /[: \t]+/;
+                $off = $FL2OFF{$fl} // do { warn <<EOM;
+W: unknown `$fl' in $self->{dir}.mh_sequences (ignoring)
+EOM
+                        next;
+                };
+                @n = grep /\A[0-9]+\z/s, @n; # don't stat, yet
+                if (@n) {
+                        @n = sort { $b <=> $a } @n; # to avoid resize
+                        my $buf = '';
+                        vec($buf, $_, 1) = 1 for @n;
+                        $seq[$off] = $buf;
+                }
+        }
+        \@seq;
+}
+
+sub mh_each_file {
+        my ($self, $efcb, @arg) = @_;
+        opendir(my $dh, my $dir = $self->{dir});
+        my $restore = on_destroy \&chdir, $self->{cwdfh};
+        chdir($dh);
+        my $sort = $self->{sort};
+        if (defined $sort && "@$sort" ne 'none') {
+                my @sort = map {
+                        my @tmp = $_ eq '' ? ('sequence') : split(/[, ]/);
+                        # sorting by name alphabetically makes no sense for MH:
+                        for my $k (@tmp) {
+                                s/\A(\-|\+|)(?:name|)\z/$1sequence/;
+                        }
+                        @tmp;
+                } @$sort;
+                my @n = grep /\A[0-9]+\z/s, readdir $dh;
+                mdir_sort \@n, \@sort;
+                $efcb->($dir, $_, $self, @arg) for @n;
+        } else {
+                while (readdir $dh) { # perl v5.12+ to set $_ on readdir
+                        $efcb->($dir, $_, $self, @arg) if /\A[0-9]+\z/s;
+                }
+        }
+        closedir $dh; # may die
+}
+
+sub kw_for ($$) {
+        my ($self, $n) = @_;
+        my $seq = $self->{mh_seq} //= read_mh_sequences($self);
+        my @kw = map { vec($seq->[$_], $n, 1) ? $OFF2KW[$_] : () } (0, 1);
+        vec($seq->[2], $n, 1) or push @kw, 'seen';
+        \@kw;
+}
+
+sub _file2eml { # mh_each_file / mh_read_one cb
+        my ($dir, $n, $self, $ucb, @arg) = @_;
+        my $eml = eml_from_path($n);
+        $ucb->($dir, $n, kw_for($self, $n), $eml, @arg) if $eml;
+}
+
+sub mh_each_eml {
+        my ($self, $ucb, @arg) = @_;
+        mh_each_file($self, \&_file2eml, $ucb, @arg);
+}
+
+sub mh_read_one {
+        my ($self, $n, $ucb, @arg) = @_;
+        my $restore = on_destroy \&chdir, $self->{cwdfh};
+        chdir(my $dir = $self->{dir});
+        _file2eml($dir, $n, $self, $ucb, @arg);
+}
+
+1;
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 5aeffb8c..36c05855 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -1,15 +1,15 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Various Message-ID-related functions.
 package PublicInbox::MID;
 use strict;
-use warnings;
-use base qw/Exporter/;
+use v5.10.1; # TODO: check unicode_strings compat for v5.12
+use parent qw(Exporter);
 our @EXPORT_OK = qw(mid_clean id_compress mid2path mid_escape MID_ESC
-        mids references mids_for_index $MID_EXTRACT);
+        mids references mids_for_index mids_in $MID_EXTRACT);
 use URI::Escape qw(uri_escape_utf8);
-use Digest::SHA qw/sha1_hex/;
+use PublicInbox::SHA qw(sha1_hex);
 require PublicInbox::Address;
 use constant {
         ID_MAX => 40, # SHA-1 hex length for HTML id anchors
@@ -73,14 +73,17 @@ sub mids ($) {
         uniq_mids(extract_mids(@mids));
 }
 
+# for Resent-Message-ID and maybe others
+sub mids_in ($@) {
+        my ($eml, @headers) = @_;
+        uniq_mids(extract_mids(map { ($eml->header_raw($_)) } @headers));
+}
+
 # we allow searching on X-Alt-Message-ID since PublicInbox::NNTP uses them
 # to placate some clients, and we want to ensure NNTP-only clients can
 # import and index without relying on HTTP endpoints
 sub mids_for_index ($) {
-        my ($hdr) = @_;
-        my @mids = $hdr->header_raw('Message-ID');
-        my @alts = $hdr->header_raw('X-Alt-Message-ID');
-        uniq_mids(extract_mids(@mids, @alts));
+        mids_in($_[0], qw(Message-ID X-Alt-Message-ID));
 }
 
 # last References should be IRT, but some mail clients do things
@@ -89,8 +92,7 @@ sub references ($) {
         my ($hdr) = @_;
         my @mids;
         foreach my $f (qw(References In-Reply-To)) {
-                my @v = $hdr->header_raw($f);
-                foreach my $v (@v) {
+                for my $v ($hdr->header_raw($f)) {
                         push(@mids, ($v =~ /$MID_EXTRACT/g));
                 }
         }
@@ -101,8 +103,7 @@ sub references ($) {
         my %addr = ( y => 1, n => 1 );
 
         foreach my $f (qw(To From Cc)) {
-                my @v = $hdr->header_raw($f);
-                foreach my $v (@v) {
+                for my $v ($hdr->header_raw($f)) {
                         $addr{$_} = 1 for (PublicInbox::Address::emails($v));
                 }
         }
@@ -114,17 +115,17 @@ sub uniq_mids ($;$) {
         my @ret;
         $seen ||= {};
         foreach my $mid (@$mids) {
-                $mid =~ tr/\n\t\r//d;
+                $mid =~ tr/\n\t\r\0//d;
                 if (length($mid) > MAX_MID_SIZE) {
                         warn "Message-ID: <$mid> too long, truncating\n";
                         $mid = substr($mid, 0, MAX_MID_SIZE);
                 }
-                push(@ret, $mid) unless $seen->{$mid}++;
+                $seen->{$mid} //= push(@ret, $mid);
         }
         \@ret;
 }
 
-# RFC3986, section 3.3:
+# RFC3986, section 3.3 (pathnames only):
 sub MID_ESC () { '^A-Za-z0-9\-\._~!\$\&\'\(\)\*\+,;=:@' }
 sub mid_escape ($) { uri_escape_utf8($_[0], MID_ESC) }
 
diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm
new file mode 100644
index 00000000..125360fe
--- /dev/null
+++ b/lib/PublicInbox/MailDiff.pm
@@ -0,0 +1,137 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::MailDiff;
+use v5.12;
+use File::Temp 0.19 (); # 0.19 for ->newdir
+use PublicInbox::ContentHash qw(content_digest);
+use PublicInbox::MsgIter qw(msg_part_text);
+use PublicInbox::ViewDiff qw(flush_diff);
+use PublicInbox::GitAsyncCat;
+use PublicInbox::ContentDigestDbg;
+use PublicInbox::Qspawn;
+use PublicInbox::IO qw(write_file);
+use autodie qw(close mkdir);
+
+sub write_part { # Eml->each_part callback
+        my ($ary, $self) = @_;
+        my ($part, $depth, $idx) = @$ary;
+        if ($idx ne '1' || $self->{-raw_hdr}) { # lei mail-diff --raw-header
+                write_file '>', "$self->{curdir}/$idx.hdr", ${$part->{hdr}};
+        }
+        my $ct = $part->content_type || 'text/plain';
+        my ($s, $err) = msg_part_text($part, $ct);
+        my $sfx = defined($s) ? 'txt' : 'bin';
+        $s //= $part->body;
+        $s =~ s/\r\n/\n/gs; # TODO: consider \r+\n to match View
+        $s =~ s/\s*\z//s;
+        write_file '>:utf8', "$self->{curdir}/$idx.$sfx", $s, "\n";
+}
+
+# public
+sub dump_eml ($$$) {
+        my ($self, $dir, $eml) = @_;
+        local $self->{curdir} = $dir;
+        mkdir $dir;
+        $eml->each_part(\&write_part, $self);
+        my $fh = write_file '>', "$dir/content_digest";
+        my $dig = PublicInbox::ContentDigestDbg->new($fh);
+        content_digest($eml, $dig);
+        say $fh "\n", $dig->hexdigest;
+        close $fh;
+}
+
+# public
+sub prep_a ($$) {
+        my ($self, $eml) = @_;
+        $self->{tmp} = File::Temp->newdir('mail-diff-XXXX', TMPDIR => 1);
+        dump_eml($self, "$self->{tmp}/a", $eml);
+}
+
+# WWW-specific stuff below (TODO: split out for non-lei)
+
+sub next_smsg ($) {
+        my ($self) = @_;
+        my $ctx = $self->{ctx};
+        my $over = $ctx->{ibx}->over;
+        $self->{smsg} = $over ? $over->next_by_mid(@{$self->{next_arg}})
+                        : $ctx->gone('over');
+        if (!$self->{smsg}) {
+                $ctx->write('</pre>', $ctx->_html_end);
+                return $ctx->close;
+        }
+        PublicInbox::DS::requeue($self) if $ctx->{env}->{'pi-httpd.async'};
+}
+
+sub emit_msg_diff {
+        my ($bref, $self) = @_; # bref is `git diff' output
+        require PublicInbox::Hval;
+        PublicInbox::Hval::utf8_maybe($$bref);
+
+        # will be escaped to `&#8226;' in HTML
+        $self->{ctx}->{ibx}->{obfuscate} and
+                PublicInbox::Hval::obfuscate_addrs($self->{ctx}->{ibx},
+                                                $$bref, "\x{2022}");
+        print { $self->{ctx}->{zfh} } '</pre><hr><pre>' if $self->{nr} > 1;
+        flush_diff($self->{ctx}, $bref);
+        next_smsg($self);
+}
+
+sub do_diff {
+        my ($self, $eml) = @_;
+        my $n = 'N'.(++$self->{nr});
+        my $dir = "$self->{tmp}/$n";
+        $self->dump_eml($dir, $eml);
+        my $cmd = [ qw(git diff --no-index --no-color -- a), $n ];
+        my $opt = { -C => "$self->{tmp}", quiet => 1 };
+        my $qsp = PublicInbox::Qspawn->new($cmd, undef, $opt);
+        $qsp->psgi_qx($self->{ctx}->{env}, undef, \&emit_msg_diff, $self);
+}
+
+sub diff_msg_i {
+        my ($self, $eml) = @_;
+        if ($eml) {
+                if ($self->{tmp}) { # 2nd..last message
+                        do_diff($self, $eml);
+                } else { # first message:
+                        prep_a($self, $eml);
+                        next_smsg($self);
+                }
+        } else {
+                warn "W: $self->{smsg}->{blob} missing\n";
+                next_smsg($self);
+        }
+}
+
+sub diff_msg_i_async {
+        my ($bref, $oid, $type, $size, $self) = @_;
+        diff_msg_i($self, $bref ? PublicInbox::Eml->new($bref) : undef);
+}
+
+sub event_step {
+        my ($self) = @_;
+        eval {
+                my $ctx = $self->{ctx};
+                if ($ctx->{env}->{'pi-httpd.async'}) {
+                        ibx_async_cat($ctx->{ibx}, $self->{smsg}->{blob},
+                                        \&diff_msg_i_async, $self);
+                } else {
+                        diff_msg_i($self, $ctx->{ibx}->smsg_eml($self->{smsg}));
+                }
+        };
+        if ($@) {
+                warn "E: $@";
+                delete $self->{smsg};
+                $self->{ctx}->close;
+        }
+}
+
+sub begin_mail_diff {
+        my ($self) = @_;
+        if ($self->{ctx}->{env}->{'pi-httpd.async'}) {
+                PublicInbox::DS::requeue($self);
+        } else {
+                event_step($self) while $self->{smsg};
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index f98d9d01..be5d5f2a 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -1,105 +1,47 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# generates manifest.js.gz for grokmirror(1)
+# generates manifest.js.gz for grokmirror(1) via PublicInbox::WWW
+# This doesn't parse manifest.js.gz (that happens in LeiMirror)
 package PublicInbox::ManifestJsGz;
-use strict;
-use v5.10.1;
+use v5.12;
 use parent qw(PublicInbox::WwwListing);
-use Digest::SHA ();
-use File::Spec ();
-use bytes (); # length
-use PublicInbox::Inbox;
-use PublicInbox::Git;
+use PublicInbox::Config;
 use IO::Compress::Gzip qw(gzip);
 use HTTP::Date qw(time2str);
-*try_cat = \&PublicInbox::Inbox::try_cat;
 
-our $json;
-for my $mod (qw(JSON::MaybeXS JSON JSON::PP)) {
-        eval "require $mod" or next;
-        # ->ascii encodes non-ASCII to "\uXXXX"
-        $json = $mod->new->ascii(1) and last;
-}
+my $json = PublicInbox::Config::json();
 
-# called by WwwListing
-sub url_regexp {
+sub url_filter {
         my ($ctx) = @_;
         # grokmirror uses relative paths, so it's domain-dependent
-        # SUPER calls PublicInbox::WwwListing::url_regexp
-        $ctx->SUPER::url_regexp('publicInbox.grokManifest', 'match=domain');
+        # SUPER calls PublicInbox::WwwListing::url_filter
+        $ctx->SUPER::url_filter('publicInbox.grokManifest', 'match=domain');
 }
 
-sub fingerprint ($) {
-        my ($git) = @_;
-        # TODO: convert to qspawn for fairness when there's
-        # thousands of repos
-        my ($fh, $pid) = $git->popen('show-ref');
-        my $dig = Digest::SHA->new(1);
-        while (read($fh, my $buf, 65536)) {
-                $dig->add($buf);
-        }
-        close $fh;
-        waitpid($pid, 0);
-        return if $?; # empty, uninitialized git repo
-        $dig->hexdigest;
+sub inject_entry ($$$;$) {
+        my ($ctx, $url_path, $ent, $git_dir) = @_;
+        $ctx->{-abs2urlpath}->{$git_dir // delete $ent->{git_dir}} = $url_path;
+        my $modified = $ent->{modified};
+        $ctx->{-mtime} = $modified if $modified > ($ctx->{-mtime} // 0);
+        $ctx->{manifest}->{$url_path} = $ent;
 }
 
-sub manifest_add ($$;$$) {
+sub manifest_add ($$;$$) { # slow path w/o extindex "all" (or per-inbox)
         my ($ctx, $ibx, $epoch, $default_desc) = @_;
         my $url_path = "/$ibx->{name}";
-        my $git_dir = $ibx->{inboxdir};
+        my $git;
         if (defined $epoch) {
-                $git_dir .= "/git/$epoch.git";
                 $url_path .= "/git/$epoch.git";
+                $git = $ibx->git_epoch($epoch) or return;
+        } else {
+                $git = $ibx->git;
         }
-        return unless -d $git_dir;
-        my $git = PublicInbox::Git->new($git_dir);
-        my $fingerprint = fingerprint($git) or return; # no empty repos
-
-        chomp(my $owner = $git->qx('config', 'gitweb.owner'));
-        chomp(my $desc = try_cat("$git_dir/description"));
-        utf8::decode($owner);
-        utf8::decode($desc);
-        $owner = undef if $owner eq '';
-        $desc = 'Unnamed repository' if $desc eq '';
-
-        # templates/hooks--update.sample and git-multimail in git.git
-        # only match "Unnamed repository", not the full contents of
-        # templates/this--description in git.git
-        if ($desc =~ /\AUnnamed repository/) {
-                $desc = "$default_desc [epoch $epoch]" if defined($epoch);
-        }
-
-        my $reference;
-        chomp(my $alt = try_cat("$git_dir/objects/info/alternates"));
-        if ($alt) {
-                # n.b.: GitPython doesn't seem to handle comments or C-quoted
-                # strings like native git does; and we don't for now, either.
-                my @alt = split(/\n+/, $alt);
-
-                # grokmirror only supports 1 alternate for "reference",
-                if (scalar(@alt) == 1) {
-                        my $objdir = "$git_dir/objects";
-                        $reference = File::Spec->rel2abs($alt[0], $objdir);
-                        $reference =~ s!/[^/]+/?\z!!; # basename
-                }
-        }
-        $ctx->{-abs2urlpath}->{$git_dir} = $url_path;
-        my $modified = $git->modified;
-        if ($modified > ($ctx->{-mtime} // 0)) {
-                $ctx->{-mtime} = $modified;
-        }
-        $ctx->{manifest}->{$url_path} = {
-                owner => $owner,
-                reference => $reference,
-                description => $desc,
-                modified => $modified,
-                fingerprint => $fingerprint,
-        };
+        my $ent = $git->manifest_entry($epoch, $default_desc) or return;
+        inject_entry($ctx, $url_path, $ent, $git->{git_dir});
 }
 
-sub ibx_entry {
+sub slow_manifest_add ($$) {
         my ($ctx, $ibx) = @_;
         eval {
                 if (defined(my $max = $ibx->max_git_epoch)) {
@@ -114,9 +56,47 @@ sub ibx_entry {
         warn "E: $@" if $@;
 }
 
-sub hide_key { 'manifest' }
+sub eidx_manifest_add ($$$) {
+        my ($ctx, $ALL, $ibx) = @_;
+        if (my $data = $ALL->misc->inbox_data($ibx)) {
+                $data = $json->decode($data);
+                delete $data->{''}; # private
+                while (my ($url_path, $ent) = each %$data) {
+                        inject_entry($ctx, $url_path, $ent);
+                }
+        } else {
+                warn "E: `${\$ibx->eidx_key}' not indexed by $ALL->{topdir}\n";
+                # do not use slow path for global manifest since
+                # it can become catastrophically slow.  per-inbox manifest
+                # is not too bad with dozens of epochs, so never fail that:
+                slow_manifest_add($ctx, $ibx) if $ibx == $ctx->{ibx};
+        }
+}
+
+sub response {
+        my ($class, $ctx) = @_;
+        bless $ctx, $class;
+        my ($re, undef) = $ctx->url_filter;
+        $re // return psgi_triple($ctx);
+        my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
+                                        $ctx->can('list_match_i'), $re, $ctx);
+        sub {
+                $ctx->{-wcb} = $_[0]; # HTTP server callback
+                ($ctx->{www}->{pi_cfg}->ALL ||
+                                        !$ctx->{env}->{'pi-httpd.async'}) ?
+                                $iter->each_section : $iter->event_step;
+        }
+}
+
+sub ibx_entry {
+        my ($ctx, $ibx) = @_;
+        my $ALL = $ctx->{www}->{pi_cfg}->ALL;
+        $ALL ? eidx_manifest_add($ctx, $ALL, $ibx) :
+                slow_manifest_add($ctx, $ibx);
+}
+
+sub hide_key { 'manifest' } # for WwwListing->list_match_i
 
-# overrides WwwListing->psgi_triple
 sub psgi_triple {
         my ($ctx) = @_;
         my $abs2urlpath = delete($ctx->{-abs2urlpath}) // {};
@@ -129,7 +109,13 @@ sub psgi_triple {
         gzip(\$manifest => \(my $out));
         [ 200, [ qw(Content-Type application/gzip),
                  'Last-Modified', time2str($ctx->{-mtime}),
-                 'Content-Length', bytes::length($out) ], [ $out ] ]
+                 'Content-Length', length($out) ], [ $out ] ]
+}
+
+sub per_inbox {
+        my ($ctx) = @_;
+        ibx_entry($ctx, $ctx->{ibx});
+        psgi_triple($ctx);
 }
 
 1;
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 47025891..17893a09 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -1,10 +1,10 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Streaming interface for mboxrd HTTP responses
 # See PublicInbox::GzipFilter for details.
 package PublicInbox::Mbox;
-use strict;
+use v5.12;
 use parent 'PublicInbox::GzipFilter';
 use PublicInbox::MID qw/mid_escape/;
 use PublicInbox::Hval qw/to_filename/;
@@ -17,15 +17,12 @@ use PublicInbox::Eml;
 sub getline {
         my ($ctx) = @_; # ctx
         my $smsg = $ctx->{smsg} or return;
-        my $ibx = $ctx->{-inbox};
-        my $eml = $ibx->smsg_eml($smsg) or return;
-        my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
-        $ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
-        if ($n) {
-                $ctx->translate(msg_body($eml));
+        my $ibx = $ctx->{ibx};
+        my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return;
+        if (($ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}}))) {
+                $ctx->translate(msg_hdr($ctx, $eml), msg_body($eml));
         } else { # last message
-                $ctx->zmore(msg_body($eml));
-                $ctx->zflush;
+                $ctx->zflush(msg_hdr($ctx, $eml), msg_body($eml));
         }
 }
 
@@ -34,8 +31,8 @@ sub async_next {
         my ($http) = @_; # PublicInbox::HTTP
         my $ctx = $http->{forward} or return; # client aborted
         eval {
-                my $smsg = $ctx->{smsg} or return $ctx->close;
-                $ctx->smsg_blob($smsg);
+                my $smsg = $ctx->{smsg} // return $ctx->close;
+                $ctx->smsg_blob($smsg) if $smsg;
         };
         warn "E: $@" if $@;
 }
@@ -44,19 +41,19 @@ sub async_eml { # for async_blob_cb
         my ($ctx, $eml) = @_;
         my $smsg = delete $ctx->{smsg};
         # next message
-        $ctx->{smsg} = $ctx->{-inbox}->over->next_by_mid(@{$ctx->{next_arg}});
-
-        $ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
-        $ctx->{http_out}->write($ctx->translate(msg_body($eml)));
+        $ctx->{smsg} = $ctx->{ibx}->over->next_by_mid(@{$ctx->{next_arg}});
+        local $ctx->{eml} = $eml; # for mbox_hdr
+        $ctx->write(msg_hdr($ctx, $eml), msg_body($eml));
 }
 
-sub res_hdr ($$) {
-        my ($ctx, $subject) = @_;
-        my $fn = $subject // '';
+sub mbox_hdr ($) {
+        my ($ctx) = @_;
+        my $eml = $ctx->{eml} //= $ctx->{ibx}->smsg_eml($ctx->{smsg});
+        my $fn = $eml->header_str('Subject') // '';
         $fn =~ s/^re:\s+//i;
         $fn = to_filename($fn) // 'no-subject';
         my @hdr = ('Content-Type');
-        if ($ctx->{-inbox}->{obfuscate}) {
+        if ($ctx->{ibx}->{obfuscate}) {
                 # obfuscation is stupid, but maybe scrapers are, too...
                 push @hdr, 'application/mbox';
                 $fn .= '.mbox';
@@ -64,69 +61,44 @@ sub res_hdr ($$) {
                 push @hdr, 'text/plain';
                 $fn .= '.txt';
         }
+        my $cs = $ctx->{eml}->ct->{attributes}->{charset} // 'UTF-8';
+        $cs = 'UTF-8' if $cs =~ /[^a-zA-Z0-9\-\_]/; # avoid header injection
+        $hdr[-1] .= "; charset=$cs";
         push @hdr, 'Content-Disposition', "inline; filename=$fn";
-        \@hdr;
+        [ 200, \@hdr ];
 }
 
 # for rare cases where v1 inboxes aren't indexed w/ ->over at all
 sub no_over_raw ($) {
         my ($ctx) = @_;
-        my $mref = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return;
-        my $eml = PublicInbox::Eml->new($mref);
-        [ 200, res_hdr($ctx, $eml->header_str('Subject')),
-                [ msg_hdr($ctx, $eml, $ctx->{mid}) . msg_body($eml) ] ]
+        my $mref = $ctx->{ibx}->msg_by_mid($ctx->{mid}) or return;
+        my $eml = $ctx->{eml} = PublicInbox::Eml->new($mref);
+        [ @{mbox_hdr($ctx)}, [ msg_hdr($ctx, $eml) . msg_body($eml) ] ]
 }
 
 # /$INBOX/$MESSAGE_ID/raw
 sub emit_raw {
         my ($ctx) = @_;
-        $ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
-        my $over = $ctx->{-inbox}->over or return no_over_raw($ctx);
+        my $over = $ctx->{ibx}->over or return no_over_raw($ctx);
         my ($id, $prev);
         my $mip = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
         my $smsg = $ctx->{smsg} = $over->next_by_mid(@$mip) or return;
-        my $res_hdr = res_hdr($ctx, $smsg->{subject});
         bless $ctx, __PACKAGE__;
-        $ctx->psgi_response(200, $res_hdr);
+        $ctx->psgi_response(\&mbox_hdr);
 }
 
-sub msg_hdr ($$;$) {
-        my ($ctx, $eml, $mid) = @_;
-        my $header_obj = $eml->header_obj;
+sub msg_hdr ($$) {
+        my ($ctx, $eml) = @_;
 
-        # drop potentially confusing headers, ssoma already should've dropped
-        # Lines and Content-Length
-        foreach my $d (qw(Lines Bytes Content-Length Status)) {
-                $header_obj->header_set($d);
+        # drop potentially confusing headers, various importers should've
+        # already dropped these, but we can't trust stuff we've cloned
+        for my $d (qw(Lines Bytes Content-Length Status)) {
+                $eml->header_set($d);
         }
-        my $ibx = $ctx->{-inbox};
-        my $base = $ctx->{base_url};
-        $mid = $ctx->{mid} unless defined $mid;
-        $mid = mid_escape($mid);
-        my @append = (
-                'Archived-At', "<$base$mid/>",
-                'List-Archive', "<$base>",
-                'List-Post', "<mailto:$ibx->{-primary_address}>",
-        );
-        my $crlf = $header_obj->crlf;
-        my $buf = $header_obj->as_string;
-        # fixup old bug from import (pre-a0c07cba0e5d8b6a)
-        $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
-        $buf = "From mboxrd\@z Thu Jan  1 00:00:00 1970" . $crlf . $buf;
-
-        for (my $i = 0; $i < @append; $i += 2) {
-                my $k = $append[$i];
-                my $v = $append[$i + 1];
-                my @v = $header_obj->header_raw($k);
-                foreach (@v) {
-                        if ($v eq $_) {
-                                $v = undef;
-                                last;
-                        }
-                }
-                $buf .= "$k: $v$crlf" if defined $v;
-        }
-        $buf .= $crlf;
+        my $crlf = $eml->crlf;
+        my $buf = $eml->header_obj->as_string;
+        PublicInbox::Eml::strip_from($buf);
+        "From mboxrd\@z Thu Jan  1 00:00:00 1970" . $crlf . $buf . $crlf;
 }
 
 sub msg_body ($) {
@@ -147,8 +119,9 @@ sub thread_cb {
                         return $smsg;
                 }
                 # refill result set
-                $ctx->{msgs} = $msgs = $ctx->{over}->get_thread($ctx->{mid},
-                                                                $ctx->{prev});
+                my $over = $ctx->{ibx}->over or return $ctx->gone('over');
+                $ctx->{msgs} = $msgs = $over->get_thread($ctx->{mid},
+                                                        $ctx->{prev});
                 return unless @$msgs;
                 $ctx->{prev} = $msgs->[-1];
         }
@@ -159,7 +132,6 @@ sub thread_mbox {
         my $msgs = $ctx->{msgs} = $over->get_thread($ctx->{mid}, {});
         return [404, [qw(Content-Type text/plain)], []] if !@$msgs;
         $ctx->{prev} = $msgs->[-1];
-        $ctx->{over} = $over; # bump refcnt
         require PublicInbox::MboxGz;
         PublicInbox::MboxGz::mbox_gz($ctx, \&thread_cb, $msgs->[0]->{subject});
 }
@@ -178,97 +150,124 @@ sub emit_range {
 
 sub all_ids_cb {
         my ($ctx) = @_;
+        my $over = $ctx->{ibx}->over or return $ctx->gone('over');
         my $ids = $ctx->{ids};
         do {
                 while ((my $num = shift @$ids)) {
-                        my $smsg = $ctx->{over}->get_art($num) or next;
+                        my $smsg = $over->get_art($num) or next;
                         return $smsg;
                 }
-                $ctx->{ids} = $ids = $ctx->{mm}->ids_after(\($ctx->{prev}));
+                $ctx->{ids} = $ids = $over->ids_after(\($ctx->{prev}));
         } while (@$ids);
+        undef;
 }
 
 sub mbox_all_ids {
         my ($ctx) = @_;
-        my $ibx = $ctx->{-inbox};
         my $prev = 0;
-        my $mm = $ctx->{mm} = $ibx->mm;
-        my $ids = $mm->ids_after(\$prev) or return
-                [404, [qw(Content-Type text/plain)], ["No results found\n"]];
-        $ctx->{over} = $ibx->over or
+        my $over = $ctx->{ibx}->over or
                 return PublicInbox::WWW::need($ctx, 'Overview');
+        my $ids = $over->ids_after(\$prev) or return
+                [404, [qw(Content-Type text/plain)], ["No results found\n"]];
         $ctx->{ids} = $ids;
         $ctx->{prev} = $prev;
+        $ctx->{-low_prio} = 1;
         require PublicInbox::MboxGz;
         PublicInbox::MboxGz::mbox_gz($ctx, \&all_ids_cb, 'all');
 }
 
-sub results_cb {
-        my ($ctx) = @_;
-        my $over = $ctx->{-inbox}->over or return;
-        while (1) {
-                while (defined(my $num = shift(@{$ctx->{ids}}))) {
-                        my $smsg = $over->get_art($num) or next;
-                        return $smsg;
-                }
-                # refill result set
-                my $srch = $ctx->{-inbox}->search(undef, $ctx) or return;
-                my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
-                my $size = $mset->size or return;
-                $ctx->{qopts}->{offset} += $size;
-                $ctx->{ids} = $srch->mset_to_artnums($mset);
+my $refill_ids_cb = sub { # async_mset cb
+        my ($ctx, $http, $mset, $err) = @_;
+        $http = undef unless $ctx->{-really_async};
+        if ($err) {
+                warn "E: $err";
+                $ctx->close if $http; # our async httpd
+                return;
         }
-}
-
-sub results_thread_cb {
-        my ($ctx) = @_;
+        # refill result set, deprioritize since there's many results
+        my $size = $mset->size or do {
+                $ctx->close if $http;
+                $ctx->{-mbox_done} = 1;
+                return;
+        };
+        $ctx->{qopts}->{offset} += $size;
+        $ctx->{ids} = $ctx->{srch}->mset_to_artnums($mset, $ctx->{qopts});
+        $ctx->{-low_prio} = 1; # true
+        return if !$http;
+        eval {
+                my $smsg = results_cb($ctx) // return $ctx->close;
+                return if !$smsg; # '' wait for async_mset
+                $ctx->smsg_blob($ctx->{smsg} = $smsg);
+        };
+        warn "E: $@" if $@;
+};
 
-        my $over = $ctx->{-inbox}->over or return;
+sub results_cb { # async_next or MboxGz->getline cb
+        my ($ctx, $http) = @_;
+        my $over = $ctx->{ibx}->over or return $ctx->gone('over');
         while (1) {
-                while (defined(my $num = shift(@{$ctx->{xids}}))) {
+                my $ids = $ctx->{xids} // $ctx->{ids};
+                while (defined(my $num = shift(@$ids))) {
                         my $smsg = $over->get_art($num) or next;
                         return $smsg;
                 }
-
-                # refills ctx->{xids}
-                next if $over->expand_thread($ctx);
-
-                # refill result set
-                my $srch = $ctx->{-inbox}->search(undef, $ctx) or return;
-                my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
-                my $size = $mset->size or return;
-                $ctx->{qopts}->{offset} += $size;
-                $ctx->{ids} = $srch->mset_to_artnums($mset);
+                next if $ctx->{xids} && $over->expand_thread($ctx);
+                return '' if $ctx->{srch}->async_mset(@$ctx{qw(query qopts)},
+                                                $refill_ids_cb, $ctx, $http);
+                return if $ctx->{-mbox_done};
         }
+}
 
+sub mbox_qry_cb { # async_mset cb
+        my ($ctx, $q, $mset, $err) = @_;
+        my $wcb = delete $ctx->{wcb};
+        if ($err) {
+                warn "E: $err";
+                return $wcb->([500, [qw(Content-Type text/plain)],
+                                [ "Internal server error\n" ]])
+        }
+        $ctx->{qopts}->{offset} = $mset->size or
+                        return $wcb->([404, [qw(Content-Type text/plain)],
+                                        ["No results found\n"]]);
+        $ctx->{ids} = $ctx->{srch}->mset_to_artnums($mset, $ctx->{qopts});
+        my $fn;
+        if ($q->{t} && $ctx->{srch}->has_threadid) {
+                $ctx->{xids} = []; # triggers over->expand_thread
+                $fn = "results-thread-$ctx->{query}";
+        } else {
+                $fn = "results-$ctx->{query}";
+        }
+        require PublicInbox::MboxGz;
+        my $res = PublicInbox::MboxGz::mbox_gz($ctx, \&results_cb, $fn);
+        ref($res) eq 'CODE' ? $res->($wcb) : $wcb->($res);
 }
 
 sub mbox_all {
         my ($ctx, $q) = @_;
-        my $q_string = $q->{'q'};
-        return mbox_all_ids($ctx) if $q_string !~ /\S/;
-        my $srch = $ctx->{-inbox}->search or
+        my $qstr = $q->{'q'};
+        return mbox_all_ids($ctx) if $qstr !~ /\S/;
+        my $srch = $ctx->{srch} = $ctx->{ibx}->isrch or
                 return PublicInbox::WWW::need($ctx, 'Search');
-        my $over = $ctx->{-inbox}->over or
-                return PublicInbox::WWW::need($ctx, 'Overview');
+        my $opt = $ctx->{qopts} = { relevance => -2 }; # ORDER BY docid DESC
 
-        my $qopts = $ctx->{qopts} = { mset => 2 }; # order by docid
-        $qopts->{thread} = 1 if $q->{t};
-        my $mset = $srch->mset($q_string, $qopts);
-        $qopts->{offset} = $mset->size or
-                        return [404, [qw(Content-Type text/plain)],
-                                ["No results found\n"]];
-        $ctx->{query} = $q_string;
-        $ctx->{ids} = $srch->mset_to_artnums($mset);
-        require PublicInbox::MboxGz;
-        my $fn;
-        if ($q->{t} && $srch->has_threadid) {
-                $fn = 'results-thread-'.$q_string;
-                PublicInbox::MboxGz::mbox_gz($ctx, \&results_thread_cb, $fn);
-        } else {
-                $fn = 'results-'.$q_string;
-                PublicInbox::MboxGz::mbox_gz($ctx, \&results_cb, $fn);
+        # {threadid} limits results to a given thread
+        # {threads} collapses results from messages in the same thread,
+        # allowing us to use ->expand_thread w/o duplicates in our own code
+        if (defined($ctx->{mid})) {
+                my $over = ($ctx->{ibx}->{isrch} ?
+                                $ctx->{ibx}->{isrch}->{es}->over :
+                                $ctx->{ibx}->over) or
+                        return PublicInbox::WWW::need($ctx, 'Overview');
+                $opt->{threadid} = $over->mid2tid($ctx->{mid});
         }
+        $opt->{threads} = 1 if $q->{t};
+        $srch->query_approxidate($ctx->{ibx}->git, $qstr);
+        $ctx->{query} = $qstr;
+        sub { # called by PSGI server
+                $ctx->{wcb} = $_[0]; # PSGI server supplied write cb
+                $srch->async_mset($qstr, $opt, \&mbox_qry_cb, $ctx, $q) and
+                        $ctx->{-really_async} = 1;
+        };
 }
 
 1;
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index 913be6e4..90e69c09 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -1,7 +1,7 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::MboxGz;
-use strict;
+use v5.12;
 use parent 'PublicInbox::GzipFilter';
 use PublicInbox::Eml;
 use PublicInbox::Hval qw/to_filename/;
@@ -13,8 +13,8 @@ sub async_next ($) {
         my ($http) = @_; # PublicInbox::HTTP
         my $ctx = $http->{forward} or return;
         eval {
-                $ctx->{smsg} = $ctx->{cb}->($ctx) or return $ctx->close;
-                $ctx->smsg_blob($ctx->{smsg});
+                my $smsg = $ctx->{cb}->($ctx, $http) // return $ctx->close;
+                $smsg and $ctx->smsg_blob($ctx->{smsg} = $smsg);
         };
         warn "E: $@" if $@;
 }
@@ -22,7 +22,6 @@ sub async_next ($) {
 sub mbox_gz {
         my ($self, $cb, $fn) = @_;
         $self->{cb} = $cb;
-        $self->{base_url} = $self->{-inbox}->base_url($self->{env});
         $self->{gz} = PublicInbox::GzipFilter::gzip_or_die();
         $fn = to_filename($fn // '') // 'no-subject';
         # http://www.iana.org/assignments/media-types/application/gzip
@@ -37,9 +36,8 @@ sub getline {
         my ($self) = @_;
         my $cb = $self->{cb} or return;
         while (my $smsg = $cb->($self)) {
-                my $eml = $self->{-inbox}->smsg_eml($smsg) or next;
-                $self->zmore(msg_hdr($self, $eml, $smsg->{mid}));
-                return $self->translate(msg_body($eml));
+                my $eml = $self->{ibx}->smsg_eml($smsg) or next;
+                return $self->translate(msg_hdr($self, $eml), msg_body($eml));
         }
         # signal that we're done and can return undef next call:
         delete $self->{cb};
diff --git a/lib/PublicInbox/MboxLock.pm b/lib/PublicInbox/MboxLock.pm
new file mode 100644
index 00000000..5e373873
--- /dev/null
+++ b/lib/PublicInbox/MboxLock.pm
@@ -0,0 +1,134 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Various mbox locking methods
+package PublicInbox::MboxLock;
+use v5.12;
+use PublicInbox::OnDestroy ();
+use Fcntl qw(:flock F_SETLK F_SETLKW F_RDLCK F_WRLCK
+                        O_CREAT O_EXCL O_WRONLY SEEK_SET);
+use Carp qw(croak);
+use PublicInbox::DS qw(now); # ugh...
+use autodie qw(chdir opendir unlink);
+
+our $TMPL = do {
+        if ($^O eq 'linux') {
+                \'s @32'
+        } elsif ($^O =~ /bsd/ || $^O eq 'dragonfly') {
+                \'@20 s @256' # n.b. @32 may be enough...
+        } else {
+                 eval { require File::FcntlLock; 1 }
+        }
+};
+
+# This order matches Debian policy on Linux systems.
+# See policy/ch-customized-programs.rst in
+# https://salsa.debian.org/dbnpolicy/policy.git
+sub defaults { [ qw(fcntl dotlock) ] }
+
+sub acq_fcntl {
+        my ($self) = @_;
+        my $op = $self->{nb} ? F_SETLK : F_SETLKW;
+        my $t = $self->{rw} ? F_WRLCK : F_RDLCK;
+        my $end = now + $self->{timeout};
+        $TMPL or die <<EOF;
+"struct flock" layout not available on $^O, install File::FcntlLock?
+EOF
+        do {
+                if (ref $TMPL) {
+                        return if fcntl($self->{fh}, $op, pack($$TMPL, $t));
+                } else {
+                        my $fl = File::FcntlLock->new;
+                        $fl->l_type($t);
+                        $fl->l_whence(SEEK_SET);
+                        $fl->l_start(0);
+                        $fl->l_len(0);
+                        return if $fl->lock($self->{fh}, $op);
+                }
+                select(undef, undef, undef, $self->{delay});
+        } while (now < $end);
+        die "fcntl lock timeout $self->{f}: $!\n";
+}
+
+sub acq_dotlock {
+        my ($self) = @_;
+        my $dot_lock = "$self->{f}.lock";
+        my ($pfx, $base) = ($self->{f} =~ m!(\A.*?/)?([^/]+)\z!);
+        $pfx //= '';
+        my $pid = $$;
+        my $end = now + $self->{timeout};
+        do {
+                my $tmp = "$pfx.$base-".sprintf('%x,%x,%x',
+                                        rand(0xffffffff), $pid, time);
+                if (sysopen(my $fh, $tmp, O_CREAT|O_EXCL|O_WRONLY)) {
+                        if (link($tmp, $dot_lock)) {
+                                unlink($tmp);
+                                $self->{".lock$pid"} = $dot_lock;
+                                substr($dot_lock, 0, 1) eq '/' or
+                                        opendir($self->{dh}, '.');
+                                return;
+                        }
+                        unlink($tmp);
+                        select(undef, undef, undef, $self->{delay});
+                } else {
+                        croak "open $tmp (for $dot_lock): $!" if !$!{EXIST};
+                }
+        } while (now < $end);
+        die "dotlock timeout $dot_lock\n";
+}
+
+sub acq_flock {
+        my ($self) = @_;
+        my $op = $self->{rw} ? LOCK_EX : LOCK_SH;
+        $op |= LOCK_NB if $self->{nb};
+        my $end = now + $self->{timeout};
+        do {
+                return if flock($self->{fh}, $op);
+                if ($!{EWOULDBLOCK}) {
+                        select(undef, undef, undef, $self->{delay});
+                } elsif (!$!{EINTR}) {
+                        croak "flock($self->{f} ($self->{fh}): $!";
+                }
+        } while (now < $end);
+        die "flock timeout $self->{f}: $!\n";
+}
+
+sub acq {
+        my ($cls, $f, $rw, $methods) = @_;
+        my $self = bless { f => $f, rw => $rw }, $cls;
+        my $ok = open $self->{fh}, $rw ? '+>>' : '<', $f;
+        croak "open($f): $!" if !$ok && ($rw || !$!{ENOENT});
+        my $m = "@$methods";
+        if ($m ne 'none') {
+                my @m = map {
+                        if (/\A(timeout|delay)=([0-9\.]+)s?\z/) {
+                                $self->{$1} = $2 + 0;
+                                ();
+                        } else {
+                                $cls->can("acq_$_") // $_
+                        }
+                } split(/[, ]/, $m);
+                my @bad = grep { !ref } @m;
+                croak "Unsupported lock methods: @bad\n" if @bad;
+                croak "No lock methods supplied with $m\n" if !@m;
+                $self->{nb} = $#m || defined($self->{timeout});
+                $self->{delay} //= 0.1;
+                $self->{timeout} //= 5;
+                $_->($self) for @m;
+        }
+        $self;
+}
+
+sub DESTROY {
+        my ($self) = @_;
+        my $f = $self->{".lock$$"} or return;
+        my $od;
+        if (my $dh = delete $self->{dh}) {
+                opendir my $c, '.';
+                $od = PublicInbox::OnDestroy::all \&chdir, $c;
+                chdir($dh);
+        }
+        CORE::unlink($f) or die "unlink($f): $! (lock stolen?)";
+}
+
+1;
diff --git a/lib/PublicInbox/MboxReader.pm b/lib/PublicInbox/MboxReader.pm
new file mode 100644
index 00000000..3d78ca23
--- /dev/null
+++ b/lib/PublicInbox/MboxReader.pm
@@ -0,0 +1,188 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# reader for mbox variants we support (and also sets up commands for writing)
+package PublicInbox::MboxReader;
+use strict;
+use v5.10.1; # check regexps before v5.12
+use Data::Dumper;
+$Data::Dumper::Useqq = 1; # should've been the default, for bad data
+
+my $from_strict =
+        qr/^From \S+ +\S+ \S+ +\S+ [^\n:]+:[^\n:]+:[^\n:]+ [^\n:]+\n/sm;
+
+# cf: https://doc.dovecot.org/configuration_manual/mail_location/mbox/
+my %status2kw = (F => 'flagged', A => 'answered', R => 'seen', T => 'draft');
+# O (old/non-recent), and D (deleted) aren't in JMAP,
+# so probably won't be supported by us.
+sub mbox_keywords {
+        my $eml = $_[-1];
+        my $s = "@{[$eml->header_raw('X-Status'),$eml->header_raw('Status')]}";
+        my %kw;
+        $s =~ s/([FART])/$kw{$status2kw{$1}} = 1/sge;
+        [ sort(keys %kw) ];
+}
+
+sub _mbox_from {
+        my ($mbfh, $from_re, $eml_cb, @arg) = @_;
+        my $buf = '';
+        my @raw;
+        while (defined(my $r = read($mbfh, $buf, 65536, length($buf)))) {
+                if ($r == 0) { # close here to check for "curl --fail"
+                        $mbfh->close or die "error closing mbox: \$?=$? $!";
+                        @raw = ($buf);
+                } else {
+                        @raw = split(/$from_strict/mos, $buf, -1);
+                        next if scalar(@raw) == 0;
+                        $buf = pop(@raw); # last bit may be incomplete
+                }
+                @raw = grep /[^ \t\r\n]/s, @raw; # skip empty messages
+                while (defined(my $raw = shift @raw)) {
+                        $raw =~ s/^\r?\n\z//ms;
+                        $raw =~ s/$from_re/$1/gms;
+                        my $eml = PublicInbox::Eml->new(\$raw);
+                        $eml_cb->($eml, @arg) if $eml->raw_size;
+                }
+                return if $r == 0; # EOF
+        }
+        die "error reading mboxo/mboxrd handle: $!";
+}
+
+sub mboxrd {
+        my (undef, $mbfh, $eml_cb, @arg) = @_;
+        _mbox_from($mbfh, qr/^>(>*From )/ms, $eml_cb, @arg);
+}
+
+sub mboxo {
+        my (undef, $mbfh, $eml_cb, @arg) = @_;
+        _mbox_from($mbfh, qr/^>(From )/ms, $eml_cb, @arg);
+}
+
+sub _cl_body {
+        my ($mbfh, $bref, $cl) = @_;
+        my $body = substr($$bref, 0, $cl, '');
+        my $need = $cl - length($body);
+        if ($need > 0) {
+                $mbfh or die "E: needed $need bytes after EOF";
+                defined(my $r = read($mbfh, $body, $need, length($body))) or
+                        die "E: read error: $!\n";
+                $r == $need or die "E: read $r of $need bytes\n";
+        }
+        \$body;
+}
+
+sub _extract_hdr {
+        my ($ref) = @_;
+        if (index($$ref, "\r\n") < 0 && (my $pos = index($$ref, "\n\n")) >= 0) {
+                # likely on *nix
+                \substr($$ref, 0, $pos + 2, ''); # sv_chop on $$ref
+        } elsif ($$ref =~ /\r?\n\r?\n/s) {
+                \substr($$ref, 0, $+[0], ''); # sv_chop on $$ref
+        } else {
+                undef
+        }
+}
+
+sub _mbox_cl ($$$;@) {
+        my ($mbfh, $uxs_from, $eml_cb, @arg) = @_;
+        my $buf = '';
+        while (defined(my $r = read($mbfh, $buf, 65536, length($buf)))) {
+                if ($r == 0) { # detect "curl --fail"
+                        $mbfh->close or
+                                die "error closing mboxcl/mboxcl2: \$?=$? $!";
+                        undef $mbfh;
+                }
+                while (my $hdr = _extract_hdr(\$buf)) {
+                        PublicInbox::Eml::strip_from($$hdr) or
+                                die "E: no 'From ' line in:\n", Dumper($hdr);
+                        my $eml = PublicInbox::Eml->new($hdr);
+                        next unless $eml->raw_size;
+                        my @cl = $eml->header_raw('Content-Length');
+                        my $n = scalar(@cl);
+                        $n == 0 and die "E: Content-Length missing in:\n",
+                                        Dumper($eml->as_string);
+                        $n == 1 or die "E: multiple ($n) Content-Length in:\n",
+                                        Dumper($eml->as_string);
+                        $cl[0] =~ /\A[0-9]+\z/ or die
+                                "E: Content-Length `$cl[0]' invalid\n",
+                                        Dumper($eml->as_string);
+                        if (($eml->{bdy} = _cl_body($mbfh, \$buf, $cl[0]))) {
+                                $uxs_from and
+                                        ${$eml->{bdy}} =~ s/^>From /From /sgm;
+                        }
+                        $eml_cb->($eml, @arg);
+                }
+                if ($r == 0) {
+                        $buf =~ /[^ \r\n\t]/ and
+                                warn "W: leftover at end of mboxcl/mboxcl2:\n",
+                                        Dumper(\$buf);
+                        return;
+                }
+        }
+        die "error reading mboxcl/mboxcl2 handle: $!";
+}
+
+sub mboxcl {
+        my (undef, $mbfh, $eml_cb, @arg) = @_;
+        _mbox_cl($mbfh, 1, $eml_cb, @arg);
+}
+
+sub mboxcl2 {
+        my (undef, $mbfh, $eml_cb, @arg) = @_;
+        _mbox_cl($mbfh, undef, $eml_cb, @arg);
+}
+
+sub new { bless \(my $x), __PACKAGE__ }
+
+sub reads {
+        my $ifmt = $_[-1];
+        $ifmt =~ /\Ambox(?:rd|cl|cl2|o)\z/ ? __PACKAGE__->can($ifmt) : undef
+}
+
+# all of these support -c for stdout and -d for decompression,
+# mutt is commonly distributed with hooks for gz, bz2 and xz, at least
+# { foo => '' } means "--foo" is passed to the command-line
+my %zsfx2cmd = (
+        gz => [ qw(GZIP pigz gzip), { rsyncable => '' } ],
+        bz2 => [ 'bzip2', {} ],
+        xz => [ 'xz', {} ],
+        # don't add new entries here unless MUA support is widely available
+);
+
+sub zsfx ($) {
+        my ($pathname) = @_;
+        my $allow = join('|', keys %zsfx2cmd);
+        $pathname =~ /\.($allow)\z/ ? $1 : undef;
+}
+
+sub zsfx2cmd ($$$) {
+        my ($zsfx, $decompress, $lei) = @_;
+        my $x = $zsfx2cmd{$zsfx} // die "BUG: no support for suffix=.$zsfx";
+        my @info = @$x;
+        my $cmd_opt = ref($info[-1]) ? pop(@info) : undef;
+        my @cmd = (undef, $decompress ? qw(-dc) : qw(-c));
+        require PublicInbox::Spawn;
+        for my $exe (@info) {
+                # I think respecting client's ENV{GZIP} is OK, not sure
+                # about ENV overrides for other, less-common compressors
+                if ($exe eq uc($exe)) {
+                        $exe = $lei->{env}->{$exe} or next;
+                }
+                $cmd[0] = PublicInbox::Spawn::which($exe) and last;
+        }
+        $cmd[0] // die join(' or ', @info)." missing for .$zsfx";
+
+        # only for --rsyncable.  TODO: support compression level?
+        for my $key (keys %$cmd_opt) {
+                push @cmd, '--'.$key if $lei->{opt}->{$key};
+        }
+        \@cmd;
+}
+
+sub zsfxcat ($$$) {
+        my ($in, $zsfx, $lei) = @_;
+        my $cmd = zsfx2cmd($zsfx, 1, $lei);
+        PublicInbox::Spawn::popen_rd($cmd, undef, { 0 => $in, 2 => $lei->{2} });
+}
+
+1;
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
new file mode 100644
index 00000000..2981b058
--- /dev/null
+++ b/lib/PublicInbox/MdirReader.pm
@@ -0,0 +1,108 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Maildirs only (PublicInbox::MHreader exists, now)
+# ref: https://cr.yp.to/proto/maildir.html
+#        https://wiki2.dovecot.org/MailboxFormat/Maildir
+package PublicInbox::MdirReader;
+use strict;
+use v5.10.1;
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::SHA qw(sha256_hex);
+
+# returns Maildir flags from a basename ('' for no flags, undef for invalid)
+sub maildir_basename_flags {
+        my (@f) = split(/:/, $_[0], -1);
+        return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.');
+        $f[1] // return ''; # "new"
+        $f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur"
+}
+
+# same as above, but for full path name
+sub maildir_path_flags {
+        my ($f) = @_;
+        my $i = rindex($f, '/');
+        $i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef;
+}
+
+sub shard_ok ($$$) {
+        my ($bn, $mod, $shard) = @_;
+        # can't get dirent.d_ino w/ pure Perl readdir, so we extract
+        # the OID if it looks like one instead of doing stat(2)
+        my $hex = $bn =~ m!\A([a-f0-9]{40,})! ? $1 : sha256_hex($bn);
+        my $recno = hex(substr($hex, 0, 8));
+        ($recno % $mod) == $shard;
+}
+
+sub maildir_each_file {
+        my ($self, $dir, $cb, @arg) = @_;
+        $dir .= '/' unless substr($dir, -1) eq '/';
+        my ($mod, $shard) = @{$self->{shard_info} // []};
+        for my $d (qw(new/ cur/)) {
+                my $pfx = $dir.$d;
+                opendir my $dh, $pfx or next;
+                while (defined(my $bn = readdir($dh))) {
+                        my $fl = maildir_basename_flags($bn) // next;
+                        next if defined($mod) && !shard_ok($bn, $mod, $shard);
+                        next if index($fl, 'T') >= 0; # no Trashed messages
+                        $cb->($pfx.$bn, $fl, @arg);
+                }
+        }
+}
+
+my %c2kw = ('D' => 'draft', F => 'flagged', P => 'forwarded',
+        R => 'answered', S => 'seen');
+
+sub maildir_each_eml {
+        my ($self, $dir, $cb, @arg) = @_;
+        $dir .= '/' unless substr($dir, -1) eq '/';
+        my ($mod, $shard) = @{$self->{shard_info} // []};
+        my $pfx = $dir . 'new/';
+        if (opendir(my $dh, $pfx)) {
+                while (defined(my $bn = readdir($dh))) {
+                        next if substr($bn, 0, 1) eq '.';
+                        my @f = split(/:/, $bn, -1);
+
+                        # mbsync and offlineimap both use "2," in "new/"
+                        next if ($f[1] // '2,') ne '2,' || defined($f[2]);
+
+                        next if defined($mod) && !shard_ok($bn, $mod, $shard);
+                        my $f = $pfx.$bn;
+                        my $eml = eml_from_path($f) or next;
+                        $cb->($f, [], $eml, @arg);
+                }
+        }
+        $pfx = $dir . 'cur/';
+        opendir my $dh, $pfx or return;
+        while (defined(my $bn = readdir($dh))) {
+                my $fl = maildir_basename_flags($bn) // next;
+                next if index($fl, 'T') >= 0;
+                next if defined($mod) && !shard_ok($bn, $mod, $shard);
+                my $f = $pfx.$bn;
+                my $eml = eml_from_path($f) or next;
+                my @kw = sort(map { $c2kw{$_} // () } split(//, $fl));
+                $cb->($f, \@kw, $eml, @arg);
+        }
+}
+
+sub new { bless {}, __PACKAGE__ }
+
+sub flags2kw ($) {
+        if (wantarray) {
+                my @unknown;
+                my %kw;
+                for (split(//, $_[0])) {
+                        my $k = $c2kw{$_};
+                        if (defined($k)) {
+                                $kw{$k} = 1;
+                        } else {
+                                push @unknown, $_;
+                        }
+                }
+                (\%kw, \@unknown);
+        } else {
+                [ sort(map { $c2kw{$_} // () } split(//, $_[0])) ];
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/MdirSort.pm b/lib/PublicInbox/MdirSort.pm
new file mode 100644
index 00000000..6bd9fb6c
--- /dev/null
+++ b/lib/PublicInbox/MdirSort.pm
@@ -0,0 +1,46 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# used for sorting MH (and (TODO) Maildir) names
+# TODO: consider sort(1) to parallelize sorting of gigantic directories
+package PublicInbox::MdirSort;
+use v5.12;
+use Time::HiRes ();
+use parent qw(Exporter);
+use Fcntl qw(S_ISREG);
+our @EXPORT = qw(mdir_sort);
+my %ST = (sequence => 0, size => 1, atime => 2, mtime => 3, ctime => 4);
+
+sub mdir_sort ($$;$) {
+        my ($ent, $sort, $max) = @_;
+        my @st;
+        my @ent = map {
+                @st = Time::HiRes::stat $_;
+                # name, size, {a,m,c}time
+                S_ISREG($st[2]) ? [ $_, @st[7..10] ] : ();
+        } @$ent;
+        @ent = grep { $_->[1] <= $max } @ent if $max;
+        use sort 'stable';
+        for my $s (@$sort) {
+                if ($s =~ /\A(\-|\+|)name\z/) {
+                        if ($1 eq '-') {
+                                @ent = sort { $b->[0] cmp $a->[0] } @ent;
+                        } else {
+                                @ent = sort { $a->[0] cmp $b->[0] } @ent;
+                        }
+                } elsif ($s =~ /\A(\-|\+|)
+                                (sequence|size|ctime|mtime|atime)\z/x) {
+                        my $key = $ST{$2};
+                        if ($1 eq '-') {
+                                @ent = sort { $b->[$key] <=> $a->[$key] } @ent;
+                        } else {
+                                @ent = sort { $a->[$key] <=> $b->[$key] } @ent;
+                        }
+                } else {
+                        die "E: unrecognized sort parameter: `$s'";
+                }
+        }
+        @$ent = map { $_->[0] } @ent;
+}
+
+1;
diff --git a/lib/PublicInbox/MiscIdx.pm b/lib/PublicInbox/MiscIdx.pm
new file mode 100644
index 00000000..6708527d
--- /dev/null
+++ b/lib/PublicInbox/MiscIdx.pm
@@ -0,0 +1,161 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# like PublicInbox::SearchIdx, but for searching for non-mail messages.
+# Things indexed include:
+# * inboxes themselves
+# * epoch information
+# * (maybe) git code repository information (not commits)
+# Expect ~100K-1M documents with no parallelism opportunities,
+# so no sharding, here.
+#
+# See MiscSearch for read-only counterpart
+package PublicInbox::MiscIdx;
+use strict;
+use v5.10.1;
+use PublicInbox::InboxWritable;
+use PublicInbox::Search; # for SWIG Xapian and Search::Xapian compat
+use PublicInbox::SearchIdx qw(index_text term_generator add_val);
+use Carp qw(croak);
+use File::Path ();
+use PublicInbox::MiscSearch;
+use PublicInbox::Config;
+use PublicInbox::Syscall;
+my $json;
+
+sub new {
+        my ($class, $eidx) = @_;
+        PublicInbox::SearchIdx::load_xapian_writable();
+        my $mi_dir = "$eidx->{xpfx}/misc";
+        File::Path::mkpath($mi_dir);
+        PublicInbox::Syscall::nodatacow_dir($mi_dir);
+        my $flags = $PublicInbox::SearchIdx::DB_CREATE_OR_OPEN;
+        $flags |= $PublicInbox::SearchIdx::DB_NO_SYNC if $eidx->{-no_fsync};
+        $flags |= $PublicInbox::SearchIdx::DB_DANGEROUS if $eidx->{-dangerous};
+        $json //= PublicInbox::Config::json();
+        bless {
+                mi_dir => $mi_dir,
+                flags => $flags,
+                indexlevel => 'full', # small DB, no point in medium?
+        }, $class;
+}
+
+sub _begin_txn ($) {
+        my ($self) = @_;
+        my $wdb = $PublicInbox::Search::X{WritableDatabase};
+        my $xdb = eval { $wdb->new($self->{mi_dir}, $self->{flags}) };
+        croak "Failed opening $self->{mi_dir}: $@" if $@;
+        $xdb->begin_transaction;
+        $xdb;
+}
+
+sub commit_txn {
+        my ($self) = @_;
+        my $xdb = delete $self->{xdb} or return;
+        $xdb->commit_transaction;
+}
+
+sub create_xdb {
+        my ($self) = @_;
+        $self->{xdb} //= _begin_txn($self);
+        commit_txn($self);
+}
+
+sub remove_eidx_key {
+        my ($self, $eidx_key) = @_;
+        my $xdb = $self->{xdb} //= _begin_txn($self);
+        my $head = $xdb->postlist_begin('Q'.$eidx_key);
+        my $tail = $xdb->postlist_end('Q'.$eidx_key);
+        my @docids; # only one, unless we had bugs
+        for (; $head != $tail; $head++) {
+                push @docids, $head->get_docid;
+        }
+        for my $docid (@docids) {
+                $xdb->delete_document($docid);
+                warn "# remove inbox docid #$docid ($eidx_key)\n";
+        }
+}
+
+# adds or updates according to $eidx_key
+sub index_ibx {
+        my ($self, $ibx) = @_;
+        my $eidx_key = $ibx->eidx_key;
+        my $xdb = $self->{xdb} //= _begin_txn($self);
+        # Q = uniQue in Xapian terminology
+        my $head = $xdb->postlist_begin('Q'.$eidx_key);
+        my $tail = $xdb->postlist_end('Q'.$eidx_key);
+        my ($docid, @drop);
+        for (; $head != $tail; $head++) {
+                if (defined $docid) {
+                        my $i = $head->get_docid;
+                        push @drop, $i;
+                        warn <<EOF;
+W: multiple inboxes keyed to `$eidx_key', deleting #$i
+EOF
+                } else {
+                        $docid = $head->get_docid;
+                }
+        }
+        $xdb->delete_document($_) for @drop; # just in case
+
+        my $doc = $PublicInbox::Search::X{Document}->new;
+        term_generator($self)->set_document($doc);
+
+        # allow sorting by modified and uidvalidity (created at)
+        add_val($doc, $PublicInbox::MiscSearch::MODIFIED, $ibx->modified);
+        add_val($doc, $PublicInbox::MiscSearch::UIDVALIDITY, $ibx->uidvalidity);
+
+        $doc->add_boolean_term('Q'.$eidx_key); # uniQue id
+        $doc->add_boolean_term('T'.'inbox'); # Type
+
+        # force reread from disk, {description} could be loaded from {misc}
+        delete @$ibx{qw(-art_min -art_max description)};
+        if (defined($ibx->{newsgroup}) && $ibx->nntp_usable) {
+                $doc->add_boolean_term('T'.'newsgroup'); # additional Type
+                my $n = $ibx->art_min;
+                add_val($doc, $PublicInbox::MiscSearch::ART_MIN, $n) if $n;
+                $n = $ibx->art_max;
+                add_val($doc, $PublicInbox::MiscSearch::ART_MAX, $n) if $n;
+        }
+
+        my $desc = $ibx->description;
+
+        # description = S/Subject (or title)
+        # address = A/Author
+        index_text($self, $desc, 1, 'S');
+        index_text($self, $ibx->{name}, 1, 'XNAME');
+        my %map = (
+                address => 'A',
+                listid => 'XLISTID',
+                infourl => 'XINFOURL',
+                url => 'XURL'
+        );
+        while (my ($f, $pfx) = each %map) {
+                for my $v (@{$ibx->{$f} // []}) {
+                        index_text($self, $v, 1, $pfx);
+                }
+        }
+        my $data = {};
+        if (defined(my $max = $ibx->max_git_epoch)) { # v2
+                my $pfx = "/$ibx->{name}/git/";
+                for my $epoch (0..$max) {
+                        my $git = $ibx->git_epoch($epoch) or return;
+                        if (my $ent = $git->manifest_entry($epoch, $desc)) {
+                                $data->{"$pfx$epoch.git"} = $ent;
+                                $ent->{git_dir} = $git->{git_dir};
+                        }
+                        $git->cleanup; # ->modified starts cat-file --batch
+                }
+        } elsif (my $ent = $ibx->git->manifest_entry) { # v1
+                $ent->{git_dir} = $ibx->{inboxdir};
+                $data->{"/$ibx->{name}"} = $ent;
+        }
+        $doc->set_data($json->encode($data));
+        if (defined $docid) {
+                $xdb->replace_document($docid, $doc);
+        } else {
+                $xdb->add_document($doc);
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
new file mode 100644
index 00000000..5fb47d03
--- /dev/null
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -0,0 +1,154 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# read-only counterpart to MiscIdx
+package PublicInbox::MiscSearch;
+use strict;
+use v5.10.1;
+use PublicInbox::Search qw(retry_reopen int_val xap_terms);
+my $json;
+
+# Xapian value columns:
+our $MODIFIED = 0;
+our $UIDVALIDITY = 1; # (created time)
+our $ART_MIN = 2; # NNTP article number
+our $ART_MAX = 3; # NNTP article number
+
+# avoid conflicting with message Search::prob_prefix for UI/UX reasons
+my %PROB_PREFIX = (
+        description => 'S', # $INBOX_DIR/description
+        address => 'A',
+        listid => 'XLISTID',
+        url => 'XURL',
+        infourl => 'XINFOURL',
+        name => 'XNAME',
+        '' => 'S A XLISTID XNAME XURL XINFOURL'
+);
+
+sub new {
+        my ($class, $dir) = @_;
+        PublicInbox::Search::load_xapian();
+        $json //= PublicInbox::Config::json();
+        bless {
+                xdb => $PublicInbox::Search::X{Database}->new($dir)
+        }, $class;
+}
+
+# read-only
+sub mi_qp_new ($) {
+        my ($self) = @_;
+        my $xdb = $self->{xdb};
+        my $qp = $PublicInbox::Search::X{QueryParser}->new;
+        $qp->set_default_op(PublicInbox::Search::OP_AND());
+        $qp->set_database($xdb);
+        $qp->set_stemmer(PublicInbox::Search::stemmer($self));
+        $qp->set_stemming_strategy(PublicInbox::Search::STEM_SOME());
+        my $cb = $qp->can('set_max_wildcard_expansion') //
+                $qp->can('set_max_expansion'); # Xapian 1.5.0+
+        $cb->($qp, 100);
+        $cb = $qp->can('add_valuerangeprocessor') //
+                $qp->can('add_rangeprocessor'); # Xapian 1.5.0+
+        while (my ($name, $prefix) = each %PROB_PREFIX) {
+                $qp->add_prefix($name, $_) for split(/ /, $prefix);
+        }
+        $qp->add_boolean_prefix('type', 'T');
+        $qp;
+}
+
+sub misc_enquire_once { # retry_reopen callback
+        my ($self, $qr, $opt) = @_;
+        my $eq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
+        $eq->set_query($qr);
+        my $desc = !$opt->{asc};
+        my $rel = $opt->{relevance} // 0;
+        if ($rel == -1) { # ORDER BY docid
+                $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
+                $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
+        } elsif ($rel) {
+                $eq->set_sort_by_relevance_then_value($MODIFIED, $desc);
+        } else {
+                $eq->set_sort_by_value_then_relevance($MODIFIED, $desc);
+        }
+        $eq->get_mset($opt->{offset} || 0, $opt->{limit} || 200);
+}
+
+sub mset {
+        my ($self, $qs, $opt) = @_;
+        $opt ||= {};
+        reopen($self);
+        my $qp = $self->{qp} //= mi_qp_new($self);
+        $qs = 'type:inbox' if $qs eq '';
+        my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
+        $opt->{relevance} = 1 unless exists $opt->{relevance};
+        retry_reopen($self, \&misc_enquire_once, $qr, $opt);
+}
+
+sub ibx_data_once {
+        my ($self, $ibx) = @_;
+        my $xdb = $self->{xdb};
+        my $term = 'Q'.$ibx->eidx_key; # may be {inboxdir}, so private
+        my $head = $xdb->postlist_begin($term);
+        my $tail = $xdb->postlist_end($term);
+        return if $head == $tail;
+        my $doc = $xdb->get_document($head->get_docid);
+        $ibx->{uidvalidity} //= int_val($doc, $UIDVALIDITY);
+        $ibx->{-modified} = int_val($doc, $MODIFIED);
+        $ibx->{-art_min} = int_val($doc, $ART_MIN);
+        $ibx->{-art_max} = int_val($doc, $ART_MAX);
+        $doc->get_data;
+}
+
+sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc)
+        my ($doc) = $_[-1];
+        my $d;
+        my $data = $json->decode($doc->get_data);
+        for (values %$data) {
+                $d = $_->{description} // next;
+                $d =~ s/ \[epoch [0-9]+\]\z// or next;
+                last;
+        }
+        {
+                uidvalidity => int_val($doc, $UIDVALIDITY),
+                -modified => int_val($doc, $MODIFIED),
+                -art_min => int_val($doc, $ART_MIN), # may be undef
+                -art_max => int_val($doc, $ART_MAX), # may be undef
+                # extract description from manifest.js.gz epoch description
+                description => $d
+        };
+}
+
+sub inbox_data {
+        my ($self, $ibx) = @_;
+        retry_reopen($self, \&ibx_data_once, $ibx);
+}
+
+sub ibx_cache_load {
+        my ($doc, $cache) = @_;
+        my ($eidx_key) = xap_terms('Q', $doc);
+        return unless defined($eidx_key); # expired
+        $cache->{$eidx_key} = doc2ibx_cache_ent($doc);
+}
+
+sub _nntpd_cache_load { # retry_reopen callback
+        my ($self) = @_;
+        my $opt = { limit => $self->{xdb}->get_doccount * 10, relevance => -1 };
+        my $mset = mset($self, 'type:newsgroup type:inbox', $opt);
+        my $cache = {};
+        for my $it ($mset->items) {
+                ibx_cache_load($it->get_document, $cache);
+        }
+        $cache
+}
+
+# returns { newsgroup => $cache_entry } mapping, $cache_entry contains
+# anything which may trigger seeks at startup, currently: description,
+# -modified, and uidvalidity.
+sub nntpd_cache_load {
+        my ($self) = @_;
+        retry_reopen($self, \&_nntpd_cache_load);
+}
+
+no warnings 'once';
+*reopen = \&PublicInbox::Search::reopen;
+
+1;
diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm
index bb1dfead..dd28417b 100644
--- a/lib/PublicInbox/MsgIter.pm
+++ b/lib/PublicInbox/MsgIter.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # read-only utilities for Email::MIME
@@ -84,18 +84,30 @@ sub msg_part_text ($$) {
                 # If forcing charset=UTF-8 failed,
                 # caller will warn further down...
                 $s = $part->body if $@;
+        } elsif ($err && $ct =~ m!\bapplication/octet-stream\b!i) {
+                # Some unconfigured/poorly-configured MUAs will set
+                # application/octet-stream even for all text attachments.
+                # Try to see if it's printable text that we can index
+                # and display:
+                $s = $part->body;
+                utf8::decode($s);
+                undef($s =~ /[^\p{XPosixPrint}\s]/s ? $s : $err);
         }
         ($s, $err);
 }
 
 # returns an array of quoted or unquoted sections
 sub split_quotes {
+        # some editors don't put trailing newlines at the end,
+        # make sure split_quotes can work:
+        $_[0] .= "\n" if substr($_[0], -1) ne "\n";
+
         # Quiet "Complex regular subexpression recursion limit" warning
         # in case an inconsiderate sender quotes 32K of text at once.
         # The warning from Perl is harmless for us since our callers can
         # tolerate less-than-ideal matches which work within Perl limits.
         no warnings 'regexp';
-        split(/((?:^>[^\n]*\n)+)/sm, shift);
+        split(/((?:^>[^\n]*\n)+)/sm, $_[0]);
 }
 
 1;
diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm
index 8596f01c..bbc9a007 100644
--- a/lib/PublicInbox/MsgTime.pm
+++ b/lib/PublicInbox/MsgTime.pm
@@ -1,11 +1,11 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Various date/time-related functions
 package PublicInbox::MsgTime;
+use v5.10.1; # unicode_strings in 5.12 may not work...
 use strict;
-use warnings;
-use base qw(Exporter);
+use parent qw(Exporter);
 our @EXPORT_OK = qw(msg_timestamp msg_datestamp);
 use Time::Local qw(timegm);
 my @MoY = qw(january february march april may june
@@ -125,10 +125,7 @@ sub str2date_zone ($) {
         # but we want to keep "git fsck" happy.
         # "-1200" is the furthest westermost zone offset,
         # but git fast-import is liberal so we use "-1400"
-        if ($zone >= 1400 || $zone <= -1400) {
-                warn "bogus TZ offset: $zone, ignoring and assuming +0000\n";
-                $zone = '+0000';
-        }
+        $zone = '+0000' if $zone >= 1400 || $zone <= -1400;
         [$ts, $zone];
 }
 
@@ -138,50 +135,38 @@ sub time_response ($) {
 }
 
 sub msg_received_at ($) {
-        my ($hdr) = @_; # PublicInbox::Eml
-        my @recvd = $hdr->header_raw('Received');
-        my ($ts);
-        foreach my $r (@recvd) {
+        my ($eml) = @_;
+        my $ts;
+        for my $r ($eml->header_raw('Received')) {
                 $r =~ /\s*([0-9]+\s+[a-zA-Z]+\s+[0-9]{2,4}\s+
                         [0-9]+[^0-9][0-9]+(?:[^0-9][0-9]+)
-                        \s+([\+\-][0-9]+))/sx or next;
+                        \s+(?:[\+\-][0-9]+))/sx or next;
                 $ts = eval { str2date_zone($1) } and return $ts;
-                my $mid = $hdr->header_raw('Message-ID');
-                warn "no date in $mid Received: $r\n";
         }
         undef;
 }
 
 sub msg_date_only ($) {
-        my ($hdr) = @_; # PublicInbox::Eml
-        my @date = $hdr->header_raw('Date');
-        my ($ts);
-        foreach my $d (@date) {
+        my ($eml) = @_;
+        my $ts;
+        for my $d ($eml->header_raw('Date')) {
                 $ts = eval { str2date_zone($d) } and return $ts;
-                if ($@) {
-                        my $mid = $hdr->header_raw('Message-ID');
-                        warn "bad Date: $d in $mid: $@\n";
-                }
         }
         undef;
 }
 
 # Favors Received header for sorting globally
 sub msg_timestamp ($;$) {
-        my ($hdr, $fallback) = @_; # PublicInbox::Eml
-        my $ret;
-        $ret = msg_received_at($hdr) and return time_response($ret);
-        $ret = msg_date_only($hdr) and return time_response($ret);
-        time_response([ $fallback // time, '+0000' ]);
+        my ($eml, $fallback) = @_;
+        time_response(msg_received_at($eml) // msg_date_only($eml) //
+                        [ $fallback // time, '+0000' ]);
 }
 
 # Favors the Date: header for display and sorting within a thread
 sub msg_datestamp ($;$) {
-        my ($hdr, $fallback) = @_; # PublicInbox::Eml
-        my $ret;
-        $ret = msg_date_only($hdr) and return time_response($ret);
-        $ret = msg_received_at($hdr) and return time_response($ret);
-        time_response([ $fallback // time, '+0000' ]);
+        my ($eml, $fallback) = @_; # PublicInbox::Eml
+        time_response(msg_date_only($eml) // msg_received_at($eml) //
+                        [ $fallback // time, '+0000' ]);
 }
 
 1;
diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index f15875e3..cb4bb295 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # bidirectional Message-ID <-> Article Number mapping for the NNTP
@@ -13,20 +13,17 @@ use v5.10.1;
 use DBI;
 use DBD::SQLite;
 use PublicInbox::Over;
-use PublicInbox::Spawn;
-
-sub new {
-        my ($class, $git_dir, $writable) = @_;
-        my $d = "$git_dir/public-inbox";
-        if ($writable && !-d $d && !mkdir $d) {
-                my $err = $!;
-                -d $d or die "$d not created: $err";
-        }
-        new_file($class, "$d/msgmap.sqlite3", $writable);
-}
+use Scalar::Util qw(blessed);
 
 sub new_file {
-        my ($class, $f, $rw) = @_;
+        my ($class, $ibx, $rw) = @_;
+        my $f;
+        if (blessed($ibx)) {
+                $f = $ibx->mm_file;
+                $rw = 2 if $rw && $ibx->{-no_fsync};
+        } else {
+                $f = $ibx;
+        }
         return if !$rw && !-r $f;
 
         my $self = bless { filename => $f }, $class;
@@ -34,10 +31,16 @@ sub new_file {
         if ($rw) {
                 $dbh->begin_work;
                 create_tables($dbh);
-                $self->created_at(time) unless $self->created_at;
+                unless ($self->created_at) {
+                        my $t;
 
-                my $max = $self->max // 0;
-                $self->num_highwater($max);
+                        if (blessed($ibx) &&
+                                -f "$ibx->{inboxdir}/inbox.config.example") {
+                                $t = (stat(_))[9]; # mtime set by "curl -R"
+                        }
+                        $self->created_at($t // time);
+                }
+                $self->num_highwater(max($self));
                 $dbh->commit;
         }
         $self;
@@ -47,9 +50,10 @@ sub new_file {
 sub tmp_clone {
         my ($self, $dir) = @_;
         require File::Temp;
-        my $tmp = "mm_tmp-$$-XXXXXX";
+        my $tmp = "mm_tmp-$$-XXXX";
         my ($fh, $fn) = File::Temp::tempfile($tmp, EXLOCK => 0, DIR => $dir);
-        PublicInbox::Spawn::nodatacow_fd(fileno($fh));
+        require PublicInbox::Syscall;
+        PublicInbox::Syscall::nodatacow_fh($fh);
         $self->{dbh}->sqlite_backup_to_file($fn);
         $tmp = ref($self)->new_file($fn, 2);
         $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY');
@@ -62,18 +66,15 @@ sub meta_accessor {
         my ($self, $key, $value) = @_;
 
         my $sql = 'SELECT val FROM meta WHERE key = ? LIMIT 1';
-        my $dbh = $self->{dbh};
-        my $prev;
-        defined $value or return $dbh->selectrow_array($sql, undef, $key);
-
-        $prev = $dbh->selectrow_array($sql, undef, $key);
+        my $prev = $self->{dbh}->selectrow_array($sql, undef, $key);
+        $value // return $prev;
 
         if (defined $prev) {
                 $sql = 'UPDATE meta SET val = ? WHERE key = ?';
-                $dbh->do($sql, undef, $value, $key);
+                $self->{dbh}->do($sql, undef, $value, $key);
         } else {
                 $sql = 'INSERT INTO meta (key,val) VALUES (?,?)';
-                $dbh->do($sql, undef, $key, $value);
+                $self->{dbh}->do($sql, undef, $key, $value);
         }
         $prev;
 }
@@ -99,44 +100,40 @@ sub created_at {
 
 sub num_highwater {
         my ($self, $num) = @_;
-        my $high = $self->{num_highwater} ||=
-            $self->meta_accessor('num_highwater');
+        my $high = $self->meta_accessor('num_highwater');
         if (defined($num) && (!defined($high) || ($num > $high))) {
-                $self->{num_highwater} = $num;
+                $high = $num;
                 $self->meta_accessor('num_highwater', $num);
         }
-        $self->{num_highwater};
+        $high
 }
 
 sub mid_insert {
         my ($self, $mid) = @_;
-        my $dbh = $self->{dbh};
-        my $sth = $dbh->prepare_cached(<<'');
+        my $sth = $self->{dbh}->prepare_cached(<<'');
 INSERT INTO msgmap (mid) VALUES (?)
 
         return unless eval { $sth->execute($mid) };
-        my $num = $dbh->last_insert_id(undef, undef, 'msgmap', 'num');
+        my $num = $self->{dbh}->last_insert_id(undef, undef, 'msgmap', 'num');
         $self->num_highwater($num) if defined($num);
         $num;
 }
 
 sub mid_for {
         my ($self, $num) = @_;
-        my $dbh = $self->{dbh};
-        my $sth = $self->{mid_for} ||=
-                $dbh->prepare('SELECT mid FROM msgmap WHERE num = ? LIMIT 1');
-        $sth->bind_param(1, $num);
-        $sth->execute;
+        my $sth = $self->{dbh}->prepare_cached(<<"", undef, 1);
+SELECT mid FROM msgmap WHERE num = ? LIMIT 1
+
+        $sth->execute($num);
         $sth->fetchrow_array;
 }
 
 sub num_for {
         my ($self, $mid) = @_;
-        my $dbh = $self->{dbh};
-        my $sth = $self->{num_for} ||=
-                $dbh->prepare('SELECT num FROM msgmap WHERE mid = ? LIMIT 1');
-        $sth->bind_param(1, $mid);
-        $sth->execute;
+        my $sth = $self->{dbh}->prepare_cached(<<"", undef, 1);
+SELECT num FROM msgmap WHERE mid = ? LIMIT 1
+
+        $sth->execute($mid);
         $sth->fetchrow_array;
 }
 
@@ -144,32 +141,30 @@ sub max {
         my $sth = $_[0]->{dbh}->prepare_cached('SELECT MAX(num) FROM msgmap',
                                                 undef, 1);
         $sth->execute;
-        $sth->fetchrow_array;
+        $sth->fetchrow_array // 0;
 }
 
-sub minmax {
-        # breaking MIN and MAX into separate queries speeds up from 250ms
-        # to around 700us with 2.7million messages.
+sub min {
         my $sth = $_[0]->{dbh}->prepare_cached('SELECT MIN(num) FROM msgmap',
                                                 undef, 1);
         $sth->execute;
-        ($sth->fetchrow_array, max($_[0]));
+        $sth->fetchrow_array // 0;
+}
+
+sub minmax {
+        # breaking MIN and MAX into separate queries speeds up from 250ms
+        # to around 700us with 2.7million messages.
+        (min($_[0]), max($_[0]));
 }
 
 sub mid_delete {
         my ($self, $mid) = @_;
-        my $dbh = $self->{dbh};
-        my $sth = $dbh->prepare('DELETE FROM msgmap WHERE mid = ?');
-        $sth->bind_param(1, $mid);
-        $sth->execute;
+        $self->{dbh}->do('DELETE FROM msgmap WHERE mid = ?', undef, $mid);
 }
 
 sub num_delete {
         my ($self, $num) = @_;
-        my $dbh = $self->{dbh};
-        my $sth = $dbh->prepare('DELETE FROM msgmap WHERE num = ?');
-        $sth->bind_param(1, $num);
-        $sth->execute;
+        $self->{dbh}->do('DELETE FROM msgmap WHERE num = ?', undef, $num);
 }
 
 sub create_tables {
@@ -190,23 +185,11 @@ CREATE TABLE IF NOT EXISTS meta (
 
 }
 
-# used by NNTP.pm
-sub ids_after {
-        my ($self, $num) = @_;
-        my $ids = $self->{dbh}->selectcol_arrayref(<<'', undef, $$num);
-SELECT num FROM msgmap WHERE num > ?
-ORDER BY num ASC LIMIT 1000
-
-        $$num = $ids->[-1] if @$ids;
-        $ids;
-}
-
 sub msg_range {
         my ($self, $beg, $end, $cols) = @_;
         $cols //= 'num,mid';
-        my $dbh = $self->{dbh};
         my $attr = { Columns => [] };
-        my $mids = $dbh->selectall_arrayref(<<"", $attr, $$beg, $end);
+        my $mids = $self->{dbh}->selectall_arrayref(<<"", $attr, $$beg, $end);
 SELECT $cols FROM msgmap WHERE num >= ? AND num <= ?
 ORDER BY num ASC LIMIT 1000
 
@@ -218,10 +201,9 @@ ORDER BY num ASC LIMIT 1000
 # see scripts/xhdr-num2mid or PublicInbox::Filter::RubyLang for usage
 sub mid_set {
         my ($self, $num, $mid) = @_;
-        my $sth = $self->{mid_set} ||= do {
-                $self->{dbh}->prepare(
-                        'INSERT OR IGNORE INTO msgmap (num,mid) VALUES (?,?)');
-        };
+        my $sth = $self->{dbh}->prepare_cached(<<"");
+INSERT OR IGNORE INTO msgmap (num,mid) VALUES (?,?)
+
         my $result = $sth->execute($num, $mid);
         $self->num_highwater($num) if (defined($result) && $result == 1);
         $result;
@@ -283,21 +265,10 @@ sub skip_artnum {
 
 sub check_inodes {
         my ($self) = @_;
-        # no filename if in-:memory:
-        my $f = $self->{dbh}->sqlite_db_filename // return;
-        if (my @st = stat($f)) { # did st_dev, st_ino change?
-                my $st = pack('dd', $st[0], $st[1]);
-                if ($st ne ($self->{st} // $st)) {
-                        my $tmp = eval { ref($self)->new_file($f) };
-                        if ($@) {
-                                warn "E: DBI->connect($f): $@\n";
-                        } else {
-                                %$self = %$tmp;
-                        }
-                }
-        } else {
-                warn "W: stat $f: $!\n";
-        }
+        $self->{dbh} // return;
+        my $rw = !$self->{dbh}->{ReadOnly};
+        PublicInbox::Over::check_inodes($self);
+        $self->{dbh} //= PublicInbox::Over::dbh_new($self, !$rw);
 }
 
 1;
diff --git a/lib/PublicInbox/MultiGit.pm b/lib/PublicInbox/MultiGit.pm
new file mode 100644
index 00000000..b7691806
--- /dev/null
+++ b/lib/PublicInbox/MultiGit.pm
@@ -0,0 +1,145 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# common git alternates + all.git||ALL.git management code
+package PublicInbox::MultiGit;
+use strict;
+use v5.10.1;
+use PublicInbox::Spawn qw(run_die run_qx);
+use PublicInbox::Import;
+use File::Temp 0.19;
+use List::Util qw(max);
+use PublicInbox::IO qw(read_all);
+use autodie qw(chmod close rename);
+
+sub new {
+        my ($cls, $topdir, $all, $epfx) = @_;
+        bless {
+                topdir => $topdir, # inboxdir || extindex.*.topdir
+                all => $all, # all.git or ALL.git
+                epfx => $epfx, # "git" (inbox) or "local" (lei/store)
+        }, $cls;
+}
+
+sub read_alternates {
+        my ($self, $moderef, $prune) = @_;
+        my $objpfx = "$self->{topdir}/$self->{all}/objects/";
+        my $f = "${objpfx}info/alternates";
+        my %alt; # line => score
+        my %seen; # $st_dev\0$st_ino => count
+        my $other = 0;
+        if (open(my $fh, '<', $f)) {
+                my $is_edir = defined($self->{epfx}) ?
+                        qr!\A\Q../../$self->{epfx}\E/([0-9]+)\.git/objects\z! :
+                        undef;
+                $$moderef = (stat($fh))[2] & 07777;
+                for my $rel (split(/^/m, read_all($fh, -s _))) {
+                        chomp(my $dir = $rel);
+                        my $score;
+                        if (defined($is_edir) && $dir =~ $is_edir) {
+                                $score = $1 + 0;
+                                substr($dir, 0, 0) = $objpfx;
+                        } else { # absolute paths, if any (extindex)
+                                $score = --$other;
+                        }
+                        if (my @st = stat($dir)) {
+                                next if $seen{"$st[0]\0$st[1]"}++;
+                                $alt{$rel} = $score;
+                        } else {
+                                warn "W: stat($dir) failed: $! ($f)";
+                                if ($prune) {
+                                        ++$$prune;
+                                } else {
+                                        $alt{$rel} = $score;
+                                }
+                        }
+                }
+        } elsif (!$!{ENOENT}) {
+                die "E: open($f): $!";
+        }
+        (\%alt, \%seen);
+}
+
+sub epoch_dir { "$_[0]->{topdir}/$_[0]->{epfx}" }
+
+sub write_alternates {
+        my ($self, $mode, $alt, @new) = @_;
+        my $all_dir = "$self->{topdir}/$self->{all}";
+        PublicInbox::Import::init_bare($all_dir);
+        my $out = join('', sort { $alt->{$b} <=> $alt->{$a} } keys %$alt);
+        my $info_dir = "$all_dir/objects/info";
+        my $fh = File::Temp->new(TEMPLATE => 'alt-XXXX', DIR => $info_dir);
+        print $fh $out, @new;
+        chmod($mode, $fh);
+        close $fh;
+        rename($fh->filename, "$info_dir/alternates");
+        $fh->unlink_on_destroy(0);
+}
+
+# returns true if new epochs exist
+sub merge_epochs {
+        my ($self, $alt, $seen) = @_;
+        my $epoch_dir = epoch_dir($self);
+        if (opendir my $dh, $epoch_dir) {
+                my $has_new;
+                for my $bn (grep(/\A[0-9]+\.git\z/, readdir($dh))) {
+                        my $rel = "../../$self->{epfx}/$bn/objects\n";
+                        next if exists($alt->{$rel});
+                        if (my @st = stat("$epoch_dir/$bn/objects")) {
+                                next if $seen->{"$st[0]\0$st[1]"}++;
+                                $alt->{$rel} = substr($bn, 0, -4) + 0;
+                                $has_new = 1;
+                        } else {
+                                warn "E: stat($epoch_dir/$bn/objects): $!";
+                        }
+                }
+                $has_new;
+        } else {
+                $!{ENOENT} ? undef : die "opendir($epoch_dir): $!";
+        }
+}
+
+sub fill_alternates {
+        my ($self) = @_;
+        my ($alt, $seen) = read_alternates($self, \(my $mode = 0644));
+        merge_epochs($self, $alt, $seen) and
+                write_alternates($self, $mode, $alt);
+}
+
+sub epoch_cfg_set {
+        my ($self, $epoch_nr) = @_;
+        my $f = epoch_dir($self)."/$epoch_nr.git/config";
+        my $v = "../../$self->{all}/config";
+        if (-r $f) {
+                chomp(my $x = run_qx([qw(git config -f), $f, 'include.path']));
+                return if $x eq $v;
+        }
+        run_die([qw(git config -f), $f, 'include.path', $v ]);
+}
+
+sub add_epoch {
+        my ($self, $epoch_nr) = @_;
+        my $git_dir = epoch_dir($self)."/$epoch_nr.git";
+        my $f = "$git_dir/config";
+        my $existing = -f $f;
+        PublicInbox::Import::init_bare($git_dir);
+        epoch_cfg_set($self, $epoch_nr) unless $existing;
+        fill_alternates($self);
+        $git_dir;
+}
+
+sub git_epochs  {
+        my ($self) = @_;
+        if (opendir(my $dh, epoch_dir($self))) {
+                my @epochs = map {
+                        substr($_, 0, -4) + 0; # drop ".git" suffix
+                } grep(/\A[0-9]+\.git\z/, readdir($dh));
+                wantarray ? sort { $b <=> $a } @epochs : (max(@epochs) // 0);
+        } elsif ($!{ENOENT}) {
+                wantarray ? () : 0;
+        } else {
+                die(epoch_dir($self).": $!");
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/NDC_PP.pm b/lib/PublicInbox/NDC_PP.pm
deleted file mode 100644
index 10a7ee2a..00000000
--- a/lib/PublicInbox/NDC_PP.pm
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-
-# Pure-perl class for Linux non-Inline::C users to disable COW for btrfs
-package PublicInbox::NDC_PP;
-use strict;
-use v5.10.1;
-
-sub nodatacow_dir ($) {
-        my ($path) = @_;
-        open my $mh, '<', '/proc/self/mounts' or return;
-        for (grep(/ btrfs /, <$mh>)) {
-                my (undef, $mnt_path, $type) = split(/ /);
-                next if $type ne 'btrfs'; # in case of false-positive from grep
-
-                # weird chars are escaped as octal
-                $mnt_path =~ s/\\(0[0-9]{2})/chr(oct($1))/egs;
-                $mnt_path .= '/' unless $mnt_path =~ m!/\z!;
-                if (index($path, $mnt_path) == 0) {
-                        # error goes to stderr, but non-fatal for us
-                        system('chattr', '+C', $path);
-                        last;
-                }
-        }
-}
-
-sub nodatacow_fd ($) {
-        my ($fd) = @_;
-        return if $^O ne 'linux';
-        defined(my $path = readlink("/proc/self/fd/$fd")) or return;
-        nodatacow_dir($path);
-}
-
-1;
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 88fe2bb0..603cf094 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -1,39 +1,40 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Each instance of this represents a NNTP client socket
 # fields:
 # nntpd: PublicInbox::NNTPD ref
 # article: per-session current article number
-# ng: PublicInbox::Inbox ref
+# ibx: PublicInbox::Inbox ref
 # long_cb: long_response private data
 package PublicInbox::NNTP;
 use strict;
+use v5.10.1;
 use parent qw(PublicInbox::DS);
 use PublicInbox::MID qw(mid_escape $MID_EXTRACT);
 use PublicInbox::Eml;
 use POSIX qw(strftime);
 use PublicInbox::DS qw(now);
-use Digest::SHA qw(sha1_hex);
+use PublicInbox::SHA qw(sha1_hex);
 use Time::Local qw(timegm timelocal);
 use PublicInbox::GitAsyncCat;
+use PublicInbox::Address;
+
 use constant {
         LINE_MAX => 512, # RFC 977 section 2.3
-        r501 => '501 command syntax error',
-        r502 => '502 Command unavailable',
-        r221 => '221 Header follows',
-        r224 => '224 Overview information follows (multi-line)',
-        r225 =>        '225 Headers follow (multi-line)',
-        r430 => '430 No article with that message-id',
+        r501 => "501 command syntax error\r\n",
+        r502 => "502 Command unavailable\r\n",
+        r221 => "221 Header follows\r\n",
+        r225 =>        "225 Headers follow (multi-line)\r\n",
+        r430 => "430 No article with that message-id\r\n",
 };
-use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
 use Errno qw(EAGAIN);
 my $ONE_MSGID = qr/\A$MID_EXTRACT\z/;
 my @OVERVIEW = qw(Subject From Date Message-ID References);
 my $OVERVIEW_FMT = join(":\r\n", @OVERVIEW, qw(Bytes Lines), '') .
-                "Xref:full\r\n";
+                "Xref:full\r\n.\r\n";
 my $LIST_HEADERS = join("\r\n", @OVERVIEW,
-                        qw(:bytes :lines Xref To Cc)) . "\r\n";
+                        qw(:bytes :lines Xref To Cc)) . "\r\n.\r\n";
 my $CAPABILITIES = <<"";
 101 Capability list:\r
 VERSION 2\r
@@ -44,32 +45,17 @@ HDR\r
 OVER\r
 COMPRESS DEFLATE\r
 
-sub greet ($) { $_[0]->write($_[0]->{nntpd}->{greet}) };
-
-sub new ($$$) {
-        my ($class, $sock, $nntpd) = @_;
-        my $self = bless { nntpd => $nntpd }, $class;
-        my $ev = EPOLLIN;
-        my $wbuf;
-        if ($sock->can('accept_SSL') && !$sock->accept_SSL) {
-                return CORE::close($sock) if $! != EAGAIN;
-                $ev = PublicInbox::TLS::epollbit();
-                $wbuf = [ \&PublicInbox::DS::accept_tls_step, \&greet ];
-        }
-        $self->SUPER::new($sock, $ev | EPOLLONESHOT);
-        if ($wbuf) {
-                $self->{wbuf} = $wbuf;
-        } else {
-                greet($self);
-        }
-        $self->update_idle_time;
-        $self;
+sub do_greet ($) { $_[0]->write($_[0]->{nntpd}->{greet}) };
+
+sub new {
+        my ($cls, $sock, $nntpd) = @_;
+        (bless { nntpd => $nntpd }, $cls)->greet($sock)
 }
 
 sub args_ok ($$) {
         my ($cb, $argc) = @_;
         my $tot = prototype $cb;
-        my ($nreq, undef) = split(';', $tot);
+        my ($nreq, undef) = split(/;/, $tot);
         $nreq = ($nreq =~ tr/$//) - 1;
         $tot = ($tot =~ tr/$//) - 1;
         ($argc <= $tot && $argc >= $nreq);
@@ -80,20 +66,17 @@ sub process_line ($$) {
         my ($self, $l) = @_;
         my ($req, @args) = split(/[ \t]+/, $l);
         return 1 unless defined($req); # skip blank line
-        $req = $self->can('cmd_'.lc($req));
-        return res($self, '500 command not recognized') unless $req;
-        return res($self, r501) unless args_ok($req, scalar @args);
-
+        $req = $self->can('cmd_'.lc($req)) //
+                return $self->write(\"500 command not recognized\r\n");
+        return $self->write(\r501) unless args_ok($req, scalar @args);
         my $res = eval { $req->($self, @args) };
         my $err = $@;
         if ($err && $self->{sock}) {
-                local $/ = "\n";
-                chomp($l);
-                err($self, 'error from: %s (%s)', $l, $err);
-                $res = '503 program fault - command not performed';
+                $l =~ s/\r?\n//s;
+                warn("error from: $l ($err)\n");
+                $res = \"503 program fault - command not performed\r\n";
         }
-        return 0 unless defined $res;
-        res($self, $res);
+        defined($res) ? $self->write($res) : 0;
 }
 
 # The keyword argument is not used (rfc3977 5.2.2)
@@ -101,65 +84,86 @@ sub cmd_capabilities ($;$) {
         my ($self, undef) = @_;
         my $res = $CAPABILITIES;
         if (!$self->{sock}->can('accept_SSL') &&
-                        $self->{nntpd}->{accept_tls}) {
+                        $self->{nntpd}->{ssl_ctx_opt}) {
                 $res .= "STARTTLS\r\n";
         }
-        $res .= '.';
+        $res .= ".\r\n";
 }
 
 sub cmd_mode ($$) {
         my ($self, $arg) = @_;
-        $arg = uc $arg;
-        return r501 unless $arg eq 'READER';
-        '201 Posting prohibited';
+        uc($arg) eq 'READER' ? \"201 Posting prohibited\r\n" : \r501;
 }
 
-sub cmd_slave ($) { '202 slave status noted' }
+sub cmd_slave ($) { \"202 slave status noted\r\n" }
 
 sub cmd_xgtitle ($;$) {
         my ($self, $wildmat) = @_;
-        more($self, '282 list of groups and descriptions follows');
+        $self->msg_more("282 list of groups and descriptions follows\r\n");
         list_newsgroups($self, $wildmat);
-        '.'
 }
 
-sub list_overview_fmt ($) {
-        my ($self) = @_;
-        $self->msg_more($OVERVIEW_FMT);
+sub list_overview_fmt ($) { $OVERVIEW_FMT }
+
+sub list_headers ($;$) { $LIST_HEADERS }
+
+sub names2ibx ($;$) {
+        my ($self, $names) = @_;
+        my $groups = $self->{nntpd}->{pi_cfg}->{-by_newsgroup};
+        if ($names) { # modify arrayref in-place
+                $_ = $groups->{$_} for @$names;
+                $names; # now an arrayref of ibx
+        } else {
+                my @ret = map { $groups->{$_} } @{$self->{nntpd}->{groupnames}};
+                \@ret;
+        }
 }
 
-sub list_headers ($;$) {
-        my ($self) = @_;
-        $self->msg_more($LIST_HEADERS);
+sub list_active_i { # "LIST ACTIVE" and also just "LIST" (no args)
+        my ($self, $ibxs) = @_;
+        my @window = splice(@$ibxs, 0, 1000);
+        emit_group_lines($self, \@window);
+        scalar @$ibxs; # continue if there's more
 }
 
-sub list_active ($;$) {
+sub list_active ($;$) { # called by cmd_list
         my ($self, $wildmat) = @_;
         wildmat2re($wildmat);
-        foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                $ng->{newsgroup} =~ $wildmat or next;
-                group_line($self, $ng);
-        }
+        my @names = grep(/$wildmat/, @{$self->{nntpd}->{groupnames}});
+        $self->long_response(\&list_active_i, names2ibx($self, \@names));
 }
 
-sub list_active_times ($;$) {
+sub list_active_times_i {
+        my ($self, $ibxs) = @_;
+        my @window = splice(@$ibxs, 0, 1000);
+        $self->msg_more(join('', map {
+                my $c = eval { $_->uidvalidity } // time;
+                "$_->{newsgroup} $c <$_->{-primary_address}>\r\n";
+        } @window));
+        scalar @$ibxs; # continue if there's more
+}
+
+sub list_active_times ($;$) { # called by cmd_list
         my ($self, $wildmat) = @_;
         wildmat2re($wildmat);
-        foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                $ng->{newsgroup} =~ $wildmat or next;
-                my $c = eval { $ng->mm->created_at } || time;
-                more($self, "$ng->{newsgroup} $c $ng->{-primary_address}");
-        }
+        my @names = grep(/$wildmat/, @{$self->{nntpd}->{groupnames}});
+        $self->long_response(\&list_active_times_i, names2ibx($self, \@names));
 }
 
-sub list_newsgroups ($;$) {
+sub list_newsgroups_i {
+        my ($self, $ibxs) = @_;
+        my @window = splice(@$ibxs, 0, 1000);
+        $self->msg_more(join('', map {
+                "$_->{newsgroup} ".$_->description."\r\n"
+        } @window));
+        scalar @$ibxs; # continue if there's more
+}
+
+sub list_newsgroups ($;$) { # called by cmd_list
         my ($self, $wildmat) = @_;
         wildmat2re($wildmat);
-        foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                $ng->{newsgroup} =~ $wildmat or next;
-                my $d = $ng->description;
-                more($self, "$ng->{newsgroup} $d");
-        }
+        my @names = grep(/$wildmat/, @{$self->{nntpd}->{groupnames}});
+        $self->long_response(\&list_newsgroups_i, names2ibx($self, \@names));
 }
 
 # LIST SUBSCRIPTIONS, DISTRIB.PATS are not supported
@@ -168,33 +172,31 @@ sub cmd_list ($;$$) {
         if (scalar @args) {
                 my $arg = shift @args;
                 $arg =~ tr/A-Z./a-z_/;
+                my $ret = $arg eq 'active';
                 $arg = "list_$arg";
                 $arg = $self->can($arg);
                 return r501 unless $arg && args_ok($arg, scalar @args);
-                more($self, '215 information follows');
+                $self->msg_more("215 information follows\r\n");
                 $arg->($self, @args);
         } else {
-                more($self, '215 list of newsgroups follows');
-                foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                        group_line($self, $ng);
-                }
+                $self->msg_more("215 list of newsgroups follows\r\n");
+                $self->long_response(\&list_active_i, names2ibx($self));
         }
-        '.'
 }
 
 sub listgroup_range_i {
         my ($self, $beg, $end) = @_;
-        my $r = $self->{ng}->mm->msg_range($beg, $end, 'num');
+        my $r = $self->{ibx}->mm(1)->msg_range($beg, $end, 'num');
         scalar(@$r) or return;
-        more($self, join("\r\n", map { $_->[0] } @$r));
+        $self->msg_more(join('', map { "$_->[0]\r\n" } @$r));
         1;
 }
 
 sub listgroup_all_i {
         my ($self, $num) = @_;
-        my $ary = $self->{ng}->mm->ids_after($num);
+        my $ary = $self->{ibx}->over(1)->ids_after($num);
         scalar(@$ary) or return;
-        more($self, join("\r\n", @$ary));
+        $self->msg_more(join("\r\n", @$ary, ''));
         1;
 }
 
@@ -202,16 +204,16 @@ sub cmd_listgroup ($;$$) {
         my ($self, $group, $range) = @_;
         if (defined $group) {
                 my $res = cmd_group($self, $group);
-                return $res if ($res !~ /\A211 /);
-                more($self, $res);
+                return $res if ref($res); # error if const strref
+                $self->msg_more($res);
         }
-        $self->{ng} or return '412 no newsgroup selected';
+        $self->{ibx} or return \"412 no newsgroup selected\r\n";
         if (defined $range) {
                 my $r = get_range($self, $range);
                 return $r unless ref $r;
-                long_response($self, \&listgroup_range_i, @$r);
+                $self->long_response(\&listgroup_range_i, @$r);
         } else { # grab every article number
-                long_response($self, \&listgroup_all_i, \(my $num = 0));
+                $self->long_response(\&listgroup_all_i, \(my $num = 0));
         }
 }
 
@@ -223,7 +225,7 @@ sub parse_time ($$;$) {
                 $gmt = 1;
         }
         my ($YYYY, $MM, $DD);
-        if (bytes::length($date) == 8) { # RFC 3977 allows YYYYMMDD
+        if (length($date) == 8) { # RFC 3977 allows YYYYMMDD
                 ($YYYY, $MM, $DD) = unpack('A4A2A2', $date);
         } else { # legacy clients send YYMMDD
                 my $YY;
@@ -241,10 +243,27 @@ sub parse_time ($$;$) {
         }
 }
 
-sub group_line ($$) {
-        my ($self, $ng) = @_;
-        my ($min, $max) = $ng->mm->minmax;
-        more($self, "$ng->{newsgroup} $max $min n") if defined $min && defined $max;
+sub emit_group_lines {
+        my ($self, $ibxs) = @_;
+        my ($min, $max);
+        my $ALL = $self->{nntpd}->{pi_cfg}->ALL;
+        my $misc = $ALL->misc if $ALL;
+        my $buf = '';
+        for my $ibx (@$ibxs) {
+                $misc ? $misc->inbox_data($ibx) :
+                        delete(@$ibx{qw(-art_min -art_max)});
+                ($min, $max) = ($ibx->art_min, $ibx->art_max);
+                $buf .= "$ibx->{newsgroup} $max $min n\r\n";
+        }
+        $self->msg_more($buf);
+}
+
+sub newgroups_i {
+        my ($self, $ts, $ibxs) = @_;
+        my @window = splice(@$ibxs, 0, 1000);
+        @window = grep { (eval { $_->uidvalidity } // 0) > $ts } @window;
+        emit_group_lines($self, \@window);
+        scalar @$ibxs; # any more?
 }
 
 sub cmd_newgroups ($$$;$$) {
@@ -253,13 +272,8 @@ sub cmd_newgroups ($$$;$$) {
         return r501 if $@;
 
         # TODO dists
-        more($self, '231 list of new newsgroups follows');
-        foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                my $c = eval { $ng->mm->created_at } || 0;
-                next unless $c > $ts;
-                group_line($self, $ng);
-        }
-        '.'
+        $self->msg_more("231 list of new newsgroups follows\r\n");
+        $self->long_response(\&newgroups_i, $ts, names2ibx($self));
 }
 
 sub wildmat2re (;$) {
@@ -294,77 +308,69 @@ sub ngpat2re (;$) {
 }
 
 sub newnews_i {
-        my ($self, $overs, $ts, $prev) = @_;
-        my $over = $overs->[0];
-        my $msgs = $over->query_ts($ts, $$prev);
-        if (scalar @$msgs) {
-                more($self, '<' .
-                        join(">\r\n<", map { $_->{mid} } @$msgs ).
-                        '>');
-                $$prev = $msgs->[-1]->{num};
-        } else {
-                shift @$overs;
-                if (@$overs) { # continue onto next newsgroup
-                        $$prev = 0;
-                        return 1;
-                } else { # break out of the long response.
-                        return;
+        my ($self, $ibxs, $ts, $prev) = @_;
+        if (my $over = $ibxs->[0]->over) {
+                my $msgs = $over->query_ts($ts, $$prev);
+                if (scalar @$msgs) {
+                        $self->msg_more(join('', map {
+                                                "<$_->{mid}>\r\n";
+                                        } @$msgs));
+                        $$prev = $msgs->[-1]->{num};
+                        return 1; # continue on current group
                 }
         }
+        shift @$ibxs;
+        if (@$ibxs) { # continue onto next newsgroup
+                $$prev = 0;
+                1;
+        } else { # all done, break out of the long_response
+                undef;
+        }
 }
 
 sub cmd_newnews ($$$$;$$) {
         my ($self, $newsgroups, $date, $time, $gmt, $dists) = @_;
         my $ts = eval { parse_time($date, $time, $gmt) };
         return r501 if $@;
-        more($self, '230 list of new articles by message-id follows');
-        my ($keep, $skip) = split('!', $newsgroups, 2);
+        $self->msg_more("230 list of new articles by message-id follows\r\n");
+        my ($keep, $skip) = split(/!/, $newsgroups, 2);
         ngpat2re($keep);
         ngpat2re($skip);
-        my @overs;
-        foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                $ng->{newsgroup} =~ $keep or next;
-                $ng->{newsgroup} =~ $skip and next;
-                my $over = $ng->over or next;
-                push @overs, $over;
-        };
-        return '.' unless @overs;
-
+        my @names = grep(/$keep/, @{$self->{nntpd}->{groupnames}});
+        @names = grep(!/$skip/, @names);
+        return \".\r\n" unless scalar(@names);
         my $prev = 0;
-        long_response($self, \&newnews_i, \@overs, $ts, \$prev);
+        $self->long_response(\&newnews_i, names2ibx($self, \@names),
+                                $ts, \$prev);
 }
 
 sub cmd_group ($$) {
         my ($self, $group) = @_;
-        my $no_such = '411 no such news group';
         my $nntpd = $self->{nntpd};
-        my $ng = $nntpd->{groups}->{$group} or return $no_such;
+        my $ibx = $nntpd->{pi_cfg}->{-by_newsgroup}->{$group} or
+                return \"411 no such news group\r\n";
         $nntpd->idler_start;
 
-        $self->{ng} = $ng;
-        my ($min, $max) = $ng->mm->minmax;
-        $min ||= 0;
-        $max ||= 0;
+        $self->{ibx} = $ibx;
+        my ($min, $max) = $ibx->mm(1)->minmax;
         $self->{article} = $min;
         my $est_size = $max - $min;
-        "211 $est_size $min $max $group";
+        "211 $est_size $min $max $group\r\n";
 }
 
 sub article_adj ($$) {
         my ($self, $off) = @_;
-        my $ng = $self->{ng} or return '412 no newsgroup selected';
-
-        my $n = $self->{article};
-        defined $n or return '420 no current article has been selected';
+        my $ibx = $self->{ibx} // return \"412 no newsgroup selected\r\n";
+        my $n = $self->{article} //
+                return \"420 no current article has been selected\r\n";
 
         $n += $off;
-        my $mid = $ng->mm->mid_for($n);
-        unless ($mid) {
+        my $mid = $ibx->mm(1)->mid_for($n) // do {
                 $n = $off > 0 ? 'next' : 'previous';
-                return "421 no $n article in this group";
-        }
+                return "421 no $n article in this group\r\n";
+        };
         $self->{article} = $n;
-        "223 $n <$mid> article retrieved - request text separately";
+        "223 $n <$mid> article retrieved - request text separately\r\n";
 }
 
 sub cmd_next ($) { article_adj($_[0], 1) }
@@ -374,39 +380,53 @@ sub cmd_last ($) { article_adj($_[0], -1) }
 # the single-point-of-failure a single server provides.
 sub cmd_post ($) {
         my ($self) = @_;
-        my $ng = $self->{ng};
-        $ng ? "440 mailto:$ng->{-primary_address} to post"
-                : '440 posting not allowed'
+        my $ibx = $self->{ibx};
+        $ibx ? "440 mailto:$ibx->{-primary_address} to post\r\n"
+                : \"440 posting not allowed\r\n"
 }
 
 sub cmd_quit ($) {
         my ($self) = @_;
-        res($self, '205 closing connection - goodbye!');
+        $self->write(\"205 closing connection - goodbye!\r\n");
         $self->shutdn;
         undef;
 }
 
-sub header_append ($$$) {
-        my ($hdr, $k, $v) = @_;
-        my @v = $hdr->header_raw($k);
-        foreach (@v) {
-                return if $v eq $_;
+sub xref_by_tc ($$$) {
+        my ($xref, $pi_cfg, $smsg) = @_;
+        my $by_addr = $pi_cfg->{-by_addr};
+        my $mid = $smsg->{mid};
+        for my $f (qw(to cc)) {
+                my @ibxs = map {
+                        $by_addr->{lc($_)} // ()
+                } (PublicInbox::Address::emails($smsg->{$f} // ''));
+                for my $ibx (@ibxs) {
+                        $xref->{$ibx->{newsgroup}} //=
+                                                $ibx->mm(1)->num_for($mid);
+                }
         }
-        $hdr->header_set($k, @v, $v);
 }
 
-sub xref ($$$$) {
-        my ($self, $ng, $n, $mid) = @_;
-        my $ret = $self->{nntpd}->{servername} . " $ng->{newsgroup}:$n";
-
-        # num_for is pretty cheap and sometimes we'll lookup the existence
-        # of an article without getting even the OVER info.  In other words,
-        # I'm not sure if its worth optimizing by scanning To:/Cc: and
-        # PublicInbox::ExtMsg on the PSGI end is just as expensive
-        foreach my $other (@{$self->{nntpd}->{grouplist}}) {
-                next if $ng eq $other;
-                my $num = eval { $other->mm->num_for($mid) } or next;
-                $ret .= " $other->{newsgroup}:$num";
+sub xref ($$$) {
+        my ($self, $cur_ibx, $smsg) = @_;
+        my $nntpd = $self->{nntpd};
+        my $cur_ng = $cur_ibx->{newsgroup};
+        my $xref;
+        if (my $ALL = $nntpd->{pi_cfg}->ALL) {
+                $xref = $ALL->nntp_xref_for($cur_ibx, $smsg);
+                xref_by_tc($xref, $nntpd->{pi_cfg}, $smsg);
+        } else { # slow path
+                $xref = { $cur_ng => $smsg->{num} };
+                my $mid = $smsg->{mid};
+                for my $ibx (values %{$nntpd->{pi_cfg}->{-by_newsgroup}}) {
+                        $xref->{$ibx->{newsgroup}} //=
+                                                 $ibx->mm(1)->num_for($mid);
+                }
+        }
+        my $ret = "$nntpd->{servername} $cur_ng:".delete($xref->{$cur_ng});
+        for my $ng (sort keys %$xref) {
+                my $num = $xref->{$ng} // next;
+                $ret .= " $ng:$num";
         }
         $ret;
 }
@@ -415,10 +435,6 @@ sub set_nntp_headers ($$) {
         my ($hdr, $smsg) = @_;
         my ($mid) = $smsg->{mid};
 
-        # why? leafnode requires a Path: header for some inexplicable
-        # reason.  We'll fake the shortest one possible.
-        $hdr->header_set('Path', 'y');
-
         # leafnode (and maybe other NNTP clients) have trouble dealing
         # with v2 messages which have multiple Message-IDs (either due
         # to our own content-based dedupe or buggy git-send-email versions).
@@ -432,68 +448,58 @@ sub set_nntp_headers ($$) {
                 $hdr->header_set('X-Alt-Message-ID', @alt);
         }
 
-        # clobber some
+        # clobber some existing headers
         my $ibx = $smsg->{-ibx};
-        my $xref = xref($smsg->{nntp}, $ibx, $smsg->{num}, $mid);
+        my $xref = xref($smsg->{nntp}, $ibx, $smsg);
         $hdr->header_set('Xref', $xref);
-        $xref =~ s/:[0-9]+//g;
-        $hdr->header_set('Newsgroups', (split(/ /, $xref, 2))[1]);
-        header_append($hdr, 'List-Post', "<mailto:$ibx->{-primary_address}>");
-        if (my $url = $ibx->base_url) {
-                $mid = mid_escape($mid);
-                header_append($hdr, 'Archived-At', "<$url$mid/>");
-                header_append($hdr, 'List-Archive', "<$url>");
-        }
+
+        # RFC 5536 3.1.4
+        my ($server_name, $newsgroups) = split(/ /, $xref, 2);
+        $newsgroups =~ s/:[0-9]+\b//g; # drop NNTP article numbers
+        $newsgroups =~ tr/ /,/;
+        $hdr->header_set('Newsgroups', $newsgroups);
+
+        # *something* here is required for leafnode, try to follow
+        # RFC 5536 3.1.5...
+        $hdr->header_set('Path', $server_name . '!not-for-mail');
 }
 
 sub art_lookup ($$$) {
         my ($self, $art, $code) = @_;
-        my $ng = $self->{ng};
-        my ($n, $mid);
+        my ($ibx, $n);
         my $err;
         if (defined $art) {
                 if ($art =~ /\A[0-9]+\z/) {
-                        $err = '423 no such article number in this group';
+                        $err = \"423 no such article number in this group\r\n";
                         $n = int($art);
-                        goto find_mid;
+                        goto find_ibx;
                 } elsif ($art =~ $ONE_MSGID) {
-                        $mid = $1;
-                        $err = r430;
-                        $n = $ng->mm->num_for($mid) if $ng;
-                        goto found if defined $n;
-                        foreach my $g (values %{$self->{nntpd}->{groups}}) {
-                                $n = $g->mm->num_for($mid);
-                                if (defined $n) {
-                                        $ng = $g;
-                                        goto found;
-                                }
-                        }
-                        return $err;
+                        ($ibx, $n) = mid_lookup($self, $1);
+                        goto found if $ibx;
+                        return \r430;
                 } else {
-                        return r501;
+                        return \r501;
                 }
         } else {
-                $err = '420 no current article has been selected';
-                $n = $self->{article};
-                defined $n or return $err;
-find_mid:
-                $ng or return '412 no newsgroup has been selected';
-                $mid = $ng->mm->mid_for($n);
-                defined $mid or return $err;
+                $err = \"420 no current article has been selected\r\n";
+                $n = $self->{article} // return $err;
+find_ibx:
+                $ibx = $self->{ibx} or
+                        return \"412 no newsgroup has been selected\r\n";
         }
 found:
-        my $smsg = $ng->over->get_art($n) or return $err;
-        $smsg->{-ibx} = $ng;
+        my $smsg = $ibx->over(1)->get_art($n) or return $err;
+        $smsg->{-ibx} = $ibx;
         if ($code == 223) { # STAT
                 set_art($self, $n);
                 "223 $n <$smsg->{mid}> article retrieved - " .
-                        "request text separately";
+                        "request text separately\r\n";
         } else { # HEAD | BODY | ARTICLE
                 $smsg->{nntp} = $self;
                 $smsg->{nntp_code} = $code;
                 set_art($self, $art);
                 # this dereferences to `undef'
-                ${git_async_cat($ng->git, $smsg->{blob}, \&blob_cb, $smsg)};
+                ${ibx_async_cat($ibx, $smsg->{blob}, \&blob_cb, $smsg)};
         }
 }
 
@@ -503,7 +509,7 @@ sub msg_body_write ($$) {
         # these can momentarily double the memory consumption :<
         $$msg =~ s/^\./../smg;
         $$msg =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
-        $$msg .= "\r\n" unless $$msg =~ /\r\n\z/s;
+        $$msg .= "\r\n" unless substr($$msg, -2, 2) eq "\r\n";
         $self->msg_more($$msg);
 }
 
@@ -517,8 +523,7 @@ sub msg_hdr_write ($$) {
         set_nntp_headers($eml, $smsg);
 
         my $hdr = $eml->{hdr} // \(my $x = '');
-        # fixup old bug from import (pre-a0c07cba0e5d8b6a)
-        $$hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+        PublicInbox::Eml::strip_from($$hdr);
         $$hdr =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
 
         # for leafnode compatibility, we need to ensure Message-ID headers
@@ -527,11 +532,15 @@ sub msg_hdr_write ($$) {
         $smsg->{nntp}->msg_more($$hdr);
 }
 
-sub blob_cb { # called by git->cat_async via git_async_cat
+sub blob_cb { # called by git->cat_async via ibx_async_cat
         my ($bref, $oid, $type, $size, $smsg) = @_;
         my $self = $smsg->{nntp};
         my $code = $smsg->{nntp_code};
-        if (!defined($oid)) {
+        if (!defined($type)) {
+                warn "E: git aborted on $oid / $smsg->{blob} ".
+                        $self->{-ibx}->{inboxdir};
+                return $self->close;
+        } elsif ($type ne 'blob') {
                 # it's possible to have TOCTOU if an admin runs
                 # public-inbox-(edit|purge), just move onto the next message
                 warn "E: $smsg->{blob} missing in $smsg->{-ibx}->{inboxdir}\n";
@@ -543,21 +552,21 @@ sub blob_cb { # called by git->cat_async via git_async_cat
         my $r = "$code $smsg->{num} <$smsg->{mid}> article retrieved - ";
         my $eml = PublicInbox::Eml->new($bref);
         if ($code == 220) {
-                more($self, $r .= 'head and body follow');
+                $self->msg_more($r .= "head and body follow\r\n");
                 msg_hdr_write($eml, $smsg);
                 $self->msg_more("\r\n");
                 msg_body_write($self, $bref);
         } elsif ($code == 221) {
-                more($self, $r .= 'head follows');
+                $self->msg_more($r .= "head follows\r\n");
                 msg_hdr_write($eml, $smsg);
         } elsif ($code == 222) {
-                more($self, $r .= 'body follows');
+                $self->msg_more($r .= "body follows\r\n");
                 msg_body_write($self, $bref);
         } else {
                 $self->close;
                 die "BUG: bad code: $r";
         }
-        $self->write(\".\r\n"); # flushes (includes ->zflush)
+        $self->write(\".\r\n"); # flushes (includes ->dflush)
         $self->requeue;
 }
 
@@ -581,22 +590,20 @@ sub cmd_stat ($;$) {
         art_lookup($self, $art, 223); # art may be msgid
 }
 
-sub cmd_ihave ($) { '435 article not wanted - do not send it' }
+sub cmd_ihave ($) { \"435 article not wanted - do not send it\r\n" }
 
-sub cmd_date ($) { '111 '.strftime('%Y%m%d%H%M%S', gmtime(time)) }
+sub cmd_date ($) { '111 '.strftime('%Y%m%d%H%M%S', gmtime(time))."\r\n" }
 
-sub cmd_help ($) {
-        my ($self) = @_;
-        more($self, '100 help text follows');
-        '.'
-}
+sub cmd_help ($) { \"100 help text follows\r\n.\r\n" }
 
+# returns a ref on success
 sub get_range ($$) {
         my ($self, $range) = @_;
-        my $ng = $self->{ng} or return '412 no news group has been selected';
-        defined $range or return '420 No article(s) selected';
+        my $ibx = $self->{ibx} //
+                return "412 no news group has been selected\r\n";
+        $range // return "420 No article(s) selected\r\n";
         my ($beg, $end);
-        my ($min, $max) = $ng->mm->minmax;
+        my ($min, $max) = $ibx->mm(1)->minmax;
         if ($range =~ /\A([0-9]+)\z/) {
                 $beg = $end = $1;
         } elsif ($range =~ /\A([0-9]+)-\z/) {
@@ -608,67 +615,16 @@ sub get_range ($$) {
         }
         $beg = $min if ($beg < $min);
         $end = $max if ($end > $max);
-        return '420 No article(s) selected' if ($beg > $end);
-        [ \$beg, $end ];
+        $beg > $end ? "420 No article(s) selected\r\n" : [ \$beg, $end ];
 }
 
-sub long_step {
-        my ($self) = @_;
-        # wbuf is unset or empty, here; {long} may add to it
-        my ($fd, $cb, $t0, @args) = @{$self->{long_cb}};
-        my $more = eval { $cb->($self, @args) };
-        if ($@ || !$self->{sock}) { # something bad happened...
-                delete $self->{long_cb};
-                my $elapsed = now() - $t0;
-                if ($@) {
-                        err($self,
-                            "%s during long response[$fd] - %0.6f",
-                            $@, $elapsed);
-                }
-                out($self, " deferred[$fd] aborted - %0.6f", $elapsed);
-                $self->close;
-        } elsif ($more) { # $self->{wbuf}:
-                $self->update_idle_time;
-
-                # COMPRESS users all share the same DEFLATE context.
-                # Flush it here to ensure clients don't see
-                # each other's data
-                $self->zflush;
-
-                # no recursion, schedule another call ASAP, but only after
-                # all pending writes are done.  autovivify wbuf:
-                my $new_size = push(@{$self->{wbuf}}, \&long_step);
-
-                # wbuf may be populated by $cb, no need to rearm if so:
-                $self->requeue if $new_size == 1;
-        } else { # all done!
-                delete $self->{long_cb};
-                res($self, '.');
-                my $elapsed = now() - $t0;
-                my $fd = fileno($self->{sock});
-                out($self, " deferred[$fd] done - %0.6f", $elapsed);
-                my $wbuf = $self->{wbuf}; # do NOT autovivify
-                $self->requeue unless $wbuf && @$wbuf;
-        }
-}
-
-sub long_response ($$;@) {
-        my ($self, $cb, @args) = @_; # cb returns true if more, false if done
-
-        my $sock = $self->{sock} or return;
-        # make sure we disable reading during a long response,
-        # clients should not be sending us stuff and making us do more
-        # work while we are stream a response to them
-        $self->{long_cb} = [ fileno($sock), $cb, now(), @args ];
-        long_step($self); # kick off!
-        undef;
-}
+sub long_response_done { $_[0]->write(\".\r\n") } # overrides superclass
 
 sub hdr_msgid_range_i {
         my ($self, $beg, $end) = @_;
-        my $r = $self->{ng}->mm->msg_range($beg, $end);
+        my $r = $self->{ibx}->mm(1)->msg_range($beg, $end);
         @$r or return;
-        more($self, join("\r\n", map { "$_->[0] <$_->[1]>" } @$r));
+        $self->msg_more(join('', map { "$_->[0] <$_->[1]>\r\n" } @$r));
         1;
 }
 
@@ -676,42 +632,68 @@ sub hdr_message_id ($$$) { # optimize XHDR Message-ID [range] for slrnpull.
         my ($self, $xhdr, $range) = @_;
 
         if (defined $range && $range =~ $ONE_MSGID) {
-                my ($ng, $n) = mid_lookup($self, $1);
+                my ($ibx, $n) = mid_lookup($self, $1);
                 return r430 unless $n;
-                hdr_mid_response($self, $xhdr, $ng, $n, $range, $range);
+                hdr_mid_response($self, $xhdr, $ibx, $n, $range, $range);
         } else { # numeric range
                 $range = $self->{article} unless defined $range;
                 my $r = get_range($self, $range);
                 return $r unless ref $r;
-                more($self, $xhdr ? r221 : r225);
-                long_response($self, \&hdr_msgid_range_i, @$r);
+                $self->msg_more($xhdr ? r221 : r225);
+                $self->long_response(\&hdr_msgid_range_i, @$r);
         }
 }
 
 sub mid_lookup ($$) {
         my ($self, $mid) = @_;
-        my $self_ng = $self->{ng};
-        if ($self_ng) {
-                my $n = $self_ng->mm->num_for($mid);
-                return ($self_ng, $n) if defined $n;
-        }
-        foreach my $ng (values %{$self->{nntpd}->{groups}}) {
-                next if defined $self_ng && $ng eq $self_ng;
-                my $n = $ng->mm->num_for($mid);
-                return ($ng, $n) if defined $n;
+        my $cur_ibx = $self->{ibx};
+        if ($cur_ibx) {
+                my $n = $cur_ibx->mm(1)->num_for($mid);
+                return ($cur_ibx, $n) if defined $n;
+        }
+        my $pi_cfg = $self->{nntpd}->{pi_cfg};
+        if (my $ALL = $pi_cfg->ALL) {
+                my ($id, $prev);
+                while (my $smsg = $ALL->over->next_by_mid($mid, \$id, \$prev)) {
+                        my $xr3 = $ALL->over->get_xref3($smsg->{num});
+                        if (my @x = grep(/:$smsg->{blob}\z/, @$xr3)) {
+                                my ($ngname, $xnum) = split(/:/, $x[0]);
+                                my $ibx = $pi_cfg->{-by_newsgroup}->{$ngname};
+                                return ($ibx, $xnum) if $ibx;
+                                # fall through to trying all xref3s
+                        } else {
+                                warn <<EOF;
+W: xref3 missing for <$mid> ($smsg->{blob}) in $ALL->{topdir}, -extindex bug?
+EOF
+                        }
+                        # try all xref3s
+                        for my $x (@$xr3) {
+                                my ($ngname, $xnum) = split(/:/, $x);
+                                my $ibx = $pi_cfg->{-by_newsgroup}->{$ngname};
+                                return ($ibx, $xnum) if $ibx;
+                                warn "W: `$ngname' does not exist for #$xnum\n";
+                        }
+                }
+                # no warning here, $mid is just invalid
+        } else { # slow path for non-ALL users
+                for my $ibx (values %{$pi_cfg->{-by_newsgroup}}) {
+                        next if defined $cur_ibx && $ibx eq $cur_ibx;
+                        my $n = $ibx->mm(1)->num_for($mid);
+                        return ($ibx, $n) if defined $n;
+                }
         }
         (undef, undef);
 }
 
 sub xref_range_i {
         my ($self, $beg, $end) = @_;
-        my $ng = $self->{ng};
-        my $r = $ng->mm->msg_range($beg, $end);
-        @$r or return;
-        more($self, join("\r\n", map {
-                my $num = $_->[0];
-                "$num ".xref($self, $ng, $num, $_->[1]);
-        } @$r));
+        my $ibx = $self->{ibx};
+        my $msgs = $ibx->over(1)->query_xover($$beg, $end);
+        scalar(@$msgs) or return;
+        $$beg = $msgs->[-1]->{num} + 1;
+        $self->msg_more(join('', map {
+                "$_->{num} ".xref($self, $ibx, $_) . "\r\n";
+        } @$msgs));
         1;
 }
 
@@ -720,30 +702,30 @@ sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin
 
         if (defined $range && $range =~ $ONE_MSGID) {
                 my $mid = $1;
-                my ($ng, $n) = mid_lookup($self, $mid);
+                my ($ibx, $n) = mid_lookup($self, $mid);
                 return r430 unless $n;
-                hdr_mid_response($self, $xhdr, $ng, $n, $range,
-                                xref($self, $ng, $n, $mid));
+                my $smsg = $ibx->over(1)->get_art($n) or return;
+                hdr_mid_response($self, $xhdr, $ibx, $n, $range,
+                                xref($self, $ibx, $smsg));
         } else { # numeric range
                 $range = $self->{article} unless defined $range;
                 my $r = get_range($self, $range);
                 return $r unless ref $r;
-                more($self, $xhdr ? r221 : r225);
-                long_response($self, \&xref_range_i, @$r);
+                $self->msg_more($xhdr ? r221 : r225);
+                $self->long_response(\&xref_range_i, @$r);
         }
 }
 
 sub over_header_for {
-        my ($over, $num, $field) = @_;
-        my $smsg = $over->get_art($num) or return;
+        my ($ibx, $num, $field) = @_;
+        my $smsg = $ibx->over(1)->get_art($num) or return;
         return PublicInbox::Smsg::date($smsg) if $field eq 'date';
         $smsg->{$field};
 }
 
 sub smsg_range_i {
         my ($self, $beg, $end, $field) = @_;
-        my $over = $self->{ng}->over;
-        my $msgs = $over->query_xover($$beg, $end);
+        my $msgs = $self->{ibx}->over(1)->query_xover($$beg, $end);
         scalar(@$msgs) or return;
         my $tmp = '';
 
@@ -765,16 +747,16 @@ sub smsg_range_i {
 sub hdr_smsg ($$$$) {
         my ($self, $xhdr, $field, $range) = @_;
         if (defined $range && $range =~ $ONE_MSGID) {
-                my ($ng, $n) = mid_lookup($self, $1);
+                my ($ibx, $n) = mid_lookup($self, $1);
                 return r430 unless defined $n;
-                my $v = over_header_for($ng->over, $n, $field);
-                hdr_mid_response($self, $xhdr, $ng, $n, $range, $v);
+                my $v = over_header_for($ibx, $n, $field);
+                hdr_mid_response($self, $xhdr, $ibx, $n, $range, $v);
         } else { # numeric range
                 $range = $self->{article} unless defined $range;
                 my $r = get_range($self, $range);
                 return $r unless ref $r;
-                more($self, $xhdr ? r221 : r225);
-                long_response($self, \&smsg_range_i, @$r, $field);
+                $self->msg_more($xhdr ? r221 : r225);
+                $self->long_response(\&smsg_range_i, @$r, $field);
         }
 }
 
@@ -791,7 +773,7 @@ sub do_hdr ($$$;$) {
         } elsif ($sub =~ /\A:(bytes|lines)\z/) {
                 hdr_smsg($self, $xhdr, $1, $range);
         } else {
-                $xhdr ? (r221 . "\r\n.") : "503 HDR not permitted on $header";
+                $xhdr ? (r221.".\r\n") : "503 HDR not permitted on $header\r\n";
         }
 }
 
@@ -808,57 +790,50 @@ sub cmd_xhdr ($$;$) {
 }
 
 sub hdr_mid_prefix ($$$$$) {
-        my ($self, $xhdr, $ng, $n, $mid) = @_;
+        my ($self, $xhdr, $ibx, $n, $mid) = @_;
         return $mid if $xhdr;
 
         # HDR for RFC 3977 users
-        if (my $self_ng = $self->{ng}) {
-                ($self_ng eq $ng) ? $n : '0';
+        if (my $cur_ibx = $self->{ibx}) {
+                ($cur_ibx eq $ibx) ? $n : '0';
         } else {
                 '0';
         }
 }
 
 sub hdr_mid_response ($$$$$$) {
-        my ($self, $xhdr, $ng, $n, $mid, $v) = @_;
-        my $res = '';
-        if ($xhdr) {
-                $res .= r221 . "\r\n";
-                $res .= "$mid $v\r\n";
-        } else {
-                $res .= r225 . "\r\n";
-                my $pfx = hdr_mid_prefix($self, $xhdr, $ng, $n, $mid);
-                $res .= "$pfx $v\r\n";
-        }
-        res($self, $res .= '.');
+        my ($self, $xhdr, $ibx, $n, $mid, $v) = @_;
+        $self->write(($xhdr ? r221.$mid :
+                   r225.hdr_mid_prefix($self, $xhdr, $ibx, $n, $mid)) .
+                " $v\r\n.\r\n");
         undef;
 }
 
 sub xrover_i {
         my ($self, $beg, $end) = @_;
-        my $h = over_header_for($self->{ng}->over, $$beg, 'references');
-        more($self, "$$beg $h") if defined($h);
+        my $h = over_header_for($self->{ibx}, $$beg, 'references');
+        $self->msg_more("$$beg $h\r\n") if defined($h);
         $$beg++ < $end;
 }
 
 sub cmd_xrover ($;$) {
         my ($self, $range) = @_;
-        my $ng = $self->{ng} or return '412 no newsgroup selected';
+        my $ibx = $self->{ibx} or return \"412 no newsgroup selected\r\n";
         (defined $range && $range =~ /[<>]/) and
-                return '420 No article(s) selected'; # no message IDs
+                return \"420 No article(s) selected\r\n"; # no message IDs
 
         $range = $self->{article} unless defined $range;
         my $r = get_range($self, $range);
         return $r unless ref $r;
-        more($self, '224 Overview information follows');
-        long_response($self, \&xrover_i, @$r);
+        $self->msg_more("224 Overview information follows\r\n");
+        $self->long_response(\&xrover_i, @$r);
 }
 
-sub over_line ($$$$) {
-        my ($self, $ng, $num, $smsg) = @_;
+sub over_line ($$$) {
+        my ($self, $ibx, $smsg) = @_;
         # n.b. field access and procedural calls can be
         # 10%-15% faster than OO method calls:
-        my $s = join("\t", $num,
+        my $s = join("\t", $smsg->{num},
                 $smsg->{subject},
                 $smsg->{from},
                 PublicInbox::Smsg::date($smsg),
@@ -866,24 +841,29 @@ sub over_line ($$$$) {
                 $smsg->{references},
                 $smsg->{bytes},
                 $smsg->{lines},
-                "Xref: " . xref($self, $ng, $num, $smsg->{mid}));
+                "Xref: " . xref($self, $ibx, $smsg));
         utf8::encode($s);
-        $s
+        $s .= "\r\n";
 }
 
 sub cmd_over ($;$) {
         my ($self, $range) = @_;
         if ($range && $range =~ $ONE_MSGID) {
-                my ($ng, $n) = mid_lookup($self, $1);
+                my ($ibx, $n) = mid_lookup($self, $1);
                 defined $n or return r430;
-                my $smsg = $ng->over->get_art($n) or return r430;
-                more($self, '224 Overview information follows (multi-line)');
+                my $smsg = $ibx->over(1)->get_art($n) or return r430;
+                $self->msg_more(
+                        "224 Overview information follows (multi-line)\r\n");
 
                 # Only set article number column if it's the current group
-                my $self_ng = $self->{ng};
-                $n = 0 if (!$self_ng || $self_ng ne $ng);
-                more($self, over_line($self, $ng, $n, $smsg));
-                '.';
+                # (RFC 3977 8.3.2)
+                my $cur_ibx = $self->{ibx};
+                if (!$cur_ibx || $cur_ibx ne $ibx) {
+                        # set {-orig_num} for nntp_xref_for
+                        $smsg->{-orig_num} = $smsg->{num};
+                        $smsg->{num} = 0;
+                }
+                over_line($self, $ibx, $smsg).".\r\n";
         } else {
                 cmd_xover($self, $range);
         }
@@ -891,13 +871,13 @@ sub cmd_over ($;$) {
 
 sub xover_i {
         my ($self, $beg, $end) = @_;
-        my $ng = $self->{ng};
-        my $msgs = $ng->over->query_xover($$beg, $end);
+        my $ibx = $self->{ibx};
+        my $msgs = $ibx->over(1)->query_xover($$beg, $end);
         my $nr = scalar @$msgs or return;
 
         # OVERVIEW.FMT
-        more($self, join("\r\n", map {
-                over_line($self, $ng, $_->{num}, $_);
+        $self->msg_more(join('', map {
+                over_line($self, $ibx, $_);
                 } @$msgs));
         $$beg = $msgs->[-1]->{num} + 1;
 }
@@ -908,21 +888,20 @@ sub cmd_xover ($;$) {
         my $r = get_range($self, $range);
         return $r unless ref $r;
         my ($beg, $end) = @$r;
-        more($self, "224 Overview information follows for $$beg to $end");
-        long_response($self, \&xover_i, @$r);
+        $self->msg_more(
+                "224 Overview information follows for $$beg to $end\r\n");
+        $self->long_response(\&xover_i, @$r);
 }
 
-sub compressed { undef }
-
 sub cmd_starttls ($) {
         my ($self) = @_;
-        my $sock = $self->{sock} or return;
         # RFC 4642 2.2.1
-        return r502 if ($sock->can('accept_SSL') || $self->compressed);
-        my $opt = $self->{nntpd}->{accept_tls} or
-                return '580 can not initiate TLS negotiation';
-        res($self, '382 Continue with TLS negotiation');
-        $self->{sock} = IO::Socket::SSL->start_SSL($sock, %$opt);
+        (($self->{sock} // return)->can('stop_SSL') || $self->compressed) and
+                return r502;
+        $self->{nntpd}->{ssl_ctx_opt} or
+                return \"580 can not initiate TLS negotiation\r\n";
+        $self->write(\"382 Continue with TLS negotiation\r\n");
+        PublicInbox::TLS::start($self->{sock}, $self->{nntpd});
         $self->requeue if PublicInbox::DS::accept_tls_step($self);
         undef;
 }
@@ -930,43 +909,42 @@ sub cmd_starttls ($) {
 # RFC 8054
 sub cmd_compress ($$) {
         my ($self, $alg) = @_;
-        return '503 Only DEFLATE is supported' if uc($alg) ne 'DEFLATE';
+        return "503 Only DEFLATE is supported\r\n" if uc($alg) ne 'DEFLATE';
         return r502 if $self->compressed;
-        PublicInbox::NNTPdeflate->enable($self);
+        PublicInbox::NNTPdeflate->enable($self) or return
+                                \"403 Unable to activate compression\r\n";
+        PublicInbox::DS::write($self, \"206 Compression active\r\n");
         $self->requeue;
         undef
 }
 
-sub zflush {} # overridden by NNTPdeflate
-
 sub cmd_xpath ($$) {
         my ($self, $mid) = @_;
         return r501 unless $mid =~ $ONE_MSGID;
         $mid = $1;
         my @paths;
-        foreach my $ng (values %{$self->{nntpd}->{groups}}) {
-                my $n = $ng->mm->num_for($mid);
-                push @paths, "$ng->{newsgroup}/$n" if defined $n;
+        my $pi_cfg = $self->{nntpd}->{pi_cfg};
+        my $groups = $pi_cfg->{-by_newsgroup};
+        if (my $ALL = $pi_cfg->ALL) {
+                my ($id, $prev, %seen);
+                while (my $smsg = $ALL->over->next_by_mid($mid, \$id, \$prev)) {
+                        my $xr3 = $ALL->over->get_xref3($smsg->{num});
+                        for my $x (@$xr3) {
+                                my ($ngname, $n) = split(/:/, $x);
+                                $x = "$ngname/$n";
+                                if ($groups->{$ngname} && !$seen{$x}++) {
+                                        push(@paths, $x);
+                                }
+                        }
+                }
+        } else { # slow path, no point in using long_response
+                for my $ibx (values %$groups) {
+                        my $n = $ibx->mm(1)->num_for($mid) // next;
+                        push @paths, "$ibx->{newsgroup}/$n";
+                }
         }
-        return '430 no such article on server' unless @paths;
-        '223 '.join(' ', @paths);
-}
-
-sub res ($$) { do_write($_[0], $_[1] . "\r\n") }
-
-sub more ($$) { $_[0]->msg_more($_[1] . "\r\n") }
-
-sub do_write ($$) {
-        my $self = $_[0];
-        my $done = $self->write(\($_[1]));
-        return 0 unless $self->{sock};
-
-        $done;
-}
-
-sub err ($$;@) {
-        my ($self, $fmt, @args) = @_;
-        printf { $self->{nntpd}->{err} } $fmt."\n", @args;
+        return \"430 no such article on server\r\n" unless @paths;
+        '223 '.join(' ', sort(@paths))."\r\n";
 }
 
 sub out ($$;@) {
@@ -977,10 +955,9 @@ sub out ($$;@) {
 # callback used by PublicInbox::DS for any (e)poll (in/out/hup/err)
 sub event_step {
         my ($self) = @_;
-
+        local $SIG{__WARN__} = $self->{nntpd}->{warn_cb};
         return unless $self->flush_write && $self->{sock} && !$self->{long_cb};
 
-        $self->update_idle_time;
         # only read more requests if we've drained the write buffer,
         # otherwise we can be buffering infinitely w/o backpressure
 
@@ -1002,17 +979,19 @@ sub event_step {
         out($self, "[$fd] %s - %0.6f$pending", $line, now() - $t0);
         return $self->close if $r < 0;
         $self->rbuf_idle($rbuf);
-        $self->update_idle_time;
 
         # maybe there's more pipelined data, or we'll have
         # to register it for socket-readiness notifications
         $self->requeue unless $pending;
 }
 
-# for graceful shutdown in PublicInbox::Daemon:
-sub busy {
-        my ($self, $now) = @_;
-        ($self->{rbuf} || $self->{wbuf} || $self->not_idle_long($now));
+sub busy { # for graceful shutdown in PublicInbox::Daemon:
+        my ($self) = @_;
+        defined($self->{rbuf}) || defined($self->{wbuf})
 }
 
+package PublicInbox::NNTPdeflate;
+use PublicInbox::DSdeflate;
+our @ISA = qw(PublicInbox::DSdeflate PublicInbox::NNTP);
+
 1;
diff --git a/lib/PublicInbox/NNTPD.pm b/lib/PublicInbox/NNTPD.pm
index 6b762d89..4401a29b 100644
--- a/lib/PublicInbox/NNTPD.pm
+++ b/lib/PublicInbox/NNTPD.pm
@@ -1,74 +1,64 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # represents an NNTPD (currently a singleton),
 # see script/public-inbox-nntpd for how it is used
 package PublicInbox::NNTPD;
 use strict;
-use warnings;
+use v5.10.1;
 use Sys::Hostname;
 use PublicInbox::Config;
 use PublicInbox::InboxIdle;
+use PublicInbox::NNTP;
 
 sub new {
         my ($class) = @_;
-        my $pi_config = PublicInbox::Config->new;
-        my $name = $pi_config->{'publicinbox.nntpserver'};
-        if (!defined($name) or $name eq '') {
-                $name = hostname;
-        } elsif (ref($name) eq 'ARRAY') {
-                $name = $name->[0];
-        }
-
         bless {
-                groups => {},
                 err => \*STDERR,
                 out => \*STDOUT,
-                grouplist => [],
-                pi_config => $pi_config,
-                servername => $name,
-                greet => \"201 $name ready - post via email\r\n",
-                # accept_tls => { SSL_server => 1, ..., SSL_reuse_ctx => ... }
+                # pi_cfg => $pi_cfg,
+                # ssl_ctx_opt => { SSL_cert_file => ..., SSL_key_file => ... }
                 # idler => PublicInbox::InboxIdle
         }, $class;
 }
 
 sub refresh_groups {
         my ($self, $sig) = @_;
-        my $pi_config = $sig ? PublicInbox::Config->new : $self->{pi_config};
-        my $new = {};
-        my @list;
-        $pi_config->each_inbox(sub {
-                my ($ng) = @_;
-                my $ngname = $ng->{newsgroup} or return;
-                if (ref $ngname) {
-                        warn 'multiple newsgroups not supported: '.
-                                join(', ', @$ngname). "\n";
-                # Newsgroup name needs to be compatible with RFC 3977
-                # wildmat-exact and RFC 3501 (IMAP) ATOM-CHAR.
-                # Leave out a few chars likely to cause problems or conflicts:
-                # '|', '<', '>', ';', '#', '$', '&',
-                } elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]!) {
-                        warn "newsgroup name invalid: `$ngname'\n";
-                } elsif ($ng->nntp_usable) {
-                        # Only valid if msgmap and search works
-                        $new->{$ngname} = $ng;
-                        push @list, $ng;
-
+        my $pi_cfg = PublicInbox::Config->new;
+        my $name = $pi_cfg->{'publicinbox.nntpserver'};
+        if (!defined($name) or $name eq '') {
+                $name = hostname;
+        } elsif (ref($name) eq 'ARRAY') {
+                $name = $name->[0];
+        }
+        if ($name ne ($self->{servername} // '')) {
+                $self->{servername} = $name;
+                $self->{greet} = \"201 $name ready - post via email\r\n";
+        }
+        my $groups = $pi_cfg->{-by_newsgroup}; # filled during each_inbox
+        my $cache = eval { $pi_cfg->ALL->misc->nntpd_cache_load } // {};
+        $pi_cfg->each_inbox(sub {
+                my ($ibx) = @_;
+                my $ngname = $ibx->{newsgroup} // return;
+                my $ce = $cache->{$ngname};
+                if (($ce and (%$ibx = (%$ibx, %$ce))) || $ibx->nntp_usable) {
+                        # only valid if msgmap and over works
                         # preload to avoid fragmentation:
-                        $ng->description;
-                        $ng->base_url;
+                        $ibx->description;
+                } else {
+                        delete $groups->{$ngname};
+                        # Note: don't be tempted to delete more for memory
+                        # savings just yet: NNTP, IMAP, and WWW may all
+                        # run in the same process someday.
                 }
         });
-        @list =        sort { $a->{newsgroup} cmp $b->{newsgroup} } @list;
-        $self->{grouplist} = \@list;
-        $self->{pi_config} = $pi_config;
+        @{$self->{groupnames}} = sort(keys %$groups);
         # this will destroy old groups that got deleted
-        %{$self->{groups}} = %$new;
+        $self->{pi_cfg} = $pi_cfg;
 }
 
 sub idler_start {
-        $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_config});
+        $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_cfg});
 }
 
 1;
diff --git a/lib/PublicInbox/NetNNTPSocks.pm b/lib/PublicInbox/NetNNTPSocks.pm
new file mode 100644
index 00000000..d27efba1
--- /dev/null
+++ b/lib/PublicInbox/NetNNTPSocks.pm
@@ -0,0 +1,36 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# wrap Net::NNTP client with SOCKS support.  Convoluted, but AFAIK this
+# is the only way to get SOCKS working with Net::NNTP w/o LD_PRELOAD.
+package PublicInbox::NetNNTPSocks;
+use v5.12;
+use Net::NNTP;
+our %OPT; # used to pass options between ->new_socks and our ->new
+our @ISA = qw(IO::Socket::Socks);
+
+# use this instead of Net::NNTP->new if using Proxy*
+sub new_socks {
+        my (undef, %opt) = @_;
+        require IO::Socket::Socks;
+        local @Net::NNTP::ISA = (qw(Net::Cmd), __PACKAGE__);
+        local %OPT = map {;
+                defined($opt{$_}) ? ($_ => $opt{$_}) : ()
+        } qw(ProxyAddr ProxyPort SocksVersion SocksDebug SocksResolve);
+        no warnings 'uninitialized'; # needed for $SOCKS_ERROR
+        my $ret = Net::NNTP->new(%opt); # calls PublicInbox::NetNNTPSocks::new
+        return $ret if $ret || $!{EINTR};
+        $ret // die "errors: \$!=$! SOCKS=",
+                                eval('$IO::Socket::Socks::SOCKS_ERROR // ""'),
+                                ', SSL=',
+                                (eval('IO::Socket::SSL->errstr')  // ''), "\n";
+}
+
+# called by Net::NNTP->new
+sub new {
+        my ($self, %opt) = @_;
+        @OPT{qw(ConnectAddr ConnectPort)} = @opt{qw(PeerAddr PeerPort)};
+        $self->SUPER::new(%OPT);
+}
+
+1;
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
new file mode 100644
index 00000000..ec18818b
--- /dev/null
+++ b/lib/PublicInbox/NetReader.pm
@@ -0,0 +1,896 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# common reader code for IMAP and NNTP (and maybe JMAP)
+package PublicInbox::NetReader;
+use v5.12;
+use parent qw(Exporter PublicInbox::IPC);
+use PublicInbox::Eml;
+use PublicInbox::Config;
+our %IMAPflags2kw = map {; "\\\u$_" => $_ } qw(seen answered flagged draft);
+$IMAPflags2kw{'$Forwarded'} = 'forwarded';  # RFC 5550
+
+our @EXPORT = qw(uri_section imap_uri nntp_uri);
+
+sub ndump {
+        require Data::Dumper;
+        Data::Dumper->new([ $_[-1] ])->Useqq(1)->Terse(1)->Dump;
+}
+
+# returns the git config section name, e.g [imap "imaps://user@example.com"]
+# without the mailbox, so we can share connections between different inboxes
+sub uri_section ($) {
+        my ($uri) = @_;
+        $uri->scheme . '://' . $uri->authority;
+}
+
+sub socks_args ($) {
+        my ($val) = @_;
+        return if ($val // '') eq '';
+        if ($val =~ m!\Asocks5h:// (?: \[ ([^\]]+) \] | ([^:/]+) )
+                                        (?::([0-9]+))?/*\z!ix) {
+                my ($h, $p) = ($1 // $2, $3 + 0);
+                $h = '127.0.0.1' if $h eq '0';
+                eval { require IO::Socket::Socks } or die <<EOM;
+IO::Socket::Socks missing for socks5h://$h:$p
+EOM
+                # for IO::Socket::Socks
+                return { ProxyAddr => $h, ProxyPort => $p };
+        }
+        die "$val not understood (only socks5h:// is supported)\n";
+}
+
+# gives an arrayref suitable for the Mail::IMAPClient Ssl or Starttls arg
+sub mic_tls_opt ($$) {
+        my ($o, $hostname) = @_;
+        require IO::Socket::SSL;
+        $o = {} if !ref($o);
+        $o->{SSL_hostname} //= $hostname;
+        [ map { ($_, $o->{$_}) } keys %$o ];
+}
+
+sub set_ssl_verify_mode ($$) {
+        my ($o, $bool) = @_;
+        require IO::Socket::SSL;
+        $o->{SSL_verify_mode} = $bool ? IO::Socket::SSL::SSL_VERIFY_PEER() :
+                                        IO::Socket::SSL::SSL_VERIFY_NONE();
+}
+
+sub mic_new ($$$$) {
+        my ($self, $mic_arg, $sec, $uri) = @_;
+        my %mic_arg = (%$mic_arg, Keepalive => 1);
+        my $sa = $self->{cfg_opt}->{$sec}->{-proxy_cfg} || $self->{-proxy_cli};
+        my ($mic, $s, $t);
+        if ($sa) {
+                # this `require' needed for worker[1..Inf], since socks_args
+                # only got called in worker[0]
+                require IO::Socket::Socks;
+                my %opt = (%$sa, Keepalive => 1);
+                $opt{SocksDebug} = 1 if $mic_arg{Debug};
+                $opt{ConnectAddr} = delete $mic_arg{Server};
+                $opt{ConnectPort} = delete $mic_arg{Port};
+                do {
+                        $! = 0;
+                        $s = IO::Socket::Socks->new(%opt);
+                } until ($s || !$!{EINTR} || $self->{quit});
+                return if $self->{quit};
+                $s or die "E: <$uri> ".eval('$IO::Socket::Socks::SOCKS_ERROR');
+                $mic_arg{Socket} = $s;
+                if (my $o = delete $mic_arg{Ssl}) { # for imaps://
+                        $o = mic_tls_opt($o, $opt{ConnectAddr});
+                        do {
+                                $! = 0;
+                                $t = IO::Socket::SSL->start_SSL($s, @$o);
+                        } until ($t || !$!{EINTR} || $self->{quit});
+                        return if $self->{quit};
+                        $t or die "E: <$uri> ".(IO::Socket::SSL->errstr // '');
+                        $mic_arg{Socket} = $t;
+                } elsif ($o = $mic_arg{Starttls}) {
+                        # Mail::IMAPClient will use this:
+                        $mic_arg{Starttls} = mic_tls_opt($o, $opt{ConnectAddr});
+                }
+        } elsif ($mic_arg{Ssl} || $mic_arg{Starttls}) {
+                for my $f (qw(Ssl Starttls)) {
+                        my $o = $mic_arg{$f} or next;
+                        $mic_arg{$f} = mic_tls_opt($o, $mic_arg{Server});
+                }
+        }
+        do {
+                $! = 0;
+                $mic = PublicInbox::IMAPClient->new(%mic_arg);
+        } until ($mic || !$!{EINTR} || $self->{quit});
+        $mic;
+}
+
+sub auth_anon_cb { '' }; # for Mail::IMAPClient::Authcallback
+
+sub onion_hint ($$) {
+        my ($lei, $uri) = @_;
+        $uri->host =~ /\.onion\z/i or return "\n";
+        my $t = $uri->isa('PublicInbox::URIimap') ? 'imap' : 'nntp';
+        my $url = PublicInbox::Config::squote_maybe(uri_section($uri));
+        my $scheme = $uri->scheme;
+        my $set_cfg = 'lei config';
+        if (!$lei) { # public-inbox-watch
+                my $f = PublicInbox::Config::squote_maybe(
+                                $ENV{PI_CONFIG} || '~/.public-inbox/config');
+                $set_cfg = "git config -f $f";
+        }
+        my $dq = substr($url, 0, 1) eq "'" ? '"' : '';
+        <<EOM
+
+Assuming you have Tor configured and running locally on port 9050,
+try configuring a socks5h:// proxy:
+
+        url=$url
+        $set_cfg $t.$dq\$url$dq.proxy socks5h://127.0.0.1:9050
+
+git 2.26+ users may instead rely on `*' to match all .onion URLs:
+
+        $set_cfg '$t.$scheme://*.onion.proxy' socks5h://127.0.0.1:9050
+
+...before retrying your current command
+EOM
+}
+
+# Net::NNTP doesn't support CAPABILITIES, yet; and both IMAP+NNTP
+# servers may have multiple listen sockets.
+sub try_starttls ($) {
+        my ($host) = @_;
+        return if $host =~ /\.onion\z/si;
+        return if $host =~ /\A127\.[0-9]+\.[0-9]+\.[0-9]+\z/s;
+        return if $host eq '::1';
+        1;
+}
+
+# mic_for may prompt the user and store auth info, prepares mic_get
+sub mic_for ($$$$) { # mic = Mail::IMAPClient
+        my ($self, $uri, $mic_common, $lei) = @_;
+        require PublicInbox::GitCredential;
+        my $cred = bless {
+                url => "$uri",
+                protocol => $uri->scheme,
+                host => $uri->host,
+                username => $uri->user,
+                password => $uri->password,
+        }, 'PublicInbox::GitCredential';
+        my $sec = uri_section($uri);
+        my $common = $mic_common->{$sec} // {};
+        # IMAPClient and Net::Netrc both mishandles `0', so we pass `127.0.0.1'
+        my $host = $cred->{host};
+        $host = '127.0.0.1' if $host eq '0';
+        my $mic_arg = {
+                Port => $uri->port,
+                Server => $host,
+                %$common, # may set Starttls, Compress, Debug ....
+        };
+        require PublicInbox::IMAPClient;
+        my $mic = mic_new($self, $mic_arg, $sec, $uri);
+        return if $self->{quit};
+        ($mic && $mic->IsConnected) or
+                die "E: <$uri> new: $@".onion_hint($lei, $uri);
+
+        # default to using STARTTLS if it's available, but allow
+        # it to be disabled since I usually connect to localhost
+        if (!$mic_arg->{Ssl} && !defined($mic_arg->{Starttls}) &&
+                        $mic->has_capability('STARTTLS') &&
+                        try_starttls($host) &&
+                        $mic->can('starttls')) {
+                $mic->starttls or die "E: <$uri> STARTTLS: $@\n";
+        }
+
+        # do we even need credentials?
+        if (!defined($cred->{username}) &&
+                        $mic->has_capability('AUTH=ANONYMOUS')) {
+                $cred = undef;
+        }
+        if ($cred) {
+                my $p = $cred->{password} // $cred->check_netrc($lei);
+                $cred->fill($lei) unless defined($p); # may prompt user here
+                $mic->User($mic_arg->{User} = $cred->{username});
+                $mic->Password($mic_arg->{Password} = $cred->{password});
+        } else { # AUTH=ANONYMOUS
+                $mic->Authmechanism($mic_arg->{Authmechanism} = 'ANONYMOUS');
+                $mic_arg->{Authcallback} = 'auth_anon_cb';
+                $mic->Authcallback(\&auth_anon_cb);
+        }
+        my $err;
+        if ($mic->login && $mic->IsAuthenticated) {
+                # success! keep IMAPClient->new arg in case we get disconnected
+                $self->{net_arg}->{$sec} = $mic_arg;
+                if ($cred) {
+                        $uri->user($cred->{username}) if !defined($uri->user);
+                } elsif ($mic_arg->{Authmechanism} eq 'ANONYMOUS') {
+                        $uri->auth('ANONYMOUS') if !defined($uri->auth);
+                }
+        } else {
+                $err = "E: <$uri> LOGIN: $@\n";
+                if ($cred && defined($cred->{password})) {
+                        $err =~ s/\Q$cred->{password}\E/*******/g;
+                }
+                $mic = undef;
+        }
+        $cred->run($mic ? 'approve' : 'reject') if $cred && $cred->{filled};
+        if ($err) {
+                $lei ? $lei->fail($err) : warn($err);
+        }
+        $mic;
+}
+
+sub nn_new ($$$$) {
+        my ($self, $nn_arg, $nntp_cfg, $uri) = @_;
+        my $nn;
+        my ($Net_NNTP, $new) = qw(Net::NNTP new);
+        if (defined $nn_arg->{ProxyAddr}) {
+                require PublicInbox::NetNNTPSocks;
+                ($Net_NNTP, $new) = qw(PublicInbox::NetNNTPSocks new_socks);
+                $nn_arg->{SocksDebug} = 1 if $nn_arg->{Debug};
+        }
+        do {
+                $! = 0;
+                $nn = $Net_NNTP->$new(%$nn_arg);
+        } until ($nn || !$!{EINTR} || $self->{quit});
+        $nn // return;
+        setsockopt($nn, Socket::SOL_SOCKET(), Socket::SO_KEEPALIVE(), 1);
+
+        # default to using STARTTLS if it's available, but allow
+        # it to be disabled for localhost/VPN users
+        if (!$nn_arg->{SSL} && $nn->can('starttls')) {
+                if (!defined($nntp_cfg->{starttls}) &&
+                                try_starttls($nn_arg->{Host})) {
+                        # soft fail by default
+                        $nn->starttls or warn <<"";
+W: <$uri> STARTTLS tried and failed (not requested): ${\(ndump($nn->message))}
+
+                } elsif ($nntp_cfg->{starttls}) {
+                        # hard fail if explicitly configured
+                        $nn->starttls or die <<"";
+E: <$uri> STARTTLS requested and failed: ${\(ndump($nn->message))}
+
+                }
+        } elsif ($nntp_cfg->{starttls}) {
+                $nn->can('starttls') or
+                        die "E: <$uri> Net::NNTP too old for STARTTLS\n";
+                $nn->starttls or die <<"";
+E: <$uri> STARTTLS requested and failed: ${\(ndump($nn->message))}
+
+        }
+        $nn;
+}
+
+sub nn_for ($$$$) { # nn = Net::NNTP
+        my ($self, $uri, $nn_common, $lei) = @_;
+        my $sec = uri_section($uri);
+        my $nntp_cfg = $self->{cfg_opt}->{$sec} //= {};
+        my $host = $uri->host;
+        # Net::NNTP and Net::Netrc both mishandle `0', so we pass `127.0.0.1'
+        $host = '127.0.0.1' if $host eq '0';
+        my $cred;
+        my ($u, $p);
+        if (defined(my $ui = $uri->userinfo)) {
+                require PublicInbox::GitCredential;
+                $cred = bless {
+                        url => $sec,
+                        protocol => $uri->scheme,
+                        host => $host,
+                }, 'PublicInbox::GitCredential';
+                ($u, $p) = split(/:/, $ui, 2);
+                ($cred->{username}, $cred->{password}) = ($u, $p);
+                $p //= $cred->check_netrc($lei);
+        }
+        my $common = $nn_common->{$sec} // {};
+        my $nn_arg = {
+                Port => $uri->port,
+                Host => $host,
+                %$common, # may Debug ....
+        };
+        $nn_arg->{SSL} = 1 if $uri->secure; # snews == nntps
+        my $sa = $self->{-proxy_cli};
+        %$nn_arg = (%$nn_arg, %$sa) if $sa;
+        my $nn = nn_new($self, $nn_arg, $nntp_cfg, $uri);
+        return if $self->{quit};
+        $nn // die "E: <$uri> new: $@".onion_hint($lei, $uri);
+        if ($cred) {
+                $p //= do {
+                        $cred->fill($lei); # may prompt user here
+                        $cred->{password};
+                };
+                if ($nn->authinfo($u, $p)) {
+                        push @{$nntp_cfg->{-postconn}}, [ 'authinfo', $u, $p ];
+                } else {
+                        warn <<EOM;
+E: <$uri> AUTHINFO $u XXXX: ${\(ndump($nn->message))}
+EOM
+                        $nn = undef;
+                }
+        }
+        if ($nn && $nntp_cfg->{compress}) {
+                # https://rt.cpan.org/Ticket/Display.html?id=129967
+                if ($nn->can('compress')) {
+                        if ($nn->compress) {
+                                push @{$nntp_cfg->{-postconn}}, [ 'compress' ];
+                        } else {
+                                warn <<EOM;
+W: <$uri> COMPRESS: ${\(ndump($nn->message))}
+EOM
+                        }
+                } else {
+                        delete $nntp_cfg->{compress};
+                        warn <<"";
+W: <$uri> COMPRESS not supported by Net::NNTP
+W: see https://rt.cpan.org/Ticket/Display.html?id=129967 for updates
+
+                }
+        }
+
+        $self->{net_arg}->{$sec} = $nn_arg;
+        $cred->run($nn ? 'approve' : 'reject') if $cred && $cred->{filled};
+        $nn;
+}
+
+sub imap_uri {
+        my ($url, $ls_ok) = @_;
+        require PublicInbox::URIimap;
+        my $uri = PublicInbox::URIimap->new($url);
+        $uri && ($ls_ok || $uri->mailbox) ? $uri->canonical : undef;
+}
+
+my %IS_NNTP = (news => 1, snews => 1, nntp => 1, nntps => 1);
+sub nntp_uri {
+        my ($url, $ls_ok) = @_;
+        require PublicInbox::URInntps;
+        my $uri = PublicInbox::URInntps->new($url);
+        $uri && $IS_NNTP{$uri->scheme} && ($ls_ok || $uri->group) ?
+                $uri->canonical : undef;
+}
+
+sub cfg_intvl ($$$) {
+        my ($cfg, $key, $url) = @_;
+        my $v = $cfg->urlmatch($key, $url) // return;
+        $v =~ /\A[0-9]+(?:\.[0-9]+)?\z/s and return $v + 0;
+        if (ref($v) eq 'ARRAY') {
+                $v = join(', ', @$v);
+                warn "W: $key has multiple values: $v\nW: $key ignored\n";
+        } else {
+                warn "W: $key=$v is not a numeric value in seconds\n";
+        }
+}
+
+# flesh out common IMAP-specific data structures
+sub imap_common_init ($;$) {
+        my ($self, $lei) = @_;
+        return unless $self->{imap_order};
+        $self->{quiet} = 1 if $lei && $lei->{opt}->{quiet};
+        eval { require PublicInbox::IMAPClient } or
+                die "Mail::IMAPClient is required for IMAP:\n$@\n";
+        ($lei || eval { require PublicInbox::IMAPTracker }) or
+                die "DBD::SQLite is required for IMAP\n:$@\n";
+        require PublicInbox::URIimap;
+        my $cfg = $self->{pi_cfg} // $lei->_lei_cfg;
+        my $mic_common = {}; # scheme://authority => Mail:IMAPClient arg
+        for my $uri (@{$self->{imap_order}}) {
+                my $sec = uri_section($uri);
+
+                # knobs directly for Mail::IMAPClient->new
+                for my $k (qw(Starttls Debug Compress)) {
+                        my $v = $cfg->urlmatch('--bool', "imap.$k", $$uri);
+                        $mic_common->{$sec}->{$k} = $v if defined $v;
+                }
+                my $to = cfg_intvl($cfg, 'imap.timeout', $$uri);
+                $mic_common->{$sec}->{Timeout} = $to if $to;
+                $mic_common->{$sec}->{Ssl} = 1 if $uri->scheme eq 'imaps';
+
+                # knobs we use ourselves:
+                my $sa = socks_args($cfg->urlmatch('imap.Proxy', $$uri));
+                $self->{cfg_opt}->{$sec}->{-proxy_cfg} = $sa if $sa;
+                for my $k (qw(pollInterval idleInterval)) {
+                        $to = cfg_intvl($cfg, "imap.$k", $$uri) // next;
+                        $self->{cfg_opt}->{$sec}->{$k} = $to;
+                }
+                my $k = 'imap.fetchBatchSize';
+                if (defined(my $bs = $cfg->urlmatch($k, $$uri))) {
+                        ($bs =~ /\A([0-9]+)\z/ && $bs > 0) ?
+                                ($self->{cfg_opt}->{$sec}->{batch_size} = $bs) :
+                                warn("$k=$bs is not a positive integer\n");
+                }
+                my $v = $cfg->urlmatch(qw(--bool imap.sslVerify), $$uri);
+                if (defined $v) {
+                        my $cur = $mic_common->{$sec} //= {};
+                        $cur->{Starttls} //= 1 if !$cur->{Ssl};
+                        for my $f (grep { $cur->{$_} } qw(Ssl Starttls)) {
+                                set_ssl_verify_mode($cur->{$f} = {}, $v);
+                        }
+                }
+        }
+        # make sure we can connect and cache the credentials in memory
+        my $mics = {}; # schema://authority => IMAPClient obj
+        for my $orig_uri (@{$self->{imap_order}}) {
+                my $sec = uri_section($orig_uri);
+                my $uri = PublicInbox::URIimap->new("$sec/");
+                my $mic = $mics->{$sec} //=
+                                mic_for($self, $uri, $mic_common, $lei);
+                return if $self->{quit};
+                $mic // die "Unable to continue\n";
+                next unless $self->isa('PublicInbox::NetWriter');
+                next if $self->{-skip_creat};
+                my $dst = $orig_uri->mailbox // next;
+                next if $mic->exists($dst); # already exists
+                $mic->create($dst) or die "CREATE $dst failed <$orig_uri>: $@";
+        }
+        $mics;
+}
+
+# flesh out common NNTP-specific data structures
+sub nntp_common_init ($;$) {
+        my ($self, $lei) = @_;
+        return unless $self->{nntp_order};
+        $self->{quiet} = 1 if $lei && $lei->{opt}->{quiet};
+        eval { require Net::NNTP } or
+                die "Net::NNTP is required for NNTP:\n$@\n";
+        ($lei || eval { require PublicInbox::IMAPTracker }) or
+                die "DBD::SQLite is required for NNTP\n:$@\n";
+        my $cfg = $self->{pi_cfg} // $lei->_lei_cfg;
+        my $nn_common = {}; # scheme://authority => Net::NNTP->new arg
+        for my $uri (@{$self->{nntp_order}}) {
+                my $sec = uri_section($uri);
+                my $args = $nn_common->{$sec} //= {};
+
+                # Debug and Timeout are passed to Net::NNTP->new
+                my $v = $cfg->urlmatch(qw(--bool nntp.Debug), $$uri);
+                $args->{Debug} = $v if defined $v;
+                my $to = cfg_intvl($cfg, 'nntp.Timeout', $$uri);
+                $args->{Timeout} = $to if $to;
+                my $sa = socks_args($cfg->urlmatch('nntp.Proxy', $$uri));
+                %$args = (%$args, %$sa) if $sa;
+
+                # Net::NNTP post-connect commands
+                for my $k (qw(starttls compress)) {
+                        $v = $cfg->urlmatch('--bool', "nntp.$k", $$uri);
+                        $self->{cfg_opt}->{$sec}->{$k} = $v if defined $v;
+                }
+                $v = $cfg->urlmatch(qw(--bool nntp.sslVerify), $$uri);
+                set_ssl_verify_mode($args, $v) if defined $v;
+
+                # -watch internal option
+                for my $k (qw(pollInterval)) {
+                        $to = cfg_intvl($cfg, "nntp.$k", $$uri) // next;
+                        $self->{cfg_opt}->{$sec}->{$k} = $to;
+                }
+        }
+        # make sure we can connect and cache the credentials in memory
+        my %nn; # schema://authority => Net::NNTP object
+        for my $uri (@{$self->{nntp_order}}) {
+                my $sec = uri_section($uri);
+                $nn{$sec} //= nn_for($self, $uri, $nn_common, $lei);
+        }
+        \%nn; # for optional {nn_cached}
+}
+
+sub add_url {
+        my ($self, $arg, $ls_ok) = @_;
+        my $uri;
+        if ($uri = imap_uri($arg, $ls_ok)) {
+                $_[1] = $$uri; # canonicalized
+                push @{$self->{imap_order}}, $uri;
+        } elsif ($uri = nntp_uri($arg, $ls_ok)) {
+                $_[1] = $$uri; # canonicalized
+                push @{$self->{nntp_order}}, $uri;
+        } else {
+                push @{$self->{unsupported_url}}, $arg;
+        }
+}
+
+sub errors {
+        my ($self, $lei) = @_;
+        if (my $u = $self->{unsupported_url}) {
+                return "Unsupported URL(s): @$u";
+        }
+        if ($self->{imap_order}) {
+                eval { require PublicInbox::IMAPClient } or
+                        die "Mail::IMAPClient is required for IMAP:\n$@\n";
+        }
+        if ($self->{nntp_order}) {
+                eval { require Net::NNTP } or
+                        die "Net::NNTP is required for NNTP:\n$@\n";
+        }
+        my $sa = socks_args($lei ? $lei->{opt}->{proxy} : undef);
+        $self->{-proxy_cli} = $sa if $sa;
+        undef;
+}
+
+sub flags2kw ($$$$) {
+        my ($self, $uri, $uid, $flags) = @_;
+        my $kw = [];
+        for my $f (split(/ /, $flags)) {
+                if (my $k = $IMAPflags2kw{$f}) {
+                        push @$kw, $k;
+                } elsif ($f eq "\\Recent") { # not in JMAP
+                } elsif ($f eq "\\Deleted") { # not in JMAP
+                        return;
+                } elsif ($self->{verbose}) {
+                        warn "# unknown IMAP flag $f <$uri/;UID=$uid>\n";
+                }
+        }
+        @$kw = sort @$kw; # for LeiSearch->kw_changed and UI/UX purposes
+        $kw;
+}
+
+sub _imap_do_msg ($$$$$) {
+        my ($self, $uri, $uid, $raw, $flags) = @_;
+        # our target audience expects LF-only, save storage
+        $$raw =~ s/\r\n/\n/sg;
+        my $kw = defined($flags) ?
+                (flags2kw($self, $uri, $uid, $flags) // return) : undef;
+        my ($eml_cb, @args) = @{$self->{eml_each}};
+        $eml_cb->($uri, $uid, $kw, PublicInbox::Eml->new($raw), @args);
+}
+
+sub run_commit_cb ($) {
+        my ($self) = @_;
+        my $cmt_cb_args = $self->{on_commit} or return;
+        my ($cb, @args) = @$cmt_cb_args;
+        $cb->(@args);
+}
+
+sub itrk_last ($$;$$) {
+        my ($self, $uri, $r_uidval, $mic) = @_;
+        return (undef, undef, $r_uidval) unless $self->{incremental};
+        my ($itrk, $l_uid, $l_uidval);
+        if (defined(my $lms = $self->{-lms_rw})) { # LeiMailSync or 0
+                $uri->uidvalidity($r_uidval) if defined $r_uidval;
+                if ($mic) {
+                        my $auth = $mic->Authmechanism // '';
+                        $uri->auth($auth) if $auth eq 'ANONYMOUS';
+                        my $user = $mic->User;
+                        $uri->user($user) if defined($user);
+                }
+                my $x;
+                $l_uid = ($lms && ($x = $lms->location_stats($$uri))) ?
+                                $x->{'uid.max'} : undef;
+                # itrk remains undef, lei/store worker writes to
+                # mail_sync.sqlite3
+        } else {
+                $itrk = PublicInbox::IMAPTracker->new($$uri);
+                ($l_uidval, $l_uid) = $itrk->get_last($$uri);
+        }
+        ($itrk, $l_uid, $l_uidval //= $r_uidval);
+}
+
+# import flags of already-seen messages
+sub each_old_flags ($$$$) {
+        my ($self, $mic, $uri, $l_uid) = @_;
+        $l_uid ||= 1;
+        my $sec = uri_section($uri);
+        my $bs = ($self->{cfg_opt}->{$sec}->{batch_size} // 1) * 10000;
+        my ($eml_cb, @args) = @{$self->{eml_each}};
+        $self->{quiet} or warn "# $uri syncing flags 1:$l_uid\n";
+        for (my $n = 1; $n <= $l_uid; $n += $bs) {
+                my $end = $n + $bs;
+                $end = $l_uid if $end > $l_uid;
+                my $r = $mic->fetch_hash("$n:$end", 'FLAGS');
+                if (!$r) {
+                        return if $!{EINTR} && $self->{quit};
+                        return "E: $uri UID FETCH $n:$end error: $!";
+                }
+                while (my ($uid, $per_uid) = each %$r) {
+                        my $kw = flags2kw($self, $uri, $uid, $per_uid->{FLAGS})
+                                // next;
+                        # LeiImport->input_net_cb
+                        $eml_cb->($uri, $uid, $kw, undef, @args);
+                }
+        }
+}
+
+# returns true if PERMANENTFLAGS indicates FLAGS of already imported
+# messages are meaningful
+sub perm_fl_ok ($) {
+        my ($perm_fl) = @_;
+        return if !defined($perm_fl);
+        for my $f (split(/[ \t]+/, $perm_fl)) {
+                return 1 if $IMAPflags2kw{$f};
+        }
+        undef;
+}
+
+# may be overridden in NetWriter or Watch
+sub folder_select { $_[0]->{each_old} ? 'select' : 'examine' }
+
+sub _imap_fetch_bodies ($$$$) {
+        my ($self, $mic, $uri, $uids) = @_;
+        my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
+        my $key = $req;
+        $key =~ s/\.PEEK//;
+        my $sec = uri_section($uri);
+        my $mbx = $uri->mailbox;
+        my $bs = $self->{cfg_opt}->{$sec}->{batch_size} // 1;
+        my ($last_uid, $err);
+        my $use_fl = $self->{-use_fl};
+
+        while (scalar @$uids) {
+                my @batch = splice(@$uids, 0, $bs);
+                my $batch = join(',', @batch);
+                local $0 = "UID:$batch $mbx $sec";
+                my $r = $mic->fetch_hash($batch, $req, 'FLAGS');
+                unless ($r) { # network error?
+                        last if $!{EINTR} && $self->{quit};
+                        $err = "E: $uri UID FETCH $batch error: $!";
+                        last;
+                }
+                for my $uid (@batch) {
+                        # messages get deleted, so holes appear
+                        my $per_uid = delete $r->{$uid} // next;
+                        my $raw = delete($per_uid->{$key}) // next;
+                        my $fl = $use_fl ? $per_uid->{FLAGS} : undef;
+                        _imap_do_msg($self, $uri, $uid, \$raw, $fl);
+                        $last_uid = $uid;
+                        last if $self->{quit};
+                }
+                last if $self->{quit};
+        }
+        ($last_uid, $err);
+}
+
+sub _imap_fetch_all ($$$) {
+        my ($self, $mic, $orig_uri) = @_;
+        my $sec = uri_section($orig_uri);
+        my $mbx = $orig_uri->mailbox;
+        $mic->Clear(1); # trim results history
+
+        # we need to check for mailbox writability to see if we care about
+        # FLAGS from already-imported messages.
+        my $cmd = $self->folder_select;
+        $mic->$cmd($mbx) or return "E: \U$cmd\E $mbx ($sec) failed: $!";
+
+        my ($r_uidval, $r_uidnext, $perm_fl);
+        for ($mic->Results) {
+                /^\* OK \[PERMANENTFLAGS \(([^\)]*)\)\].*/ and $perm_fl = $1;
+                /^\* OK \[UIDVALIDITY ([0-9]+)\].*/ and $r_uidval = $1;
+                /^\* OK \[UIDNEXT ([0-9]+)\].*/ and $r_uidnext = $1;
+        }
+        $r_uidval //= $mic->uidvalidity($mbx) //
+                return "E: $orig_uri cannot get UIDVALIDITY";
+        $r_uidnext //= $mic->uidnext($mbx) //
+                return "E: $orig_uri cannot get UIDNEXT";
+        my $expect = $orig_uri->uidvalidity // $r_uidval;
+        return <<EOF if $expect != $r_uidval;
+E: $orig_uri UIDVALIDITY mismatch (got $r_uidval)
+EOF
+
+        my $uri = $orig_uri->clone;
+        my $single_uid = $uri->uid;
+        my ($itrk, $l_uid, $l_uidval) = itrk_last($self, $uri, $r_uidval, $mic);
+        if (defined($single_uid)) {
+                $itrk = $l_uid = undef;
+                $uri->uid(undef); # for eml_cb
+        }
+        return <<EOF if $l_uidval != $r_uidval;
+E: $uri UIDVALIDITY mismatch
+E: local=$l_uidval != remote=$r_uidval
+EOF
+        $uri->uidvalidity($r_uidval);
+        $l_uid //= 0;
+        my $r_uid = $r_uidnext - 1;
+        return <<EOF if $l_uid > $r_uid;
+E: $uri local UID exceeds remote ($l_uid > $r_uid)
+E: $uri strangely, UIDVALIDLITY matches ($l_uidval)
+EOF
+        $mic->Uid(1); # the default, we hope
+        my $err;
+        my $use_fl = perm_fl_ok($perm_fl);
+        local $self->{-use_fl} = $use_fl;
+        if (!defined($single_uid) && $self->{each_old} && $use_fl) {
+                $err = each_old_flags($self, $mic, $uri, $l_uid);
+                return $err if $err;
+        }
+        return if $l_uid >= $r_uid; # nothing to do
+        $l_uid ||= 1;
+        my ($mod, $shard) = @{$self->{shard_info} // []};
+        unless ($self->{quiet}) {
+                my $m = $mod ? " [(UID % $mod) == $shard]" : '';
+                warn "# $uri fetching UID $l_uid:$r_uid$m\n";
+        }
+        my $fetch_cb = \&_imap_fetch_bodies;
+        do {
+                # I wish "UID FETCH $START:*" could work, but:
+                # 1) servers do not need to return results in any order
+                # 2) Mail::IMAPClient doesn't offer a streaming API
+                my $uids;
+                if (defined $single_uid) {
+                        $uids = [ $single_uid ];
+                } elsif (!($uids = $mic->search("UID $l_uid:*"))) {
+                        return if $!{EINTR} && $self->{quit};
+                        return "E: $uri UID SEARCH $l_uid:* error: $!";
+                }
+                return if scalar(@$uids) == 0;
+
+                # RFC 3501 doesn't seem to indicate order of UID SEARCH
+                # responses, so sort it ourselves.  Order matters so
+                # IMAPTracker can store the newest UID.
+                @$uids = sort { $a <=> $b } @$uids;
+
+                # Did we actually get new messages?
+                return if $uids->[0] < $l_uid;
+
+                $l_uid = $uids->[-1] + 1; # for next search
+                @$uids = grep { ($_ % $mod) == $shard } @$uids if $mod;
+                (my $last_uid, $err) = $fetch_cb->($self, $mic, $uri, $uids);
+                run_commit_cb($self);
+                $itrk->update_last($r_uidval, $last_uid) if $itrk;
+        } until ($err || $self->{quit} || defined($single_uid));
+        $err;
+}
+
+# uses cached auth info prepared by mic_for
+sub mic_get {
+        my ($self, $uri) = @_;
+        my $sec = uri_section($uri);
+        # see if caller saved result of imap_common_init
+        my $cached = $self->{mics_cached};
+        if ($cached) {
+                my $mic = $cached->{$sec};
+                return $mic if $mic && $mic->IsConnected;
+                delete $cached->{$sec};
+        }
+        my $mic_arg = $self->{net_arg}->{$sec} or
+                        die "BUG: no Mail::IMAPClient->new arg for $sec";
+        if (defined(my $cb_name = $mic_arg->{Authcallback})) {
+                if (ref($cb_name) ne 'CODE') {
+                        $mic_arg->{Authcallback} = $self->can($cb_name);
+                }
+        }
+        my $mic = mic_new($self, $mic_arg, $sec, $uri);
+        $cached //= {}; # invalid placeholder if no cache enabled
+        if ($mic && $mic->IsConnected) {
+                $cached->{$sec} = $mic;
+        } else {
+                warn 'IMAP LastError: ',$mic->LastError, "\n" if $mic;
+                warn "IMAP errno: $!\n" if $!;
+                undef;
+        }
+}
+
+sub imap_each {
+        my ($self, $url, $eml_cb, @args) = @_;
+        my $uri = ref($url) ? $url : PublicInbox::URIimap->new($url);
+        my $sec = uri_section($uri);
+        local $0 = $uri->mailbox." $sec";
+        my $mic = mic_get($self, $uri);
+        my $err;
+        if ($mic) {
+                local $self->{eml_each} = [ $eml_cb, @args ];
+                $err = _imap_fetch_all($self, $mic, $uri);
+        } else {
+                $err = "E: <$uri> not connected: $!";
+        }
+        die $err if $err && $self->{-can_die};
+        warn $err if $err;
+        $mic;
+}
+
+# may used cached auth info prepared by nn_for once
+sub nn_get {
+        my ($self, $uri) = @_;
+        my $sec = uri_section($uri);
+        # see if caller saved result of nntp_common_init
+        my $cached = $self->{nn_cached} // {};
+        my $nn;
+        $nn = delete($cached->{$sec}) and return $nn;
+        my $nn_arg = $self->{net_arg}->{$sec} or
+                        die "BUG: no Net::NNTP->new arg for $sec";
+        my $nntp_cfg = $self->{cfg_opt}->{$sec};
+        $nn = nn_new($self, $nn_arg, $nntp_cfg, $uri) or return;
+        if (my $postconn = $nntp_cfg->{-postconn}) {
+                for my $m_arg (@$postconn) {
+                        my ($method, @args) = @$m_arg;
+                        $nn->$method(@args) and next;
+                        die "E: <$uri> $method failed\n";
+                        return;
+                }
+        }
+        $nn;
+}
+
+sub _nntp_fetch_all ($$$) {
+        my ($self, $nn, $uri) = @_;
+        my ($group, $num_a, $num_b) = $uri->group;
+        my $sec = uri_section($uri);
+        my ($nr, $beg, $end) = $nn->group($group);
+        unless (defined($nr)) {
+                my $msg = ndump($nn->message);
+                return "E: GROUP $group <$sec> $msg";
+        }
+        (defined($num_a) && defined($num_b) && $num_a > $num_b) and
+                return "E: $uri: backwards range: $num_a > $num_b";
+        if (defined($num_a)) { # no article numbers in mail_sync.sqlite3
+                $uri = $uri->clone;
+                $uri->group($group);
+        }
+        # IMAPTracker is also used for tracking NNTP, UID == article number
+        # LIST.ACTIVE can get the equivalent of UIDVALIDITY, but that's
+        # expensive.  So we assume newsgroups don't change:
+        my ($itrk, $l_art) = itrk_last($self, $uri);
+
+        if (defined($l_art) && !defined($num_a)) {
+                return if $l_art >= $end; # nothing to do
+                $beg = $l_art + 1;
+        }
+        # allow users to specify articles to refetch
+        # cf. https://tools.ietf.org/id/draft-gilman-news-url-01.txt
+        # nntp://example.com/inbox.foo/$num_a-$num_b
+        $beg = $num_a if defined($num_a) && $num_a > $beg && $num_a <= $end;
+        $end = $num_b if defined($num_b) && $num_b >= $beg && $num_b < $end;
+        $end = $beg if defined($num_a) && !defined($num_b);
+        my ($err, $last_art, $kw); # kw stays undef, no keywords in NNTP
+        warn "# $uri fetching ARTICLE $beg..$end\n" if !$self->{quiet};
+        my $n = $self->{max_batch};
+        for my $art ($beg..$end) {
+                last if $self->{quit};
+                local $0 = "#$art $group $sec";
+                if (--$n < 0) {
+                        run_commit_cb($self);
+                        $itrk->update_last(0, $last_art) if $itrk;
+                        $n = $self->{max_batch};
+                }
+                my $raw = $nn->article($art);
+                unless (defined($raw)) {
+                        my $msg = ndump($nn->message);
+                        if ($nn->code == 421) { # pseudo response from Net::Cmd
+                                $err = "E: $msg";
+                                last;
+                        } else { # probably just a deleted message (spam)
+                                warn "W: $msg";
+                                next;
+                        }
+                }
+                $raw = join('', @$raw);
+                $raw =~ s/\r\n/\n/sg;
+                my ($eml_cb, @args) = @{$self->{eml_each}};
+                $eml_cb->($uri, $art, $kw, PublicInbox::Eml->new(\$raw), @args);
+                $last_art = $art;
+        }
+        run_commit_cb($self);
+        $itrk->update_last(0, $last_art) if $itrk;
+        $err;
+}
+
+sub nntp_each {
+        my ($self, $url, $eml_cb, @args) = @_;
+        my $uri = ref($url) ? $url : PublicInbox::URInntps->new($url);
+        my $sec = uri_section($uri);
+        local $0 = $uri->group ." $sec";
+        my $nn = nn_get($self, $uri);
+        return if $self->{quit};
+        my $err;
+        if ($nn) {
+                local $self->{eml_each} = [ $eml_cb, @args ];
+                $err = _nntp_fetch_all($self, $nn, $uri);
+        } else {
+                $err = "E: <$uri> not connected: $!";
+        }
+        die $err if $err && $self->{-can_die};
+        warn $err if $err;
+        $nn;
+}
+
+sub new { bless {}, shift };
+
+# updates $uri with UIDVALIDITY
+sub mic_for_folder {
+        my ($self, $uri) = @_;
+        my $mic = $self->mic_get($uri) or die "E: not connected: $@";
+        my $m = $self->isa('PublicInbox::NetWriter') ? 'select' : 'examine';
+        $mic->$m($uri->mailbox) or return;
+        my $uidval;
+        for ($mic->Results) {
+                /^\* OK \[UIDVALIDITY ([0-9]+)\].*/ or next;
+                $uidval = $1;
+                last;
+        }
+        $uidval //= $mic->uidvalidity($uri->mailbox) or
+                die "E: failed to get uidvalidity from <$uri>: $@";
+        $uri->uidvalidity($uidval);
+        $mic;
+}
+
+
+1;
diff --git a/lib/PublicInbox/NetWriter.pm b/lib/PublicInbox/NetWriter.pm
new file mode 100644
index 00000000..7917ef89
--- /dev/null
+++ b/lib/PublicInbox/NetWriter.pm
@@ -0,0 +1,67 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# common writer code for IMAP (and later, JMAP)
+package PublicInbox::NetWriter;
+use v5.12;
+use parent qw(PublicInbox::NetReader);
+use PublicInbox::Smsg;
+use PublicInbox::MsgTime qw(msg_timestamp);
+
+my %IMAPkw2flags;
+@IMAPkw2flags{values %PublicInbox::NetReader::IMAPflags2kw} =
+                                keys %PublicInbox::NetReader::IMAPflags2kw;
+
+sub kw2flags ($) { join(' ', map { $IMAPkw2flags{$_} } @{$_[0]}) }
+
+sub imap_append {
+        my ($mic, $folder, $bref, $smsg, $eml) = @_;
+        $bref //= \($eml->as_string);
+        $smsg //= bless {}, 'PublicInbox::Smsg';
+        bless($smsg, 'PublicInbox::Smsg') if ref($smsg) eq 'HASH';
+        $smsg->{ts} //= msg_timestamp($eml // PublicInbox::Eml->new($$bref));
+        my $f = kw2flags($smsg->{kw});
+        $mic->append_string($folder, $$bref, $f, $smsg->internaldate) or
+                die "APPEND $folder: $@";
+}
+
+sub folder_select { 'select' } # for PublicInbox::NetReader
+
+sub imap_delete_all {
+        my ($self, $uri) = @_;
+        my $mic = $self->mic_for_folder($uri) or return;
+        my $sec = $self->can('uri_section')->($uri);
+        local $0 = $uri->mailbox." $sec";
+        if ($mic->delete_message('1:*')) {
+                $mic->expunge;
+        }
+}
+
+sub imap_delete_1 {
+        my ($self, $uri, $uid, $delete_mic) = @_;
+        $$delete_mic //= $self->mic_for_folder($uri) or return;
+        $$delete_mic->delete_message($uid);
+}
+
+sub imap_add_kw {
+        my ($self, $mic, $uid, $kw) = @_;
+        $mic->store($uid, '+FLAGS.SILENT', '('.kw2flags($kw).')');
+        $mic; # caller must ->expunge
+}
+
+sub imap_set_kw {
+        my ($self, $mic, $uid, $kw) = @_;
+        $mic->store($uid, 'FLAGS.SILENT', '('.kw2flags($kw).')');
+        $mic; # caller must ->expunge
+}
+
+sub can_store_flags {
+        my ($self, $mic) = @_;
+        for ($mic->Results) {
+                /^\* OK \[PERMANENTFLAGS \(([^\)]*)\)\].*/ and
+                        return $self->can('perm_fl_ok')->($1);
+        }
+        undef;
+}
+
+1;
diff --git a/lib/PublicInbox/NewsWWW.pm b/lib/PublicInbox/NewsWWW.pm
index 6bed0103..d13731ae 100644
--- a/lib/PublicInbox/NewsWWW.pm
+++ b/lib/PublicInbox/NewsWWW.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Plack app redirector for mapping /$NEWSGROUP requests to
@@ -13,9 +13,8 @@ use PublicInbox::MID qw(mid_escape);
 use PublicInbox::Hval qw(prurl);
 
 sub new {
-        my ($class, $pi_config) = @_;
-        $pi_config ||= PublicInbox::Config->new;
-        bless { pi_config => $pi_config }, $class;
+        my ($class, $pi_cfg) = @_;
+        bless { pi_cfg => $pi_cfg // PublicInbox::Config->new }, $class;
 }
 
 sub redirect ($$) {
@@ -46,9 +45,11 @@ sub call {
         # some links may have the article number in them:
         # /inbox.foo.bar/123456
         my (undef, @parts) = split(m!/!, $env->{PATH_INFO});
+        @parts or return
+                [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ];
         my ($ng, $article) = @parts;
-        my $pi_config = $self->{pi_config};
-        if (my $ibx = $pi_config->lookup_newsgroup($ng)) {
+        my $pi_cfg = $self->{pi_cfg};
+        if (my $ibx = $pi_cfg->lookup_newsgroup($ng)) {
                 my $url = prurl($env, $ibx->{url});
                 my $code = 301;
                 if (defined $article && $article =~ /\A[0-9]+\z/) {
@@ -63,7 +64,6 @@ sub call {
                 return redirect($code, $url);
         }
 
-        my $res;
         my @try = (join('/', @parts));
 
         # trailing slash is in the rest of our WWW, so maybe some users
@@ -72,13 +72,31 @@ sub call {
                 pop @parts;
                 push @try, join('/', @parts);
         }
-
-        foreach my $mid (@try) {
-                my $arg = [ $mid ];
-                $pi_config->each_inbox(\&try_inbox, $arg);
-                defined($res = $arg->[1]) and last;
+        my $ALL = $pi_cfg->ALL;
+        if (my $over = $ALL ? $ALL->over : undef) {
+                my $by_eidx_key = $pi_cfg->{-by_eidx_key};
+                for my $mid (@try) {
+                        my ($id, $prev);
+                        while (my $x = $over->next_by_mid($mid, \$id, \$prev)) {
+                                my $xr3 = $over->get_xref3($x->{num});
+                                for (@$xr3) {
+                                        s/:[0-9]+:$x->{blob}\z// or next;
+                                        my $ibx = $by_eidx_key->{$_} // next;
+                                        my $url = $ALL->base_url($env) //
+                                                        $ibx->base_url // next;
+                                        $url .= mid_escape($mid) . '/';
+                                        return redirect(302, $url);
+                                }
+                        }
+                }
+        } else { # slow path, scan every inbox
+                for my $mid (@try) {
+                        my $arg = [ $mid ]; # [1] => result
+                        $pi_cfg->each_inbox(\&try_inbox, $arg);
+                        return $arg->[1] if $arg->[1];
+                }
         }
-        $res || [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ];
+        [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ];
 }
 
 1;
diff --git a/lib/PublicInbox/OnDestroy.pm b/lib/PublicInbox/OnDestroy.pm
new file mode 100644
index 00000000..4301edff
--- /dev/null
+++ b/lib/PublicInbox/OnDestroy.pm
@@ -0,0 +1,31 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::OnDestroy;
+use v5.12;
+use parent qw(Exporter);
+use autodie qw(fork);
+our @EXPORT = qw(on_destroy);
+our $fork_gen = 0;
+
+# either parent or child is expected to exit or exec shortly after this:
+sub fork_tmp () {
+        my $pid = fork;
+        ++$fork_gen if $pid == 0;
+        $pid;
+}
+
+# all children
+sub all (@) { bless [ undef, @_ ], __PACKAGE__ }
+
+# same process
+sub on_destroy (@) { bless [ $fork_gen, @_ ], __PACKAGE__ }
+
+sub cancel { @{$_[0]} = () }
+
+sub DESTROY {
+        my ($fgen, $cb, @args) = @{$_[0]};
+        $cb->(@args) if ($cb && ($fgen // $fork_gen) == $fork_gen);
+}
+
+1;
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index 08112386..3b7d49f5 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # for XOVER, OVER in NNTP, and feeds/homepage/threads in PSGI
@@ -7,20 +7,22 @@
 package PublicInbox::Over;
 use strict;
 use v5.10.1;
-use DBI;
+use DBI qw(:sql_types); # SQL_BLOB
 use DBD::SQLite;
 use PublicInbox::Smsg;
 use Compress::Zlib qw(uncompress);
 use constant DEFAULT_LIMIT => 1000;
+use List::Util (); # for max
 
 sub dbh_new {
         my ($self, $rw) = @_;
         my $f = delete $self->{filename};
-        if (!-f $f) { # SQLite defaults mode to 0644, we want 0666
+        if (!-s $f) { # SQLite defaults mode to 0644, we want 0666
                 if ($rw) {
-                        require PublicInbox::Spawn;
+                        require PublicInbox::Syscall;
+                        my ($dir) = ($f =~ m!(.+)/[^/]+\z!);
+                        PublicInbox::Syscall::nodatacow_dir($dir);
                         open my $fh, '+>>', $f or die "failed to open $f: $!";
-                        PublicInbox::Spawn::nodatacow_fd(fileno($fh));
                 } else {
                         $self->{filename} = $f; # die on stat() below:
                 }
@@ -80,7 +82,13 @@ sub dbh_close {
         }
 }
 
-sub dbh ($) { $_[0]->{dbh} //= $_[0]->dbh_new } # dbh_new may be subclassed
+sub dbh ($) {
+        my ($self) = @_;
+        $self->{dbh} // do {
+                my $dbh = $self->dbh_new; # dbh_new may be subclassed
+                $self->{dbh} = $dbh;
+        }
+}
 
 sub load_from_row ($;$) {
         my ($smsg, $cull) = @_;
@@ -106,8 +114,8 @@ sub do_get {
 }
 
 sub query_xover {
-        my ($self, $beg, $end) = @_;
-        do_get($self, <<'', {}, $beg, $end);
+        my ($self, $beg, $end, $opt) = @_;
+        do_get($self, <<'', $opt, $beg, $end);
 SELECT num,ts,ds,ddd FROM over WHERE num >= ? AND num <= ?
 ORDER BY num ASC
 
@@ -192,15 +200,22 @@ ORDER BY $sort_col DESC
                 # TODO separate strict and loose matches here once --reindex
                 # is fixed to preserve `tid' properly
                 push @$msgs, @$loose;
+
+                # we wanted to retrieve the latest loose messages; but preserve
+                # chronological ordering for threading /$INBOX/$MSGID/[tT]/
+                $sort_col eq 'ds' and
+                        @$msgs = sort { $a->{ds} <=> $b->{ds} } @$msgs;
         }
         ($nr, $msgs);
 }
 
 # strict `tid' matches, only, for thread-expanded mbox.gz search results
-# and future CLI interface
+# and lei
 # returns true if we have IDs, undef if not
 sub expand_thread {
         my ($self, $ctx) = @_;
+        # previous maxuid for LeiSavedSearch is our min:
+        my $lss_min = $ctx->{min} // 0;
         my $dbh = dbh($self);
         do {
                 defined(my $num = $ctx->{ids}->[0]) or return;
@@ -213,7 +228,7 @@ SELECT num FROM over WHERE tid = ? AND num > ?
 ORDER BY num ASC LIMIT 1000
 
                         my $xids = $dbh->selectcol_arrayref($sql, undef, $tid,
-                                                        $ctx->{prev} // 0);
+                                List::Util::max($ctx->{prev} // 0, $lss_min));
                         if (scalar(@$xids)) {
                                 $ctx->{prev} = $xids->[-1];
                                 $ctx->{xids} = $xids;
@@ -252,21 +267,67 @@ SELECT ts,ds,ddd FROM over WHERE $s
 sub get_art {
         my ($self, $num) = @_;
         # caching $sth ourselves is faster than prepare_cached
-        my $sth = $self->{-get_art} //= dbh($self)->prepare(<<'');
+        my $sth = $self->{-get_art} // do {
+                my $sth = dbh($self)->prepare(<<'');
 SELECT num,tid,ds,ts,ddd FROM over WHERE num = ? LIMIT 1
 
+                $self->{-get_art} = $sth;
+        };
         $sth->execute($num);
         my $smsg = $sth->fetchrow_hashref;
         $smsg ? load_from_row($smsg) : undef;
 }
 
+sub get_xref3 {
+        my ($self, $num, $raw) = @_;
+        my $dbh = dbh($self);
+        my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id,xnum,oidbin FROM xref3 WHERE docid = ? ORDER BY ibx_id,xnum ASC
+
+        $sth->execute($num);
+        my $rows = $sth->fetchall_arrayref;
+        return $rows if $raw;
+        my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT eidx_key FROM inboxes WHERE ibx_id = ?
+
+        for my $r (@$rows) {
+                $eidx_key_sth->execute($r->[0]);
+                my $eidx_key = $eidx_key_sth->fetchrow_array;
+                $eidx_key //= "missing://ibx_id=$r->[0]";
+                $r = "$eidx_key:$r->[1]:".unpack('H*', $r->[2]);
+        }
+        $rows;
+}
+
+sub mid2tid {
+        my ($self, $mid) = @_;
+        my $dbh = dbh($self);
+
+        my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT id FROM msgid WHERE mid = ? LIMIT 1
+
+        $sth->execute($mid);
+        my $id = $sth->fetchrow_array or return;
+        $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT num FROM id2num WHERE id = ? AND num > ?
+ORDER BY num ASC LIMIT 1
+
+        $sth->execute($id, 0);
+        my $num = $sth->fetchrow_array or return;
+        $sth = $dbh->prepare(<<'');
+SELECT tid FROM over WHERE num = ? LIMIT 1
+
+        $sth->execute($num);
+        $sth->fetchrow_array;
+}
+
 sub next_by_mid {
         my ($self, $mid, $id, $prev) = @_;
         my $dbh = dbh($self);
 
         unless (defined $$id) {
                 my $sth = $dbh->prepare_cached(<<'', undef, 1);
-        SELECT id FROM msgid WHERE mid = ? LIMIT 1
+SELECT id FROM msgid WHERE mid = ? LIMIT 1
 
                 $sth->execute($mid);
                 $$id = $sth->fetchrow_array;
@@ -328,4 +389,37 @@ sub check_inodes {
         }
 }
 
+sub oidbin_exists {
+        my ($self, $oidbin) = @_;
+        if (wantarray) {
+                my $sth = $self->dbh->prepare_cached(<<'', undef, 1);
+SELECT docid FROM xref3 WHERE oidbin = ? ORDER BY docid ASC
+
+                $sth->bind_param(1, $oidbin, SQL_BLOB);
+                $sth->execute;
+                my $tmp = $sth->fetchall_arrayref;
+                map { $_->[0] } @$tmp;
+        } else {
+                my $sth = $self->dbh->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE oidbin = ?
+
+                $sth->bind_param(1, $oidbin, SQL_BLOB);
+                $sth->execute;
+                $sth->fetchrow_array;
+        }
+}
+
+sub blob_exists { oidbin_exists($_[0], pack('H*', $_[1])) }
+
+# used by NNTP.pm
+sub ids_after {
+        my ($self, $num) = @_;
+        my $ids = dbh($self)->selectcol_arrayref(<<'', undef, $$num);
+SELECT num FROM over WHERE num > ?
+ORDER BY num ASC LIMIT 1000
+
+        $$num = $ids->[-1] if @$ids;
+        $ids;
+}
+
 1;
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index db4b7738..4f8533f7 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # for XOVER, OVER in NNTP, and feeds/homepage/threads in PSGI
@@ -17,6 +17,7 @@ use PublicInbox::MID qw/id_compress mids_for_index references/;
 use PublicInbox::Smsg qw(subject_normalized);
 use Compress::Zlib qw(compress);
 use Carp qw(croak);
+use bytes (); # length
 
 sub dbh_new {
         my ($self) = @_;
@@ -79,6 +80,11 @@ SELECT $id_col FROM $tbl WHERE $val_col = ? LIMIT 1
         }
 }
 
+sub ibx_id {
+        my ($self, $eidx_key) = @_;
+        id_for($self, 'inboxes', 'ibx_id', eidx_key => $eidx_key);
+}
+
 sub sid {
         my ($self, $path) = @_;
         return unless defined $path && $path ne '';
@@ -153,7 +159,8 @@ SELECT $cols FROM over WHERE over.num = ? LIMIT 1
 
                 foreach (@$nums) {
                         $sth->execute($_->[0]);
-                        my $smsg = $sth->fetchrow_hashref;
+                        # $cb may delete rows and invalidate nums
+                        my $smsg = $sth->fetchrow_hashref // next;
                         $smsg = PublicInbox::Over::load_from_row($smsg);
                         $cb->($self, $smsg, @arg) or return;
                 }
@@ -170,8 +177,14 @@ sub _resolve_mid_to_tid {
                 $$tid = $cur_tid;
         } else { # rethreading, queue up dead ghosts
                 $$tid = next_tid($self);
-                my $num = $smsg->{num};
-                push(@{$self->{-ghosts_to_delete}}, $num) if $num < 0;
+                my $n = $smsg->{num};
+                if ($n > 0) {
+                        $self->{dbh}->prepare_cached(<<'')->execute($$tid, $n);
+UPDATE over SET tid = ? WHERE num = ?
+
+                } elsif ($n < 0) {
+                        push(@{$self->{-ghosts_to_delete}}, $n);
+                }
         }
         1;
 }
@@ -187,7 +200,7 @@ sub resolve_mid_to_tid {
         $tid // do { # create a new ghost
                 my $id = mid2id($self, $mid);
                 my $num = next_ghost_num($self);
-                $num < 0 or die "ghost num is non-negative: $num\n";
+                $num < 0 or croak "BUG: ghost num is non-negative: $num\n";
                 $tid = next_tid($self);
                 my $dbh = $self->{dbh};
                 $dbh->prepare_cached(<<'')->execute($num, $tid);
@@ -232,57 +245,49 @@ sub link_refs {
         $tid;
 }
 
-sub parse_references ($$$) {
-        my ($smsg, $hdr, $mids) = @_;
-        my $refs = references($hdr);
-        push(@$refs, @$mids) if scalar(@$mids) > 1;
-        return $refs if scalar(@$refs) == 0;
-
-        # prevent circular references here:
-        my %seen = ( $smsg->{mid} => 1 );
-        my @keep;
-        foreach my $ref (@$refs) {
-                if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
-                        warn "References: <$ref> too long, ignoring\n";
-                        next;
-                }
-                push(@keep, $ref) unless $seen{$ref}++;
-        }
-        $smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
-        \@keep;
-}
-
-# normalize subjects so they are suitable as pathnames for URLs
-# XXX: consider for removal
+# normalize subjects somewhat, they used to be ASCII-only but now
+# we use \w for UTF-8 support.  We may still drop it entirely and
+# rely on Xapian for subject matches...
 sub subject_path ($) {
         my ($subj) = @_;
         $subj = subject_normalized($subj);
-        $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
+        $subj =~ s![^\w\.~/\-]+!_!g;
         lc($subj);
 }
 
+sub ddd_for ($) {
+        my ($smsg) = @_;
+        my $dd = $smsg->to_doc_data;
+        utf8::encode($dd);
+        compress($dd);
+}
+
 sub add_overview {
         my ($self, $eml, $smsg) = @_;
-        $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!;
+        my $raw = $eml->body_raw;
+        $smsg->{lines} = $raw =~ tr!\n!\n!;
+        $smsg->{bytes} //= bytes::length $raw;
+        undef $raw;
         my $mids = mids_for_index($eml);
-        my $refs = parse_references($smsg, $eml, $mids);
+        my $refs = $smsg->parse_references($eml, $mids);
+        $mids->[0] //= do {
+                $smsg->{mid} //= '';
+                $eml->{-lei_fake_mid};
+        };
         my $subj = $smsg->{subject};
         my $xpath;
         if ($subj ne '') {
                 $xpath = subject_path($subj);
                 $xpath = id_compress($xpath);
         }
-        my $dd = $smsg->to_doc_data;
-        utf8::encode($dd);
-        $dd = compress($dd);
-        add_over($self, $smsg, $mids, $refs, $xpath, $dd);
+        add_over($self, $smsg, $mids, $refs, $xpath, ddd_for($smsg));
 }
 
 sub _add_over {
         my ($self, $smsg, $mid, $refs, $old_tid, $v) = @_;
         my $cur_tid = $smsg->{tid};
         my $n = $smsg->{num};
-        die "num must not be zero for $mid" if !$n;
+        croak "BUG: num must not be zero for $mid" if !$n;
         my $cur_valid = $cur_tid > $self->{min_tid};
 
         if ($n > 0) { # regular mail
@@ -294,7 +299,7 @@ sub _add_over {
                 }
         } elsif ($n < 0) { # ghost
                 $$old_tid //= $cur_valid ? $cur_tid : next_tid($self);
-                link_refs($self, $refs, $$old_tid);
+                $$old_tid = link_refs($self, $refs, $$old_tid);
                 delete_by_num($self, $n);
                 $$v++;
         }
@@ -379,13 +384,12 @@ sub create_tables {
 
         $dbh->do(<<'');
 CREATE TABLE IF NOT EXISTS over (
-        num INTEGER NOT NULL, /* NNTP article number == IMAP UID */
+        num INTEGER PRIMARY KEY NOT NULL, /* NNTP article number == IMAP UID */
         tid INTEGER NOT NULL, /* THREADID (IMAP REFERENCES threading, JMAP) */
         sid INTEGER, /* Subject ID (IMAP ORDEREDSUBJECT "threading") */
         ts INTEGER, /* IMAP INTERNALDATE (Received: header, git commit time) */
         ds INTEGER, /* RFC-2822 sent Date: header, git author time */
-        ddd VARBINARY, /* doc-data-deflated (->to_doc_data, ->load_from_data) */
-        UNIQUE (num)
+        ddd VARBINARY /* doc-data-deflated (->to_doc_data, ->load_from_data) */
 )
 
         $dbh->do('CREATE INDEX IF NOT EXISTS idx_tid ON over (tid)');
@@ -434,6 +438,7 @@ sub commit_lazy {
         my ($self) = @_;
         delete $self->{txn} or return;
         $self->{dbh}->commit;
+        eval { $self->{dbh}->do('PRAGMA optimize') };
 }
 
 sub begin_lazy {
@@ -453,16 +458,20 @@ sub rollback_lazy {
 
 sub dbh_close {
         my ($self) = @_;
-        die "in transaction" if $self->{txn};
+        Carp::confess('BUG: in transaction') if $self->{txn};
         $self->SUPER::dbh_close;
 }
 
 sub create {
         my ($self) = @_;
-        unless (-r $self->{filename}) {
+        my $fn = $self->{filename} // do {
+                croak('BUG: no {filename}') unless $self->{dbh};
+                return;
+        };
+        unless (-r $fn) {
                 require File::Path;
-                require File::Basename;
-                File::Path::mkpath(File::Basename::dirname($self->{filename}));
+                my ($dir) = ($fn =~ m!(.*?/)[^/]+\z!);
+                File::Path::mkpath($dir);
         }
         # create the DB:
         PublicInbox::Over::dbh($self);
@@ -504,12 +513,178 @@ EOF
                                 next;
                         }
                         $pr->(<<EOM) if $pr;
-I: ghost $r->{num} <$mid> THREADID=$r->{tid} culled
+# ghost $r->{num} <$mid> THREADID=$r->{tid} culled
 EOM
                 }
                 delete_by_num($self, $r->{num});
         }
-        $pr->("I: rethread culled $total ghosts\n") if $pr && $total;
+        $pr->("# rethread culled $total ghosts\n") if $pr && $total;
+}
+
+# used for cross-inbox search
+sub eidx_prep ($) {
+        my ($self) = @_;
+        $self->{-eidx_prep} // do {
+                my $dbh = $self->dbh;
+                $dbh->do(<<'');
+INSERT OR IGNORE INTO counter (key) VALUES ('eidx_docid')
+
+                $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS inboxes (
+        ibx_id INTEGER PRIMARY KEY AUTOINCREMENT,
+        eidx_key VARCHAR(255) NOT NULL, /* {newsgroup} // {inboxdir} */
+        UNIQUE (eidx_key)
+)
+
+                $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS xref3 (
+        docid INTEGER NOT NULL, /* <=> over.num */
+        ibx_id INTEGER NOT NULL, /* <=> inboxes.ibx_id */
+        xnum INTEGER NOT NULL, /* NNTP article number in ibx */
+        oidbin VARBINARY NOT NULL, /* 20-byte SHA-1 or 32-byte SHA-256 */
+        UNIQUE (docid, ibx_id, xnum, oidbin)
+)
+
+        $dbh->do('CREATE INDEX IF NOT EXISTS idx_docid ON xref3 (docid)');
+
+        # performance critical, this is not UNIQUE since we may need to
+        # tolerate some old bugs from indexing mirrors.  n.b. we used
+        # to index oidbin here, but leaving it out speeds up reindexing
+        # and "XHDR Xref <$MSGID>" isn't any slower w/o oidbin
+        $dbh->do('CREATE INDEX IF NOT EXISTS idx_reindex ON '.
+                'xref3 (xnum,ibx_id)');
+
+        $dbh->do('CREATE INDEX IF NOT EXISTS idx_oidbin ON xref3 (oidbin)');
+
+                $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS eidx_meta (
+        key VARCHAR(255) PRIMARY KEY,
+        val VARCHAR(255) NOT NULL
+)
+
+                # A queue of current docids which need reindexing.
+                # eidxq persists across aborted -extindex invocations
+                # Currently used for "-extindex --reindex" for Xapian
+                # data, but may be used in more places down the line.
+                $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS eidxq (docid INTEGER PRIMARY KEY NOT NULL)
+
+                $self->{-eidx_prep} = 1;
+        };
+}
+
+sub eidx_meta { # requires transaction
+        my ($self, $key, $val) = @_;
+
+        my $sql = 'SELECT val FROM eidx_meta WHERE key = ? LIMIT 1';
+        my $dbh = $self->{dbh};
+        defined($val) or return $dbh->selectrow_array($sql, undef, $key);
+
+        my $prev = $dbh->selectrow_array($sql, undef, $key);
+        if (defined $prev) {
+                $sql = 'UPDATE eidx_meta SET val = ? WHERE key = ?';
+                $dbh->do($sql, undef, $val, $key);
+        } else {
+                $sql = 'INSERT INTO eidx_meta (key,val) VALUES (?,?)';
+                $dbh->do($sql, undef, $key, $val);
+        }
+        $prev;
+}
+
+sub eidx_max {
+        my ($self) = @_;
+        get_counter($self->{dbh}, 'eidx_docid');
+}
+
+sub add_xref3 {
+        my ($self, $docid, $xnum, $oidhex, $eidx_key) = @_;
+        begin_lazy($self);
+        my $ibx_id = ibx_id($self, $eidx_key);
+        my $oidbin = pack('H*', $oidhex);
+        my $sth = $self->{dbh}->prepare_cached(<<'');
+INSERT OR IGNORE INTO xref3 (docid, ibx_id, xnum, oidbin) VALUES (?, ?, ?, ?)
+
+        $sth->bind_param(1, $docid);
+        $sth->bind_param(2, $ibx_id);
+        $sth->bind_param(3, $xnum);
+        $sth->bind_param(4, $oidbin, SQL_BLOB);
+        $sth->execute;
+}
+
+# for when an xref3 goes missing, this does NOT update {ts}
+sub update_blob {
+        my ($self, $smsg, $oidhex) = @_;
+        my $sth = $self->{dbh}->prepare(<<'');
+UPDATE over SET ddd = ? WHERE num = ?
+
+        $smsg->{blob} = $oidhex;
+        $sth->bind_param(1, ddd_for($smsg), SQL_BLOB);
+        $sth->bind_param(2, $smsg->{num});
+        $sth->execute;
+}
+
+sub merge_xref3 { # used for "-extindex --dedupe"
+        my ($self, $keep_docid, $drop_docid, $oidbin) = @_;
+        my $sth = $self->{dbh}->prepare_cached(<<'');
+UPDATE OR IGNORE xref3 SET docid = ? WHERE docid = ? AND oidbin = ?
+
+        $sth->bind_param(1, $keep_docid);
+        $sth->bind_param(2, $drop_docid);
+        $sth->bind_param(3, $oidbin, SQL_BLOB);
+        $sth->execute;
+
+        # drop anything that conflicted
+        $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM xref3 WHERE docid = ? AND oidbin = ?
+
+        $sth->bind_param(1, $drop_docid);
+        $sth->bind_param(2, $oidbin, SQL_BLOB);
+        $sth->execute;
+}
+
+sub eidxq_add {
+        my ($self, $docid) = @_;
+        $self->dbh->prepare_cached(<<'')->execute($docid);
+INSERT OR IGNORE INTO eidxq (docid) VALUES (?)
+
+}
+
+sub eidxq_del {
+        my ($self, $docid) = @_;
+        $self->dbh->prepare_cached(<<'')->execute($docid);
+DELETE FROM eidxq WHERE docid = ?
+
+}
+
+# returns true if we're vivifying a message for lei/store that was
+# previously external-metadata only
+sub vivify_xvmd {
+        my ($self, $smsg) = @_;
+        my @docids = $self->blob_exists($smsg->{blob});
+        my @vivify_xvmd;
+        for my $id (@docids) {
+                if (my $cur = $self->get_art($id)) {
+                        # already indexed if bytes > 0
+                        return if $cur->{bytes} > 0;
+                        push @vivify_xvmd, $id;
+                } else {
+                        warn "W: $smsg->{blob} #$id gone (bug?)\n";
+                }
+        }
+        $smsg->{-vivify_xvmd} = \@vivify_xvmd;
+}
+
+sub fork_ok {
+        state $fork_ok = eval("v$DBD::SQLite::sqlite_version") ge v3.8.3;
+        return 1 if $fork_ok;
+        my ($opt) = @_;
+        my @j = split(/,/, $opt->{jobs} // '');
+        state $warned;
+        grep { $_ > 1 } @j and $warned //= warn(<<EOM);
+DBD::SQLite version is v$DBD::SQLite::sqlite_version, need >= v3.8.3 for --jobs > 1
+EOM
+        $opt->{jobs} = '1,1';
+        undef;
 }
 
 1;
diff --git a/lib/PublicInbox/POP3.pm b/lib/PublicInbox/POP3.pm
new file mode 100644
index 00000000..06772069
--- /dev/null
+++ b/lib/PublicInbox/POP3.pm
@@ -0,0 +1,428 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Each instance of this represents a POP3 client connected to
+# public-inbox-{netd,pop3d}.  Much of this was taken from IMAP.pm and NNTP.pm
+#
+# POP3 is one mailbox per-user, so the "USER" command is like the
+# format of -imapd and is mapped to $NEWSGROUP.$SLICE (large inboxes
+# are sliced into 50K mailboxes in both POP3 and IMAP to avoid overloading
+# clients)
+#
+# Unlike IMAP, the "$NEWSGROUP" mailbox (without $SLICE) is a rolling
+# window of the latest messages.  We can do this for POP3 since the
+# typical POP3 session is short-lived while long-lived IMAP sessions
+# would cause slices to grow on the server side without bounds.
+#
+# Like IMAP, POP3 also has per-session message sequence numbers (MSN),
+# which require mapping to UIDs.  The offset of an entry into our
+# per-client cache is: (MSN-1)
+#
+# fields:
+# - uuid - 16-byte (binary) UUID representation (before successful login)
+# - cache - one-dimentional arrayref of (UID, bytesize, oidhex)
+# - nr_dele - number of deleted messages
+# - expire - string of packed unsigned short offsets
+# - user_id - user-ID mapped to UUID (on successful login + lock)
+# - txn_max_uid - for storing max deleted UID persistently
+# - ibx - PublicInbox::Inbox object
+# - slice - unsigned integer slice number (0..Inf), -1 => latest
+# - salt - pre-auth for APOP
+# - uid_dele - maximum deleted from previous session at login (NNTP ARTICLE)
+# - uid_base - base UID for mailbox slice (0-based) (same as IMAP)
+package PublicInbox::POP3;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::GitAsyncCat;
+use PublicInbox::DS qw(now);
+use Errno qw(EAGAIN);
+use Digest::MD5 qw(md5);
+use PublicInbox::IMAP; # for UID slice stuff
+
+use constant {
+        LINE_MAX => 512, # XXX unsure
+        UID_SLICE => PublicInbox::IMAP::UID_SLICE,
+};
+
+# XXX FIXME: duplicated stuff from NNTP.pm and IMAP.pm
+
+sub out ($$;@) {
+        my ($self, $fmt, @args) = @_;
+        printf { $self->{pop3d}->{out} } $fmt."\n", @args;
+}
+
+sub do_greet {
+        my ($self) = @_;
+        my $s = $self->{salt} = sprintf('%x.%x', int(rand(0x7fffffff)), time);
+        $self->write("+OK POP3 server ready <$s\@public-inbox>\r\n");
+}
+
+sub new {
+        my ($cls, $sock, $pop3d) = @_;
+        (bless { pop3d => $pop3d }, $cls)->greet($sock)
+}
+
+# POP user is $UUID1@$NEWSGROUP[.$SLICE][?QUERY_ARGS]
+sub cmd_user ($$) {
+        my ($self, $mailbox) = @_;
+        $self->{salt} // return \"-ERR already authed\r\n";
+        $mailbox =~ s/\A([a-f0-9\-]+)\@//i or
+                return \"-ERR no UUID@ in mailbox name\r\n";
+        my $user = $1;
+        $user =~ tr/-//d; # most have dashes, some (dbus-uuidgen) don't
+        $user =~ m!\A[a-f0-9]{32}\z!i or return \"-ERR user has no UUID\r\n";
+
+        my %l;
+        if ($mailbox =~ s/\?(.*)\z//) { # query args
+                for (split(/&+/, $1)) {
+                        /\A(initial_limit|limit)=([0-9]+)\z/ and $l{$1} = $2;
+                }
+                $self->{limits} = \%l;
+        }
+        my $slice = $mailbox =~ s/\.([0-9]+)\z// ? $1 + 0 : undef;
+
+        my $ibx = $self->{pop3d}->{pi_cfg}->lookup_newsgroup($mailbox) //
+                return \"-ERR $mailbox does not exist\r\n";
+        my $uidmax = $self->{uidmax} = $ibx->mm(1)->num_highwater // 0;
+        if (defined $slice) {
+                my $max = int($uidmax / UID_SLICE);
+                my $tip = "$mailbox.$max";
+                return \"-ERR $mailbox.$slice does not exist ($tip does)\r\n"
+                        if $slice > $max;
+                $self->{slice} = $slice;
+        } else { # latest messages:
+                $self->{slice} = -1;
+        }
+        $self->{ibx} = $ibx;
+        $self->{uuid} = pack('H*', $user); # deleted by _login_ok
+        $slice //= '(latest)';
+        \"+OK $ibx->{newsgroup} slice=$slice selected\r\n";
+}
+
+sub _login_ok ($) {
+        my ($self) = @_;
+        $self->{pop3d}->lock_mailbox($self) or
+                return \"-ERR [IN-USE] unable to lock maildrop\r\n";
+
+        my $l = delete $self->{limits};
+        $l = defined($self->{uid_dele}) ? $l->{limit}
+                                        : ($l->{initial_limit} // $l->{limit});
+        my $uidmax = delete $self->{uidmax};
+        if ($self->{slice} >= 0) {
+                $self->{uid_base} = $self->{slice} * UID_SLICE;
+                if (defined $l) { # n.b: the last slice is not full:
+                        my $max = int($uidmax/UID_SLICE) == $self->{slice} ?
+                                        ($uidmax % UID_SLICE) : UID_SLICE;
+                        my $off = $max - $l;
+                        $self->{uid_base} += $off if $off > 0;
+                }
+        } else { # latest $l messages, or 1k if unspecified
+                my $base = $uidmax - ($l // 1000);
+                $self->{uid_base} = $base < 0 ? 0 : $base;
+        }
+        $self->{uid_max} = $self->{ibx}->over(1)->max;
+        \"+OK logged in\r\n";
+}
+
+sub cmd_apop {
+        my ($self, $mailbox, $hex) = @_;
+        my $res = cmd_user($self, $mailbox); # sets {uuid}
+        return $res if substr($$res, 0, 1) eq '-';
+        my $s = delete($self->{salt}) // die 'BUG: salt missing';
+        return _login_ok($self) if md5("<$s\@public-inbox>anonymous") eq
+                                pack('H*', $hex);
+        $self->{salt} = $s;
+        \"-ERR APOP password mismatch\r\n";
+}
+
+sub cmd_pass {
+        my ($self, $pass) = @_;
+        $self->{ibx} // return \"-ERR mailbox unspecified\r\n";
+        my $s = delete($self->{salt}) // return \"-ERR already authed\r\n";
+        return _login_ok($self) if $pass eq 'anonymous';
+        $self->{salt} = $s;
+        \"-ERR password is not `anonymous'\r\n";
+}
+
+sub cmd_stls {
+        my ($self) = @_;
+        ($self->{sock} // return)->can('stop_SSL') and
+                return \"-ERR TLS already enabled\r\n";
+        $self->{pop3d}->{ssl_ctx_opt} or
+                return \"-ERR can't start TLS negotiation\r\n";
+        $self->write(\"+OK begin TLS negotiation now\r\n");
+        PublicInbox::TLS::start($self->{sock}, $self->{pop3d});
+        $self->requeue if PublicInbox::DS::accept_tls_step($self);
+        undef;
+}
+
+sub need_txn ($) {
+        exists($_[0]->{salt}) ? \"-ERR not in TRANSACTION\r\n" : undef;
+}
+
+sub _stat_cache ($) {
+        my ($self) = @_;
+        my ($beg, $end) = (($self->{uid_dele} // -1) + 1, $self->{uid_max});
+        PublicInbox::IMAP::uid_clamp($self, \$beg, \$end);
+        my (@cache, $m);
+        my $sth = $self->{ibx}->over(1)->dbh->prepare_cached(<<'', undef, 1);
+SELECT num,ddd FROM over WHERE num >= ? AND num <= ?
+ORDER BY num ASC
+
+        $sth->execute($beg, $end);
+        my $tot = 0;
+        while (defined($m = $sth->fetchall_arrayref({}, 1000))) {
+                for my $x (@$m) {
+                        PublicInbox::Over::load_from_row($x);
+                        push(@cache, $x->{num}, $x->{bytes} + 0, $x->{blob});
+                        undef $x; # saves ~1.5M memory w/ 50k messages
+                        $tot += $cache[-2];
+                }
+        }
+        $self->{total_bytes} = $tot;
+        $self->{cache} = \@cache;
+}
+
+sub cmd_stat {
+        my ($self) = @_;
+        my $err; $err = need_txn($self) and return $err;
+        my $cache = $self->{cache} // _stat_cache($self);
+        my $nr = @$cache / 3 - ($self->{nr_dele} // 0);
+        "+OK $nr $self->{total_bytes}\r\n";
+}
+
+# for LIST and UIDL
+sub _list {
+        my ($desc, $idx, $self, $msn) = @_;
+        my $err; $err = need_txn($self) and return $err;
+        my $cache = $self->{cache} // _stat_cache($self);
+        if (defined $msn) {
+                my $base_off = ($msn - 1) * 3;
+                my $val = $cache->[$base_off + $idx] //
+                                return \"-ERR no such message\r\n";
+                "+OK $desc listing follows\r\n$msn $val\r\n.\r\n";
+        } else { # always +OK, even if no messages
+                my $res = "+OK $desc listing follows\r\n";
+                my $msn = 0;
+                for (my $i = 0; $i < scalar(@$cache); $i += 3) {
+                        ++$msn;
+                        defined($cache->[$i]) and
+                                $res .= "$msn $cache->[$i + $idx]\r\n";
+                }
+                $res .= ".\r\n";
+        }
+}
+
+sub cmd_list { _list('scan', 1, @_) }
+sub cmd_uidl { _list('unique-id', 2, @_) }
+
+sub mark_dele ($$) {
+        my ($self, $off) = @_;
+        my $base_off = $off * 3;
+        my $cache = $self->{cache};
+        my $uid = $cache->[$base_off] // return; # already deleted
+
+        my $old = $self->{txn_max_uid} //= $uid;
+        $self->{txn_max_uid} = $uid if $uid > $old;
+
+        $self->{total_bytes} -= $cache->[$base_off + 1];
+        $cache->[$base_off] = undef; # clobber UID
+        $cache->[$base_off + 1] = undef; # clobber bytes
+        $cache->[$base_off + 2] = undef; # clobber oidhex
+        ++$self->{nr_dele};
+}
+
+sub retr_cb { # called by git->cat_async via ibx_async_cat
+        my ($bref, $oid, $type, $size, $args) = @_;
+        my ($self, $off, $top_nr) = @$args;
+        my $hex = $self->{cache}->[$off * 3 + 2] //
+                die "BUG: no hex (oid=$oid)";
+        if (!defined($type)) {
+                warn "E: git aborted on $oid / $hex $self->{ibx}->{inboxdir}";
+                return $self->close;
+        } elsif ($type ne 'blob') {
+                # it's possible to have TOCTOU if an admin runs
+                # public-inbox-(edit|purge), just move onto the next message
+                warn "E: $hex missing in $self->{ibx}->{inboxdir}\n";
+                $self->write(\"-ERR no such message\r\n");
+                return $self->requeue;
+        } elsif ($hex ne $oid) {
+                $self->close;
+                die "BUG: $hex != $oid";
+        }
+        PublicInbox::IMAP::to_crlf_full($bref);
+        if (defined $top_nr) {
+                my ($hdr, $bdy) = split(/\r\n\r\n/, $$bref, 2);
+                $bref = \$hdr;
+                $hdr .= "\r\n\r\n";
+                my @tmp = split(/^/m, $bdy);
+                $hdr .= join('', splice(@tmp, 0, $top_nr));
+        } elsif (exists $self->{expire}) {
+                $self->{expire} .= pack('S', $off);
+        }
+        $$bref =~ s/^\./../gms;
+        $$bref .= substr($$bref, -2, 2) eq "\r\n" ? ".\r\n" : "\r\n.\r\n";
+        $self->msg_more("+OK message follows\r\n");
+        $self->write($bref);
+        $self->requeue;
+}
+
+sub cmd_retr {
+        my ($self, $msn, $top_nr) = @_;
+        return \"-ERR lines must be a non-negative number\r\n" if
+                        (defined($top_nr) && $top_nr !~ /\A[0-9]+\z/);
+        my $err; $err = need_txn($self) and return $err;
+        my $cache = $self->{cache} // _stat_cache($self);
+        my $off = $msn - 1;
+        my $hex = $cache->[$off * 3 + 2] // return \"-ERR no such message\r\n";
+        ${ibx_async_cat($self->{ibx}, $hex, \&retr_cb,
+                        [ $self, $off, $top_nr ])};
+}
+
+sub cmd_noop { $_[0]->write(\"+OK\r\n") }
+
+sub cmd_rset {
+        my ($self) = @_;
+        my $err; $err = need_txn($self) and return $err;
+        delete $self->{cache};
+        delete $self->{txn_max_uid};
+        \"+OK\r\n";
+}
+
+sub cmd_dele {
+        my ($self, $msn) = @_;
+        my $err; $err = need_txn($self) and return $err;
+        $self->{cache} // _stat_cache($self);
+        $msn =~ /\A[1-9][0-9]*\z/ or return \"-ERR no such message\r\n";
+        mark_dele($self, $msn - 1) ? \"+OK\r\n" : \"-ERR no such message\r\n";
+}
+
+# RFC 2449
+sub cmd_capa {
+        my ($self) = @_;
+        my $STLS = !$self->{ibx} && !$self->{sock}->can('stop_SSL') &&
+                        $self->{pop3d}->{ssl_ctx_opt} ? "\nSTLS\r" : '';
+        $self->{expire} = ''; # "EXPIRE 0" allows clients to avoid DELE commands
+        <<EOM;
++OK Capability list follows\r
+TOP\r
+USER\r
+PIPELINING\r
+UIDL\r
+EXPIRE 0\r
+RESP-CODES\r$STLS
+.\r
+EOM
+}
+
+sub close {
+        my ($self) = @_;
+        $self->{pop3d}->unlock_mailbox($self);
+        $self->SUPER::close;
+}
+
+# must be called inside a state_dbh transaction with flock held
+sub __cleanup_state {
+        my ($self, $txn_id) = @_;
+        my $user_id = $self->{user_id} // die 'BUG: no {user_id}';
+        $self->{pop3d}->{-state_dbh}->prepare_cached(<<'')->execute($txn_id);
+DELETE FROM deletes WHERE txn_id = ? AND uid_dele = -1
+
+        my $sth = $self->{pop3d}->{-state_dbh}->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM deletes WHERE user_id = ?
+
+        $sth->execute($user_id);
+        my $nr = $sth->fetchrow_array;
+        if ($nr == 0) {
+                $sth = $self->{pop3d}->{-state_dbh}->prepare_cached(<<'');
+DELETE FROM users WHERE user_id = ?
+
+                $sth->execute($user_id);
+        }
+        $nr;
+}
+
+sub cmd_quit {
+        my ($self) = @_;
+        if (defined(my $txn_id = $self->{txn_id})) {
+                my $user_id = $self->{user_id} // die 'BUG: no {user_id}';
+                if (my $exp = delete $self->{expire}) {
+                        mark_dele($self, $_) for unpack('S*', $exp);
+                }
+                my $keep = 1;
+                my $dbh = $self->{pop3d}->{-state_dbh};
+                my $lk = $self->{pop3d}->lock_for_scope;
+                $dbh->begin_work;
+
+                if (defined(my $max = $self->{txn_max_uid})) {
+                        $dbh->prepare_cached(<<'')->execute($max, $txn_id, $max)
+UPDATE deletes SET uid_dele = ? WHERE txn_id = ? AND uid_dele < ?
+
+                } else {
+                        $keep = $self->__cleanup_state($txn_id);
+                }
+                $dbh->prepare_cached(<<'')->execute(time, $user_id) if $keep;
+UPDATE users SET last_seen = ? WHERE user_id = ?
+
+                $dbh->commit;
+                # we MUST do txn_id F_UNLCK here inside ->lock_for_scope:
+                $self->{did_quit} = 1;
+                $self->{pop3d}->unlock_mailbox($self);
+        }
+        $self->write(\"+OK public-inbox POP3 server signing off\r\n");
+        $self->shutdn;
+        undef;
+}
+
+# returns 1 if we can continue, 0 if not due to buffered writes or disconnect
+sub process_line ($$) {
+        my ($self, $l) = @_;
+        my ($req, @args) = split(/[ \t]+/, $l);
+        return 1 unless defined($req); # skip blank line
+        $req = $self->can('cmd_'.lc($req));
+        my $res = $req ? eval { $req->($self, @args) } :
+                \"-ERR command not recognized\r\n";
+        my $err = $@;
+        if ($err && $self->{sock}) {
+                $l =~ s/\r?\n//s;
+                warn("error from: $l ($err)\n");
+                $res = \"-ERR program fault - command not performed\r\n";
+        }
+        defined($res) ? $self->write($res) : 0;
+}
+
+# callback used by PublicInbox::DS for any (e)poll (in/out/hup/err)
+sub event_step {
+        my ($self) = @_;
+        local $SIG{__WARN__} = $self->{pop3d}->{warn_cb};
+        return unless $self->flush_write && $self->{sock} && !$self->{long_cb};
+
+        # only read more requests if we've drained the write buffer,
+        # otherwise we can be buffering infinitely w/o backpressure
+        my $rbuf = $self->{rbuf} // \(my $x = '');
+        my $line = index($$rbuf, "\n");
+        while ($line < 0) {
+                return $self->close if length($$rbuf) >= LINE_MAX;
+                $self->do_read($rbuf, LINE_MAX, length($$rbuf)) or return;
+                $line = index($$rbuf, "\n");
+        }
+        $line = substr($$rbuf, 0, $line + 1, '');
+        $line =~ s/\r?\n\z//s;
+        return $self->close if $line =~ /[[:cntrl:]]/s;
+        my $t0 = now();
+        my $fd = fileno($self->{sock}); # may become invalid after process_line
+        my $r = eval { process_line($self, $line) };
+        my $pending = $self->{wbuf} ? ' pending' : '';
+        out($self, "[$fd] %s - %0.6f$pending - $r", $line, now() - $t0);
+        return $self->close if $r < 0;
+        $self->rbuf_idle($rbuf);
+
+        # maybe there's more pipelined data, or we'll have
+        # to register it for socket-readiness notifications
+        $self->requeue unless $pending;
+}
+
+no warnings 'once';
+*cmd_top = \&cmd_retr;
+
+1;
diff --git a/lib/PublicInbox/POP3D.pm b/lib/PublicInbox/POP3D.pm
new file mode 100644
index 00000000..bd440434
--- /dev/null
+++ b/lib/PublicInbox/POP3D.pm
@@ -0,0 +1,277 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# represents an POP3D
+package PublicInbox::POP3D;
+use v5.12;
+use parent qw(PublicInbox::Lock);
+use DBI qw(:sql_types); # SQL_BLOB
+use Carp ();
+use File::Temp 0.19 (); # 0.19 for ->newdir
+use PublicInbox::Config;
+use PublicInbox::POP3;
+use PublicInbox::Syscall;
+use File::Temp 0.19 (); # 0.19 for ->newdir
+use Fcntl qw(F_SETLK F_UNLCK F_WRLCK SEEK_SET);
+my ($FLOCK_TMPL, @FLOCK_ORDER);
+# are all BSDs the same "struct flock"? tested Free+Net+Open...
+if ($^O =~ /\A(?:linux|dragonfly)\z/ || $^O =~ /bsd/) {
+        require Config;
+        my $off_t;
+        my @LE_pad = ('', '');
+        my $sz = $Config::Config{lseeksize};
+        if ($sz == 8) {
+                if (eval('length(pack("q", 1)) == 8')) {
+                        $off_t = 'q';
+                } elsif ($Config::Config{byteorder} == 1234) { # OpenBSD i386
+                        $off_t = 'l';
+                        @LE_pad = ('@8', '@16');
+                } else { # I have no 32-bit BE machine to test on...
+                        warn <<EOM;
+Perl built with 64-bit file support but not 64-bit int (pack("q")) support.
+byteorder=$Config::Config{byteorder}
+EOM
+                }
+        } elsif ($sz == 4) {
+                $off_t = 'l';
+        } else {
+                warn "sizeof(off_t)=$sz requires File::FcntlLock\n"
+        }
+        if (defined($off_t)) {
+                if ($^O eq 'linux') {
+                        $FLOCK_TMPL = 'ss@8'.$off_t.$LE_pad[0].$off_t.'@32';
+                        @FLOCK_ORDER = qw(l_type l_whence l_start l_len);
+                } else { # *bsd including dragonfly
+                        $FLOCK_TMPL = $off_t.$LE_pad[0].$off_t.$LE_pad[1].
+                                        'lss@256';
+                        @FLOCK_ORDER = qw(l_start l_len l_pid l_type l_whence);
+                }
+        }
+}
+@FLOCK_ORDER or eval { require File::FcntlLock } or
+        die "File::FcntlLock required for POP3 on $^O: $@\n";
+
+sub new {
+        my ($cls) = @_;
+        bless {
+                err => \*STDERR,
+                out => \*STDOUT,
+                # pi_cfg => PublicInbox::Config
+                # lock_path => ...
+                # interprocess lock is the $pop3state/txn.locks file
+                # txn_locks => {}, # intraworker locks
+                # ssl_ctx_opt => { SSL_cert_file => ..., SSL_key_file => ... }
+        }, $cls;
+}
+
+sub refresh_groups { # PublicInbox::Daemon callback
+        my ($self, $sig) = @_;
+        # TODO share pi_cfg with nntpd/imapd inside -netd
+        my $new = PublicInbox::Config->new;
+        my $d = $new->{'publicinbox.pop3state'} //
+                die "publicinbox.pop3state undefined ($new->{-f})\n";
+        -d $d or do {
+                require File::Path;
+                File::Path::make_path($d, { mode => 0700 });
+                PublicInbox::Syscall::nodatacow_dir($d);
+        };
+        $self->{lock_path} //= "$d/db.lock";
+        if (my $old = $self->{pi_cfg}) {
+                my $s = 'publicinbox.pop3state';
+                $new->{$s} //= $old->{$s};
+                return warn <<EOM if $new->{$s} ne $old->{$s};
+$s changed: `$old->{$s}' => `$new->{$s}', config reload ignored
+EOM
+        }
+        $self->{pi_cfg} = $new;
+}
+
+# persistent tables
+sub create_state_tables ($$) {
+        my ($self, $dbh) = @_;
+
+        $dbh->do(<<''); # map publicinbox.<name>.newsgroup to integers
+CREATE TABLE IF NOT EXISTS newsgroups (
+        newsgroup_id INTEGER PRIMARY KEY NOT NULL,
+        newsgroup VARBINARY NOT NULL,
+        UNIQUE (newsgroup) )
+
+        # the $NEWSGROUP_NAME.$SLICE_INDEX is part of the POP3 username;
+        # POP3 has no concept of folders/mailboxes like IMAP/JMAP
+        $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS mailboxes (
+        mailbox_id INTEGER PRIMARY KEY NOT NULL,
+        newsgroup_id INTEGER NOT NULL REFERENCES newsgroups,
+        slice INTEGER NOT NULL, /* -1 for most recent slice */
+        UNIQUE (newsgroup_id, slice) )
+
+        $dbh->do(<<''); # actual users are differentiated by their UUID
+CREATE TABLE IF NOT EXISTS users (
+        user_id INTEGER PRIMARY KEY NOT NULL,
+        uuid VARBINARY NOT NULL,
+        last_seen INTEGER NOT NULL, /* to expire idle accounts */
+        UNIQUE (uuid) )
+
+        # we only track the highest-numbered deleted message per-UUID@mailbox
+        $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS deletes (
+        txn_id INTEGER PRIMARY KEY NOT NULL, /* -1 == txn lock offset */
+        user_id INTEGER NOT NULL REFERENCES users,
+        mailbox_id INTEGER NOT NULL REFERENCES mailboxes,
+        uid_dele INTEGER NOT NULL DEFAULT -1, /* IMAP UID, NNTP article */
+        UNIQUE(user_id, mailbox_id) )
+
+}
+
+sub state_dbh_new {
+        my ($self) = @_;
+        my $f = "$self->{pi_cfg}->{'publicinbox.pop3state'}/db.sqlite3";
+        my $creat = !-s $f;
+        if ($creat) {
+                open my $fh, '+>>', $f or Carp::croak "open($f): $!";
+                PublicInbox::Syscall::nodatacow_fh($fh);
+        }
+
+        my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', {
+                AutoCommit => 1,
+                RaiseError => 1,
+                PrintError => 0,
+                sqlite_use_immediate_transaction => 1,
+                sqlite_see_if_its_a_number => 1,
+        });
+        $dbh->do('PRAGMA journal_mode = WAL') if $creat;
+        $dbh->do('PRAGMA foreign_keys = ON'); # don't forget this
+
+        # ensure the interprocess fcntl lock file exists
+        $f = "$self->{pi_cfg}->{'publicinbox.pop3state'}/txn.locks";
+        open my $fh, '+>>', $f or Carp::croak("open($f): $!");
+        $self->{txn_fh} = $fh;
+
+        create_state_tables($self, $dbh);
+        $dbh;
+}
+
+sub _setlk ($%) {
+        my ($self, %lk) = @_;
+        $lk{l_pid} = 0; # needed for *BSD
+        $lk{l_whence} = SEEK_SET;
+        if (@FLOCK_ORDER) {
+                fcntl($self->{txn_fh}, F_SETLK,
+                        pack($FLOCK_TMPL, @lk{@FLOCK_ORDER}));
+        } else {
+                my $fs = File::FcntlLock->new(%lk);
+                $fs->lock($self->{txn_fh}, F_SETLK);
+        }
+}
+
+sub lock_mailbox {
+        my ($self, $pop3) = @_; # pop3 - PublicInbox::POP3 client object
+        my $lk = $self->lock_for_scope; # lock the SQLite DB, only
+        my $dbh = $self->{-state_dbh} //= state_dbh_new($self);
+        my ($user_id, $ngid, $mbid, $txn_id);
+        my $uuid = delete $pop3->{uuid};
+        $dbh->begin_work;
+        my $creat = 0;
+
+        # 1. make sure the user exists, update `last_seen'
+        my $sth = $dbh->prepare_cached(<<'');
+INSERT OR IGNORE INTO users (uuid, last_seen) VALUES (?,?)
+
+        $sth->bind_param(1, $uuid, SQL_BLOB);
+        $sth->bind_param(2, time);
+        if ($sth->execute == 0) { # existing user
+                $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT user_id FROM users WHERE uuid = ?
+
+                $sth->bind_param(1, $uuid, SQL_BLOB);
+                $sth->execute;
+                $user_id = $sth->fetchrow_array //
+                        die 'BUG: user '.unpack('H*', $uuid).' not found';
+                $sth = $dbh->prepare_cached(<<'');
+UPDATE users SET last_seen = ? WHERE user_id = ?
+
+                $sth->execute(time, $user_id);
+        } else { # new user
+                $user_id = $dbh->last_insert_id(undef, undef,
+                                                'users', 'user_id')
+        }
+
+        # 2. make sure the newsgroup has an integer ID
+        $sth = $dbh->prepare_cached(<<'');
+INSERT OR IGNORE INTO newsgroups (newsgroup) VALUES (?)
+
+        my $ng = $pop3->{ibx}->{newsgroup};
+        $sth->bind_param(1, $ng, SQL_BLOB);
+        if ($sth->execute == 0) {
+                $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT newsgroup_id FROM newsgroups WHERE newsgroup = ?
+
+                $sth->bind_param(1, $ng, SQL_BLOB);
+                $sth->execute;
+                $ngid = $sth->fetchrow_array // die "BUG: `$ng' not found";
+        } else {
+                $ngid = $dbh->last_insert_id(undef, undef,
+                                                'newsgroups', 'newsgroup_id');
+        }
+
+        # 3. ensure the mailbox exists
+        $sth = $dbh->prepare_cached(<<'');
+INSERT OR IGNORE INTO mailboxes (newsgroup_id, slice) VALUES (?,?)
+
+        if ($sth->execute($ngid, $pop3->{slice}) == 0) {
+                $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT mailbox_id FROM mailboxes WHERE newsgroup_id = ? AND slice = ?
+
+                $sth->execute($ngid, $pop3->{slice});
+                $mbid = $sth->fetchrow_array //
+                        die "BUG: mailbox_id for $ng.$pop3->{slice} not found";
+        } else {
+                $mbid = $dbh->last_insert_id(undef, undef,
+                                                'mailboxes', 'mailbox_id');
+        }
+
+        # 4. ensure the (max) deletes row exists for locking
+        $sth = $dbh->prepare_cached(<<'');
+INSERT OR IGNORE INTO deletes (user_id,mailbox_id) VALUES (?,?)
+
+        if ($sth->execute($user_id, $mbid) == 0) { # fetching into existing
+                $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT txn_id,uid_dele FROM deletes WHERE user_id = ? AND mailbox_id = ?
+
+                $sth->execute($user_id, $mbid);
+                ($txn_id, $pop3->{uid_dele}) = $sth->fetchrow_array;
+        } else { # new user/mailbox combo
+                $txn_id = $dbh->last_insert_id(undef, undef,
+                                                'deletes', 'txn_id');
+        }
+        $dbh->commit;
+
+        # see if it's locked by the same worker:
+        return if $self->{txn_locks}->{$txn_id};
+
+        # see if it's locked by another worker:
+        _setlk($self, l_type => F_WRLCK, l_start => $txn_id - 1, l_len => 1)
+                or return;
+
+        $pop3->{user_id} = $user_id;
+        $pop3->{txn_id} = $txn_id;
+        $self->{txn_locks}->{$txn_id} = 1;
+}
+
+sub unlock_mailbox {
+        my ($self, $pop3) = @_;
+        my $txn_id = delete($pop3->{txn_id}) // return;
+        if (!$pop3->{did_quit}) { # deal with QUIT-less disconnects
+                my $lk = $self->lock_for_scope;
+                $self->{-state_dbh}->begin_work;
+                $pop3->__cleanup_state($txn_id);
+                $self->{-state_dbh}->commit;
+        }
+        delete $self->{txn_locks}->{$txn_id}; # same worker
+
+        # other workers
+        _setlk($self, l_type => F_UNLCK, l_start => $txn_id - 1, l_len => 1)
+                or die "F_UNLCK: $!";
+}
+
+1;
diff --git a/lib/PublicInbox/PktOp.pm b/lib/PublicInbox/PktOp.pm
new file mode 100644
index 00000000..1bcdd799
--- /dev/null
+++ b/lib/PublicInbox/PktOp.pm
@@ -0,0 +1,72 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# op dispatch socket, reads a message, runs a sub
+# There may be multiple producers, but (for now) only one consumer
+# Used for lei_xsearch and maybe other things
+# "command" => [ $sub, @fixed_operands ]
+package PublicInbox::PktOp;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use Errno qw(EAGAIN ECONNRESET);
+use PublicInbox::Syscall qw(EPOLLIN);
+use Socket qw(AF_UNIX SOCK_SEQPACKET);
+use PublicInbox::IPC qw(ipc_freeze ipc_thaw);
+use Scalar::Util qw(blessed);
+
+sub new {
+        my ($cls, $r) = @_;
+        my $self = bless { sock => $r }, $cls;
+        $r->blocking(0);
+        $self->SUPER::new($r, EPOLLIN);
+}
+
+# returns a blessed objects as the consumer and producer
+sub pair {
+        my ($cls) = @_;
+        my ($c, $p);
+        socketpair($c, $p, AF_UNIX, SOCK_SEQPACKET, 0) or die "socketpair: $!";
+        (new($cls, $c), bless { op_p => $p }, $cls);
+}
+
+sub pkt_do { # for the producer to trigger event_step in consumer
+        my ($self, $cmd, @args) = @_;
+        send($self->{op_p}, @args ? "$cmd\0".ipc_freeze(\@args) : $cmd, 0)
+}
+
+sub event_step {
+        my ($self) = @_;
+        my $c = $self->{sock};
+        my $n = recv($c, my $msg, 4096, 0);
+        unless (defined $n) {
+                return if $! == EAGAIN;
+                die "recv: $!" if $! != ECONNRESET; # we may be bidirectional
+        }
+        my ($cmd, @pargs);
+        if (index($msg, "\0") > 0) {
+                ($cmd, my $pargs) = split(/\0/, $msg, 2);
+                @pargs = @{ipc_thaw($pargs)};
+        } else {
+                # for compatibility with the script/lei in client mode,
+                # it doesn't load Sereal||Storable for startup speed
+                ($cmd, @pargs) = split(/ /, $msg);
+        }
+        my $op = $self->{ops}->{$cmd //= $msg};
+        if ($op) {
+                my ($obj, @args) = (@$op, @pargs);
+                if (blessed($args[0]) && $args[0]->can('do_env')) {
+                        my $lei = shift @args;
+                        $lei->do_env($obj, @args);
+                } elsif (blessed($obj)) {
+                        $obj->can('do_env') ? $obj->do_env($cmd, @args)
+                                                : $obj->$cmd(@args);
+                } else {
+                        $obj->(@args);
+                }
+        } elsif ($msg ne '') {
+                die "BUG: unknown message: `$cmd'";
+        }
+        $self->close if $msg eq ''; # close on EOF
+}
+
+1;
diff --git a/lib/PublicInbox/ProcessPipe.pm b/lib/PublicInbox/ProcessPipe.pm
deleted file mode 100644
index 2ce7eb8f..00000000
--- a/lib/PublicInbox/ProcessPipe.pm
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-
-# a tied handle for auto reaping of children tied to a pipe, see perltie(1)
-package PublicInbox::ProcessPipe;
-use strict;
-use warnings;
-
-sub TIEHANDLE {
-        my ($class, $pid, $fh) = @_;
-        bless { pid => $pid, fh => $fh }, $class;
-}
-
-sub READ { read($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) }
-
-sub READLINE { readline($_[0]->{fh}) }
-
-sub CLOSE {
-        my $fh = delete($_[0]->{fh});
-        my $ret = defined $fh ? close($fh) : '';
-        my $pid = delete $_[0]->{pid};
-        if (defined $pid) {
-                # PublicInbox::DS may not be loaded
-                eval { PublicInbox::DS::dwaitpid($pid, undef, undef) };
-
-                if ($@) { # ok, not in the event loop, work synchronously
-                        waitpid($pid, 0);
-                        $ret = '' if $?;
-                }
-        }
-        $ret;
-}
-
-sub FILENO { fileno($_[0]->{fh}) }
-
-sub DESTROY {
-        CLOSE(@_);
-        undef;
-}
-
-sub pid { $_[0]->{pid} }
-
-1;
diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm
index 88b6d390..0bf857c6 100644
--- a/lib/PublicInbox/Qspawn.pm
+++ b/lib/PublicInbox/Qspawn.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Like most Perl modules in public-inbox, this is internal and
@@ -12,21 +12,28 @@
 # operate in.  This can be useful to ensure smaller inboxes can
 # be cloned while cloning of large inboxes is maxed out.
 #
-# This does not depend on PublicInbox::DS or any other external
-# scheduling mechanism, you just need to call start() and finish()
-# appropriately. However, public-inbox-httpd (which uses PublicInbox::DS)
-# will be able to schedule this based on readability of stdout from
-# the spawned process.  See GitHTTPBackend.pm and SolverGit.pm for
-# usage examples.  It does not depend on any form of threading.
+# This does not depend on the PublicInbox::DS::event_loop or any
+# other external scheduling mechanism, you just need to call
+# start() and finish() appropriately. However, public-inbox-httpd
+# (which uses PublicInbox::DS)  will be able to schedule this
+# based on readability of stdout from the spawned process.
+# See GitHTTPBackend.pm and SolverGit.pm for usage examples.
+# It does not depend on any form of threading.
 #
 # This is useful for scheduling CGI execution of both long-lived
 # git-http-backend(1) process (for "git clone") as well as short-lived
 # processes such as git-apply(1).
 
 package PublicInbox::Qspawn;
-use strict;
+use v5.12;
 use PublicInbox::Spawn qw(popen_rd);
 use PublicInbox::GzipFilter;
+use Scalar::Util qw(blessed);
+use PublicInbox::Limiter;
+use PublicInbox::Aspawn qw(run_await);
+use PublicInbox::Syscall qw(EPOLLIN);
+use PublicInbox::InputPipe;
+use Carp qw(carp confess);
 
 # n.b.: we get EAGAIN with public-inbox-httpd, and EINTR on other PSGI servers
 use Errno qw(EAGAIN EINTR);
@@ -37,57 +44,45 @@ my $def_limiter;
 # $cmd is the command to spawn
 # $cmd_env is the environ for the child process (not PSGI env)
 # $opt can include redirects and perhaps other process spawning options
-sub new ($$$;) {
+# {qsp_err} is an optional error buffer callers may access themselves
+sub new {
         my ($class, $cmd, $cmd_env, $opt) = @_;
-        bless { args => [ $cmd, $cmd_env, $opt ] }, $class;
+        bless { args => [ $cmd, $cmd_env, $opt ? { %$opt } : {} ] }, $class;
 }
 
 sub _do_spawn {
         my ($self, $start_cb, $limiter) = @_;
-        my $err;
-        my ($cmd, $cmd_env, $opt) = @{delete $self->{args}};
+        my ($cmd, $cmd_env, $opt) = @{$self->{args}};
         my %o = %{$opt || {}};
         $self->{limiter} = $limiter;
-        foreach my $k (@PublicInbox::Spawn::RLIMITS) {
-                if (defined(my $rlimit = $limiter->{$k})) {
-                        $o{$k} = $rlimit;
-                }
+        for my $k (@PublicInbox::Spawn::RLIMITS) {
+                $opt->{$k} = $limiter->{$k} // next;
+        }
+        $self->{-quiet} = 1 if $o{quiet};
+        $limiter->{running}++;
+        if ($start_cb) {
+                eval { # popen_rd may die on EMFILE, ENFILE
+                        $self->{rpipe} = popen_rd($cmd, $cmd_env, $opt,
+                                                        \&waitpid_err, $self);
+                        $start_cb->($self); # EPOLL_CTL_ADD may ENOSPC/ENOMEM
+                };
+        } else {
+                eval { run_await($cmd, $cmd_env, $opt, \&wait_await, $self) };
+                warn "E: $@" if $@;
         }
-        $self->{cmd} = $o{quiet} ? undef : $cmd;
-        eval {
-                # popen_rd may die on EMFILE, ENFILE
-                ($self->{rpipe}, $self->{pid}) = popen_rd($cmd, $cmd_env, \%o);
-
-                die "E: $!" unless defined($self->{pid});
-
-                $limiter->{running}++;
-                $start_cb->($self); # EPOLL_CTL_ADD may ENOSPC/ENOMEM
-        };
         finish($self, $@) if $@;
 }
 
-sub child_err ($) {
-        my ($child_error) = @_; # typically $?
-        my $exitstatus = ($child_error >> 8) or return;
-        my $sig = $child_error & 127;
-        my $msg = "exit status=$exitstatus";
-        $msg .= " signal=$sig" if $sig;
-        $msg;
-}
-
-sub log_err ($$) {
-        my ($env, $msg) = @_;
-        $env->{'psgi.errors'}->print($msg, "\n");
+sub psgi_status_err { # Qspawn itself is useful w/o PSGI
+        require PublicInbox::WwwStatic;
+        PublicInbox::WwwStatic::r($_[0] // 500);
 }
 
-sub finalize ($$) {
-        my ($self, $err) = @_;
-
-        my ($env, $qx_cb, $qx_arg, $qx_buf) =
-                delete @$self{qw(psgi_env qx_cb qx_arg qx_buf)};
+sub finalize ($) {
+        my ($self) = @_;
 
-        # done, spawn whatever's in the queue
-        my $limiter = $self->{limiter};
+        # process is done, spawn whatever's in the queue
+        my $limiter = delete $self->{limiter} or return;
         my $running = --$limiter->{running};
 
         if ($running < $limiter->{max}) {
@@ -95,61 +90,69 @@ sub finalize ($$) {
                         _do_spawn(@$next, $limiter);
                 }
         }
-
-        if ($err) {
-                if (defined $self->{err}) {
-                        $self->{err} .= "; $err";
-                } else {
-                        $self->{err} = $err;
-                }
-                if ($env && $self->{cmd}) {
-                        log_err($env, join(' ', @{$self->{cmd}}) . ": $err");
+        if (my $err = $self->{_err}) { # set by finish or waitpid_err
+                utf8::decode($err);
+                if (my $dst = $self->{qsp_err}) {
+                        $$dst .= $$dst ? " $err" : "; $err";
                 }
+                warn "E: @{$self->{args}->[0]}: $err\n" if !$self->{-quiet};
         }
-        if ($qx_cb) {
-                eval { $qx_cb->($qx_buf, $qx_arg) };
-        } elsif (my $wcb = delete $env->{'qspawn.wcb'}) {
+
+        my ($env, $qx_cb_arg) = delete @$self{qw(psgi_env qx_cb_arg)};
+        if ($qx_cb_arg) {
+                my $cb = shift @$qx_cb_arg;
+                eval { $cb->($self->{args}->[2]->{1}, @$qx_cb_arg) };
+                return unless $@;
+                warn "E: $@"; # hope qspawn.wcb can handle it
+        }
+        return if $self->{passed}; # another command chained it
+        if (my $wcb = delete $env->{'qspawn.wcb'}) {
                 # have we started writing, yet?
-                require PublicInbox::WwwStatic;
-                $wcb->(PublicInbox::WwwStatic::r(500));
+                $wcb->(psgi_status_err($env->{'qspawn.fallback'}));
         }
 }
 
-# callback for dwaitpid
-sub waitpid_err ($$) {
-        my ($self, $pid) = @_;
-        my $xpid = delete $self->{pid};
-        my $err;
-        if (defined $pid) {
-                if ($pid > 0) { # success!
-                        $err = child_err($?);
-                } elsif ($pid < 0) { # ??? does this happen in our case?
-                        $err = "W: waitpid($xpid, 0) => $pid: $!";
-                } # else should not be called with pid == 0
+sub waitpid_err { # callback for awaitpid
+        my (undef, $self) = @_; # $_[0]: pid
+        $self->{_err} = ''; # for defined check in ->finish
+        if ($?) { # XXX this may be redundant
+                my $status = $? >> 8;
+                my $sig = $? & 127;
+                $self->{_err} .= "exit status=$status";
+                $self->{_err} .= " signal=$sig" if $sig;
         }
-        finalize($self, $err);
+        finalize($self) if !$self->{rpipe};
 }
 
-sub do_waitpid ($) {
-        my ($self) = @_;
-        my $pid = $self->{pid};
-        # PublicInbox::DS may not be loaded
-        eval { PublicInbox::DS::dwaitpid($pid, \&waitpid_err, $self) };
-        # done if we're running in PublicInbox::DS::EventLoop
-        if ($@) {
-                # non public-inbox-{httpd,nntpd} callers may block:
-                my $ret = waitpid($pid, 0);
-                waitpid_err($self, $ret);
-        }
+sub wait_await { # run_await cb
+        my ($pid, $cmd, $cmd_env, $opt, $self) = @_;
+        waitpid_err($pid, $self);
+}
+
+sub yield_chunk { # $_[-1] is sysread buffer (or undef)
+        my ($self, $ipipe) = @_;
+        if (!defined($_[-1])) {
+                warn "error reading body: $!";
+        } elsif ($_[-1] eq '') { # normal EOF
+                $self->finish;
+                $self->{qfh}->close;
+        } elsif (defined($self->{qfh}->write($_[-1]))) {
+                return; # continue while HTTP client is reading our writes
+        } # else { # HTTP client disconnected
+        delete $self->{rpipe};
+        $ipipe->close;
 }
 
 sub finish ($;$) {
         my ($self, $err) = @_;
-        if (delete $self->{rpipe}) {
-                do_waitpid($self);
-        } else {
-                finalize($self, $err);
-        }
+        $self->{_err} //= $err; # only for $@
+
+        # we can safely finalize if pipe was closed before, or if
+        # {_err} is defined by waitpid_err.  Deleting {rpipe} will
+        # trigger PublicInbox::IO::DESTROY -> waitpid_err,
+        # but it may not fire right away if inside the event loop.
+        my $closed_before = !delete($self->{rpipe});
+        finalize($self) if $closed_before || defined($self->{_err});
 }
 
 sub start ($$$) {
@@ -161,138 +164,92 @@ sub start ($$$) {
         }
 }
 
-sub psgi_qx_init_cb {
-        my ($self) = @_;
-        my $async = delete $self->{async};
-        my ($r, $buf);
-        my $qx_fh = $self->{qx_fh};
-reread:
-        $r = sysread($self->{rpipe}, $buf, 65536);
-        if ($async) {
-                $async->async_pass($self->{psgi_env}->{'psgix.io'},
-                                        $qx_fh, \$buf);
-        } elsif (defined $r) {
-                $r ? (print $qx_fh $buf) : event_step($self, undef);
-        } else {
-                return if $! == EAGAIN; # try again when notified
-                goto reread if $! == EINTR;
-                event_step($self, $!);
-        }
-}
-
-sub psgi_qx_start {
-        my ($self) = @_;
-        if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) {
-                # PublicInbox::HTTPD::Async->new(rpipe, $cb, cb_arg, $end_obj)
-                $self->{async} = $async->($self->{rpipe},
-                                        \&psgi_qx_init_cb, $self, $self);
-                # init_cb will call ->async_pass or ->close
-        } else { # generic PSGI
-                psgi_qx_init_cb($self) while $self->{qx_fh};
-        }
-}
-
-# Similar to `backtick` or "qx" ("perldoc -f qx"), it calls $qx_cb with
+# Similar to `backtick` or "qx" ("perldoc -f qx"), it calls @qx_cb_arg with
 # the stdout of the given command when done; but respects the given limiter
 # $env is the PSGI env.  As with ``/qx; only use this when output is small
 # and safe to slurp.
 sub psgi_qx {
-        my ($self, $env, $limiter, $qx_cb, $qx_arg) = @_;
+        my ($self, $env, $limiter, @qx_cb_arg) = @_;
         $self->{psgi_env} = $env;
-        my $qx_buf = '';
-        open(my $qx_fh, '+>', \$qx_buf) or die; # PerlIO::scalar
-        $self->{qx_cb} = $qx_cb;
-        $self->{qx_arg} = $qx_arg;
-        $self->{qx_fh} = $qx_fh;
-        $self->{qx_buf} = \$qx_buf;
-        $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32);
-        start($self, $limiter, \&psgi_qx_start);
+        $self->{qx_cb_arg} = \@qx_cb_arg;
+        $limiter ||= $def_limiter ||= PublicInbox::Limiter->new(32);
+        start($self, $limiter, undef);
 }
 
-# this is called on pipe EOF to reap the process, may be called
-# via PublicInbox::DS event loop OR via GetlineBody for generic
-# PSGI servers.
-sub event_step {
-        my ($self, $err) = @_; # $err: $!
-        log_err($self->{psgi_env}, "psgi_{return,qx} $err") if defined($err);
-        finish($self);
-        my ($fh, $qx_fh) = delete(@$self{qw(fh qx_fh)});
-        $fh->close if $fh; # async-only (psgi_return)
-}
+sub yield_pass {
+        my ($self, $ipipe, $res) = @_; # $ipipe = InputPipe
+        my $env = $self->{psgi_env};
+        my $wcb = delete $env->{'qspawn.wcb'} // confess('BUG: no qspawn.wcb');
+        if (ref($res) eq 'CODE') { # chain another command
+                delete $self->{rpipe};
+                $ipipe->close if $ipipe;
+                $res->($wcb);
+                $self->{passed} = 1;
+                return; # all done
+        }
+        confess("BUG: $res unhandled") if ref($res) ne 'ARRAY';
 
-sub rd_hdr ($) {
-        my ($self) = @_;
-        # typically used for reading CGI headers
-        # we must loop until EAGAIN for EPOLLET in HTTPD/Async.pm
-        # We also need to check EINTR for generic PSGI servers.
-        my $ret;
-        my $total_rd = 0;
-        my $hdr_buf = $self->{hdr_buf};
-        my ($ph_cb, $ph_arg) = @{$self->{parse_hdr}};
-        do {
-                my $r = sysread($self->{rpipe}, $$hdr_buf, 4096,
-                                length($$hdr_buf));
-                if (defined($r)) {
-                        $total_rd += $r;
-                        eval { $ret = $ph_cb->($total_rd, $hdr_buf, $ph_arg) };
-                        if ($@) {
-                                log_err($self->{psgi_env}, "parse_hdr: $@");
-                                $ret = [ 500, [], [ "Internal error\n" ] ];
-                        }
-                } else {
-                        # caller should notify us when it's ready:
-                        return if $! == EAGAIN;
-                        next if $! == EINTR; # immediate retry
-                        log_err($self->{psgi_env}, "error reading header: $!");
-                        $ret = [ 500, [], [ "Internal error\n" ] ];
-                }
-        } until (defined $ret);
-        delete $self->{parse_hdr}; # done parsing headers
-        $ret;
+        my $filter = blessed($res->[2]) && $res->[2]->can('attach') ?
+                        pop(@$res) : delete($env->{'qspawn.filter'});
+        $filter //= PublicInbox::GzipFilter::qsp_maybe($res->[1], $env);
+
+        if (scalar(@$res) == 3) { # done early (likely error or static file)
+                delete $self->{rpipe};
+                $ipipe->close if $ipipe;
+                $wcb->($res); # all done
+                return;
+        }
+        scalar(@$res) == 2 or confess("BUG: scalar(res) != 2: @$res");
+        return ($wcb, $filter) if !$ipipe; # generic PSGI
+        # streaming response
+        my $qfh = $wcb->($res); # get PublicInbox::HTTP::(Chunked|Identity)
+        $qfh = $filter->attach($qfh) if $filter;
+        my ($bref) = @{delete $self->{yield_parse_hdr}};
+        $qfh->write($$bref) if $$bref ne '';
+        $self->{qfh} = $qfh; # keep $ipipe open
 }
 
-sub psgi_return_init_cb {
+sub parse_hdr_done ($$) {
         my ($self) = @_;
-        my $r = rd_hdr($self) or return;
-        my $env = $self->{psgi_env};
-        my $filter = delete $env->{'qspawn.filter'} //
-                PublicInbox::GzipFilter::qsp_maybe($r->[1], $env);
-
-        my $wcb = delete $env->{'qspawn.wcb'};
-        my $async = delete $self->{async};
-        if (scalar(@$r) == 3) { # error
-                if ($async) {
-                        # calls rpipe->close && ->event_step
-                        $async->close;
-                } else {
-                        $self->{rpipe}->close;
-                        event_step($self);
+        my ($ret, $err);
+        if (defined $_[-1]) {
+                my ($bref, $ph_cb, @ph_arg) = @{$self->{yield_parse_hdr}};
+                $$bref .= $_[-1];
+                $ret = eval { $ph_cb->(length($_[-1]), $bref, @ph_arg) };
+                if (($err = $@)) {
+                        $ret = psgi_status_err();
+                } elsif (!$ret && $_[-1] eq '') {
+                        $err = 'EOF';
+                        $ret = psgi_status_err();
                 }
-                $wcb->($r);
-        } elsif ($async) {
-                # done reading headers, handoff to read body
-                my $fh = $wcb->($r); # scalar @$r == 2
-                $fh = $filter->attach($fh) if $filter;
-                $self->{fh} = $fh;
-                $async->async_pass($env->{'psgix.io'}, $fh,
-                                        delete($self->{hdr_buf}));
-        } else { # for synchronous PSGI servers
-                require PublicInbox::GetlineBody;
-                $r->[2] = PublicInbox::GetlineBody->new($self->{rpipe},
-                                        \&event_step, $self,
-                                        ${$self->{hdr_buf}}, $filter);
-                $wcb->($r);
+        } else {
+                $err = "$!";
+                $ret = psgi_status_err();
         }
+        carp <<EOM if $err;
+E: $err @{$self->{args}->[0]} ($self->{psgi_env}->{REQUEST_URI})
+EOM
+        $ret; # undef if headers incomplete
 }
 
-sub psgi_return_start { # may run later, much later...
+sub ipipe_cb { # InputPipe callback
+        my ($ipipe, $self) = @_; # $_[-1] rbuf
+        if ($self->{qfh}) { # already streaming
+                yield_chunk($self, $ipipe, $_[-1]);
+        } elsif (my $res = parse_hdr_done($self, $_[-1])) {
+                yield_pass($self, $ipipe, $res);
+        } # else: headers incomplete, keep reading
+}
+
+sub _yield_start { # may run later, much later...
         my ($self) = @_;
-        if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) {
-                # PublicInbox::HTTPD::Async->new(rpipe, $cb, $cb_arg, $end_obj)
-                $self->{async} = $async->($self->{rpipe},
-                                        \&psgi_return_init_cb, $self, $self);
-        } else { # generic PSGI
-                psgi_return_init_cb($self) while $self->{parse_hdr};
+        if ($self->{psgi_env}->{'pi-httpd.async'}) {
+                my $rpipe = $self->{rpipe};
+                $rpipe->blocking(0);
+                PublicInbox::InputPipe::consume($rpipe, \&ipipe_cb, $self);
+        } else {
+                require PublicInbox::GetlineResponse;
+                PublicInbox::GetlineResponse::response($self);
         }
 }
 
@@ -303,7 +260,7 @@ sub psgi_return_start { # may run later, much later...
 #   $env->{'qspawn.wcb'} - the write callback from the PSGI server
 #                          optional, use this if you've already
 #                          captured it elsewhere.  If not given,
-#                          psgi_return will return an anonymous
+#                          psgi_yield will return an anonymous
 #                          sub for the PSGI server to call
 #
 #   $env->{'qspawn.filter'} - filter object, responds to ->attach for
@@ -312,76 +269,33 @@ sub psgi_return_start { # may run later, much later...
 #
 # $limiter - the Limiter object to use (uses the def_limiter if not given)
 #
-# $parse_hdr - Initial read function; often for parsing CGI header output.
+# @parse_hdr_arg - Initial read cb+args; often for parsing CGI header output.
 #              It will be given the return value of sysread from the pipe
 #              and a string ref of the current buffer.  Returns an arrayref
 #              for PSGI responses.  2-element arrays in PSGI mean the
 #              body will be streamed, later, via writes (push-based) to
 #              psgix.io.  3-element arrays means the body is available
 #              immediately (or streamed via ->getline (pull-based)).
-sub psgi_return {
-        my ($self, $env, $limiter, $parse_hdr, $hdr_arg) = @_;
+
+sub psgi_yield {
+        my ($self, $env, $limiter, @parse_hdr_arg)= @_;
         $self->{psgi_env} = $env;
-        $self->{hdr_buf} = \(my $hdr_buf = '');
-        $self->{parse_hdr} = [ $parse_hdr, $hdr_arg ];
-        $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32);
+        $self->{yield_parse_hdr} = [ \(my $buf = ''), @parse_hdr_arg ];
+        $limiter ||= $def_limiter ||= PublicInbox::Limiter->new(32);
 
         # the caller already captured the PSGI write callback from
         # the PSGI server, so we can call ->start, here:
-        $env->{'qspawn.wcb'} and
-                return start($self, $limiter, \&psgi_return_start);
-
-        # the caller will return this sub to the PSGI server, so
-        # it can set the response callback (that is, for
-        # PublicInbox::HTTP, the chunked_wcb or identity_wcb callback),
-        # but other HTTP servers are supported:
-        sub {
+        $env->{'qspawn.wcb'} ? start($self, $limiter, \&_yield_start) : sub {
+                # the caller will return this sub to the PSGI server, so
+                # it can set the response callback (that is, for
+                # PublicInbox::HTTP, the chunked_wcb or identity_wcb callback),
+                # but other HTTP servers are supported:
                 $env->{'qspawn.wcb'} = $_[0];
-                start($self, $limiter, \&psgi_return_start);
+                start($self, $limiter, \&_yield_start);
         }
 }
 
-package PublicInbox::Qspawn::Limiter;
-use strict;
-use warnings;
-
-sub new {
-        my ($class, $max) = @_;
-        bless {
-                # 32 is same as the git-daemon connection limit
-                max => $max || 32,
-                running => 0,
-                run_queue => [],
-                # RLIMIT_CPU => undef,
-                # RLIMIT_DATA => undef,
-                # RLIMIT_CORE => undef,
-        }, $class;
-}
-
-sub setup_rlimit {
-        my ($self, $name, $config) = @_;
-        foreach my $rlim (@PublicInbox::Spawn::RLIMITS) {
-                my $k = lc($rlim);
-                $k =~ tr/_//d;
-                $k = "publicinboxlimiter.$name.$k";
-                defined(my $v = $config->{$k}) or next;
-                my @rlimit = split(/\s*,\s*/, $v);
-                if (scalar(@rlimit) == 1) {
-                        push @rlimit, $rlimit[0];
-                } elsif (scalar(@rlimit) != 2) {
-                        warn "could not parse $k: $v\n";
-                }
-                eval { require BSD::Resource };
-                if ($@) {
-                        warn "BSD::Resource missing for $rlim";
-                        next;
-                }
-                foreach my $i (0..$#rlimit) {
-                        next if $rlimit[$i] ne 'INFINITY';
-                        $rlimit[$i] = BSD::Resource::RLIM_INFINITY();
-                }
-                $self->{$rlim} = \@rlimit;
-        }
-}
+no warnings 'once';
+*DESTROY = \&finalize; # ->finalize is idempotent
 
 1;
diff --git a/lib/PublicInbox/Reply.pm b/lib/PublicInbox/Reply.pm
index 5058ff8c..091f20bc 100644
--- a/lib/PublicInbox/Reply.pm
+++ b/lib/PublicInbox/Reply.pm
@@ -1,23 +1,17 @@
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # For reply instructions and address generation in WWW UI
 package PublicInbox::Reply;
 use strict;
-use warnings;
+use v5.10.1;
 use URI::Escape qw/uri_escape_utf8/;
 use PublicInbox::Hval qw(ascii_html obfuscate_addrs mid_href);
 use PublicInbox::Address;
 use PublicInbox::MID qw(mid_clean);
+use PublicInbox::Config;
 
-sub squote_maybe ($) {
-        my ($val) = @_;
-        if ($val =~ m{([^\w@\./,\%\+\-])}) {
-                $val =~ s/(['!])/'\\$1'/g; # '!' for csh
-                return "'$val'";
-        }
-        $val;
-}
+*squote_maybe = \&PublicInbox::Config::squote_maybe;
 
 sub add_addrs {
         my ($to, $cc, @addrs) = @_;
@@ -34,7 +28,7 @@ my $reply_headers = join('|', @reply_headers);
 sub mailto_arg_link {
         my ($ibx, $hdr) = @_;
         my $cc = {}; # everyone else
-        my $to; # this is the From address by defaultq
+        my $to; # this is the From address by default
         my $reply_to_all = 'reply-to-all'; # the only good default :P
         my $reply_to_cfg = $ibx->{replyto};
 
@@ -77,6 +71,7 @@ sub mailto_arg_link {
         my $mid = $hdr->header_raw('Message-ID');
         push @arg, '--in-reply-to='.squote_maybe(mid_clean($mid));
         my $irt = mid_href($mid);
+        add_addrs(\$to, $cc, $ibx->{-primary_address}) unless defined($to);
         delete $cc->{$to};
         if ($obfs) {
                 my $arg_to = $to;
@@ -85,7 +80,6 @@ sub mailto_arg_link {
                 # no $subj for $href below
         } else {
                 push @arg, "--to=$to";
-                $to = uri_escape_utf8($to);
                 $subj = uri_escape_utf8($subj);
         }
         my @cc = sort values %$cc;
@@ -108,6 +102,10 @@ sub mailto_arg_link {
         # anyways.
         return (\@arg, '', $reply_to_all) if $obfs;
 
+        # keep `@' instead of using `%40' for RFC 6068
+        utf8::encode($to);
+        $to =~ s!([^A-Za-z0-9\-\._~\@])!$URI::Escape::escapes{$1}!ge;
+
         # order matters, Subject is the least important header,
         # so it is last in case it's lost/truncated in a copy+paste
         my $href = "mailto:$to?In-Reply-To=$irt${cc}&Subject=$subj";
diff --git a/lib/PublicInbox/RepoAtom.pm b/lib/PublicInbox/RepoAtom.pm
new file mode 100644
index 00000000..eb0ed3c7
--- /dev/null
+++ b/lib/PublicInbox/RepoAtom.pm
@@ -0,0 +1,126 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# git log => Atom feed (cgit-compatible: $REPO/atom/[PATH]?h=$tip
+package PublicInbox::RepoAtom;
+use v5.12;
+use parent qw(PublicInbox::GzipFilter);
+use POSIX qw(strftime);
+use URI::Escape qw(uri_escape);
+use Scalar::Util ();
+use PublicInbox::Hval qw(ascii_html utf8_maybe);
+
+# git for-each-ref and log use different format fields :<
+my $ATOM_FMT = '--pretty=tformat:'.join('%n',
+                                map { "%$_" } qw(H ct an ae at s b)).'%x00';
+
+my $EACH_REF_FMT = '--format='.join(';', map { "\$r{'$_'}=%($_)" } qw(
+        objectname refname:short creator contents:subject contents:body
+        *subject *body)).'%00';
+
+sub atom_ok { # parse_hdr for qspawn
+        my ($r, $bref, $ctx) = @_;
+        return [ 404, [], [ "Not Found\n"] ] if $r == 0;
+        bless $ctx, __PACKAGE__;
+        my $h = [ 'Content-Type' => 'application/atom+xml; charset=UTF-8' ];
+        $ctx->{gz} = $ctx->can('gz_or_noop')->($h, $ctx->{env});
+        my $title = ascii_html(delete $ctx->{-feed_title});
+        my $desc = ascii_html($ctx->{git}->description);
+        my $url = ascii_html($ctx->{git}->base_url($ctx->{env}));
+        $ctx->{-base_url} = $url;
+        $ctx->zmore(<<EOM);
+<?xml version="1.0"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+<title>$title</title><subtitle>$desc</subtitle><link
+rel="alternate" type="text/html" href="$url"/>
+EOM
+        [ 200, $h, $ctx ]; # [2] is qspawn.filter
+}
+
+# called by GzipFilter->close
+sub zflush { $_[0]->SUPER::zflush('</feed>') }
+
+# called by GzipFilter->write or GetlineResponse->getline
+sub translate {
+        my $self = shift;
+        $_[0] // return zflush($self); # getline caller
+        my @out;
+        my $lbuf = delete($self->{lbuf}) // shift;
+        $lbuf .= shift while @_;
+        my $is_tag = $self->{-is_tag};
+        my ($H, $ct, $an, $ae, $at, $s, $bdy);
+        while ($lbuf =~ s/\A([^\0]+)\0\n//s) {
+                utf8_maybe($bdy = $1);
+                if ($is_tag) {
+                        my %r;
+                        eval "$bdy";
+                        for (qw(contents:subject contents:body)) {
+                                $r{$_} =~ /\S/ or delete($r{$_})
+                        }
+                        $H = $r{objectname};
+                        $s = $r{'contents:subject'} // $r{'*subject'};
+                        $bdy = $r{'contents:body'} // $r{'*body'};
+                        $s .= " ($r{'refname:short'})";
+                        $_ = ascii_html($_) for ($s, $bdy, $r{creator});
+                        ($an, $ae, $at) = split(/\s*&[gl]t;\s*/, $r{creator});
+                        $at =~ s/ .*\z//; # no TZ
+                        $ct = $at = strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($at));
+                } else {
+                        $bdy = ascii_html($bdy);
+                        ($H, $ct, $an, $ae, $at, $s, $bdy) =
+                                                        split(/\n/, $bdy, 7);
+                        $at = strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($at));
+                        $ct = strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($ct));
+                }
+                $bdy //= '';
+                push @out, <<"";
+<entry><title>$s</title><updated>$ct</updated><author><name>$an</name>
+<email>$ae</email></author><published>$at</published><link
+rel="alternate" type="text/html" href="$self->{-base_url}$H/s/"
+/><id>$H</id>
+
+                push @out, <<'', $bdy, '</pre></div></content>' if $bdy ne '';
+<content type="xhtml"><div
+xmlns="http://www.w3.org/1999/xhtml"><pre style="white-space:pre-wrap">
+
+                push @out, '</entry>';
+        }
+        $self->{lbuf} = $lbuf;
+        chomp @out;
+        @out ? $self->SUPER::translate(@out) : ''; # not EOF, yet
+}
+
+# $REPO/tags.atom endpoint
+sub srv_tags_atom {
+        my ($ctx) = @_;
+        my $max = 50; # TODO configurable
+        my $cmd = $ctx->{git}->cmd(qw(for-each-ref --sort=-creatordate),
+                        "--count=$max", '--perl', $EACH_REF_FMT, 'refs/tags');
+        $ctx->{-feed_title} = "$ctx->{git}->{nick} tags";
+        my $qsp = PublicInbox::Qspawn->new($cmd);
+        $ctx->{-is_tag} = 1;
+        $qsp->psgi_yield($ctx->{env}, undef, \&atom_ok, $ctx);
+}
+
+sub srv_atom {
+        my ($ctx, $path) = @_;
+        return if index($path, '//') >= 0 || index($path, '/') == 0;
+        my $max = 50; # TODO configurable
+        my $cmd = $ctx->{git}->cmd(qw(log --no-notes --no-color --no-abbrev),
+                                $ATOM_FMT, "-$max");
+        my $tip = $ctx->{qp}->{h}; # same as cgit
+        $ctx->{-feed_title} = $ctx->{git}->{nick};
+        $ctx->{-feed_title} .= " $path" if $path ne '';
+        if (defined($tip)) {
+                push @$cmd, $tip;
+                $ctx->{-feed_title} .= ", $tip";
+        }
+        # else: let git decide based on HEAD if $tip isn't defined
+        push @$cmd, '--';
+        push @$cmd, $path if $path ne '';
+        my $qsp = PublicInbox::Qspawn->new($cmd, undef,
+                                        { quiet => 1, 2 => $ctx->{lh} });
+        $qsp->psgi_yield($ctx->{env}, undef, \&atom_ok, $ctx);
+}
+
+1;
diff --git a/lib/PublicInbox/RepoList.pm b/lib/PublicInbox/RepoList.pm
new file mode 100644
index 00000000..39dc9c0b
--- /dev/null
+++ b/lib/PublicInbox/RepoList.pm
@@ -0,0 +1,39 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::RepoList;
+use v5.12;
+use parent qw(PublicInbox::WwwStream);
+use PublicInbox::Hval qw(ascii_html prurl fmt_ts);
+require PublicInbox::CodeSearch;
+
+sub html_top_fallback { # WwwStream->html_repo_top
+        my ($ctx) = @_;
+        my $title = delete($ctx->{-title_html}) //
+                ascii_html("$ctx->{env}->{PATH_INFO}*");
+        my $upfx = $ctx->{-upfx} // '';
+        "<html><head><title>$title</title>" .
+                $ctx->{www}->style($upfx) . '</head><body>';
+}
+
+sub html ($$$) {
+        my ($wcr, $ctx, $re) = @_;
+        my $cr = $wcr->{pi_cfg}->{-coderepos};
+        my @nicks = grep(m!$re!, keys %$cr) or return; # 404
+        __PACKAGE__->html_init($ctx);
+        my $zfh = $ctx->zfh;
+        print $zfh "<pre>matching coderepos\n";
+        my @recs = PublicInbox::CodeSearch::repos_sorted($wcr->{pi_cfg},
+                                                        @$cr{@nicks});
+        my $env = $ctx->{env};
+        for (@recs) {
+                my ($t, $git) = @$_;
+                my $nick = ascii_html("$git->{nick}");
+                for my $u ($git->pub_urls($env)) {
+                        $u = prurl($env, $u);
+                        print $zfh "\n".fmt_ts($t).qq{ <a\nhref="$u">$nick</a>}
+                }
+        }
+        $ctx->html_done('</pre>');
+}
+
+1;
diff --git a/lib/PublicInbox/RepoSnapshot.pm b/lib/PublicInbox/RepoSnapshot.pm
new file mode 100644
index 00000000..bff97bc8
--- /dev/null
+++ b/lib/PublicInbox/RepoSnapshot.pm
@@ -0,0 +1,85 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# cgit-compatible /snapshot/ endpoint for WWW coderepos
+package PublicInbox::RepoSnapshot;
+use v5.12;
+use PublicInbox::Qspawn;
+use PublicInbox::ViewVCS;
+use PublicInbox::WwwStatic qw(r);
+
+# Not using standard mime types since the compressed tarballs are
+# special or do not match my /etc/mime.types.  Choose what gitweb
+# and cgit agree on for compatibility.
+our %FMT_TYPES = (
+        'tar' => 'application/x-tar',
+        'tar.gz' => 'application/x-gzip',
+        'tar.bz2' => 'application/x-bzip2',
+        'tar.xz' => 'application/x-xz',
+        'zip' => 'application/x-zip',
+);
+
+our %FMT_CFG = (
+        'tar.xz' => 'xz -c',
+        'tar.bz2' => 'bzip2 -c',
+        # not supporting lz nor zstd for now to avoid format proliferation
+        # and increased cache overhead required to handle extra formats.
+);
+
+my $SUFFIX = join('|', map { quotemeta } keys %FMT_TYPES);
+
+# TODO deal with tagged blobs
+
+sub archive_hdr { # parse_hdr for Qspawn
+        my ($r, $bref, $ctx) = @_;
+        $r or return [500, [qw(Content-Type text/plain Content-Length 0)], []];
+        my $fn = "$ctx->{snap_pfx}.$ctx->{snap_fmt}";
+        my $type = $FMT_TYPES{$ctx->{snap_fmt}} //
+                                die "BUG: bad fmt: $ctx->{snap_fmt}";
+        [ 200, [ 'Content-Type', "$type; charset=UTF-8",
+                'Content-Disposition', qq(inline; filename="$fn"),
+                'ETag', qq("$ctx->{etag}") ] ];
+}
+
+sub ver_check { # git->check_async callback
+        my (undef, $oid, $type, $size, $ctx) = @_;
+        return if defined $ctx->{etag};
+        my $treeish = shift @{$ctx->{-try}} // die 'BUG: no {-try}';
+        if ($type eq 'missing') {
+                scalar(@{$ctx->{-try}}) or
+                        delete($ctx->{env}->{'qspawn.wcb'})->(r(404));
+        } else { # found, done:
+                $ctx->{etag} = $oid;
+                my $cmd = $ctx->{git}->cmd;
+                if (my $cmd = $FMT_CFG{$ctx->{snap_fmt}}) {
+                        push @$cmd, '-c', "tar.$ctx->{snap_fmt}.command=$cmd";
+                }
+                push @$cmd, 'archive', "--prefix=$ctx->{snap_pfx}/",
+                                "--format=$ctx->{snap_fmt}", $treeish;
+                my $qsp = PublicInbox::Qspawn->new($cmd, undef, { quiet => 1 });
+                $qsp->psgi_yield($ctx->{env}, undef, \&archive_hdr, $ctx);
+        }
+}
+
+sub srv {
+        my ($ctx, $fn) = @_;
+        return if $fn =~ /["\s]/s;
+        my $fmt = $ctx->{wcr}->{snapshots}; # TODO per-repo snapshots
+        $fn =~ s/\.($SUFFIX)\z//o and $fmt->{$1} or return;
+        $ctx->{snap_fmt} = $1;
+        my $pfx = $ctx->{git}->local_nick // return;
+        $pfx =~ s/(?:\.git)?\z/-/;
+        ($pfx) = ($pfx =~ m!([^/]+)\z!);
+        substr($fn, 0, length($pfx)) eq $pfx or return;
+        $ctx->{snap_pfx} = $fn;
+        my $v = $ctx->{snap_ver} = substr($fn, length($pfx), length($fn));
+        # try without [vV] prefix, first
+        my @try = map { "$_$v" } ('', 'v', 'V'); # cf. cgit:ui-snapshot.c
+        @{$ctx->{-try}} = @try;
+        sub {
+                $ctx->{env}->{'qspawn.wcb'} = $_[0];
+                PublicInbox::ViewVCS::do_check_async($ctx, \&ver_check, @try);
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/RepoTree.pm b/lib/PublicInbox/RepoTree.pm
new file mode 100644
index 00000000..4c85f9a8
--- /dev/null
+++ b/lib/PublicInbox/RepoTree.pm
@@ -0,0 +1,99 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# cgit-compatible $REPO/tree/[PATH]?h=$tip redirector
+package PublicInbox::RepoTree;
+use v5.12;
+use PublicInbox::ViewDiff qw(uri_escape_path);
+use PublicInbox::WwwStatic qw(r);
+use PublicInbox::Qspawn;
+use PublicInbox::WwwStream qw(html_oneshot);
+use PublicInbox::Hval qw(ascii_html utf8_maybe);
+
+sub rd_404_log {
+        my ($bref, $ctx) = @_;
+        my $path = $ctx->{-q_value_html} = ascii_html($ctx->{-path});
+        my $tip = 'HEAD';
+        $tip = ascii_html($ctx->{qp}->{h}) if defined($ctx->{qp}->{h});
+        PublicInbox::WwwStream::html_init($ctx);
+        my $zfh = $ctx->{zfh};
+        print $zfh "<pre>\$ git log -1 $tip -- $path\n";
+        my $code = 200;
+        if ($$bref eq '') {
+                say $zfh "found no record of `$path' in git history in `$tip'";
+                $ctx->{-has_srch} and
+                        say $zfh 'perhaps try searching mail (above)';
+                $code = 404;
+        } else {
+                my ($H, $h, $s_as) = split(/ /, $$bref, 3);
+                utf8_maybe($s_as);
+                my $x = uri_escape_path($ctx->{-path});
+                $s_as = ascii_html($s_as);
+                print $zfh <<EOM;
+found last record of `$path' in the following commit:
+
+<a href="$ctx->{-upfx}$H/s/?b=$x">$h</a> $s_as
+EOM
+        }
+        my $res = $ctx->html_done;
+        $res->[0] = $code;
+        delete($ctx->{-wcb})->($res);
+}
+
+sub find_missing {
+        my ($ctx) = @_;
+        if ($ctx->{-path} eq '') {
+                my $tip = 'HEAD';
+                $tip = ascii_html($ctx->{qp}->{h}) if defined($ctx->{qp}->{h});
+                PublicInbox::WwwStream::html_init($ctx);
+                print { $ctx->{zfh} } "<pre>`$tip' ref not found</pre>";
+                my $res = $ctx->html_done;
+                $res->[0] = 404;
+                return delete($ctx->{-wcb})->($res);
+        }
+        my $cmd = $ctx->{git}->cmd(qw(log --no-color -1),
+                                '--pretty=%H %h %s (%as)');
+        push @$cmd, $ctx->{qp}->{h} if defined($ctx->{qp}->{h});
+        push @$cmd, '--';
+        push @$cmd, $ctx->{-path};
+        my $qsp = PublicInbox::Qspawn->new($cmd, undef,
+                                        { quiet => 1, 2 => $ctx->{lh} });
+        $qsp->psgi_qx($ctx->{env}, undef, \&rd_404_log, $ctx);
+}
+
+sub tree_show { # git check_async callback
+        my (undef, $oid, $type, $size, $ctx) = @_;
+        return find_missing($ctx) if $type eq 'missing';
+
+        my $res = [ $ctx->{git}, $oid, $type, $size ];
+        my ($bn) = ($ctx->{-path} =~ m!/?([^/]+)\z!);
+        if ($type eq 'blob') {
+                my $obj = ascii_html($ctx->{-obj});
+                $ctx->{-q_value_html} = 'dfn:'.ascii_html($ctx->{-path}) .
+                        ' dfpost:'.substr($oid, 0, 7);
+                $ctx->{-paths} = [ $bn, qq[(<a
+href="$ctx->{-upfx}$oid/s/$bn">raw</a>)
+\$ git show $obj\t# shows this blob on the CLI] ];
+        }
+        PublicInbox::ViewVCS::solve_result($res, $ctx);
+}
+
+sub srv_tree {
+        my ($ctx, $path) = @_;
+        return if index($path, '//') >= 0 || index($path, '/') == 0;
+        my $tip = $ctx->{qp}->{h} // 'HEAD';
+        $ctx->{-upfx} = '../' x (($path =~ tr!/!/!) + 1);
+        $path =~ s!/\z!!;
+        my $obj = $ctx->{-obj} = "$tip:$path";
+        $ctx->{-path} = $path;
+
+        # "\n" breaks with `git cat-file --batch-check', and there's no
+        # legitimate use of "\n" in filenames anyways.
+        return if index($obj, "\n") >= 0;
+        sub {
+                $ctx->{-wcb} = $_[0]; # HTTP::{Chunked,Identity}
+                PublicInbox::ViewVCS::do_check_async($ctx, \&tree_show, $obj);
+        };
+}
+
+1;
diff --git a/lib/PublicInbox/SHA.pm b/lib/PublicInbox/SHA.pm
new file mode 100644
index 00000000..3fa8530e
--- /dev/null
+++ b/lib/PublicInbox/SHA.pm
@@ -0,0 +1,67 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# OpenSSL exception added in commit 22711f81f4e79da6b796820e37803a05cae14645
+# (README: add OpenSSL exception, 2015-10-05)
+
+# Replaces most uses of Digest::SHA with OpenSSL via Net::SSLeay if
+# possible.  OpenSSL SHA-256 is nearly twice as fast as Digest::SHA on
+# x86-64, and SHA-1 is a bit faster as well.
+# I don't think we can implement Digest::SHA->clone with what Net::SSLeay
+# gives us...  (maybe EVP_MD_CTX_copy+EVP_MD_CTX_copy_ex need to be added
+# to Net::SSLeay?)
+package PublicInbox::SHA;
+use v5.12;
+require Exporter;
+our @EXPORT_OK = qw(sha1_hex sha256_hex sha256 sha_all);
+use autodie qw(sysread);
+our @ISA;
+
+BEGIN {
+        push @ISA, 'Exporter';
+        unless (eval(<<'EOM')) {
+use Net::SSLeay 1.43;
+my %SHA = (
+        1 => Net::SSLeay::EVP_sha1(),
+        256 => Net::SSLeay::EVP_sha256(),
+);
+
+sub new {
+        my ($cls, $n) = @_;
+        my $mdctx = Net::SSLeay::EVP_MD_CTX_create();
+        Net::SSLeay::EVP_DigestInit($mdctx, $SHA{$n}) or
+                        die "EVP_DigestInit $n: $!";
+        bless \$mdctx, $cls;
+}
+
+sub add {
+        my $self = shift;
+        Net::SSLeay::EVP_DigestUpdate($$self, $_) for @_;
+        $self;
+}
+
+sub digest { Net::SSLeay::EVP_DigestFinal(${$_[0]}) };
+sub hexdigest { unpack('H*', Net::SSLeay::EVP_DigestFinal(${$_[0]})) }
+sub DESTROY { Net::SSLeay::EVP_MD_CTX_destroy(${$_[0]}) };
+
+sub sha1_hex { unpack('H*', Net::SSLeay::SHA1($_[0])) };
+sub sha256_hex { unpack('H*', Net::SSLeay::SHA256($_[0])) };
+*sha256 = \&Net::SSLeay::SHA256;
+# end of eval
+EOM
+        require Digest::SHA; # stdlib fallback
+        push @ISA, 'Digest::SHA';
+        *sha1_hex = \&Digest::SHA::sha1_hex;
+        *sha256_hex = \&Digest::SHA::sha256_hex;
+        *sha256 = \&Digest::SHA::sha256;
+}
+
+} # /BEGIN
+
+sub sha_all ($$) {
+        my ($n, $fh) = @_;
+        my ($dig, $buf) = (PublicInbox::SHA->new($n));
+        while (sysread($fh, $buf, 65536)) { $dig->add($buf) }
+        $dig
+}
+
+1;
diff --git a/lib/PublicInbox/SaPlugin/ListMirror.pm b/lib/PublicInbox/SaPlugin/ListMirror.pm
index a2a54944..06903cad 100644
--- a/lib/PublicInbox/SaPlugin/ListMirror.pm
+++ b/lib/PublicInbox/SaPlugin/ListMirror.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # SpamAssassin rules useful for running a mailing list mirror.  We want to:
@@ -39,7 +39,11 @@ sub check_list_mirror_received {
                 my $v = $pms->get($hdr) or next;
                 local $/ = "\n";
                 chomp $v;
-                next if $v ne $hval;
+                if (ref($hval)) {
+                        next if $v !~ $hval;
+                } else {
+                        next if $v ne $hval;
+                }
                 return 1 if $recvd !~ $host_re;
         }
 
@@ -91,6 +95,8 @@ sub config_list_mirror {
         $host_glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge;
         my $host_re = qr/\A\s*from\s+$host_glob(?:\s|$)/si;
 
+        (lc($hdr) eq 'list-id' && $hval =~ /<([^>]+)>/) and
+                $hval = qr/\A<\Q$1\E>\z/;
         push @{$self->{list_mirror_check}}, [ $hdr, $hval, $host_re, $addr ];
 }
 
diff --git a/lib/PublicInbox/SaPlugin/ListMirror.pod b/lib/PublicInbox/SaPlugin/ListMirror.pod
index 3c4ec8c1..e6a6c2ad 100644
--- a/lib/PublicInbox/SaPlugin/ListMirror.pod
+++ b/lib/PublicInbox/SaPlugin/ListMirror.pod
@@ -6,11 +6,11 @@ PublicInbox::SaPlugin::ListMirror - SpamAssassin plugin for mailing list mirrors
 
   loadplugin PublicInbox::SaPlugin::ListMirror
 
-Declare some mailing lists based on the expected List-Id value,
+Declare some mailing lists based on the expected List-ID value,
 expected servers, and mailing list address:
 
-  list_mirror List-Id <foo.example.com> *.example.com foo@example.com
-  list_mirror List-Id <bar.example.com> *.example.com bar@example.com
+  list_mirror List-ID <foo.example.com> *.example.com foo@example.com
+  list_mirror List-ID <bar.example.com> *.example.com bar@example.com
 
 Bump the score for messages which come from unexpected servers:
 
@@ -43,14 +43,25 @@ C<allow_user_rules 1>
 
 =item list_mirror HEADER HEADER_VALUE HOSTNAME_GLOB [LIST_ADDRESS]
 
-Declare a list based on an expected C<HEADER> matching C<HEADER_NAME>
-exactly coming from C<HOSTNAME_GLOB>.  C<LIST_ADDRESS> is optional,
+Declare a list based on an expected C<HEADER> matching C<HEADER_VALUE>
+coming from C<HOSTNAME_GLOB>.  C<LIST_ADDRESS> is optional,
 but may specify the address of the mailing list being mirrored.
 
-C<List-Id> or C<X-Mailing-List> are common values of C<HEADER>
+C<List-ID> is the recommended value of C<HEADER> as most
+mailing lists support it.
 
 An example of C<HEADER_VALUE> is C<E<lt>foo.example.orgE<gt>>
-if C<HEADER> is C<List-Id>.
+if C<HEADER> is C<List-ID>.
+
+As of public-inbox 2.0, using C<List-ID> as the C<HEADER> and a
+C<HEADER_VALUE> contained by angle brackets (E<lt>list-idE<gt>),
+matching is done in accordance with
+L<RFC 2919|https://tools.ietf.org/html/rfc2919>.  That is,
+C<HEADER_VALUE> will be a case-insensitive substring match
+and ignore the optional description C<phrase> as documented
+in RFC 2919.
+
+All other C<HEADER> values use exact matches for backwards-compatibility.
 
 C<HOSTNAME_GLOB> may be a wildcard match for machines where mail can
 come from or an exact match.
@@ -101,11 +112,11 @@ This rule allows users to assign a score to Bcc-ed messages
 Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
 
 The mail archives are hosted at L<https://public-inbox.org/meta/>
-and L<http://hjrcffqmbrq6wope.onion/meta/>
+and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/>
 
 =head1 COPYRIGHT
 
-Copyright (C) 2016-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright (C) all contributors L<mailto:meta@public-inbox.org>
 
 License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt>
 
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 0321ca93..e5c5d6ab 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -1,20 +1,24 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 # based on notmuch, but with no concept of folders, files or flags
 #
 # Read-only search interface for use by the web and NNTP interfaces
 package PublicInbox::Search;
 use strict;
+use v5.10.1;
 use parent qw(Exporter);
-our @EXPORT_OK = qw(mdocid);
+our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
 use List::Util qw(max);
+use POSIX qw(strftime);
+use Carp ();
+our $XHC = 0; # defined but false
 
 # values for searching, changing the numeric value breaks
 # compatibility with old indices (so don't change them it)
 use constant {
-        TS => 0, # Received: header in Unix time (IMAP INTERNALDATE)
-        YYYYMMDD => 1, # Date: header for searching in the WWW UI
-        DT => 2, # Date: YYYYMMDDHHMMSS
+        TS => 0, # Received: in Unix time (IMAP INTERNALDATE, JMAP receivedAt)
+        YYYYMMDD => 1, # redundant with DT below
+        DT => 2, # Date: YYYYMMDDHHMMSS (IMAP SENT*, JMAP sentAt)
 
         # added for public-inbox 1.6.0+
         BYTES => 3, # IMAP RFC822.SIZE
@@ -53,19 +57,48 @@ use constant {
 };
 
 use PublicInbox::Smsg;
-use PublicInbox::Over;
-my $QP_FLAGS;
-our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem);
-our $Xap; # 'Search::Xapian' or 'Xapian'
-my $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor')
-my $ENQ_ASCENDING;
+eval { require PublicInbox::Over };
+our $QP_FLAGS;
+our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query);
+our $Xap; # 'Xapian' or 'Search::Xapian'
+our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor')
+
+# ENQ_DESCENDING and ENQ_ASCENDING weren't in SWIG Xapian.pm prior to 1.4.16,
+# let's hope the ABI is stable
+our $ENQ_DESCENDING = 0;
+our $ENQ_ASCENDING = 1;
+our @MAIL_VMAP = (
+        [ YYYYMMDD, 'd:'],
+        [ TS, 'rt:' ],
+        # these are undocumented for WWW, but lei and IMAP use them
+        [ DT, 'dt:' ],
+        [ BYTES, 'z:' ],
+        [ UID, 'uid:' ]
+);
+our @MAIL_NRP;
+
+# Getopt::Long spec, only short options for portability in C++ implementation
+our @XH_SPEC = (
+        'a', # ascending sort
+        'c', # code search
+        'd=s@', # shard dirs
+        'g=s', # git dir (with -c)
+        'k=i', # sort column (like sort(1))
+        'm=i', # maximum number of results
+        'o=i', # offset
+        'r', # 1=relevance then column
+        't', # collapse threads
+        'A=s@', # prefixes
+        'K=i', # timeout kill after i seconds
+        'O=s', # eidx_key
+        'T=i', # threadid
+        'Q=s@', # query prefixes "$user_prefix[:=]$XPREFIX"
+);
 
 sub load_xapian () {
         return 1 if defined $Xap;
-        # n.b. PI_XAPIAN is intended for development use only.  We still
-        # favor Search::Xapian since that's what's available in current
-        # Debian stable (10.x) and derived distros.
-        for my $x (($ENV{PI_XAPIAN} // 'Search::Xapian'), 'Xapian') {
+        # n.b. PI_XAPIAN is intended for development use only
+        for my $x (($ENV{PI_XAPIAN} // 'Xapian'), 'Search::Xapian') {
                 eval "require $x";
                 next if $@;
 
@@ -78,18 +111,13 @@ sub load_xapian () {
 
                 # NumberRangeProcessor was added in Xapian 1.3.6,
                 # NumberValueRangeProcessor was removed for 1.5.0+,
-                # favor the older /Value/ variant since that's what our
-                # (currently) preferred Search::Xapian supports
+                # continue with the older /Value/ variant for now...
                 $NVRP = $x.'::'.($x eq 'Xapian' && $xver ge v1.5 ?
                         'NumberRangeProcessor' : 'NumberValueRangeProcessor');
                 $X{$_} = $Xap.'::'.$_ for (keys %X);
 
-                # ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm,
-                # so lets hope this part of the ABI is stable because it's
-                # just an integer:
-                $ENQ_ASCENDING = $x eq 'Xapian' ?
-                                1 : Search::Xapian::ENQ_ASCENDING();
-
+                *sortable_serialise = $x.'::sortable_serialise';
+                *sortable_unserialise = $x.'::sortable_unserialise';
                 # n.b. FLAG_PURE_NOT is expensive not suitable for a public
                 # website as it could become a denial-of-service vector
                 # FLAG_PHRASE also seems to cause performance problems chert
@@ -98,6 +126,7 @@ sub load_xapian () {
                 # or make indexlevel=medium as default
                 $QP_FLAGS = FLAG_PHRASE() | FLAG_BOOLEAN() | FLAG_LOVEHATE() |
                                 FLAG_WILDCARD();
+                @MAIL_NRP = map { $NVRP->new(@$_) } @MAIL_VMAP;
                 return 1;
         }
         undef;
@@ -107,42 +136,50 @@ sub load_xapian () {
 # a prefix common in patch emails
 our $LANG = 'english';
 
+our %PATCH_BOOL_COMMON = (
+        dfpre => 'XDFPRE',
+        dfpost => 'XDFPOST',
+        dfblob => 'XDFPRE XDFPOST',
+        patchid => 'XDFID',
+);
+
 # note: the non-X term prefix allocations are shared with
 # Xapian omega, see xapian-applications/omega/docs/termprefixes.rst
 my %bool_pfx_external = (
         mid => 'Q', # Message-ID (full/exact), this is mostly uniQue
         lid => 'G', # newsGroup (or similar entity), just inside <>
-        dfpre => 'XDFPRE',
-        dfpost => 'XDFPOST',
-        dfblob => 'XDFPRE XDFPOST',
+        %PATCH_BOOL_COMMON
 );
 
-my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST';
-my %prob_prefix = (
-        # for mairix compatibility
+# for mairix compatibility
+our $NON_QUOTED_BODY = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST XDFID';
+our %PATCH_PROB_COMMON = (
         s => 'S',
-        m => 'XM', # 'mid:' (bool) is exact, 'm:' (prob) can do partial
-        l => 'XL', # 'lid:' (bool) is exact, 'l:' (prob) can do partial
         f => 'A',
-        t => 'XTO',
-        tc => 'XTO XCC',
-        c => 'XCC',
-        tcf => 'XTO XCC A',
-        a => 'XTO XCC A',
-        b => $non_quoted_body . ' XQUOT',
-        bs => $non_quoted_body . ' XQUOT S',
+        b => $NON_QUOTED_BODY . ' XQUOT',
+        bs => $NON_QUOTED_BODY . ' XQUOT S',
         n => 'XFN',
 
         q => 'XQUOT',
-        nq => $non_quoted_body,
+        nq => $NON_QUOTED_BODY,
         dfn => 'XDFN',
         dfa => 'XDFA',
         dfb => 'XDFB',
         dfhh => 'XDFHH',
         dfctx => 'XDFCTX',
+);
 
+my %prob_prefix = (
+        m => 'XM', # 'mid:' (bool) is exact, 'm:' (prob) can do partial
+        l => 'XL', # 'lid:' (bool) is exact, 'l:' (prob) can do partial
+        t => 'XTO',
+        tc => 'XTO XCC',
+        c => 'XCC',
+        tcf => 'XTO XCC A',
+        a => 'XTO XCC A',
+        %PATCH_PROB_COMMON,
         # default:
-        '' => 'XM S A XQUOT XFN ' . $non_quoted_body,
+        '' => 'XM S A XQUOT XFN ' . $NON_QUOTED_BODY,
 );
 
 # not documenting m: and mid: for now, the using the URLs works w/o Xapian
@@ -152,12 +189,9 @@ my %prob_prefix = (
 our @HELP = (
         's:' => 'match within Subject  e.g. s:"a quick brown fox"',
         'd:' => <<EOF,
-date range as YYYYMMDD  e.g. d:19931002..20101002
-Open-ended ranges such as d:19931002.. and d:..20101002
-are also supported
-EOF
-        'dt:' => <<EOF,
-date-time range as YYYYMMDDhhmmss (e.g. dt:19931002011000..19931002011200)
+match date-time range, git "approxidate" formats supported
+Open-ended ranges such as `d:last.week..' and
+`d:..2.days.ago' are supported
 EOF
         'b:' => 'match within message body, including text attachments',
         'nq:' => 'match non-quoted text within message body',
@@ -178,6 +212,10 @@ EOF
         'dfpre:' => 'match pre-image git blob ID',
         'dfpost:' => 'match post-image git blob ID',
         'dfblob:' => 'match either pre or post-image git blob ID',
+        'patchid:' => "match `git patch-id --stable' output",
+        'rt:' => <<EOF,
+match received time, like `d:' if sender's clock was correct
+EOF
 );
 chomp @HELP;
 
@@ -185,43 +223,38 @@ sub xdir ($;$) {
         my ($self, $rdonly) = @_;
         if ($rdonly || !defined($self->{shard})) {
                 $self->{xpfx};
-        } else { # v2 only:
+        } else { # v2, extindex, cindex only:
                 "$self->{xpfx}/$self->{shard}";
         }
 }
 
-sub _xdb ($) {
+# returns shard directories as an array of strings, does not verify existence
+sub shard_dirs ($) {
         my ($self) = @_;
-        my $dir = xdir($self, 1);
-        my ($xdb, $slow_phrase);
-        my $qpf = \($self->{qp_flags} ||= $QP_FLAGS);
-        if ($self->{ibx_ver} >= 2) {
-                my @xdb;
-                opendir(my $dh, $dir) or return; # not initialized yet
-
+        my $xpfx = $self->{xpfx};
+        if ($xpfx =~ m!/xapian[0-9]+\z!) { # v1 inbox
+                ($xpfx);
+        } else { # v2 inbox, eidx, cidx
+                opendir(my $dh, $xpfx) or return (); # not initialized yet
                 # We need numeric sorting so shard[0] is first for reading
                 # Xapian metadata, if needed
-                my $last = max(grep(/\A[0-9]+\z/, readdir($dh)));
-                return if !defined($last);
-                for (0..$last) {
-                        my $shard_dir = "$dir/$_";
-                        if (-d $shard_dir && -r _) {
-                                push @xdb, $X{Database}->new($shard_dir);
-                                $slow_phrase ||= -f "$shard_dir/iamchert";
-                        } else { # gaps from missing epochs throw off mdocid()
-                                warn "E: $shard_dir missing or unreadable\n";
-                                return;
-                        }
-                }
-                $self->{nshard} = scalar(@xdb);
-                $xdb = shift @xdb;
-                $xdb->add_database($_) for @xdb;
-        } else {
-                $slow_phrase = -f "$dir/iamchert";
-                $xdb = $X{Database}->new($dir);
+                my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return ();
+                map { "$xpfx/$_" } (0..$last);
         }
-        $$qpf |= FLAG_PHRASE() unless $slow_phrase;
-        $xdb;
+}
+
+# returns all shards as separate Xapian::Database objects w/o combining
+sub xdb_shards_flat ($) {
+        my ($self) = @_;
+        load_xapian();
+        $self->{qp_flags} //= $QP_FLAGS;
+        my $slow_phrase;
+        my @xdb = map {
+                $slow_phrase ||= -f "$_/iamchert";
+                $X{Database}->new($_); # raises if missing
+        } shard_dirs($self);
+        $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase;
+        @xdb;
 }
 
 # v2 Xapian docids don't conflict, so they're identical to
@@ -233,38 +266,36 @@ sub mdocid {
         int(($docid - 1) / $nshard) + 1;
 }
 
+sub docids_to_artnums {
+        my $nshard = shift->{nshard};
+        # XXX does array vs arrayref make a difference in modern Perls?
+        map { int(($_ - 1) / $nshard) + 1 } @_;
+}
+
 sub mset_to_artnums {
         my ($self, $mset) = @_;
-        my $nshard = $self->{nshard} // 1;
+        my $nshard = $self->{nshard};
         [ map { mdocid($nshard, $_) } $mset->items ];
 }
 
 sub xdb ($) {
         my ($self) = @_;
-        $self->{xdb} ||= do {
-                load_xapian();
-                _xdb($self);
+        $self->{xdb} // do {
+                my @xdb = $self->xdb_shards_flat or return;
+                $self->{nshard} = scalar(@xdb);
+                my $xdb = shift @xdb;
+                $xdb->add_database($_) for @xdb;
+                $self->{xdb} = $xdb;
         };
 }
 
-sub xpfx_init ($) {
-        my ($self) = @_;
-        if ($self->{ibx_ver} == 1) {
-                $self->{xpfx} .= '/public-inbox/xapian' . SCHEMA_VERSION;
-        } else {
-                $self->{xpfx} .= '/xap'.SCHEMA_VERSION;
-        }
-}
-
 sub new {
         my ($class, $ibx) = @_;
         ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
-        my $self = bless {
-                xpfx => $ibx->{inboxdir}, # for xpfx_init
-                altid => $ibx->{altid},
-                ibx_ver => $ibx->version,
-        }, $class;
-        xpfx_init($self);
+        my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian';
+        my $xpfx = "$ibx->{inboxdir}/$xap".SCHEMA_VERSION;
+        my $self = bless { xpfx => $xpfx }, $class;
+        $self->{altid} = $ibx->{altid} if defined($ibx->{altid});
         $self;
 }
 
@@ -276,46 +307,237 @@ sub reopen {
         $self; # make chaining easier
 }
 
-# read-only
+# Convert git "approxidate" ranges to something usable with our
+# Xapian indices.  At the moment, Xapian only offers a C++-only API
+# and neither the SWIG nor XS bindings allow us to use custom code
+# to parse dates (and libgit2 doesn't expose git__date_parse, either,
+# so we're running git-rev-parse(1)).
+# This replaces things we need to send to $git->date_parse with
+# "\0".$strftime_format.['+'|$idx]."\0" placeholders
+sub date_parse_prepare {
+        my ($to_parse, $pfx, $range) = @_;
+        # are we inside a parenthesized statement?
+        my $end = $range =~ s/([\)\s]*)\z// ? $1 : '';
+        my @r = split(/\.\./, $range, 2);
+
+        # expand "dt:2010-10-02" => "dt:2010-10-02..2010-10-03" and like
+        # n.b. git doesn't do YYYYMMDD w/o '-', it needs YYYY-MM-DD
+        # We upgrade "d:" to "dt:" unconditionally
+        if ($pfx eq 'd') {
+                $pfx = 'dt';
+                # upgrade YYYYMMDD to YYYYMMDDHHMMSS
+                $_ .= ' 00:00:00' for (grep(m!\A[0-9]{4}[^[:alnum:]]
+                                        [0-9]{2}[^[:alnum:]]
+                                        [0-9]{2}\z!x, @r));
+                $_ .= '000000' for (grep(m!\A[0-9]{8}\z!, @r));
+        }
+        if ($pfx eq 'dt') {
+                if (!defined($r[1])) { # git needs gaps and not /\d{14}/
+                        if ($r[0] =~ /\A([0-9]{4})([0-9]{2})([0-9]{2})
+                                        ([0-9]{2})([0-9]{2})([0-9]{2})\z/x) {
+                                push @$to_parse, "$1-$2-$3 $4:$5:$6";
+                        } else {
+                                push @$to_parse, $r[0];
+                        }
+                        $r[0] = "\0%Y%m%d%H%M%S$#$to_parse\0";
+                        $r[1] = "\0%Y%m%d%H%M%S+\0";
+                } else {
+                        for my $x (@r) {
+                                next if $x eq '' || $x =~ /\A[0-9]{14}\z/;
+                                push @$to_parse, $x;
+                                $x = "\0%Y%m%d%H%M%S$#$to_parse\0";
+                        }
+                }
+        } else { # (rt|ct), let git interpret "YYYY", deal with Y10K later :P
+                for my $x (@r) {
+                        next if $x eq '' || $x =~ /\A[0-9]{5,}\z/;
+                        push @$to_parse, $x;
+                        $x = "\0%s$#$to_parse\0";
+                }
+                $r[1] //= "\0%s+\0"; # add 1 day
+        }
+        "$pfx:".join('..', @r).$end;
+}
+
+sub date_parse_finalize {
+        my ($git, $to_parse) = @_;
+        # git-rev-parse can handle any number of args up to system
+        # limits (around (4096*32) bytes on Linux).
+        my @r = $git->date_parse(@$to_parse);
+        # n.b. git respects TZ, times stored in SQLite/Xapian are always UTC,
+        # and gmtime doesn't seem to do the right thing when TZ!=UTC
+        my ($i, $t);
+        $_[2] =~ s/\0(%[%YmdHMSs]+)([0-9\+]+)\0/
+                $t = $2 eq '+' ? ($r[$i]+86400) : $r[$i=$2+0];
+                $1 eq '%s' ? $t : strftime($1, gmtime($t))/sge;
+}
+
+# n.b. argv never has NUL, though we'll need to filter it out
+# if this $argv isn't from a command execution
+sub query_argv_to_string {
+        my (undef, $git, $argv) = @_;
+        my $to_parse;
+        my $tmp = join(' ', map {;
+                if (s!\b(d|rt|dt):(\S+)\z!date_parse_prepare(
+                                                $to_parse //= [], $1, $2)!sge) {
+                        $_;
+                } elsif (/\s/) {
+                        s/(.*?)\b(\w+:)// ? qq{$1$2"$_"} : qq{"$_"};
+                } else {
+                        $_
+                }
+        } @$argv);
+        date_parse_finalize($git, $to_parse, $tmp) if $to_parse;
+        $tmp
+}
+
+# this is for the WWW "q=" query parameter and "lei q --stdin"
+# it can't do d:"5 days ago", but it will do d:5.days.ago
+sub query_approxidate {
+        my (undef, $git) = @_; # $_[2] = $query_string (modified in-place)
+        my $DQ = qq<"\x{201c}\x{201d}>; # Xapian can use curly quotes
+        $_[2] =~ tr/\x00/ /; # Xapian doesn't do NUL, we use it as a placeholder
+        my ($terms, $phrase, $to_parse);
+        $_[2] =~ s{([^$DQ]*)([$DQ][^$DQ]*[$DQ])?}{
+                ($terms, $phrase) = ($1, $2);
+                $terms =~ s!\b(d|rt|dt):(\S+)!
+                        date_parse_prepare($to_parse //= [], $1, $2)!sge;
+                $terms.($phrase // '');
+                }sge;
+        date_parse_finalize($git, $to_parse, $_[2]) if $to_parse;
+}
+
+# read-only, for mail only (codesearch has different rules)
 sub mset {
-        my ($self, $query_string, $opts) = @_;
-        $opts ||= {};
-        my $qp = $self->{qp} //= qparse_new($self);
-        my $query = $qp->parse_query($query_string, $self->{qp_flags});
-        $opts->{relevance} = 1 unless exists $opts->{relevance};
-        _do_enquire($self, $query, $opts);
+        my ($self, $qry_str, $opt) = @_;
+        my $qp = $self->{qp} //= $self->qparse_new;
+        my $qry = $qp->parse_query($qry_str, $self->{qp_flags});
+        if (defined(my $eidx_key = $opt->{eidx_key})) {
+                $qry = $X{Query}->new(OP_FILTER(), $qry, 'O'.$eidx_key);
+        }
+        if (defined(my $uid_range = $opt->{uid_range})) {
+                my $range = $X{Query}->new(OP_VALUE_RANGE(), UID,
+                                        sortable_serialise($uid_range->[0]),
+                                        sortable_serialise($uid_range->[1]));
+                $qry = $X{Query}->new(OP_FILTER(), $qry, $range);
+        }
+        if (defined(my $tid = $opt->{threadid})) {
+                $tid = sortable_serialise($tid);
+                $qry = $X{Query}->new(OP_FILTER(), $qry,
+                        $X{Query}->new(OP_VALUE_RANGE(), THREADID, $tid, $tid));
+        }
+        do_enquire($self, $qry, $opt, TS);
+}
+
+sub xhc_start_maybe (@) {
+        require PublicInbox::XapClient;
+        my $xhc = PublicInbox::XapClient::start_helper(@_);
+        require PublicInbox::XhcMset if $xhc;
+        $xhc;
+}
+
+sub xh_opt ($$) {
+        my ($self, $opt) = @_;
+        my $lim = $opt->{limit} || 50;
+        my @ret;
+        push @ret, '-o', $opt->{offset} if $opt->{offset};
+        push @ret, '-m', $lim;
+        my $rel = $opt->{relevance} // 0;
+        if ($rel == -2) { # ORDER BY docid/UID (highest first)
+                push @ret, '-k', '-1';
+        } elsif ($rel == -1) { # ORDER BY docid/UID (lowest first)
+                push @ret, '-k', '-1';
+                push @ret, '-a';
+        } elsif ($rel == 0) {
+                push @ret, '-k', $opt->{sort_col} // TS;
+                push @ret, '-a' if $opt->{asc};
+        } else { # rel > 0
+                push @ret, '-r';
+                push @ret, '-k', $opt->{sort_col} // TS;
+                push @ret, '-a' if $opt->{asc};
+        }
+        push @ret, '-t' if $opt->{threads};
+        push @ret, '-T', $opt->{threadid} if defined $opt->{threadid};
+        push @ret, '-O', $opt->{eidx_key} if defined $opt->{eidx_key};
+        my $apfx = $self->{-alt_pfx} //= do {
+                my @tmp;
+                for (grep /\Aserial:/, @{$self->{altid} // []}) {
+                        my (undef, $pfx) = split /:/, $_;
+                        push @tmp, '-Q', "$pfx=X\U$pfx";
+                }
+                # TODO: arbitrary header indexing goes here
+                \@tmp;
+        };
+        (@ret, @$apfx);
+}
+
+# returns a true value if actually handled asynchronously,
+# and a falsy value if handled synchronously
+sub async_mset {
+        my ($self, $qry_str, $opt, $cb, @args) = @_;
+        if ($XHC) { # unconditionally retrieving pct + rank for now
+                xdb($self); # populate {nshards}
+                my @margs = ($self->xh_args, xh_opt($self, $opt));
+                my $ret = eval {
+                        my $rd = $XHC->mkreq(undef, 'mset', @margs, $qry_str);
+                        PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args);
+                };
+                $cb->(@args, undef, $@) if $@;
+                $ret;
+        } else { # synchronous
+                my $mset = $self->mset($qry_str, $opt);
+                $cb->(@args, $mset);
+                undef;
+        }
+}
+
+sub do_enquire { # shared with CodeSearch
+        my ($self, $qry, $opt, $col) = @_;
+        my $enq = $X{Enquire}->new(xdb($self));
+        $enq->set_query($qry);
+        my $rel = $opt->{relevance} // 0;
+        if ($rel == -2) { # ORDER BY docid/UID (highest first)
+                $enq->set_weighting_scheme($X{BoolWeight}->new);
+                $enq->set_docid_order($ENQ_DESCENDING);
+        } elsif ($rel == -1) { # ORDER BY docid/UID (lowest first)
+                $enq->set_weighting_scheme($X{BoolWeight}->new);
+                $enq->set_docid_order($ENQ_ASCENDING);
+        } elsif ($rel == 0) {
+                $enq->set_sort_by_value_then_relevance($col, !$opt->{asc});
+        } else { # rel > 0
+                $enq->set_sort_by_relevance_then_value($col, !$opt->{asc});
+        }
+
+        # `lei q -t / --threads' or JMAP collapseThreads; but don't collapse
+        # on `-tt' ({threads} > 1) which sets the Flagged|Important keyword
+        (($opt->{threads} // 0) == 1 && has_threadid($self)) and
+                $enq->set_collapse_key(THREADID);
+        retry_reopen($self, \&enquire_once, $enq,
+                        $opt->{offset} || 0, $opt->{limit} || 50);
 }
 
 sub retry_reopen {
-        my ($self, $cb, $arg) = @_;
+        my ($self, $cb, @arg) = @_;
         for my $i (1..10) {
                 if (wantarray) {
-                        my @ret;
-                        eval { @ret = $cb->($arg) };
+                        my @ret = eval { $cb->($self, @arg) };
                         return @ret unless $@;
                 } else {
-                        my $ret;
-                        eval { $ret = $cb->($arg) };
+                        my $ret = eval { $cb->($self, @arg) };
                         return $ret unless $@;
                 }
                 # Exception: The revision being read has been discarded -
                 # you should call Xapian::Database::reopen()
                 if (ref($@) =~ /\bDatabaseModifiedError\b/) {
-                        warn "reopen try #$i on $@\n";
                         reopen($self);
                 } else {
                         # let caller decide how to spew, because ExtMsg queries
                         # get wonky and trigger:
                         # "something terrible happened at .../Xapian/Enquire.pm"
-                        die;
+                        Carp::croak($@);
                 }
         }
-        die "Too many Xapian database modifications in progress\n";
-}
-
-sub _do_enquire {
-        my ($self, $query, $opts) = @_;
-        retry_reopen($self, \&_enquire_once, [ $self, $query, $opts ]);
+        Carp::croak("Too many Xapian database modifications in progress\n");
 }
 
 # returns true if all docs have the THREADID value
@@ -324,32 +546,14 @@ sub has_threadid ($) {
         (xdb($self)->get_metadata('has_threadid') // '') eq '1';
 }
 
-sub _enquire_once { # retry_reopen callback
-        my ($self, $query, $opts) = @{$_[0]};
-        my $xdb = xdb($self);
-        my $enquire = $X{Enquire}->new($xdb);
-        $enquire->set_query($query);
-        $opts ||= {};
-        my $desc = !$opts->{asc};
-        if (($opts->{mset} || 0) == 2) { # mset == 2: ORDER BY docid/UID
-                $enquire->set_docid_order($ENQ_ASCENDING);
-                $enquire->set_weighting_scheme($X{BoolWeight}->new);
-        } elsif ($opts->{relevance}) {
-                $enquire->set_sort_by_relevance_then_value(TS, $desc);
-        } else {
-                $enquire->set_sort_by_value_then_relevance(TS, $desc);
-        }
-
-        # `mairix -t / --threads' or JMAP collapseThreads
-        if ($opts->{thread} && has_threadid($self)) {
-                $enquire->set_collapse_key(THREADID);
-        }
-        $enquire->get_mset($opts->{offset} || 0, $opts->{limit} || 50);
+sub enquire_once { # retry_reopen callback
+        my (undef, $enq, $offset, $limit) = @_;
+        $enq->get_mset($offset, $limit);
 }
 
 sub mset_to_smsg {
         my ($self, $ibx, $mset) = @_;
-        my $nshard = $self->{nshard} // 1;
+        my $nshard = $self->{nshard};
         my $i = 0;
         my %order = map { mdocid($nshard, $_) => ++$i } $mset->items;
         my @msgs = sort {
@@ -361,29 +565,27 @@ sub mset_to_smsg {
 # read-write
 sub stemmer { $X{Stem}->new($LANG) }
 
-# read-only
-sub qparse_new ($) {
+sub qp_init_common {
         my ($self) = @_;
-
-        my $xdb = xdb($self);
         my $qp = $X{QueryParser}->new;
         $qp->set_default_op(OP_AND());
-        $qp->set_database($xdb);
+        $qp->set_database(xdb($self));
         $qp->set_stemmer(stemmer($self));
         $qp->set_stemming_strategy(STEM_SOME());
         my $cb = $qp->can('set_max_wildcard_expansion') //
                 $qp->can('set_max_expansion'); # Xapian 1.5.0+
         $cb->($qp, 100);
-        $cb = $qp->can('add_valuerangeprocessor') //
-                $qp->can('add_rangeprocessor'); # Xapian 1.5.0+
-        $cb->($qp, $NVRP->new(YYYYMMDD, 'd:'));
-        $cb->($qp, $NVRP->new(DT, 'dt:'));
+        $qp;
+}
 
-        # for IMAP, undocumented for WWW and may be split off go away
-        $cb->($qp, $NVRP->new(BYTES, 'bytes:'));
-        $cb->($qp, $NVRP->new(TS, 'ts:'));
-        $cb->($qp, $NVRP->new(UID, 'uid:'));
+# read-only
+sub qparse_new {
+        my ($self) = @_;
+        my $qp = qp_init_common($self);
+        my $cb = $qp->can('add_valuerangeprocessor') //
+                $qp->can('add_rangeprocessor'); # Xapian 1.5.0+
 
+        $cb->($qp, $_) for @MAIL_NRP;
         while (my ($name, $prefix) = each %bool_pfx_external) {
                 $qp->add_boolean_prefix($name, $_) foreach split(/ /, $prefix);
         }
@@ -413,9 +615,43 @@ EOF
         $qp;
 }
 
+sub generate_cxx () { # generates snippet for xap_helper.h
+        my $ret = <<EOM;
+# line ${\__LINE__} "${\__FILE__}"
+static NRP *mail_nrp[${\scalar(@MAIL_VMAP)}];
+static void mail_nrp_init(void)
+{
+EOM
+        for (0..$#MAIL_VMAP) {
+                my $x = $MAIL_VMAP[$_];
+                $ret .= qq{\tmail_nrp[$_] = new NRP($x->[0], "$x->[1]");\n}
+        }
+$ret .= <<EOM;
+}
+
+# line ${\__LINE__} "${\__FILE__}"
+static void qp_init_mail_search(Xapian::QueryParser *qp)
+{
+        for (size_t i = 0; i < MY_ARRAY_SIZE(mail_nrp); i++)
+                qp->ADD_RP(mail_nrp[i]);
+EOM
+        for my $name (sort keys %bool_pfx_external) {
+                for (split(/ /, $bool_pfx_external{$name})) {
+                        $ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n}
+                }
+        }
+        # altid support is handled in xh_opt and srch_init_extra in XH
+        for my $name (sort keys %prob_prefix) {
+                for (split(/ /, $prob_prefix{$name})) {
+                        $ret .= qq{\tqp->add_prefix("$name", "$_");\n}
+                }
+        }
+        $ret .= "}\n";
+}
+
 sub help {
         my ($self) = @_;
-        $self->{qp} //= qparse_new($self); # parse altids
+        $self->{qp} //= $self->qparse_new; # parse altids
         my @ret = @HELP;
         if (my $user_pfx = $self->{-user_pfx}) {
                 push @ret, @$user_pfx;
@@ -423,4 +659,74 @@ sub help {
         \@ret;
 }
 
+# always returns a scalar value
+sub int_val ($$) {
+        my ($doc, $col) = @_;
+        my $val = $doc->get_value($col) or return undef; # undef is '' in Xapian
+        sortable_unserialise($val) + 0; # PV => IV conversion
+}
+
+sub get_pct ($) { # mset item
+        # Capped at "99%" since "100%" takes an extra column in the
+        # thread skeleton view.  <xapian/mset.h> says the value isn't
+        # very meaningful, anyways.
+        my $n = $_[0]->get_percent;
+        $n > 99 ? 99 : $n;
+}
+
+sub xap_terms ($$;@) {
+        my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty ()
+        my $end = $xdb_or_doc->termlist_end(@docid);
+        my $cur = $xdb_or_doc->termlist_begin(@docid);
+        $cur->skip_to($pfx);
+        my (@ret, $tn);
+        my $pfxlen = length($pfx);
+        for (; $cur != $end; $cur++) {
+                $tn = $cur->get_termname;
+                index($tn, $pfx) ? last : push(@ret, substr($tn, $pfxlen));
+        }
+        wantarray ? @ret : +{ map { $_ => undef } @ret };
+}
+
+# get combined docid from over.num:
+# (not generic Xapian, only works with our sharding scheme for mail)
+sub num2docid ($$) {
+        my ($self, $num) = @_;
+        my $nshard = $self->{nshard};
+        ($num - 1) * $nshard + $num % $nshard + 1;
+}
+
+sub all_terms {
+        my ($self, $pfx) = @_;
+        my $cur = xdb($self)->allterms_begin($pfx);
+        my $end = $self->{xdb}->allterms_end($pfx);
+        my $pfxlen = length($pfx);
+        my @ret;
+        for (; $cur != $end; $cur++) {
+                push @ret, substr($cur->get_termname, $pfxlen);
+        }
+        wantarray ? @ret : +{ map { $_ => undef } @ret };
+}
+
+sub xh_args { # prep getopt args to feed to xap_helper.h socket
+        map { ('-d', $_) } shard_dirs($_[0]);
+}
+
+sub docids_by_postlist ($$) {
+        my ($self, $q) = @_;
+        my $cur = $self->xdb->postlist_begin($q);
+        my $end = $self->{xdb}->postlist_end($q);
+        my @ids;
+        for (; $cur != $end; $cur++) { push(@ids, $cur->get_docid) };
+        @ids;
+}
+
+sub get_doc ($$) {
+        my ($self, $docid) = @_;
+        eval { $self->{xdb}->get_document($docid) } // do {
+                die $@ if $@ && ref($@) !~ /\bDocNotFoundError\b/;
+                undef;
+        }
+}
+
 1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 803494f5..4fd493d9 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -1,6 +1,6 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# based on notmuch, but with no concept of folders, files or flags
+# based on notmuch, but with no concept of folders, files
 #
 # Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use
 # with the web and NNTP interfaces.  This index maintains thread
@@ -9,34 +9,46 @@
 package PublicInbox::SearchIdx;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::Search PublicInbox::Lock Exporter);
+use parent qw(PublicInbox::Search PublicInbox::Lock PublicInbox::Umask
+        Exporter);
 use PublicInbox::Eml;
+use PublicInbox::Search qw(xap_terms);
 use PublicInbox::InboxWritable;
 use PublicInbox::MID qw(mids_for_index mids);
 use PublicInbox::MsgIter;
 use PublicInbox::IdxStack;
-use Carp qw(croak);
+use Carp qw(croak carp);
 use POSIX qw(strftime);
+use Fcntl qw(SEEK_SET);
 use Time::Local qw(timegm);
 use PublicInbox::OverIdx;
-use PublicInbox::Spawn qw(spawn nodatacow_dir);
+use PublicInbox::Spawn qw(run_wait popen_rd);
 use PublicInbox::Git qw(git_unquote);
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
-our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size);
+use PublicInbox::Address;
+use Config;
+our @EXPORT_OK = qw(log2stack is_ancestor check_size prepare_stack
+        index_text term_generator add_val is_bad_blob);
 my $X = \%PublicInbox::Search::X;
-my ($DB_CREATE_OR_OPEN, $DB_OPEN);
+our ($DB_CREATE_OR_OPEN, $DB_OPEN);
 our $DB_NO_SYNC = 0;
-our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff : 1_000_000;
+our $DB_DANGEROUS = 0;
+our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff :
+        # assume a typical 64-bit system has 8x more RAM than a
+        # typical 32-bit system:
+        (($Config{ptrsize} >= 8 ? 8192 : 1024) * 1024);
 use constant DEBUG => !!$ENV{DEBUG};
-
+my $BASE85 = qr/[a-zA-Z0-9\!\#\$\%\&\(\)\*\+\-;<=>\?\@\^_`\{\|\}\~]+/;
 my $xapianlevels = qr/\A(?:full|medium)\z/;
 my $hex = '[a-f0-9]';
 my $OID = $hex .'{40,}';
+my @VMD_MAP = (kw => 'K', L => 'L'); # value order matters
+our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/;
+our $PATCHID_BROKEN;
 
 sub new {
         my ($class, $ibx, $creat, $shard) = @_;
         ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
-        my $levels = qr/\A(?:full|medium|basic)\z/;
         my $inboxdir = $ibx->{inboxdir};
         my $version = $ibx->version;
         my $indexlevel = 'full';
@@ -46,27 +58,24 @@ sub new {
                 $altid = [ map { PublicInbox::AltId->new($ibx, $_); } @$altid ];
         }
         if ($ibx->{indexlevel}) {
-                if ($ibx->{indexlevel} =~ $levels) {
+                if ($ibx->{indexlevel} =~ $INDEXLEVELS) {
                         $indexlevel = $ibx->{indexlevel};
                 } else {
                         die("Invalid indexlevel $ibx->{indexlevel}\n");
                 }
         }
+        undef $PATCHID_BROKEN; # retry on new instances in case of upgrades
         $ibx = PublicInbox::InboxWritable->new($ibx);
-        my $self = bless {
-                ibx => $ibx,
-                xpfx => $inboxdir, # for xpfx_init
-                -altid => $altid,
-                ibx_ver => $version,
-                indexlevel => $indexlevel,
-        }, $class;
-        $self->xpfx_init;
+        my $self = PublicInbox::Search->new($ibx);
+        bless $self, $class;
+        $self->{ibx} = $ibx;
+        $self->{-altid} = $altid;
+        $self->{indexlevel} = $indexlevel;
         $self->{-set_indexlevel_once} = 1 if $indexlevel eq 'medium';
         if ($ibx->{-skip_docdata}) {
                 $self->{-set_skip_docdata_once} = 1;
                 $self->{-skip_docdata} = 1;
         }
-        $ibx->umask_prepare;
         if ($version == 1) {
                 $self->{lock_path} = "$inboxdir/ssoma.lock";
                 my $dir = $self->xdir;
@@ -84,12 +93,12 @@ sub new {
         $self;
 }
 
-sub need_xapian ($) { $_[0]->{indexlevel} =~ $xapianlevels }
+sub need_xapian ($) { ($_[0]->{indexlevel} // 'full') =~ $xapianlevels }
 
 sub idx_release {
         my ($self, $wake) = @_;
         if (need_xapian($self)) {
-                my $xdb = delete $self->{xdb} or croak 'not acquired';
+                my $xdb = delete $self->{xdb} or croak '{xdb} not acquired';
                 $xdb->close;
         }
         $self->lock_release($wake) if $self->{creat};
@@ -98,19 +107,24 @@ sub idx_release {
 
 sub load_xapian_writable () {
         return 1 if $X->{WritableDatabase};
-        PublicInbox::Search::load_xapian() or return;
+        PublicInbox::Search::load_xapian() or die "failed to load Xapian: $@\n";
         my $xap = $PublicInbox::Search::Xap;
         for (qw(Document TermGenerator WritableDatabase)) {
                 $X->{$_} = $xap.'::'.$_;
         }
         eval 'require '.$X->{WritableDatabase} or die;
         *sortable_serialise = $xap.'::sortable_serialise';
-        *sortable_unserialise = $xap.'::sortable_unserialise';
         $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
         $DB_OPEN = eval($xap.'::DB_OPEN()');
-        my $ver = (eval($xap.'::major_version()') << 16) |
-                (eval($xap.'::minor_version()') << 8);
-        $DB_NO_SYNC = 0x4 if $ver >= 0x10400;
+        my $ver = eval 'v'.join('.', eval($xap.'::major_version()'),
+                                eval($xap.'::minor_version()'),
+                                eval($xap.'::revision()'));
+        if ($ver ge v1.4) { # new flags in Xapian 1.4
+                $DB_NO_SYNC = 0x4;
+                $DB_DANGEROUS = 0x10;
+        }
+        # Xapian v1.2.21..v1.2.24 were missing close-on-exec on OFD locks
+        $X->{CLOEXEC_UNSET} = 1 if $ver ge v1.2.21 && $ver le v1.2.24;
         1;
 }
 
@@ -123,6 +137,7 @@ sub idx_acquire {
                 load_xapian_writable();
                 $flag = $self->{creat} ? $DB_CREATE_OR_OPEN : $DB_OPEN;
         }
+        my $owner = $self->{ibx} // $self->{eidx} // $self;
         if ($self->{creat}) {
                 require File::Path;
                 $self->lock_acquire;
@@ -132,12 +147,15 @@ sub idx_acquire {
                 if (!-d $dir && (!$is_shard ||
                                 ($is_shard && need_xapian($self)))) {
                         File::Path::mkpath($dir);
-                        nodatacow_dir($dir);
-                        $self->{-set_has_threadid_once} = 1;
+                        require PublicInbox::Syscall;
+                        PublicInbox::Syscall::nodatacow_dir($dir);
+                        # owner == self for CodeSearchIdx
+                        $self->{-set_has_threadid_once} = 1 if $owner != $self;
+                        $flag |= $DB_DANGEROUS if $owner->{-dangerous};
                 }
         }
         return unless defined $flag;
-        $flag |= $DB_NO_SYNC if $self->{ibx}->{-no_fsync};
+        $flag |= $DB_NO_SYNC if $owner->{-no_fsync};
         my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) };
         croak "Failed opening $dir: $@" if $@;
         $self->{xdb} = $xdb;
@@ -154,27 +172,48 @@ sub term_generator ($) { # write-only
 
         $self->{term_generator} //= do {
                 my $tg = $X->{TermGenerator}->new;
-                $tg->set_stemmer($self->stemmer);
+                $tg->set_stemmer(PublicInbox::Search::stemmer($self));
                 $tg;
         }
 }
 
+sub index_phrase ($$$$) {
+        my ($self, $text, $wdf_inc, $prefix) = @_;
+
+        term_generator($self)->index_text($text, $wdf_inc, $prefix);
+        $self->{term_generator}->increase_termpos;
+}
+
 sub index_text ($$$$) {
         my ($self, $text, $wdf_inc, $prefix) = @_;
-        my $tg = term_generator($self); # man Search::Xapian::TermGenerator
 
         if ($self->{indexlevel} eq 'full') {
-                $tg->index_text($text, $wdf_inc, $prefix);
-                $tg->increase_termpos;
+                index_phrase($self, $text, $wdf_inc, $prefix);
         } else {
-                $tg->index_text_without_positions($text, $wdf_inc, $prefix);
+                term_generator($self)->index_text_without_positions(
+                                        $text, $wdf_inc, $prefix);
         }
 }
 
 sub index_headers ($$) {
         my ($self, $smsg) = @_;
-        my @x = (from => 'A', # Author
-                subject => 'S', to => 'XTO', cc => 'XCC');
+        my @x = (from => 'A', to => 'XTO', cc => 'XCC'); # A: Author
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                my $val = $smsg->{$field};
+                next if $val eq '';
+                # include "(comments)" after the address, too, so not using
+                # PublicInbox::Address::names or pairs
+                index_text($self, $val, 1, $pfx);
+
+                # we need positional info for email addresses since they
+                # can be considered phrases
+                if ($self->{indexlevel} eq 'medium') {
+                        for my $addr (PublicInbox::Address::emails($val)) {
+                                index_phrase($self, $addr, 1, $pfx);
+                        }
+                }
+        }
+        @x = (subject => 'S');
         while (my ($field, $pfx) = splice(@x, 0, 2)) {
                 my $val = $smsg->{$field};
                 index_text($self, $val, 1, $pfx) if $val ne '';
@@ -187,7 +226,11 @@ sub index_diff_inc ($$$$) {
                 index_text($self, join("\n", @$xnq), 1, 'XNQ');
                 @$xnq = ();
         }
-        index_text($self, $text, 1, $pfx);
+        if ($pfx eq 'XDFN') {
+                index_phrase($self, $text, 1, $pfx);
+        } else {
+                index_text($self, $text, 1, $pfx);
+        }
 }
 
 sub index_old_diff_fn {
@@ -195,8 +238,8 @@ sub index_old_diff_fn {
 
         # no renames or space support for traditional diffs,
         # find the number of leading common paths to strip:
-        my @fa = split('/', $fa);
-        my @fb = split('/', $fb);
+        my @fa = split(m'/', $fa);
+        my @fb = split(m'/', $fb);
         while (scalar(@fa) && scalar(@fb)) {
                 $fa = join('/', @fa);
                 $fb = join('/', @fb);
@@ -216,37 +259,59 @@ sub index_diff ($$$) {
         my ($self, $txt, $doc) = @_;
         my %seen;
         my $in_diff;
-        my @xnq;
-        my $xnq = \@xnq;
-        foreach (split(/\n/, $txt)) {
-                if ($in_diff && s/^ //) { # diff context
+        my $xnq = [];
+        my @l = split(/\n/, $$txt);
+        undef $$txt;
+        while (defined($_ = shift @l)) {
+                if ($in_diff && /^GIT binary patch/) {
+                        push @$xnq, $_;
+                        while (@l && $l[0] =~ /^(?:literal|delta) /) {
+                                # TODO allow searching by size range?
+                                # allows searching by exact size via:
+                                # "literal $SIZE" or "delta $SIZE"
+                                push @$xnq, shift(@l);
+
+                                # skip base85 and empty lines
+                                while (@l && ($l[0] =~ /\A$BASE85\h*\z/o ||
+                                                $l[0] !~ /\S/)) {
+                                        shift @l;
+                                }
+                                # loop hits trailing "literal 0\nHcmV?d00001\n"
+                        }
+                } elsif ($in_diff && s/^ //) { # diff context
                         index_diff_inc($self, $_, 'XDFCTX', $xnq);
                 } elsif (/^-- $/) { # email signature begins
                         $in_diff = undef;
-                } elsif (m!^diff --git "?[^/]+/.+ "?[^/]+/.+\z!) {
-                        # wait until "---" and "+++" to capture filenames
+                } elsif (m!^diff --git ("?[^/]+/.+) ("?[^/]+/.+)\z!) {
+                        # capture filenames here for binary diffs:
+                        my ($fa, $fb) = ($1, $2);
+                        push @$xnq, $_;
                         $in_diff = 1;
+                        $fa = (split(m'/', git_unquote($fa), 2))[1];
+                        $fb = (split(m'/', git_unquote($fb), 2))[1];
+                        $seen{$fa}++ or index_diff_inc($self, $fa, 'XDFN', $xnq);
+                        $seen{$fb}++ or index_diff_inc($self, $fb, 'XDFN', $xnq);
                 # traditional diff:
                 } elsif (m/^diff -(.+) (\S+) (\S+)$/) {
                         my ($opt, $fa, $fb) = ($1, $2, $3);
-                        push @xnq, $_;
+                        push @$xnq, $_;
                         # only support unified:
                         next unless $opt =~ /[uU]/;
                         $in_diff = index_old_diff_fn($self, \%seen, $fa, $fb,
                                                         $xnq);
                 } elsif (m!^--- ("?[^/]+/.+)!) {
                         my $fn = $1;
-                        $fn = (split('/', git_unquote($fn), 2))[1];
+                        $fn = (split(m'/', git_unquote($fn), 2))[1];
                         $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq);
                         $in_diff = 1;
                 } elsif (m!^\+\+\+ ("?[^/]+/.+)!)  {
                         my $fn = $1;
-                        $fn = (split('/', git_unquote($fn), 2))[1];
+                        $fn = (split(m'/', git_unquote($fn), 2))[1];
                         $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq);
                         $in_diff = 1;
                 } elsif (/^--- (\S+)/) {
-                        $in_diff = $1;
-                        push @xnq, $_;
+                        $in_diff = $1; # old diff filename
+                        push @$xnq, $_;
                 } elsif (defined $in_diff && /^\+\+\+ (\S+)/) {
                         $in_diff = index_old_diff_fn($self, \%seen, $in_diff,
                                                         $1, $xnq);
@@ -272,19 +337,65 @@ sub index_diff ($$$) {
                                 /^(?:dis)?similarity index / ||
                                 /^\\ No newline at end of file/ ||
                                 /^Binary files .* differ/) {
-                        push @xnq, $_;
+                        push @$xnq, $_;
                 } elsif ($_ eq '') {
                         # possible to be in diff context, some mail may be
                         # stripped by MUA or even GNU diff(1).  "git apply"
                         # treats a bare "\n" as diff context, too
                 } else {
-                        push @xnq, $_;
+                        push @$xnq, $_;
                         warn "non-diff line: $_\n" if DEBUG && $_ ne '';
                         $in_diff = undef;
                 }
         }
 
-        index_text($self, join("\n", @xnq), 1, 'XNQ');
+        index_text($self, join("\n", @$xnq), 1, 'XNQ');
+}
+
+sub index_body_text {
+        my ($self, $doc, $sref) = @_;
+        my $rd;
+        # start patch-id in parallel
+        if ($$sref =~ /^(?:diff|---|\+\+\+) /ms && !$PATCHID_BROKEN) {
+                my $git = ($self->{ibx} // $self->{eidx} // $self)->git;
+                my $fh = PublicInbox::IO::write_file '+>:utf8', undef, $$sref;
+                $fh->flush or die "flush: $!";
+                sysseek($fh, 0, SEEK_SET);
+                $rd = popen_rd($git->cmd(qw(patch-id --stable)), undef,
+                                { 0 => $fh });
+        }
+
+        # split off quoted and unquoted blocks:
+        my @sections = PublicInbox::MsgIter::split_quotes($$sref);
+        undef $$sref; # free memory
+        for my $txt (@sections) {
+                if ($txt =~ /\A>/) {
+                        if ($txt =~ /^[>\t ]+GIT binary patch\r?/sm) {
+                                # get rid of Base-85 noise
+                                $txt =~ s/^([>\h]+(?:literal|delta)
+                                                \x20[0-9]+\r?\n)
+                                        (?:[>\h]+$BASE85\h*\r?\n)+/$1/gsmx;
+                        }
+                        index_text($self, $txt, 0, 'XQUOT');
+                } else { # does it look like a diff?
+                        if ($txt =~ /^(?:diff|---|\+\+\+) /ms) {
+                                index_diff($self, \$txt, $doc);
+                        } else {
+                                index_text($self, $txt, 1, 'XNQ');
+                        }
+                }
+                undef $txt; # free memory
+        }
+        if (defined $rd) { # reap `git patch-id'
+                (readline($rd) // '') =~ /\A([a-f0-9]{40,})/ and
+                        $doc->add_term('XDFID'.$1);
+                if (!$rd->close) {
+                        my $c = 'git patch-id --stable';
+                        $PATCHID_BROKEN = ($? >> 8) == 129;
+                        $PATCHID_BROKEN ? warn("W: $c requires git v2.1.0+\n")
+                                : warn("W: $c failed: \$?=$? (non-fatal)");
+                }
+        }
 }
 
 sub index_xapian { # msg_iter callback
@@ -293,7 +404,7 @@ sub index_xapian { # msg_iter callback
         my $ct = $part->content_type || 'text/plain';
         my $fn = $part->filename;
         if (defined $fn && $fn ne '') {
-                index_text($self, $fn, 1, 'XFN');
+                index_phrase($self, $fn, 1, 'XFN');
         }
         if ($part->{is_submsg}) {
                 my $mids = mids_for_index($part);
@@ -306,48 +417,39 @@ sub index_xapian { # msg_iter callback
         my ($s, undef) = msg_part_text($part, $ct);
         defined $s or return;
         $_[0]->[0] = $part = undef; # free memory
+        index_body_text($self, $doc, \$s);
+}
 
-        # split off quoted and unquoted blocks:
-        my @sections = PublicInbox::MsgIter::split_quotes($s);
-        undef $s; # free memory
-        for my $txt (@sections) {
-                if ($txt =~ /\A>/) {
-                        index_text($self, $txt, 0, 'XQUOT');
-                } else {
-                        # does it look like a diff?
-                        if ($txt =~ /^(?:diff|---|\+\+\+) /ms) {
-                                index_diff($self, $txt, $doc);
-                        } else {
-                                index_text($self, $txt, 1, 'XNQ');
-                        }
-                }
-                undef $txt; # free memory
+sub index_list_id ($$$) {
+        my ($self, $doc, $hdr) = @_;
+        for my $l ($hdr->header_raw('List-Id')) {
+                $l =~ /<([^>]+)>/ or next;
+                my $lid = lc $1;
+                $lid =~ tr/\n\t\r\0//d; # same rules as Message-ID
+                $doc->add_boolean_term('G' . $lid);
+                index_phrase($self, $lid, 1, 'XL'); # probabilistic
         }
 }
 
 sub index_ids ($$$$) {
         my ($self, $doc, $hdr, $mids) = @_;
         for my $mid (@$mids) {
-                index_text($self, $mid, 1, 'XM');
+                index_phrase($self, $mid, 1, 'XM');
 
                 # because too many Message-IDs are prefixed with
                 # "Pine.LNX."...
                 if ($mid =~ /\w{12,}/) {
                         my @long = ($mid =~ /(\w{3,}+)/g);
-                        index_text($self, join(' ', @long), 1, 'XM');
+                        index_phrase($self, join(' ', @long), 1, 'XM');
                 }
         }
         $doc->add_boolean_term('Q' . $_) for @$mids;
-        for my $l ($hdr->header_raw('List-Id')) {
-                $l =~ /<([^>]+)>/ or next;
-                my $lid = $1;
-                $doc->add_boolean_term('G' . $lid);
-                index_text($self, $lid, 1, 'XL'); # probabilistic
-        }
+        index_list_id($self, $doc, $hdr);
 }
 
-sub add_xapian ($$$$) {
+sub eml2doc ($$$;$) {
         my ($self, $eml, $smsg, $mids) = @_;
+        $mids //= mids_for_index($eml);
         my $doc = $X->{Document}->new;
         add_val($doc, PublicInbox::Search::TS(), $smsg->{ts});
         my @ds = gmtime($smsg->{ds});
@@ -359,20 +461,22 @@ sub add_xapian ($$$$) {
         add_val($doc, PublicInbox::Search::UID(), $smsg->{num});
         add_val($doc, PublicInbox::Search::THREADID, $smsg->{tid});
 
-        my $tg = term_generator($self);
-        $tg->set_document($doc);
+        term_generator($self)->set_document($doc);
         index_headers($self, $smsg);
 
+        if (defined(my $eidx_key = $smsg->{eidx_key})) {
+                $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.';
+        }
         msg_iter($eml, \&index_xapian, [ $self, $doc ]);
         index_ids($self, $doc, $eml, $mids);
 
         # by default, we maintain compatibility with v1.5.0 and earlier
-        # by writing to docdata.glass, users who never exect to downgrade can
+        # by writing to docdata.glass, users who never expect to downgrade can
         # use --skip-docdata
         if (!$self->{-skip_docdata}) {
                 # WWW doesn't need {to} or {cc}, only NNTP
                 $smsg->{to} = $smsg->{cc} = '';
-                PublicInbox::OverIdx::parse_references($smsg, $eml, $mids);
+                $smsg->parse_references($eml, $mids);
                 my $data = $smsg->to_doc_data;
                 $doc->set_data($data);
         }
@@ -387,22 +491,38 @@ sub add_xapian ($$$$) {
                         }
                 }
         }
+        $doc;
+}
+
+sub add_xapian ($$$$) {
+        my ($self, $eml, $smsg, $mids) = @_;
+        begin_txn_lazy($self);
+        my $merge_vmd = delete $smsg->{-merge_vmd};
+        my $doc = eml2doc($self, $eml, $smsg, $mids);
+        if (my $old = $merge_vmd ? _get_doc($self, $smsg->{num}) : undef) {
+                my @x = @VMD_MAP;
+                while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                        for my $term (xap_terms($pfx, $old)) {
+                                $doc->add_boolean_term($pfx.$term);
+                        }
+                }
+        }
         $self->{xdb}->replace_document($smsg->{num}, $doc);
 }
 
 sub _msgmap_init ($) {
         my ($self) = @_;
-        die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1;
-        $self->{mm} //= eval {
+        die "BUG: _msgmap_init is only for v1\n" if $self->{ibx}->version != 1;
+        $self->{mm} //= do {
                 require PublicInbox::Msgmap;
-                my $rw = $self->{ibx}->{-no_fsync} ? 2 : 1;
-                PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, $rw);
+                PublicInbox::Msgmap->new_file($self->{ibx}, 1);
         };
 }
 
 sub add_message {
         # mime = PublicInbox::Eml or Email::MIME object
         my ($self, $mime, $smsg, $sync) = @_;
+        begin_txn_lazy($self);
         my $mids = mids_for_index($mime);
         $smsg //= bless { blob => '' }, 'PublicInbox::Smsg'; # test-only compat
         $smsg->{mid} //= $mids->[0]; # v1 compatibility
@@ -436,54 +556,172 @@ sub add_message {
         $smsg->{num};
 }
 
-sub get_val ($$) {
-        my ($doc, $col) = @_;
-        sortable_unserialise($doc->get_value($col));
+sub _get_doc ($$) {
+        my ($self, $docid) = @_;
+        $self->get_doc($docid) // do {
+                warn "E: #$docid missing in Xapian\n";
+                undef;
+        }
+}
+
+sub add_eidx_info {
+        my ($self, $docid, $eidx_key, $eml) = @_;
+        begin_txn_lazy($self);
+        my $doc = _get_doc($self, $docid) or return;
+        term_generator($self)->set_document($doc);
+
+        # '.' is special for lei_store
+        $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.';
+
+        index_list_id($self, $doc, $eml);
+        $self->{xdb}->replace_document($docid, $doc);
 }
 
-sub smsg_from_doc ($) {
-        my ($doc) = @_;
-        my $data = $doc->get_data or return;
-        my $smsg = bless {}, 'PublicInbox::Smsg';
-        $smsg->{ts} = get_val($doc, PublicInbox::Search::TS());
-        my $dt = get_val($doc, PublicInbox::Search::DT());
-        my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $dt);
-        $smsg->{ds} = timegm($ss, $mm, $hh, $dd, $mon - 1, $yyyy);
-        $smsg->load_from_data($data);
-        $smsg;
+sub get_terms {
+        my ($self, $pfx, $docid) = @_;
+        begin_txn_lazy($self);
+        xap_terms($pfx, $self->{xdb}, $docid);
 }
 
-sub xdb_remove {
-        my ($self, $oid, @removed) = @_;
-        my $xdb = $self->{xdb} or return;
-        for my $num (@removed) {
-                my $doc = eval { $xdb->get_document($num) };
-                unless ($doc) {
-                        warn "E: $@\n" if $@;
-                        warn "E: #$num $oid missing in Xapian\n";
-                        next;
+sub remove_eidx_info {
+        my ($self, $docid, $eidx_key, $eml) = @_;
+        begin_txn_lazy($self);
+        my $doc = _get_doc($self, $docid) or return;
+        eval { $doc->remove_term('O'.$eidx_key) };
+        warn "W: ->remove_term O$eidx_key: $@\n" if $@;
+        for my $l ($eml ? $eml->header_raw('List-Id') : ()) {
+                $l =~ /<([^>]+)>/ or next;
+                my $lid = lc $1;
+                eval { $doc->remove_term('G' . $lid) };
+                warn "W: ->remove_term G$lid: $@\n" if $@;
+
+                # nb: we don't remove the XL probabilistic terms
+                # since terms may overlap if cross-posted.
+                #
+                # IOW, a message which has both <foo.example.com>
+                # and <bar.example.com> would have overlapping
+                # "XLexample" and "XLcom" as terms and which we
+                # wouldn't know if they're safe to remove if we just
+                # unindex <foo.example.com> while preserving
+                # <bar.example.com>.
+                #
+                # In any case, this entire sub is will likely never
+                # be needed and users using the "l:" prefix are probably
+                # rarer.
+        }
+        $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub set_vmd {
+        my ($self, $docid, $vmd) = @_;
+        begin_txn_lazy($self);
+        my $doc = _get_doc($self, $docid) or return;
+        my ($v, @rm, @add);
+        my @x = @VMD_MAP;
+        my ($cur, $end) = ($doc->termlist_begin, $doc->termlist_end);
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                my $set = $vmd->{$field} // next;
+                my %keep = map { $_ => 1 } @$set;
+                my %add = %keep;
+                $cur->skip_to($pfx); # works due to @VMD_MAP order
+                for (; $cur != $end; $cur++) {
+                        $v = $cur->get_termname;
+                        $v =~ s/\A$pfx//s or next;
+                        $keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v);
                 }
-                my $smsg = smsg_from_doc($doc);
-                my $blob = $smsg->{blob}; # may be undef if --skip-docdata
-                if (!defined($blob) || $blob eq $oid) {
-                        $xdb->delete_document($num);
-                } else {
-                        warn "E: #$num $oid != $blob in Xapian\n";
+                push(@add, map { $pfx.$_ } keys %add);
+        }
+        return unless scalar(@rm) || scalar(@add);
+        $doc->remove_term($_) for @rm;
+        $doc->add_boolean_term($_) for @add;
+        $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub apply_vmd_mod ($$) {
+        my ($doc, $vmd_mod) = @_;
+        my $updated = 0;
+        my @x = @VMD_MAP;
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                # field: "L" or "kw"
+                for my $val (@{$vmd_mod->{"-$field"} // []}) {
+                        eval {
+                                $doc->remove_term($pfx . $val);
+                                ++$updated;
+                        };
+                }
+                for my $val (@{$vmd_mod->{"+$field"} // []}) {
+                        $doc->add_boolean_term($pfx . $val);
+                        ++$updated;
+                }
+        }
+        $updated;
+}
+
+sub add_vmd {
+        my ($self, $docid, $vmd) = @_;
+        begin_txn_lazy($self);
+        my $doc = _get_doc($self, $docid) or return;
+        my @x = @VMD_MAP;
+        my $updated = 0;
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                my $add = $vmd->{$field} // next;
+                $doc->add_boolean_term($pfx . $_) for @$add;
+                $updated += scalar(@$add);
+        }
+        $updated += apply_vmd_mod($doc, $vmd);
+        $self->{xdb}->replace_document($docid, $doc) if $updated;
+}
+
+sub remove_vmd {
+        my ($self, $docid, $vmd) = @_;
+        begin_txn_lazy($self);
+        my $doc = _get_doc($self, $docid) or return;
+        my $replace;
+        my @x = @VMD_MAP;
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                my $rm = $vmd->{$field} // next;
+                for (@$rm) {
+                        eval {
+                                $doc->remove_term($pfx . $_);
+                                $replace = 1;
+                        };
                 }
         }
+        $self->{xdb}->replace_document($docid, $doc) if $replace;
+}
+
+sub update_vmd {
+        my ($self, $docid, $vmd_mod) = @_;
+        begin_txn_lazy($self);
+        my $doc = _get_doc($self, $docid) or return;
+        my $updated = apply_vmd_mod($doc, $vmd_mod);
+        $self->{xdb}->replace_document($docid, $doc) if $updated;
+        $updated;
+}
+
+sub xdb_remove {
+        my ($self, @docids) = @_;
+        begin_txn_lazy($self);
+        my $xdb = $self->{xdb} // die 'BUG: missing {xdb}';
+        for my $docid (@docids) {
+                eval { $xdb->delete_document($docid) };
+                warn "E: #$docid not in Xapian? $@\n" if $@;
+        }
 }
 
-sub remove_by_oid {
-        my ($self, $oid, $num) = @_;
-        die "BUG: remove_by_oid is v2-only\n" if $self->{oidx};
-        $self->begin_txn_lazy;
-        xdb_remove($self, $oid, $num) if need_xapian($self);
+sub xdb_remove_quiet {
+        my ($self, $docid) = @_;
+        begin_txn_lazy($self);
+        my $xdb = $self->{xdb} // die 'BUG: missing {xdb}';
+        eval { $xdb->delete_document($docid) };
+        ++$self->{-quiet_rm} unless $@;
 }
 
+sub nr_quiet_rm { delete($_[0]->{-quiet_rm}) // 0 }
+
 sub index_git_blob_id {
         my ($doc, $pfx, $objid) = @_;
 
-        my $len = length($objid);
         for (my $len = length($objid); $len >= 7; ) {
                 $doc->add_term($pfx.$objid);
                 $objid = substr($objid, 0, --$len);
@@ -502,8 +740,8 @@ sub unindex_eml {
                 $tmp{$_}++ for @removed;
         }
         if (!$nr) {
-                $mids = join('> <', @$mids);
-                warn "W: <$mids> missing for removal from overview\n";
+                my $m = join('> <', @$mids);
+                warn "W: <$m> missing for removal from overview\n";
         }
         while (my ($num, $nr) = each %tmp) {
                 warn "BUG: $num appears >1 times ($nr) for $oid\n" if $nr != 1;
@@ -513,7 +751,7 @@ sub unindex_eml {
         } else { # just in case msgmap and over.sqlite3 become desynched:
                 $self->{mm}->mid_delete($mids->[0]);
         }
-        xdb_remove($self, $oid, keys %tmp) if need_xapian($self);
+        xdb_remove($self, keys %tmp) if need_xapian($self);
 }
 
 sub index_mm {
@@ -533,53 +771,73 @@ sub index_mm {
         }
 }
 
-# returns the number of bytes to add if given a non-CRLF arg
-sub crlf_adjust ($) {
-        if (index($_[0], "\r\n") < 0) {
-                # common case is LF-only, every \n needs an \r;
-                # so favor a cheap tr// over an expensive m//g
-                $_[0] =~ tr/\n/\n/;
-        } else { # count number of '\n' w/o '\r', expensive:
-                scalar(my @n = ($_[0] =~ m/(?<!\r)\n/g));
+sub is_bad_blob ($$$$) {
+        my ($oid, $type, $size, $expect_oid) = @_;
+        if ($type ne 'blob') {
+                carp "W: $expect_oid is not a blob (type=$type)";
+                return 1;
         }
+        croak "BUG: $oid != $expect_oid" if $oid ne $expect_oid;
+        $size == 0 ? 1 : 0; # size == 0 means purged
 }
 
 sub index_both { # git->cat_async callback
         my ($bref, $oid, $type, $size, $sync) = @_;
+        return if is_bad_blob($oid, $type, $size, $sync->{oid});
         my ($nr, $max) = @$sync{qw(nr max)};
         ++$$nr;
         $$max -= $size;
-        $size += crlf_adjust($$bref);
-        my $smsg = bless { bytes => $size, blob => $oid }, 'PublicInbox::Smsg';
+        my $smsg = bless { blob => $oid }, 'PublicInbox::Smsg';
+        $smsg->set_bytes($$bref, $size);
         my $self = $sync->{sidx};
+        local $self->{current_info} = "$self->{current_info}: $oid";
         my $eml = PublicInbox::Eml->new($bref);
         $smsg->{num} = index_mm($self, $eml, $oid, $sync) or
                 die "E: could not generate NNTP article number for $oid";
         add_message($self, $eml, $smsg, $sync);
+        ++$self->{nidx};
+        my $cur_cmt = $sync->{cur_cmt} // die 'BUG: {cur_cmt} missing';
+        ${$sync->{latest_cmt}} = $cur_cmt;
 }
 
 sub unindex_both { # git->cat_async callback
-        my ($bref, $oid, $type, $size, $self) = @_;
+        my ($bref, $oid, $type, $size, $sync) = @_;
+        return if is_bad_blob($oid, $type, $size, $sync->{oid});
+        my $self = $sync->{sidx};
+        local $self->{current_info} = "$self->{current_info}: $oid";
         unindex_eml($self, $oid, PublicInbox::Eml->new($bref));
+        # may be undef if leftover
+        if (defined(my $cur_cmt = $sync->{cur_cmt})) {
+                ${$sync->{latest_cmt}} = $cur_cmt;
+        }
+        ++$self->{nidx};
+}
+
+sub with_umask {
+        my $self = shift;
+        my $owner = $self->{ibx} // $self->{eidx};
+        $owner ? $owner->with_umask(@_) : $self->SUPER::with_umask(@_)
 }
 
 # called by public-inbox-index
 sub index_sync {
         my ($self, $opt) = @_;
         delete $self->{lock_path} if $opt->{-skip_lock};
-        $self->{ibx}->with_umask(\&_index_sync, $self, $opt);
-        if ($opt->{reindex}) {
+        $self->with_umask(\&_index_sync, $self, $opt);
+        if ($opt->{reindex} && !$opt->{quit} &&
+                        !grep(defined, @$opt{qw(since until)})) {
                 my %again = %$opt;
                 delete @again{qw(rethread reindex)};
                 index_sync($self, \%again);
+                $opt->{quit} = $again{quit}; # propagate to caller
         }
 }
 
 sub check_size { # check_async cb for -index --max-size=...
-        my ($oid, $type, $size, $arg, $git) = @_;
-        (($type // '') eq 'blob') or die "E: bad $oid in $git->{git_dir}";
+        my (undef, $oid, $type, $size, $arg) = @_;
+        ($type // '') eq 'blob' or die "E: bad $oid in $arg->{git}->{git_dir}";
         if ($size <= $arg->{max_size}) {
-                $git->cat_async($oid, $arg->{index_oid}, $arg);
+                $arg->{git}->cat_async($oid, $arg->{index_oid}, $arg);
         } else {
                 warn "W: skipping $oid ($size > $arg->{max_size})\n";
         }
@@ -587,46 +845,45 @@ sub check_size { # check_async cb for -index --max-size=...
 
 sub v1_checkpoint ($$;$) {
         my ($self, $sync, $stk) = @_;
-        $self->{ibx}->git->check_async_wait;
-        $self->{ibx}->git->cat_async_wait;
-
-        # latest_cmt may be undef
-        my $newest = $stk ? $stk->{latest_cmt} : undef;
-        if ($newest) {
-                my $cur = $self->{mm}->last_commit || '';
-                if (need_update($self, $cur, $newest)) {
+        $self->{ibx}->git->async_wait_all;
+
+        # $newest may be undef
+        my $newest = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}};
+        if (defined($newest)) {
+                my $cur = $self->{mm}->last_commit;
+                if (need_update($self, $sync, $cur, $newest)) {
                         $self->{mm}->last_commit($newest);
                 }
-        } else {
-                ${$sync->{max}} = $self->{batch_bytes};
         }
+        ${$sync->{max}} = $self->{batch_bytes};
 
         $self->{mm}->{dbh}->commit;
-        if ($newest && need_xapian($self)) {
-                my $xdb = $self->{xdb};
+        eval { $self->{mm}->{dbh}->do('PRAGMA optimize') };
+        my $xdb = $self->{xdb};
+        if ($newest && $xdb) {
                 my $cur = $xdb->get_metadata('last_commit');
-                if (need_update($self, $cur, $newest)) {
+                if (need_update($self, $sync, $cur, $newest)) {
                         $xdb->set_metadata('last_commit', $newest);
                 }
-
+        }
+        if ($stk) { # all done if $stk is passed
                 # let SearchView know a full --reindex was done so it can
                 # generate ->has_threadid-dependent links
-                if ($sync->{reindex} && !ref($sync->{reindex})) {
+                if ($xdb && $sync->{reindex} && !ref($sync->{reindex})) {
                         my $n = $xdb->get_metadata('has_threadid');
                         $xdb->set_metadata('has_threadid', '1') if $n ne '1';
                 }
+                $self->{oidx}->rethread_done($sync->{-opt}); # all done
         }
-
-        $self->{oidx}->rethread_done($sync->{-opt}) if $newest; # all done
         commit_txn_lazy($self);
-        $self->{ibx}->git->cleanup;
+        $sync->{ibx}->git->cleanup;
         my $nr = ${$sync->{nr}};
         idx_release($self, $nr);
         # let another process do some work...
         if (my $pr = $sync->{-opt}->{-progress}) {
                 $pr->("indexed $nr/$sync->{ntodo}\n") if $nr;
         }
-        if (!$stk) { # more to come
+        if (!$stk && !$sync->{quit}) { # more to come
                 begin_txn_lazy($self);
                 $self->{mm}->{dbh}->begin_work;
         }
@@ -635,45 +892,51 @@ sub v1_checkpoint ($$;$) {
 # only for v1
 sub process_stack {
         my ($self, $sync, $stk) = @_;
-        my $git = $self->{ibx}->git;
+        my $git = $sync->{ibx}->git;
         my $max = $self->{batch_bytes};
         my $nr = 0;
         $sync->{nr} = \$nr;
         $sync->{max} = \$max;
         $sync->{sidx} = $self;
+        $sync->{latest_cmt} = \(my $latest_cmt);
 
         $self->{mm}->{dbh}->begin_work;
         if (my @leftovers = keys %{delete($sync->{D}) // {}}) {
                 warn('W: unindexing '.scalar(@leftovers)." leftovers\n");
                 for my $oid (@leftovers) {
+                        last if $sync->{quit};
                         $oid = unpack('H*', $oid);
-                        $git->cat_async($oid, \&unindex_both, $self);
+                        $git->cat_async($oid, \&unindex_both, $sync);
                 }
         }
         if ($sync->{max_size} = $sync->{-opt}->{max_size}) {
                 $sync->{index_oid} = \&index_both;
         }
-        while (my ($f, $at, $ct, $oid) = $stk->pop_rec) {
+        while (my ($f, $at, $ct, $oid, $cur_cmt) = $stk->pop_rec) {
+                my $arg = { %$sync, cur_cmt => $cur_cmt, oid => $oid };
+                last if $sync->{quit};
                 if ($f eq 'm') {
-                        my $arg = { %$sync, autime => $at, cotime => $ct };
+                        $arg->{autime} = $at;
+                        $arg->{cotime} = $ct;
                         if ($sync->{max_size}) {
+                                $arg->{git} = $git;
                                 $git->check_async($oid, \&check_size, $arg);
                         } else {
                                 $git->cat_async($oid, \&index_both, $arg);
                         }
                         v1_checkpoint($self, $sync) if $max <= 0;
                 } elsif ($f eq 'd') {
-                        $git->cat_async($oid, \&unindex_both, $self);
+                        $git->cat_async($oid, \&unindex_both, $arg);
                 }
         }
-        v1_checkpoint($self, $sync, $stk);
+        v1_checkpoint($self, $sync, $sync->{quit} ? undef : $stk);
 }
 
-sub log2stack ($$$$) {
-        my ($sync, $git, $range, $ibx) = @_;
+sub log2stack ($$$) {
+        my ($sync, $git, $range) = @_;
         my $D = $sync->{D}; # OID_BIN => NR (if reindexing, undef otherwise)
         my ($add, $del);
-        if ($ibx->version == 1) {
+        if ($sync->{ibx}->version == 1) {
                 my $path = $hex.'{2}/'.$hex.'{38}';
                 $add = qr!\A:000000 100644 \S+ ($OID) A\t$path$!;
                 $del = qr!\A:100644 000000 ($OID) \S+ D\t$path$!;
@@ -684,43 +947,47 @@ sub log2stack ($$$$) {
 
         # Count the new files so they can be added newest to oldest
         # and still have numbers increasing from oldest to newest
-        my $fh = $git->popen(qw(log --raw -r --pretty=tformat:%at-%ct-%H
-                                --no-notes --no-color --no-renames --no-abbrev),
-                                $range);
-        my ($at, $ct, $stk);
-        while (<$fh>) {
-                if (/\A([0-9]+)-([0-9]+)-($OID)$/o) {
-                        ($at, $ct) = ($1 + 0, $2 + 0);
-                        $stk //= PublicInbox::IdxStack->new($3);
-                } elsif (/$del/) {
+        my @cmd = qw(log --raw -r --pretty=tformat:%at-%ct-%H
+                        --no-notes --no-color --no-renames --no-abbrev);
+        for my $k (qw(since until)) {
+                my $v = $sync->{-opt}->{$k} // next;
+                next if !$sync->{-opt}->{reindex};
+                push @cmd, "--$k=$v";
+        }
+        my $fh = $git->popen(@cmd, $range);
+        my ($at, $ct, $stk, $cmt, $l);
+        while (defined($l = <$fh>)) {
+                return if $sync->{quit};
+                if ($l =~ /\A([0-9]+)-([0-9]+)-($OID)$/o) {
+                        ($at, $ct, $cmt) = ($1 + 0, $2 + 0, $3);
+                        $stk //= PublicInbox::IdxStack->new($cmt);
+                } elsif ($l =~ /$del/) {
                         my $oid = $1;
                         if ($D) { # reindex case
                                 $D->{pack('H*', $oid)}++;
                         } else { # non-reindex case:
-                                $stk->push_rec('d', $at, $ct, $oid);
+                                $stk->push_rec('d', $at, $ct, $oid, $cmt);
                         }
-                } elsif (/$add/) {
+                } elsif ($l =~ /$add/) {
                         my $oid = $1;
                         if ($D) {
                                 my $oid_bin = pack('H*', $oid);
                                 my $nr = --$D->{$oid_bin};
                                 delete($D->{$oid_bin}) if $nr <= 0;
-
                                 # nr < 0 (-1) means it never existed
-                                $stk->push_rec('m', $at, $ct, $oid) if $nr < 0;
-                        } else {
-                                $stk->push_rec('m', $at, $ct, $oid);
+                                next if $nr >= 0;
                         }
+                        $stk->push_rec('m', $at, $ct, $oid, $cmt);
                 }
         }
-        close $fh or die "git log failed: \$?=$?";
+        $fh->close or die "git log failed: \$?=$?";
         $stk //= PublicInbox::IdxStack->new;
         $stk->read_prepare;
 }
 
-sub prepare_stack ($$$) {
-        my ($self, $sync, $range) = @_;
-        my $git = $self->{ibx}->git;
+sub prepare_stack ($$) {
+        my ($sync, $range) = @_;
+        my $git = $sync->{ibx}->git;
 
         if (index($range, '..') < 0) {
                 # don't show annoying git errors to users who run -index
@@ -729,24 +996,28 @@ sub prepare_stack ($$$) {
                 return PublicInbox::IdxStack->new->read_prepare if $?;
         }
         $sync->{D} = $sync->{reindex} ? {} : undef; # OID_BIN => NR
-        log2stack($sync, $git, $range, $self->{ibx});
+        log2stack($sync, $git, $range);
 }
 
 # --is-ancestor requires git 1.8.0+
 sub is_ancestor ($$$) {
         my ($git, $cur, $tip) = @_;
         return 0 unless $git->check($cur);
-        my $cmd = [ 'git', "--git-dir=$git->{git_dir}",
-                qw(merge-base --is-ancestor), $cur, $tip ];
-        my $pid = spawn($cmd);
-        waitpid($pid, 0) == $pid or die join(' ', @$cmd) .' did not finish';
-        $? == 0;
+        my $cmd = $git->cmd(qw(merge-base --is-ancestor), $cur, $tip);
+        run_wait($cmd) == 0;
 }
 
-sub need_update ($$$) {
-        my ($self, $cur, $new) = @_;
+sub need_update ($$$$) {
+        my ($self, $sync, $cur, $new) = @_;
         my $git = $self->{ibx}->git;
-        return 1 if $cur && !is_ancestor($git, $cur, $new);
+        $cur //= ''; # XS Search::Xapian ->get_metadata doesn't give undef
+
+        # don't rewind if --{since,until,before,after} are in use
+        return if $cur ne '' &&
+                grep(defined, @{$sync->{-opt}}{qw(since until)}) &&
+                is_ancestor($git, $new, $cur);
+
+        return 1 if $cur ne '' && !is_ancestor($git, $cur, $new);
         my $range = $cur eq '' ? $new : "$cur..$new";
         chomp(my $n = $git->qx(qw(rev-list --count), $range));
         ($n eq '' || $n > 0);
@@ -777,15 +1048,34 @@ sub reindex_from ($$) {
         ref($reindex) eq 'HASH' ? $reindex->{from} : '';
 }
 
+sub quit_cb ($) {
+        my ($sync) = @_;
+        sub {
+                # we set {-opt}->{quit} too, so ->index_sync callers
+                # can abort multi-inbox loops this way
+                $sync->{quit} = $sync->{-opt}->{quit} = 1;
+                warn "gracefully quitting\n";
+        }
+}
+
 # indexes all unindexed messages (v1 only)
 sub _index_sync {
         my ($self, $opt) = @_;
         my $tip = $opt->{ref} || 'HEAD';
-        my $git = $self->{ibx}->git;
+        my $ibx = $self->{ibx};
+        local $self->{current_info} = "$ibx->{inboxdir}";
         $self->{batch_bytes} = $opt->{batch_size} // $BATCH_BYTES;
-        $git->batch_prepare;
+
+        if ($X->{CLOEXEC_UNSET}) {
+                $ibx->git->cat_file($tip);
+                $ibx->git->check($tip);
+        }
         my $pr = $opt->{-progress};
-        my $sync = { reindex => $opt->{reindex}, -opt => $opt };
+        my $sync = { reindex => $opt->{reindex}, -opt => $opt, ibx => $ibx };
+        my $quit = quit_cb($sync);
+        local $SIG{QUIT} = $quit;
+        local $SIG{INT} = $quit;
+        local $SIG{TERM} = $quit;
         my $xdb = $self->begin_txn_lazy;
         $self->{oidx}->rethread_prepare($opt);
         my $mm = _msgmap_init($self);
@@ -803,10 +1093,10 @@ sub _index_sync {
         my $lx = reindex_from($sync->{reindex}, $last_commit);
         my $range = $lx eq '' ? $tip : "$lx..$tip";
         $pr->("counting changes\n\t$range ... ") if $pr;
-        my $stk = prepare_stack($self, $sync, $range);
+        my $stk = prepare_stack($sync, $range);
         $sync->{ntodo} = $stk ? $stk->num_records : 0;
         $pr->("$sync->{ntodo}\n") if $pr; # continue previous line
-        process_stack($self, $sync, $stk);
+        process_stack($self, $sync, $stk) if !$sync->{quit};
 }
 
 sub DESTROY {
@@ -815,8 +1105,10 @@ sub DESTROY {
         $_[0]->{lockfh} = undef;
 }
 
-sub _begin_txn {
+sub begin_txn_lazy {
         my ($self) = @_;
+        return if $self->{txn};
+        my $restore = $self->with_umask;
         my $xdb = $self->{xdb} || idx_acquire($self);
         $self->{oidx}->begin_lazy if $self->{oidx};
         $xdb->begin_transaction if $xdb;
@@ -824,13 +1116,8 @@ sub _begin_txn {
         $xdb;
 }
 
-sub begin_txn_lazy {
-        my ($self) = @_;
-        $self->{ibx}->with_umask(\&_begin_txn, $self) if !$self->{txn};
-}
-
 # store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard)
-# This metadata is read by Admin::detect_indexlevel:
+# This metadata is read by InboxWritable->detect_indexlevel:
 sub set_metadata_once {
         my ($self) = @_;
 
@@ -852,8 +1139,14 @@ sub set_metadata_once {
         }
 }
 
-sub _commit_txn {
+sub commit_txn_lazy {
         my ($self) = @_;
+        return unless delete($self->{txn});
+        my $restore = $self->with_umask;
+        if (my $eidx = $self->{eidx}) {
+                $eidx->git->async_wait_all;
+                $eidx->{transact_bytes} = 0;
+        }
         if (my $xdb = $self->{xdb}) {
                 set_metadata_once($self);
                 $xdb->commit_transaction;
@@ -861,18 +1154,18 @@ sub _commit_txn {
         $self->{oidx}->commit_lazy if $self->{oidx};
 }
 
-sub commit_txn_lazy {
-        my ($self) = @_;
-        delete($self->{txn}) and
-                $self->{ibx}->with_umask(\&_commit_txn, $self);
-}
-
-sub worker_done {
-        my ($self) = @_;
-        if (need_xapian($self)) {
-                die "$$ $0 xdb not released\n" if $self->{xdb};
-        }
-        die "$$ $0 still in transaction\n" if $self->{txn};
+sub eidx_shard_new {
+        my ($class, $eidx, $shard) = @_;
+        my $self = bless {
+                eidx => $eidx,
+                xpfx => $eidx->{xpfx},
+                indexlevel => $eidx->{indexlevel},
+                -skip_docdata => 1,
+                shard => $shard,
+                creat => 1,
+        }, $class;
+        $self->{-set_indexlevel_once} = 1 if $self->{indexlevel} eq 'medium';
+        $self;
 }
 
 1;
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index f23d23d0..ea261bda 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -1,155 +1,75 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Internal interface for a single Xapian shard in V2 inboxes.
 # See L<public-inbox-v2-format(5)> for more info on how we shard Xapian
 package PublicInbox::SearchIdxShard;
-use strict;
-use v5.10.1;
-use parent qw(PublicInbox::SearchIdx);
-use IO::Handle (); # autoflush
-use PublicInbox::Eml;
+use v5.12;
+use parent qw(PublicInbox::SearchIdx PublicInbox::IPC);
+use PublicInbox::OnDestroy;
+use PublicInbox::Syscall qw($F_SETPIPE_SZ);
 
 sub new {
-        my ($class, $v2w, $shard) = @_;
+        my ($class, $v2w, $shard) = @_; # v2w may be ExtSearchIdx
         my $ibx = $v2w->{ibx};
-        my $self = $class->SUPER::new($ibx, 1, $shard);
+        my $self = $ibx ? $class->SUPER::new($ibx, 1, $shard)
+                        : $class->eidx_shard_new($v2w, $shard);
         # create the DB before forking:
         $self->idx_acquire;
         $self->set_metadata_once;
         $self->idx_release;
-        $self->spawn_worker($v2w, $shard) if $v2w->{parallel};
+        if ($v2w->{parallel}) {
+                local $self->{-v2w_afc} = $v2w;
+                $self->ipc_worker_spawn("shard[$shard]");
+                # Increasing the pipe size for requests speeds V2 batch imports
+                # across 8 cores by nearly 20%.  Since many of our responses
+                # are small, make the response pipe as small as possible
+                if ($F_SETPIPE_SZ) {
+                        fcntl($self->{-ipc_req}, $F_SETPIPE_SZ, 1048576);
+                        fcntl($self->{-ipc_res}, $F_SETPIPE_SZ, 4096);
+                }
+        }
         $self;
 }
 
-sub spawn_worker {
-        my ($self, $v2w, $shard) = @_;
-        my ($r, $w);
-        pipe($r, $w) or die "pipe failed: $!\n";
-        $w->autoflush(1);
-        my $pid = fork;
-        defined $pid or die "fork failed: $!\n";
-        if ($pid == 0) {
-                my $bnote = $v2w->atfork_child;
-                close $w or die "failed to close: $!";
-
-                # F_SETPIPE_SZ = 1031 on Linux; increasing the pipe size here
-                # speeds V2Writable batch imports across 8 cores by nearly 20%
-                fcntl($r, 1031, 1048576) if $^O eq 'linux';
-
-                eval { shard_worker_loop($self, $v2w, $r, $shard, $bnote) };
-                die "worker $shard died: $@\n" if $@;
-                die "unexpected MM $self->{mm}" if $self->{mm};
-                exit;
-        }
-        $self->{pid} = $pid;
-        $self->{w} = $w;
-        close $r or die "failed to close: $!";
+sub _worker_done { # OnDestroy cb
+        my ($self) = @_;
+        die "BUG: $$ $0 xdb active" if $self->need_xapian && $self->{xdb};
+        die "BUG: $$ $0 txn active" if $self->{txn};
 }
 
-# this reads all the writes to $self->{w} from the parent process
-sub shard_worker_loop ($$$$$) {
-        my ($self, $v2w, $r, $shard, $bnote) = @_;
-        $0 = "pi-v2-shard[$shard]";
+sub ipc_atfork_child { # called automatically before ipc_worker_loop
+        my ($self) = @_;
+        my $v2w = delete $self->{-v2w_afc} or die 'BUG: {-v2w_afc} missing';
+        $v2w->atfork_child; # calls ipc_sibling_atfork_child on our siblings
+        $v2w->{current_info} = "[$self->{shard}]"; # for $SIG{__WARN__}
         $self->begin_txn_lazy;
-        while (my $line = readline($r)) {
-                $v2w->{current_info} = "[$shard] $line";
-                if ($line eq "commit\n") {
-                        $self->commit_txn_lazy;
-                } elsif ($line eq "close\n") {
-                        $self->idx_release;
-                } elsif ($line eq "barrier\n") {
-                        $self->commit_txn_lazy;
-                        # no need to lock < 512 bytes is atomic under POSIX
-                        print $bnote "barrier $shard\n" or
-                                        die "write failed for barrier $!\n";
-                } elsif ($line =~ /\AD ([a-f0-9]{40,}) ([0-9]+)\n\z/s) {
-                        $self->remove_by_oid($1, $2 + 0);
-                } else {
-                        chomp $line;
-                        # n.b. $mid may contain spaces(!)
-                        my ($to_read, $bytes, $num, $blob, $ds, $ts, $tid, $mid)
-                                = split(/ /, $line, 8);
-                        $self->begin_txn_lazy;
-                        my $n = read($r, my $msg, $to_read) or die "read: $!\n";
-                        $n == $to_read or die "short read: $n != $to_read\n";
-                        my $mime = PublicInbox::Eml->new(\$msg);
-                        my $smsg = bless {
-                                bytes => $bytes,
-                                num => $num + 0,
-                                blob => $blob,
-                                mid => $mid,
-                                tid => $tid,
-                                ds => $ds,
-                                ts => $ts,
-                        }, 'PublicInbox::Smsg';
-                        $self->add_message($mime, $smsg);
-                }
-        }
-        $self->worker_done;
-}
-
-sub index_raw {
-        my ($self, $msgref, $eml, $smsg) = @_;
-        if (my $w = $self->{w}) {
-                # mid must be last, it can contain spaces (but not LF)
-                print $w join(' ', @$smsg{qw(raw_bytes bytes
-                                                num blob ds ts tid mid)}),
-                        "\n", $$msgref or die "failed to write shard $!\n";
-        } else {
-                if ($eml) {
-                        undef $$msgref;
-                } else { # --xapian-only + --sequential-shard:
-                        $eml = PublicInbox::Eml->new($msgref);
-                }
-                $self->begin_txn_lazy;
-                $self->add_message($eml, $smsg);
-        }
+        # caller (ipc_worker_spawn) must capture this:
+        on_destroy \&_worker_done, $self;
 }
 
-sub atfork_child {
-        close $_[0]->{w} or die "failed to close write pipe: $!\n";
+sub index_eml {
+        my ($self, $eml, $smsg, $eidx_key) = @_;
+        $smsg->{eidx_key} = $eidx_key if defined $eidx_key;
+        $self->ipc_do('add_xapian', $eml, $smsg);
 }
 
-sub shard_barrier {
-        my ($self) = @_;
-        if (my $w = $self->{w}) {
-                print $w "barrier\n" or die "failed to print: $!";
-        } else {
-                $self->commit_txn_lazy;
-        }
+# wait for return to determine when ipc_do('commit_txn_lazy') is done
+sub echo {
+        shift;
+        "@_";
 }
 
-sub shard_commit {
+sub idx_close {
         my ($self) = @_;
-        if (my $w = $self->{w}) {
-                print $w "commit\n" or die "failed to write commit: $!";
-        } else {
-                $self->commit_txn_lazy;
-        }
+        die "BUG: $$ $0 txn active" if $self->{txn};
+        $self->idx_release if $self->{xdb};
 }
 
 sub shard_close {
         my ($self) = @_;
-        if (my $w = delete $self->{w}) {
-                my $pid = delete $self->{pid} or die "no process to wait on\n";
-                print $w "close\n" or die "failed to write to pid:$pid: $!\n";
-                close $w or die "failed to close pipe for pid:$pid: $!\n";
-                waitpid($pid, 0) == $pid or die "remote process did not finish";
-                $? == 0 or die ref($self)." pid:$pid exited with: $?";
-        } else {
-                die "transaction in progress $self\n" if $self->{txn};
-                $self->idx_release if $self->{xdb};
-        }
-}
-
-sub shard_remove {
-        my ($self, $oid, $num) = @_;
-        if (my $w = $self->{w}) { # triggers remove_by_oid in a shard child
-                print $w "D $oid $num\n" or die "failed to write remove $!";
-        } else { # same process
-                $self->remove_by_oid($oid, $num);
-        }
+        $self->ipc_do('idx_close');
+        $self->ipc_worker_stop;
 }
 
 1;
diff --git a/lib/PublicInbox/SearchQuery.pm b/lib/PublicInbox/SearchQuery.pm
index 6724ae39..747e3249 100644
--- a/lib/PublicInbox/SearchQuery.pm
+++ b/lib/PublicInbox/SearchQuery.pm
@@ -1,12 +1,12 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# used by PublicInbox::SearchView
+# used by PublicInbox::SearchView and PublicInbox::WwwListing
 package PublicInbox::SearchQuery;
 use strict;
 use v5.10.1;
 use URI::Escape qw(uri_escape);
-use PublicInbox::MID qw(MID_ESC);
+use PublicInbox::Hval qw(ascii_html);
 our $LIM = 200;
 
 sub new {
@@ -16,10 +16,11 @@ sub new {
         my $t = $qp->{t}; # collapse threads
         my ($l) = (($qp->{l} || '') =~ /([0-9]+)/);
         $l = $LIM if !$l || $l > $LIM;
+        my ($o) = (($qp->{o} || '0') =~ /(-?[0-9]+)/);
         bless {
                 q => $qp->{'q'},
                 x => $qp->{x} || '',
-                o => (($qp->{o} || '0') =~ /(-?[0-9]+)/),
+                o => $o,
                 l => $l,
                 r => (defined $r && $r ne '0'),
                 t => (defined $t && $t ne '0'),
@@ -32,11 +33,16 @@ sub qs_html {
         if (scalar(keys(%override))) {
                 $self = bless { (%$self, %override) }, ref($self);
         }
-
-        my $q = uri_escape($self->{'q'}, MID_ESC);
-        $q =~ s/%20/+/g; # improve URL readability
-        my $qs = "q=$q";
-
+        my $qs = '';
+        if (defined(my $q = $self->{'q'})) {
+                # not using MID_ESC since that's for the path component and
+                # this is for the query component.  Unlike MID_ESC,
+                # this disallows [\&\'\+=] and allows slash [/] for
+                # nicer looking dfn: queries
+                $q = uri_escape($q, '^A-Za-z0-9\-\._~!\$\(\)\*,;:@/');
+                $q =~ s/%20/+/g; # improve URL readability
+                $qs .= 'q='.ascii_html($q);
+        }
         if (my $o = $self->{o}) { # ignore o == 0
                 $qs .= "&amp;o=$o";
         }
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 60f692b2..00ae9fac 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -24,70 +24,74 @@ use PublicInbox::MID qw($MID_EXTRACT);
 
 sub thread {
         my ($msgs, $ordersub, $ctx) = @_;
+        my (%id_table, @imposters);
+        keys(%id_table) = scalar @$msgs; # pre-size
 
-        # A. put all current $msgs (non-ghosts) into %id_table
-        my %id_table = map {;
+        # A. put all current non-imposter $msgs (non-ghosts) into %id_table
+        # (imposters are messages with reused Message-IDs)
+        # Sadly, we sort here anyways since the fill-in-the-blanks References:
+        # can be shakier if somebody used In-Reply-To with multiple, disparate
+        # messages.  So, take the client Date: into account since we can't
+        # always determine ordering when somebody uses multiple In-Reply-To.
+        my @kids = sort { $a->{ds} <=> $b->{ds} } grep {
                 # this delete saves around 4K across 1K messages
                 # TODO: move this to a more appropriate place, breaks tests
                 # if we do it during psgi_cull
                 delete $_->{num};
-
-                $_->{mid} => PublicInbox::SearchThread::Msg::cast($_);
+                bless $_, 'PublicInbox::SearchThread::Msg';
+                if (exists $id_table{$_->{mid}}) {
+                        $_->{children} = [];
+                        push @imposters, $_; # we'll deal with them later
+                        undef;
+                } else {
+                        $_->{children} = {}; # will become arrayref later
+                        $id_table{$_->{mid}} = $_;
+                        defined($_->{references});
+                }
         } @$msgs;
+        for my $smsg (@kids) {
+                # This loop exists to help fill in gaps left from missing
+                # messages.  It is not needed in a perfect world where
+                # everything is perfectly referenced, only the last ref
+                # matters.
+                my $prev;
+                for my $ref ($smsg->{references} =~ m/$MID_EXTRACT/go) {
+                        # Find a Container object for the given Message-ID
+                        my $cont = $id_table{$ref} //=
+                                PublicInbox::SearchThread::Msg::ghost($ref);
+
+                        # Link the References field's Containers together in
+                        # the order implied by the References header
+                        #
+                        # * If they are already linked don't change the
+                        #   existing links
+                        # * Do not add a link if adding that link would
+                        #   introduce a loop...
+                        if ($prev &&
+                                !$cont->{parent} &&  # already linked
+                                !$cont->has_descendent($prev) # would loop
+                           ) {
+                                $prev->add_child($cont);
+                        }
+                        $prev = $cont;
+                }
 
-        # Sadly, we sort here anyways since the fill-in-the-blanks References:
-        # can be shakier if somebody used In-Reply-To with multiple, disparate
-        # messages.  So, take the client Date: into account since we can't
-        # always determine ordering when somebody uses multiple In-Reply-To.
-        # We'll trust the client Date: header here instead of the Received:
-        # time since this is for display (and not retrieval)
-        _set_parent(\%id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs;
-        my $ibx = $ctx->{-inbox};
-        my $rootset = [ grep {
-                        !delete($_->{parent}) && $_->visible($ibx)
-                } values %id_table ];
-        $rootset = $ordersub->($rootset);
-        $_->order_children($ordersub, $ctx) for @$rootset;
-        $rootset;
-}
-
-sub _set_parent ($$) {
-        my ($id_table, $this) = @_;
-
-        # B. For each element in the message's References field:
-        defined(my $refs = $this->{references}) or return;
-
-        # This loop exists to help fill in gaps left from missing
-        # messages.  It is not needed in a perfect world where
-        # everything is perfectly referenced, only the last ref
-        # matters.
-        my $prev;
-        foreach my $ref ($refs =~ m/$MID_EXTRACT/go) {
-                # Find a Container object for the given Message-ID
-                my $cont = $id_table->{$ref} //=
-                        PublicInbox::SearchThread::Msg::ghost($ref);
-
-                # Link the References field's Containers together in
-                # the order implied by the References header
-                #
-                # * If they are already linked don't change the
-                #   existing links
-                # * Do not add a link if adding that link would
-                #   introduce a loop...
-                if ($prev &&
-                        !$cont->{parent} &&  # already linked
-                        !$cont->has_descendent($prev) # would loop
-                   ) {
-                        $prev->add_child($cont);
+                # C. Set the parent of this message to be the last element in
+                # References.
+                if (defined $prev && !$smsg->has_descendent($prev)) {
+                        $prev->add_child($smsg);
                 }
-                $prev = $cont;
         }
+        my $ibx = $ctx->{ibx};
+        my @rootset = grep { # n.b.: delete prevents cyclic refs
+                        !delete($_->{parent}) && $_->visible($ibx)
+                } values %id_table;
+        $ordersub->(\@rootset);
+        $_->order_children($ordersub, $ctx) for @rootset;
 
-        # C. Set the parent of this message to be the last element in
-        # References.
-        if (defined $prev && !$this->has_descendent($prev)) { # would loop
-                $prev->add_child($this);
-        }
+        # parent imposter messages with reused Message-IDs
+        unshift(@{$id_table{$_->{mid}}->{children}}, $_) for @imposters;
+        \@rootset;
 }
 
 package PublicInbox::SearchThread::Msg;
@@ -104,13 +108,6 @@ sub ghost {
         }, __PACKAGE__;
 }
 
-# give a existing smsg the methods of this class
-sub cast {
-        my ($smsg) = @_;
-        $smsg->{children} = {};
-        bless $smsg, __PACKAGE__;
-}
-
 sub topmost {
         my ($self) = @_;
         my @q = ($self);
@@ -166,14 +163,14 @@ sub order_children {
 
         my %seen = ($cur => 1); # self-referential loop prevention
         my @q = ($cur);
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         while (defined($cur = shift @q)) {
-                my $c = $cur->{children}; # The hashref here...
-
-                $c = [ grep { !$seen{$_}++ && visible($_, $ibx) } values %$c ];
-                $c = $ordersub->($c) if scalar @$c > 1;
-                $cur->{children} = $c; # ...becomes an arrayref
-                push @q, @$c;
+                # the {children} hashref here...
+                my @c = grep { !$seen{$_}++ && visible($_, $ibx) }
+                        values %{delete $cur->{children}};
+                $ordersub->(\@c) if scalar(@c) > 1;
+                $cur->{children} = \@c; # ...becomes an arrayref
+                push @q, @c;
         }
 }
 
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index c482f1c9..9919e25c 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Displays search results for the web interface
@@ -14,7 +14,7 @@ use PublicInbox::WwwAtomStream;
 use PublicInbox::WwwStream qw(html_oneshot);
 use PublicInbox::SearchThread;
 use PublicInbox::SearchQuery;
-use PublicInbox::Search qw(mdocid);
+use PublicInbox::Search qw(get_pct);
 my %rmap_inc;
 
 sub mbox_results {
@@ -30,58 +30,67 @@ sub mbox_results {
 
 sub sres_top_html {
         my ($ctx) = @_;
-        my $srch = $ctx->{-inbox}->search or
+        my $srch = $ctx->{srch} = $ctx->{ibx}->isrch or
                 return PublicInbox::WWW::need($ctx, 'Search');
         my $q = PublicInbox::SearchQuery->new($ctx->{qp});
-        my $x = $q->{x};
-        my $query = $q->{'q'};
-        my $o = $q->{o};
+        my $o = $q->{o} // 0;
         my $asc;
         if ($o < 0) {
                 $asc = 1;
                 $o = -($o + 1); # so [-1] is the last element, like Perl lists
         }
 
-        my $code = 200;
         # double the limit for expanded views:
-        my $opts = {
+        my $opt = {
                 limit => $q->{l},
                 offset => $o,
                 relevance => $q->{r},
-                thread => $q->{t},
+                threads => $q->{t},
                 asc => $asc,
         };
-        my ($mset, $total, $err, $html);
-retry:
-        eval {
-                $mset = $srch->mset($query, $opts);
-                $total = $mset->get_matches_estimated;
-        };
-        $err = $@;
+        my $qs = $q->{'q'};
+        $srch->query_approxidate($ctx->{ibx}->git, $qs);
+        sub {
+                $ctx->{wcb} = $_[0]; # PSGI server supplied write cb
+                $srch->async_mset($qs, $opt, \&sres_html_cb, $ctx, $opt, $q);
+        }
+}
+
+sub sres_html_cb { # async_mset cb
+        my ($ctx, $opt, $q, $mset, $err) = @_;
+        my $code = 200;
+        my $total = $mset ? $mset->get_matches_estimated : undef;
         ctx_prepare($q, $ctx);
+        my ($res, $html);
         if ($err) {
                 $code = 400;
                 $html = '<pre>'.err_txt($ctx, $err).'</pre><hr>';
         } elsif ($total == 0) {
-                if (defined($ctx->{-uxs_retried})) {
-                        # undo retry damage:
+                if (defined($ctx->{-uxs_retried})) { # undo retry damage:
                         $q->{'q'} = $ctx->{-uxs_retried};
-                } elsif (index($q->{'q'}, '%') >= 0) {
+                } elsif (index($q->{'q'}, '%') >= 0) { # retry unescaped
                         $ctx->{-uxs_retried} = $q->{'q'};
-                        $q->{'q'} = uri_unescape($q->{'q'});
-                        goto retry;
+                        my $qs = $q->{'q'} = uri_unescape($q->{'q'});
+                        $ctx->{srch}->query_approxidate($ctx->{ibx}->git, $qs);
+                        return $ctx->{srch}->async_mset($qs, $opt,
+                                                \&sres_html_cb, $ctx, $opt, $q);
                 }
                 $code = 404;
                 $html = "<pre>\n[No results found]</pre><hr>";
+        } elsif ($q->{x} eq 'A') {
+                $res = adump($mset, $q, $ctx);
         } else {
-                return adump($_[0], $mset, $q, $ctx) if $x eq 'A';
-
                 $ctx->{-html_tip} = search_nav_top($mset, $q, $ctx);
-                return mset_thread($ctx, $mset, $q) if $x eq 't';
-                mset_summary($ctx, $mset, $q); # appends to {-html_tip}
-                $html = '';
+                if ($q->{x} eq 't') {
+                        $res = mset_thread($ctx, $mset, $q);
+                } else {
+                        mset_summary($ctx, $mset, $q); # appends to {-html_tip}
+                        $html = '';
+                }
         }
-        html_oneshot($ctx, $code);
+        $res //= html_oneshot($ctx, $code, $html);
+        my $wcb = delete $ctx->{wcb};
+        ref($res) eq 'CODE' ? $res->($wcb) : $wcb->($res);
 }
 
 # display non-nested search results similar to what users expect from
@@ -93,24 +102,24 @@ sub mset_summary {
         my $pad = length("$total");
         my $pfx = ' ' x $pad;
         my $res = \($ctx->{-html_tip});
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
-        my @nums = @{$ibx->search->mset_to_artnums($mset)};
+        my @nums = @{$ibx->isrch->mset_to_artnums($mset)};
         my %num2msg = map { $_->{num} => $_ } @{$ibx->over->get_all(@nums)};
-        my ($min, $max);
+        my ($min, $max, %seen);
 
         foreach my $m ($mset->items) {
-                my $rank = sprintf("%${pad}d", $m->get_rank + 1);
-                my $pct = get_pct($m);
                 my $num = shift @nums;
-                my $smsg = delete($num2msg{$num}) or do {
-                        eval {
-                                $m = "$m $num expired\n";
-                                $ctx->{env}->{'psgi.errors'}->print($m);
-                        };
+                my $smsg = delete($num2msg{$num}) // do {
+                        warn "$m $num expired\n";
                         next;
                 };
+                my $mid = $smsg->{mid};
+                next if $seen{$mid}++;
+                $mid = mid_href($mid);
                 $ctx->{-t_max} //= $smsg->{ts};
+                my $rank = sprintf("%${pad}d", $m->get_rank + 1);
+                my $pct = get_pct($m);
 
                 # only when sorting by relevance, ->items is always
                 # ordered descending:
@@ -118,13 +127,12 @@ sub mset_summary {
                 $min = $pct;
 
                 my $s = ascii_html($smsg->{subject});
-                my $f = ascii_html($smsg->{from_name});
+                my $f = ascii_html(delete $smsg->{from_name});
                 if ($obfs_ibx) {
                         obfuscate_addrs($obfs_ibx, $s);
                         obfuscate_addrs($obfs_ibx, $f);
                 }
                 my $date = fmt_ts($smsg->{ds});
-                my $mid = mid_href($smsg->{mid});
                 $s = '(no subject)' if $s eq '';
                 $$res .= qq{$rank. <b><a\nhref="$mid/">}.
                         $s . "</a></b>\n";
@@ -134,7 +142,7 @@ sub mset_summary {
                 $q->{-min_pct} = $min;
                 $q->{-max_pct} = $max;
         }
-        $$res .= search_nav_bot($mset, $q);
+        $$res .= search_nav_bot($ctx, $mset, $q);
         undef;
 }
 
@@ -156,7 +164,7 @@ sub path2inc ($) {
 
 sub err_txt {
         my ($ctx, $err) = @_;
-        my $u = $ctx->{-inbox}->base_url($ctx->{env}) . '_/text/help/';
+        my $u = $ctx->{ibx}->base_url($ctx->{env}) . '_/text/help/';
         $err =~ s/^\s*Exception:\s*//; # bad word to show users :P
         $err =~ s!(\S+)!path2inc($1)!sge;
         $err = ascii_html($err);
@@ -167,7 +175,7 @@ sub err_txt {
 sub search_nav_top {
         my ($mset, $q, $ctx) = @_;
         my $m = $q->qs_html(x => 'm', r => undef, t => undef);
-        my $rv = qq{<form\naction="?$m"\nmethod="post"><pre>};
+        my $rv = qq{<form\nid=d\naction="?$m"\nmethod=post><pre>};
         my $initial_q = $ctx->{-uxs_retried};
         if (defined $initial_q) {
                 my $rewritten = $q->{'q'};
@@ -192,17 +200,25 @@ sub search_nav_top {
         $rv .= ']  view[';
 
         my $x = $q->{x};
-        if ($x eq '') {
-                my $t = $q->qs_html(x => 't');
-                $rv .= qq{<b>summary</b>|<a\nhref="?$t">nested</a>}
-        } elsif ($q->{x} eq 't') {
+        my $pfx = "\t\t\t";
+        if ($x eq 't') {
                 my $s = $q->qs_html(x => '');
                 $rv .= qq{<a\nhref="?$s">summary</a>|<b>nested</b>};
+                $pfx = "thread overview <a\nhref=#t>below</a> | ";
+        } else {
+                my $t = $q->qs_html(x => 't');
+                $rv .= qq{<b>summary</b>|<a\nhref="?$t">nested</a>}
         }
         my $A = $q->qs_html(x => 'A', r => undef);
-        $rv .= qq{|<a\nhref="?$A">Atom feed</a>]};
-        if ($ctx->{-inbox}->search->has_threadid) {
-                $rv .= qq{\n\t\t\tdownload mbox.gz: } .
+        $rv .= qq{|<a\nhref="?$A">Atom feed</a>]\n};
+        $rv .= <<EOM if $x ne 't' && $q->{t};
+*** "t=1" collapses threads in summary, "full threads" requires mbox.gz ***
+EOM
+        $rv .= <<EOM if $x eq 'm';
+*** "x=m" ignored for GET requests, use download buttons below ***
+EOM
+        if ($ctx->{ibx}->isrch->has_threadid) {
+                $rv .= qq{${pfx}download mbox.gz: } .
                         # we set name=z w/o using it since it seems required for
                         # lynx (but works fine for w3m).
                         qq{<input\ntype=submit\nname=z\n} .
@@ -210,14 +226,14 @@ sub search_nav_top {
                         qq{|<input\ntype=submit\nname=x\n} .
                                 q{value="full threads"/>};
         } else { # BOFH needs to --reindex
-                $rv .= qq{\n\t\t\t\t\t\tdownload: } .
+                $rv .= qq{${pfx}download: } .
                         qq{<input\ntype=submit\nname=z\nvalue="mbox.gz"/>}
         }
         $rv .= qq{</pre></form><pre>};
 }
 
-sub search_nav_bot {
-        my ($mset, $q) = @_;
+sub search_nav_bot { # also used by WwwListing for searching extindex miscidx
+        my ($ctx, $mset, $q) = @_;
         my $total = $mset->get_matches_estimated;
         my $l = $q->{l};
         my $rv = '</pre><hr><pre id=t>';
@@ -266,41 +282,37 @@ sub search_nav_bot {
         $rv .= qq{<a\nhref="?$prev"\nrel=prev>prev $pd</a>} if $prev;
 
         my $rev = $q->qs_html(o => $o < 0 ? 0 : -1);
-        $rv .= qq{ | <a\nhref="?$rev">reverse</a></pre>};
+        $rv .= qq{ | <a\nhref="?$rev">reverse</a>};
+        exists($ctx->{ibx}) and
+                $rv .= q{ | options <a href=#d>above</a></pre>};
+        $rv;
 }
 
 sub sort_relevance {
-        [ sort {
+        @{$_[0]} = sort {
                 (eval { $b->topmost->{pct} } // 0) <=>
                 (eval { $a->topmost->{pct} } // 0)
-        } @{$_[0]} ]
-}
-
-sub get_pct ($) {
-        # Capped at "99%" since "100%" takes an extra column in the
-        # thread skeleton view.  <xapian/mset.h> says the value isn't
-        # very meaningful, anyways.
-        my $n = $_[0]->get_percent;
-        $n > 99 ? 99 : $n;
+        } @{$_[0]};
 }
 
 sub mset_thread {
         my ($ctx, $mset, $q) = @_;
-        my $ibx = $ctx->{-inbox};
-        my $nshard = $ibx->search->{nshard} // 1;
-        my %pct = map { mdocid($nshard, $_) => get_pct($_) } $mset->items;
-        my $msgs = $ibx->over->get_all(keys %pct);
-        $_->{pct} = $pct{$_->{num}} for @$msgs;
+        my $ibx = $ctx->{ibx};
+        my @pct = map { get_pct($_) } $mset->items;
+        my $msgs = $ibx->isrch->mset_to_smsg($ibx, $mset);
+        my $i = 0;
+        $_->{pct} = $pct[$i++] for @$msgs;
         my $r = $q->{r};
         if ($r) { # for descriptions in search_nav_bot
-                my @pct = values %pct;
                 $q->{-min_pct} = min(@pct);
                 $q->{-max_pct} = max(@pct);
         }
         my $rootset = PublicInbox::SearchThread::thread($msgs,
                 $r ? \&sort_relevance : \&PublicInbox::View::sort_ds,
                 $ctx);
-        my $skel = search_nav_bot($mset, $q). "<pre>";
+        my $skel = search_nav_bot($ctx, $mset, $q).'<pre>'. <<EOM;
+-- pct% links below jump to the message on this page, permalinks otherwise --
+EOM
         $ctx->{-upfx} = '';
         $ctx->{anchor_idx} = 1;
         $ctx->{cur_level} = 0;
@@ -318,20 +330,20 @@ sub mset_thread {
 
         # link $INBOX_DIR/description text to "recent" view around
         # the newest message in this result set:
-        $ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs);
+        $ctx->{-t_max} = max(map { $_->{ts} } @$msgs);
 
         @$msgs = reverse @$msgs if $r;
         $ctx->{msgs} = $msgs;
-        PublicInbox::WwwStream::aresponse($ctx, 200, \&mset_thread_i);
+        PublicInbox::WwwStream::aresponse($ctx, \&mset_thread_i);
 }
 
 # callback for PublicInbox::WwwStream::getline
 sub mset_thread_i {
         my ($ctx, $eml) = @_;
-        $ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip};
+        print { $ctx->zfh } $ctx->html_top if exists $ctx->{-html_tip};
         $eml and return PublicInbox::View::eml_entry($ctx, $eml);
         my $smsg = shift @{$ctx->{msgs}} or
-                $ctx->zmore(${delete($ctx->{skel})});
+                print { $ctx->zfh } ${delete($ctx->{skel})};
         $smsg;
 }
 
@@ -353,17 +365,17 @@ sub ctx_prepare {
 }
 
 sub adump {
-        my ($cb, $mset, $q, $ctx) = @_;
-        $ctx->{ids} = $ctx->{-inbox}->search->mset_to_artnums($mset);
+        my ($mset, $q, $ctx) = @_;
+        $ctx->{ids} = $ctx->{ibx}->isrch->mset_to_artnums($mset);
         $ctx->{search_query} = $q; # used by WwwAtomStream::atom_header
-        PublicInbox::WwwAtomStream->response($ctx, 200, \&adump_i);
+        PublicInbox::WwwAtomStream->response($ctx, \&adump_i);
 }
 
 # callback for PublicInbox::WwwAtomStream::getline
 sub adump_i {
         my ($ctx) = @_;
         while (my $num = shift @{$ctx->{ids}}) {
-                my $smsg = eval { $ctx->{-inbox}->over->get_art($num) } or next;
+                my $smsg = eval { $ctx->{ibx}->over->get_art($num) } or next;
                 return $smsg;
         }
 }
diff --git a/lib/PublicInbox/Select.pm b/lib/PublicInbox/Select.pm
new file mode 100644
index 00000000..face8edc
--- /dev/null
+++ b/lib/PublicInbox/Select.pm
@@ -0,0 +1,43 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# This makes select(2) look like epoll to simplify the code in DS.pm.
+# Unlike IO::Select, it does NOT hold references to IO handles.
+# This is NOT meant to be an all encompassing emulation of epoll
+# via select, but only to support cases we care about.
+package PublicInbox::Select;
+use v5.12;
+use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT);
+use Errno;
+
+sub new { bless {}, __PACKAGE__ } # fd => events
+
+sub ep_wait {
+        my ($self, $msec, $events) = @_;
+        my ($rvec, $wvec) = ('', ''); # we don't use EPOLLERR
+        while (my ($fd, $ev) = each %$self) {
+                vec($rvec, $fd, 1) = 1 if $ev & EPOLLIN;
+                vec($wvec, $fd, 1) = 1 if $ev & EPOLLOUT;
+        }
+        @$events = ();
+        my $to = $msec < 0 ? undef : ($msec/1000);
+        my $n = select $rvec, $wvec, undef, $to or return; # timeout expired
+        return if $n < 0 && $! == Errno::EINTR; # caller recalculates timeout
+        die "select: $!" if $n < 0;
+        while (my ($fd, $ev) = each %$self) {
+                if (vec($rvec, $fd, 1) || vec($wvec, $fd, 1)) {
+                        delete($self->{$fd}) if $ev & EPOLLONESHOT;
+                        push @$events, $fd;
+                }
+        }
+        $n == scalar(@$events) or
+                warn "BUG? select() returned $n, but got ".scalar(@$events);
+}
+
+sub ep_del { delete($_[0]->{fileno($_[1])}); 0 }
+sub ep_add { $_[0]->{fileno($_[1])} = $_[2]; 0 }
+
+no warnings 'once';
+*ep_mod = \&ep_add;
+
+1;
diff --git a/lib/PublicInbox/SharedKV.pm b/lib/PublicInbox/SharedKV.pm
new file mode 100644
index 00000000..89ab3f74
--- /dev/null
+++ b/lib/PublicInbox/SharedKV.pm
@@ -0,0 +1,184 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# fork()-friendly key-value store.  Will be used for making
+# augmenting Maildirs and mboxes less expensive, maybe.
+# We use flock(2) to avoid SQLite lock problems (busy timeouts, backoff)
+package PublicInbox::SharedKV;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::Lock);
+use File::Temp qw(tempdir);
+use DBI qw(:sql_types); # SQL_BLOB
+use PublicInbox::Spawn;
+use File::Path qw(rmtree);
+
+sub dbh {
+        my ($self, $lock) = @_;
+        $self->{dbh} // do {
+                my $f = $self->{filename};
+                $lock //= $self->lock_for_scope_fast;
+                my $dbh = DBI->connect("dbi:SQLite:dbname=$f", '', '', {
+                        AutoCommit => 1,
+                        RaiseError => 1,
+                        PrintError => 0,
+                        sqlite_use_immediate_transaction => 1,
+                        # no sqlite_unicode here, this is for binary data
+                });
+                my $opt = $self->{opt} // {};
+                $dbh->do('PRAGMA synchronous = OFF') if !$opt->{fsync};
+                $dbh->do('PRAGMA journal_mode = '.
+                                ($opt->{journal_mode} // 'WAL'));
+                $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS kv (
+        k VARBINARY PRIMARY KEY NOT NULL,
+        v VARBINARY NOT NULL,
+        UNIQUE (k)
+)
+
+                $self->{dbh} = $dbh;
+        }
+}
+
+sub new {
+        my ($cls, $dir, $base, $opt) = @_;
+        my $self = bless { opt => $opt }, $cls;
+        File::Path::mkpath($dir) if defined($dir);
+        $dir //= $self->{"tmp$$.$self"} = tempdir("skv.$$-XXXX", TMPDIR => 1);
+        $base //= '';
+        my $f = $self->{filename} = "$dir/$base.sqlite3";
+        $self->{lock_path} = $opt->{lock_path} // "$dir/$base.flock";
+        unless (-s $f) {
+                require PublicInbox::Syscall;
+                PublicInbox::Syscall::nodatacow_dir($dir); # for journal/shm/wal
+                open my $fh, '+>>', $f or die "failed to open $f: $!";
+        }
+        $self;
+}
+
+sub set_maybe {
+        my ($self, $key, $val, $lock) = @_;
+        $lock //= $self->lock_for_scope_fast;
+        my $sth = $self->{dbh}->prepare_cached(<<'');
+INSERT OR IGNORE INTO kv (k,v) VALUES (?, ?)
+
+        $sth->bind_param(1, $key, SQL_BLOB);
+        $sth->bind_param(2, $val, SQL_BLOB);
+        my $e = $sth->execute;
+        $e == 0 ? undef : $e;
+}
+
+# caller calls sth->fetchrow_array
+sub each_kv_iter {
+        my ($self) = @_;
+        my $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT k,v FROM kv
+
+        $sth->execute;
+        $sth
+}
+
+sub keys {
+        my ($self, @pfx) = @_;
+        my $sql = 'SELECT k FROM kv';
+        if (defined $pfx[0]) {
+                $sql .= ' WHERE k LIKE ? ESCAPE ?';
+                my $anywhere = !!$pfx[1];
+                $pfx[1] = '\\';
+                $pfx[0] =~ s/([%_\\])/\\$1/g; # glob chars
+                $pfx[0] .= '%';
+                substr($pfx[0], 0, 0, '%') if $anywhere;
+        } else {
+                @pfx = (); # [0] may've been undef
+        }
+        my $sth = $self->dbh->prepare($sql);
+        if (@pfx) {
+                $sth->bind_param(1, $pfx[0], SQL_BLOB);
+                $sth->bind_param(2, $pfx[1]);
+        }
+        $sth->execute;
+        map { $_->[0] } @{$sth->fetchall_arrayref};
+}
+
+sub set {
+        my ($self, $key, $val) = @_;
+        if (defined $val) {
+                my $sth = $self->{dbh}->prepare_cached(<<'');
+INSERT OR REPLACE INTO kv (k,v) VALUES (?,?)
+
+                $sth->bind_param(1, $key, SQL_BLOB);
+                $sth->bind_param(2, $val, SQL_BLOB);
+                my $e = $sth->execute;
+                $e == 0 ? undef : $e;
+        } else {
+                my $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM kv WHERE k = ?
+
+                $sth->bind_param(1, $key, SQL_BLOB);
+        }
+}
+
+sub get {
+        my ($self, $key) = @_;
+        my $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT v FROM kv WHERE k = ?
+
+        $sth->bind_param(1, $key, SQL_BLOB);
+        $sth->execute;
+        $sth->fetchrow_array;
+}
+
+sub xchg {
+        my ($self, $key, $newval, $lock) = @_;
+        $lock //= $self->lock_for_scope_fast;
+        my $oldval = get($self, $key);
+        if (defined $newval) {
+                set($self, $key, $newval);
+        } else {
+                my $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM kv WHERE k = ?
+
+                $sth->bind_param(1, $key, SQL_BLOB);
+                $sth->execute;
+        }
+        $oldval;
+}
+
+sub count {
+        my ($self) = @_;
+        my $sth = $self->{dbh}->prepare_cached(<<'');
+SELECT COUNT(k) FROM kv
+
+        $sth->execute;
+        $sth->fetchrow_array;
+}
+
+# faster than ->count due to how SQLite works
+sub has_entries {
+        my ($self) = @_;
+        my @n = $self->{dbh}->selectrow_array('SELECT k FROM kv LIMIT 1');
+        scalar(@n) ? 1 : undef;
+}
+
+sub dbh_release {
+        my ($self, $lock) = @_;
+        my $dbh = delete $self->{dbh} or return;
+        $lock //= $self->lock_for_scope_fast; # may be needed for WAL
+        %{$dbh->{CachedKids}} = (); # cleanup prepare_cached
+        $dbh->disconnect;
+}
+
+sub DESTROY {
+        my ($self) = @_;
+        dbh_release($self);
+        my $dir = delete $self->{"tmp$$.$self"} or return;
+        my $tries = 0;
+        do {
+                $! = 0;
+                eval { rmtree($dir) };
+        } while ($@ && $!{ENOENT} && $tries++ < 5);
+        warn "error removing $dir: $@" if $@;
+        warn "Took $tries tries to remove $dir\n" if $tries;
+}
+
+1;
diff --git a/lib/PublicInbox/Sigfd.pm b/lib/PublicInbox/Sigfd.pm
index 5d61e630..b8a1ddfb 100644
--- a/lib/PublicInbox/Sigfd.pm
+++ b/lib/PublicInbox/Sigfd.pm
@@ -1,44 +1,35 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Wraps a signalfd (or similar) for PublicInbox::DS
 # fields: (sig: hashref similar to %SIG, but signal numbers as keys)
 package PublicInbox::Sigfd;
-use strict;
+use v5.12;
 use parent qw(PublicInbox::DS);
-use PublicInbox::Syscall qw(signalfd EPOLLIN EPOLLET $SFD_NONBLOCK);
-use POSIX qw(:signal_h);
-use IO::Handle ();
+use PublicInbox::Syscall qw(signalfd EPOLLIN EPOLLET %SIGNUM);
+use POSIX ();
 
 # returns a coderef to unblock signals if neither signalfd or kqueue
 # are available.
 sub new {
-        my ($class, $sig, $flags) = @_;
+        my ($class, $sig) = @_;
         my %signo = map {;
-                my $cb = $sig->{$_};
-                # SIGWINCH is 28 on FreeBSD, NetBSD, OpenBSD
-                my $num = ($_ eq 'WINCH' && $^O =~ /linux|bsd/i) ? 28 : do {
-                        my $m = "SIG$_";
-                        POSIX->$m;
-                };
-                $num => $cb;
+                # $num => [ $cb, $signame ];
+                ($SIGNUM{$_} // POSIX->can("SIG$_")->()) => [ $sig->{$_}, $_ ]
         } keys %$sig;
         my $self = bless { sig => \%signo }, $class;
         my $io;
-        my $fd = signalfd(-1, [keys %signo], $flags);
+        my $fd = signalfd([keys %signo]);
         if (defined $fd && $fd >= 0) {
-                $io = IO::Handle->new_from_fd($fd, 'r+');
+                open($io, '+<&=', $fd) or die "open: $!";
         } elsif (eval { require PublicInbox::DSKQXS }) {
-                $io = PublicInbox::DSKQXS->signalfd([keys %signo], $flags);
+                $io = PublicInbox::DSKQXS->signalfd([keys %signo]);
         } else {
                 return; # wake up every second to check for signals
         }
-        if ($flags & $SFD_NONBLOCK) { # it can go into the event loop
-                $self->SUPER::new($io, EPOLLIN | EPOLLET);
-        } else { # master main loop
-                $self->{sock} = $io;
-                $self;
-        }
+        $self->SUPER::new($io, EPOLLIN | EPOLLET);
+        $self->{is_kq} = 1 if tied(*$io);
+        $self;
 }
 
 # PublicInbox::Daemon in master main loop (blocking)
@@ -51,8 +42,8 @@ sub wait_once ($) {
                 for my $off (0..$nr) {
                         # the first uint32_t of signalfd_siginfo: ssi_signo
                         my $signo = unpack('L', substr($buf, 128 * $off, 4));
-                        my $cb = $self->{sig}->{$signo};
-                        $cb->($signo) if $cb ne 'IGNORE';
+                        my ($cb, $signame) = @{$self->{sig}->{$signo}};
+                        $cb->($signame) if $cb ne 'IGNORE';
                 }
         }
         $r;
@@ -63,14 +54,4 @@ sub event_step {
         while (wait_once($_[0])) {} # non-blocking
 }
 
-sub sig_setmask { sigprocmask(SIG_SETMASK, @_) or die "sigprocmask: $!" }
-
-sub block_signals () {
-        my $oldset = POSIX::SigSet->new;
-        my $newset = POSIX::SigSet->new;
-        $newset->fillset or die "fillset: $!";
-        sig_setmask($newset, $oldset);
-        $oldset;
-}
-
 1;
diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm
index 14086538..b132381b 100644
--- a/lib/PublicInbox/Smsg.pm
+++ b/lib/PublicInbox/Smsg.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # A small/skeleton/slim representation of a message.
@@ -9,13 +9,15 @@
 # large threads in our WWW UI and the NNTP range responses.
 package PublicInbox::Smsg;
 use strict;
-use warnings;
-use base qw(Exporter);
+use v5.10.1;
+use parent qw(Exporter);
 our @EXPORT_OK = qw(subject_normalized);
-use PublicInbox::MID qw(mids);
+use PublicInbox::MID qw(mids references);
 use PublicInbox::Address;
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
 
+sub oidbin { pack('H*', $_[0]->{blob}) }
+
 sub to_doc_data {
         my ($self) = @_;
         join("\n",
@@ -57,19 +59,36 @@ sub load_from_data ($$) {
 sub psgi_cull ($) {
         my ($self) = @_;
 
-        # ghosts don't have ->{from}
-        my $from = delete($self->{from}) // '';
-        my @n = PublicInbox::Address::names($from);
-        $self->{from_name} = join(', ', @n);
-
         # drop NNTP-only fields which aren't relevant to PSGI results:
         # saves ~80K on a 200 item search result:
         # TODO: we may need to keep some of these for JMAP...
-        delete @$self{qw(tid to cc bytes lines)};
+        my ($f) = delete @$self{qw(from tid to cc bytes lines)};
+        # ghosts don't have ->{from}
+        $self->{from_name} = join(', ', PublicInbox::Address::names($f // ''));
         $self;
 }
 
-# for Import and v1 non-SQLite WWW code paths
+sub parse_references ($$$) {
+        my ($smsg, $hdr, $mids) = @_;
+        my $refs = references($hdr);
+        push(@$refs, @$mids) if scalar(@$mids) > 1;
+        return $refs if scalar(@$refs) == 0;
+
+        # prevent circular references here:
+        my %seen = ( ($smsg->{mid} // '') => 1 );
+        my @keep;
+        foreach my $ref (@$refs) {
+                if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
+                        warn "References: <$ref> too long, ignoring\n";
+                        next;
+                }
+                $seen{$ref} //= push(@keep, $ref);
+        }
+        $smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
+        \@keep;
+}
+
+# used for v2, Import and v1 non-SQLite WWW code paths
 sub populate {
         my ($self, $hdr, $sync) = @_;
         for my $f (qw(From To Cc Subject)) {
@@ -80,9 +99,6 @@ sub populate {
                 # to protect git and NNTP clients
                 $val =~ tr/\0\t\n/   /;
 
-                # rare: in case headers have wide chars (not RFC2047-encoded)
-                utf8::decode($val);
-
                 # lower-case fields for read-only stuff
                 $self->{lc($f)} = $val;
 
@@ -96,13 +112,13 @@ sub populate {
                 $self->{$f} = $val if $val ne '';
         }
         $sync //= {};
-        $self->{-ds} = [ my @ds = msg_datestamp($hdr, $sync->{autime}) ];
-        $self->{-ts} = [ my @ts = msg_timestamp($hdr, $sync->{cotime}) ];
+        my @ds = msg_datestamp($hdr, $sync->{autime} // $self->{ds});
+        my @ts = msg_timestamp($hdr, $sync->{cotime} // $self->{ts});
+        $self->{-ds} = \@ds;
+        $self->{-ts} = \@ts;
         $self->{ds} //= $ds[0]; # no zone
         $self->{ts} //= $ts[0];
-
-        # for v1 users w/o SQLite
-        $self->{mid} //= eval { mids($hdr)->[0] } // '';
+        $self->{mid} //= mids($hdr)->[0];
 }
 
 # no strftime, that is locale-dependent and not for RFC822
@@ -127,6 +143,8 @@ sub internaldate { # for IMAP
 
 our $REPLY_RE = qr/^re:\s+/i;
 
+# TODO: see RFC 5256 sec 2.1 "Base Subject" and evaluate compatibility
+# w/ existing indices...
 sub subject_normalized ($) {
         my ($subj) = @_;
         $subj =~ s/\A\s+//s; # no leading space
@@ -137,4 +155,17 @@ sub subject_normalized ($) {
         $subj;
 }
 
+# returns the number of bytes to add if given a non-CRLF arg
+sub crlf_adjust ($) {
+        if (index($_[0], "\r\n") < 0) {
+                # common case is LF-only, every \n needs an \r;
+                # so favor a cheap tr// over an expensive m//g
+                $_[0] =~ tr/\n/\n/;
+        } else { # count number of '\n' w/o '\r', expensive:
+                scalar(my @n = ($_[0] =~ m/(?<!\r)\n/g));
+        }
+}
+
+sub set_bytes { $_[0]->{bytes} = $_[2] + crlf_adjust($_[1]) }
+
 1;
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 83f7a4ee..b5f6b96e 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # "Solve" blobs which don't exist in git code repositories by
@@ -11,13 +11,16 @@ package PublicInbox::SolverGit;
 use strict;
 use v5.10.1;
 use File::Temp 0.19 (); # 0.19 for ->newdir
+use autodie qw(mkdir);
 use Fcntl qw(SEEK_SET);
-use PublicInbox::Git qw(git_unquote git_quote);
+use PublicInbox::Git qw(git_unquote git_quote git_exe);
+use PublicInbox::IO qw(write_file);
 use PublicInbox::MsgIter qw(msg_part_text);
 use PublicInbox::Qspawn;
 use PublicInbox::Tmpfile;
 use PublicInbox::GitAsyncCat;
 use PublicInbox::Eml;
+use PublicInbox::Compat qw(uniqstr);
 use URI::Escape qw(uri_escape_utf8);
 
 # POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to
@@ -79,11 +82,13 @@ sub solve_existing ($$) {
         my $try = $want->{try_gits} //= [ @{$self->{gits}} ]; # array copy
         my $git = shift @$try or die 'BUG {try_gits} empty';
         my $oid_b = $want->{oid_b};
+
+        # can't use async_check due to last_check_err :<
         my ($oid_full, $type, $size) = $git->check($oid_b);
+        $git->schedule_cleanup if $self->{psgi_env}->{'pi-httpd.async'};
 
-        # other than {oid_b, try_gits, try_ibxs}
-        my $have_hints = scalar keys %$want > 3;
-        if (defined($type) && (!$have_hints || $type eq 'blob')) {
+        if ($oid_b eq ($oid_full // '') || (defined($type) &&
+                                (!$self->{have_hints} || $type eq 'blob'))) {
                 delete $want->{try_gits};
                 return [ $git, $oid_full, $type, int($size) ]; # done, success
         }
@@ -106,13 +111,16 @@ sub solve_existing ($$) {
         scalar(@$try);
 }
 
+sub _tmp {
+        $_[0]->{tmp} //=
+                File::Temp->newdir("solver.$_[0]->{oid_want}-XXXX", TMPDIR => 1);
+}
+
 sub extract_diff ($$) {
         my ($p, $arg) = @_;
         my ($self, $want, $smsg) = @$arg;
         my ($part) = @$p; # ignore $depth and @idx;
         my $ct = $part->content_type || 'text/plain';
-        my ($s, undef) = msg_part_text($part, $ct);
-        defined $s or return;
         my $post = $want->{oid_b};
         my $pre = $want->{oid_a};
         if (!defined($pre) || $pre !~ /\A[a-f0-9]+\z/) {
@@ -122,11 +130,18 @@ sub extract_diff ($$) {
         # Email::MIME::Encodings forces QP to be CRLF upon decoding,
         # change it back to LF:
         my $cte = $part->header('Content-Transfer-Encoding') || '';
+        my ($s, undef) = msg_part_text($part, $ct);
+        defined $s or return;
+        delete $part->{bdy};
         if ($cte =~ /\bquoted-printable\b/i && $part->crlf eq "\n") {
                 $s =~ s/\r\n/\n/sg;
         }
 
-
+        # Quiet "Complex regular subexpression recursion limit" warning.
+        # Not much we can do about it, but it's no longer relevant to
+        # Perl 5.3x (the warning was removed in 5.37.1, and actual
+        # recursino sometime before then).
+        no warnings 'regexp';
         $s =~ m!( # $1 start header lines we save for debugging:
 
                 # everything before ^index is optional, but we don't
@@ -169,6 +184,7 @@ sub extract_diff ($$) {
                 # because git-apply(1) handles that case, too
                 (?:^(?:[\@\+\x20\-\\][^\n]*|)$LF)+
         )!smx or return;
+        undef $s; # free memory
 
         my $di = {
                 hdr_lines => $1,
@@ -193,10 +209,8 @@ sub extract_diff ($$) {
 
         my $path = ++$self->{tot};
         $di->{n} = $path;
-        open(my $tmp, '>:utf8', $self->{tmp}->dirname . "/$path") or
-                die "open(tmp): $!";
-        print $tmp $di->{hdr_lines}, $patch or die "print(tmp): $!";
-        close $tmp or die "close(tmp): $!";
+        my $f = _tmp($self)->dirname."/$path";
+        write_file '>:utf8', $f, $di->{hdr_lines}, $patch;
 
         # for debugging/diagnostics:
         $di->{ibx} = $want->{cur_ibx};
@@ -216,7 +230,7 @@ sub filename_query ($) {
 
 sub find_smsgs ($$$) {
         my ($self, $ibx, $want) = @_;
-        my $srch = $ibx->search or return;
+        my $srch = $ibx->isrch or return;
 
         my $post = $want->{oid_b} or die 'BUG: no {oid_b}';
         $post =~ /\A[a-f0-9]+\z/ or die "BUG: oid_b not hex: $post";
@@ -242,14 +256,18 @@ sub find_smsgs ($$$) {
 
 sub update_index_result ($$) {
         my ($bref, $self) = @_;
-        my ($qsp, $msg) = delete @$self{qw(-qsp -msg)};
-        if (my $err = $qsp->{err}) {
-                ERR($self, "git update-index error: $err");
-        }
+        my ($qsp_err, $msg) = delete @$self{qw(-qsp_err -msg)};
+        ERR($self, "git update-index error:$qsp_err") if $qsp_err;
         dbg($self, $msg);
         next_step($self); # onto do_git_apply
 }
 
+sub qsp_qx ($$$) {
+        my ($self, $qsp, $cb) = @_;
+        $qsp->{qsp_err} = \($self->{-qsp_err} = '');
+        $qsp->psgi_qx($self->{psgi_env}, $self->{limiter}, $cb, $self);
+}
+
 sub prepare_index ($) {
         my ($self) = @_;
         my $patches = $self->{patches};
@@ -275,49 +293,42 @@ sub prepare_index ($) {
 
         dbg($self, 'preparing index');
         my $rdr = { 0 => $in };
-        my $cmd = [ qw(git update-index -z --index-info) ];
+        my $cmd = [ git_exe, qw(update-index -z --index-info) ];
         my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr);
         $path_a = git_quote($path_a);
-        $self->{-qsp} = $qsp;
         $self->{-msg} = "index prepared:\n$mode_a $oid_full\t$path_a";
-        $qsp->psgi_qx($self->{psgi_env}, undef, \&update_index_result, $self);
+        qsp_qx $self, $qsp, \&update_index_result;
 }
 
 # pure Perl "git init"
 sub do_git_init ($) {
         my ($self) = @_;
-        my $dir = $self->{tmp}->dirname;
-        my $git_dir = "$dir/git";
-
-        foreach ('', qw(objects refs objects/info refs/heads)) {
-                mkdir("$git_dir/$_") or die "mkdir $_: $!";
-        }
-        open my $fh, '>', "$git_dir/config" or die "open git/config: $!";
-        print $fh <<'EOF' or die "print git/config $!";
+        my $git_dir = _tmp($self)->dirname.'/git';
+
+        mkdir("$git_dir/$_") for ('', qw(objects refs objects/info refs/heads));
+        my $first = $self->{gits}->[0];
+        my $fmt = $first->object_format;
+        my ($v, @ext) = defined($$fmt) ? (1, <<EOM) : (0);
+[extensions]
+        objectformat = $$fmt
+EOM
+        write_file '>', "$git_dir/config", <<EOF, @ext;
 [core]
-        repositoryFormatVersion = 0
+        repositoryFormatVersion = $v
         filemode = true
         bare = false
-        fsyncObjectfiles = false
         logAllRefUpdates = false
 EOF
-        close $fh or die "close git/config: $!";
-
-        open $fh, '>', "$git_dir/HEAD" or die "open git/HEAD: $!";
-        print $fh "ref: refs/heads/master\n" or die "print git/HEAD: $!";
-        close $fh or die "close git/HEAD: $!";
-
-        my $f = 'objects/info/alternates';
-        open $fh, '>', "$git_dir/$f" or die "open: $f: $!";
-        foreach my $git (@{$self->{gits}}) {
-                print $fh $git->git_path('objects'),"\n" or die "print $f: $!";
-        }
-        close $fh or die "close: $f: $!";
+        write_file '>', "$git_dir/HEAD", "ref: refs/heads/master\n";
+        write_file '>', "$git_dir/objects/info/alternates", map {
+                        $_->git_path('objects')."\n"
+                } @{$self->{gits}};
         my $tmp_git = $self->{tmp_git} = PublicInbox::Git->new($git_dir);
         $tmp_git->{-tmp} = $self->{tmp};
         $self->{git_env} = {
                 GIT_DIR => $git_dir,
                 GIT_INDEX_FILE => "$git_dir/index",
+                GIT_TEST_FSYNC => 0, # undocumented git env
         };
         prepare_index($self);
 }
@@ -377,12 +388,9 @@ sub event_step ($) {
 }
 
 sub next_step ($) {
-        my ($self) = @_;
         # if outside of public-inbox-httpd, caller is expected to be
         # looping event_step, anyways
-        my $async = $self->{psgi_env}->{'pi-httpd.async'} or return;
-        # PublicInbox::HTTPD::Async->new
-        $async->(undef, undef, $self);
+        PublicInbox::DS::requeue($_[0]) if $_[0]->{psgi_env}->{'pi-httpd.async'}
 }
 
 sub mark_found ($$$) {
@@ -398,21 +406,18 @@ sub mark_found ($$$) {
 
 sub parse_ls_files ($$) {
         my ($self, $bref) = @_;
-        my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)};
-        if (my $err = $qsp->{err}) {
-                die "git ls-files error: $err";
-        }
+        my ($qsp_err, $di) = delete @$self{qw(-qsp_err -cur_di)};
+        die "git ls-files -s -z error:$qsp_err" if $qsp_err;
 
-        my ($line, @extra) = split(/\0/, $$bref);
+        my @ls = split(/\0/, $$bref);
+        my ($line, @extra) = grep(/\t\Q$di->{path_b}\E\z/, @ls);
         scalar(@extra) and die "BUG: extra files in index: <",
-                                join('> <', @extra), ">";
-
+                                join('> <', $line, @extra), ">";
+        $line // die "no \Q$di->{path_b}\E in <",join('> <', @ls), '>';
         my ($info, $file) = split(/\t/, $line, 2);
         my ($mode_b, $oid_b_full, $stage) = split(/ /, $info);
-        if ($file ne $di->{path_b}) {
-                die
+        $file eq $di->{path_b} or die
 "BUG: index mismatch: file=$file != path_b=$di->{path_b}";
-        }
 
         my $tmp_git = $self->{tmp_git} or die 'no git working tree';
         my (undef, undef, $size) = $tmp_git->check($oid_b_full);
@@ -447,50 +452,49 @@ sub skip_identical ($$$) {
         }
 }
 
-sub apply_result ($$) {
+sub apply_result ($$) { # qx_cb
         my ($bref, $self) = @_;
-        my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)};
+        my ($qsp_err, $di) = delete @$self{qw(-qsp_err -cur_di)};
         dbg($self, $$bref);
         my $patches = $self->{patches};
-        if (my $err = $qsp->{err}) {
-                my $msg = "git apply error: $err";
+        if ($qsp_err) {
+                my $msg = "git apply error:$qsp_err";
                 my $nxt = $patches->[0];
                 if ($nxt && oids_same_ish($nxt->{oid_b}, $di->{oid_b})) {
                         dbg($self, $msg);
                         dbg($self, 'trying '.di_url($self, $nxt));
                         return do_git_apply($self);
                 } else {
-                        ERR($self, $msg);
+                        $msg .= " (no patches left to try for $di->{oid_b})\n";
+                        dbg($self, $msg);
+                        return done($self, undef);
                 }
         } else {
                 skip_identical($self, $patches, $di->{oid_b});
         }
 
-        my @cmd = qw(git ls-files -s -z);
-        $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env});
+        my @cmd = (git_exe, qw(ls-files -s -z));
+        my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env});
         $self->{-cur_di} = $di;
-        $self->{-qsp} = $qsp;
-        $qsp->psgi_qx($self->{psgi_env}, undef, \&ls_files_result, $self);
+        qsp_qx $self, $qsp, \&ls_files_result;
 }
 
 sub do_git_apply ($) {
         my ($self) = @_;
-        my $dn = $self->{tmp}->dirname;
         my $patches = $self->{patches};
 
         # we need --ignore-whitespace because some patches are CRLF
-        my @cmd = (qw(git apply --cached --ignore-whitespace
+        my @cmd = (git_exe, qw(apply --cached --ignore-whitespace
                         --unidiff-zero --whitespace=warn --verbose));
         my $len = length(join(' ', @cmd));
-        my $total = $self->{tot};
         my $di; # keep track of the last one for "git ls-files"
         my $prv_oid_b;
 
         do {
                 my $i = ++$self->{nr};
                 $di = shift @$patches;
-                dbg($self, "\napplying [$i/$total] " . di_url($self, $di) .
-                        "\n" . $di->{hdr_lines});
+                dbg($self, "\napplying [$i/$self->{nr_p}] " .
+                        di_url($self, $di) . "\n" . $di->{hdr_lines});
                 my $path = $di->{n};
                 $len += length($path) + 1;
                 push @cmd, $path;
@@ -498,11 +502,10 @@ sub do_git_apply ($) {
         } while (@$patches && $len < $ARG_SIZE_MAX &&
                  !oids_same_ish($patches->[0]->{oid_b}, $prv_oid_b));
 
-        my $opt = { 2 => 1, -C => $dn, quiet => 1 };
+        my $opt = { 2 => 1, -C => _tmp($self)->dirname, quiet => 1 };
         my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}, $opt);
         $self->{-cur_di} = $di;
-        $self->{-qsp} = $qsp;
-        $qsp->psgi_qx($self->{psgi_env}, undef, \&apply_result, $self);
+        qsp_qx $self, $qsp, \&apply_result;
 }
 
 sub di_url ($$) {
@@ -551,8 +554,9 @@ sub extract_diffs_done {
         my $diffs = delete $self->{tmp_diffs};
         if (scalar @$diffs) {
                 unshift @{$self->{patches}}, @$diffs;
-                dbg($self, "found $want->{oid_b} in " .  join(" ||\n\t",
-                        map { di_url($self, $_) } @$diffs));
+                my @u = uniqstr(map { di_url($self, $_) } @$diffs);
+                dbg($self, "found $want->{oid_b} in " .  join(" ||\n\t", @u));
+                ++$self->{nr_p};
 
                 # good, we can find a path to the oid we $want, now
                 # lets see if we need to apply more patches:
@@ -593,8 +597,7 @@ sub resolve_patch ($$) {
         if (my $msgs = $want->{try_smsgs}) {
                 my $smsg = shift @$msgs;
                 if ($self->{psgi_env}->{'pi-httpd.async'}) {
-                        return git_async_cat($want->{cur_ibx}->git,
-                                                $smsg->{blob},
+                        return ibx_async_cat($want->{cur_ibx}, $smsg->{blob},
                                                 \&extract_diff_async,
                                                 [$self, $want, $smsg]);
                 } else {
@@ -635,7 +638,7 @@ sub resolve_patch ($$) {
 
         # scan through inboxes to look for emails which results in
         # the oid we want:
-        my $ibx = shift(@{$want->{try_ibxs}}) or die 'BUG: {try_ibxs} empty';
+        my $ibx = shift(@{$want->{try_ibxs}}) or return done($self, undef);
         if (my $msgs = find_smsgs($self, $ibx, $want)) {
                 $want->{try_smsgs} = $msgs;
                 $want->{cur_ibx} = $ibx;
@@ -649,15 +652,19 @@ sub resolve_patch ($$) {
 # so user_cb never references the SolverGit object
 sub new {
         my ($class, $ibx, $user_cb, $uarg) = @_;
+        my $gits = $ibx ? $ibx->{-repo_objs} : undef;
+
+        # FIXME: cindex --join= is super-aggressive and may hit too many
+        $gits = [ @$gits[0..2] ] if $gits && @$gits > 3;
 
-        bless {
-                gits => $ibx->{-repo_objs},
+        bless { # $ibx is undef if coderepo only (see WwwCoderepo)
+                gits => $gits,
                 user_cb => $user_cb,
                 uarg => $uarg,
-                # -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks
+                # -cur_di, -qsp_err, -msg => temp fields for Qspawn callbacks
 
                 # TODO: config option for searching related inboxes
-                inboxes => [ $ibx ],
+                inboxes => $ibx ? [ $ibx ] : [],
         }, $class;
 }
 
@@ -676,17 +683,16 @@ sub solve ($$$$$) {
         $self->{oid_want} = $oid_want;
         $self->{out} = $out;
         $self->{seen_oid} = {};
-        $self->{tot} = 0;
+        $self->{tot} = $self->{nr_p} = 0;
         $self->{psgi_env} = $env;
+        $self->{have_hints} = 1 if scalar keys %$hints;
         $self->{todo} = [ { %$hints, oid_b => $oid_want } ];
         $self->{patches} = []; # [ $di, $di, ... ]
         $self->{found} = {}; # { abbr => [ ::Git, oid, type, size, $di ] }
-        $self->{tmp} = File::Temp->newdir("solver.$oid_want-XXXXXXXX", TMPDIR => 1);
 
         dbg($self, "solving $oid_want ...");
-        if (my $async = $env->{'pi-httpd.async'}) {
-                # PublicInbox::HTTPD::Async->new
-                $async->(undef, undef, $self);
+        if ($env->{'pi-httpd.async'}) {
+                PublicInbox::DS::requeue($self);
         } else {
                 event_step($self) while $self->{user_cb};
         }
diff --git a/lib/PublicInbox/Spamcheck.pm b/lib/PublicInbox/Spamcheck.pm
index ffebb3cf..fbf9355d 100644
--- a/lib/PublicInbox/Spamcheck.pm
+++ b/lib/PublicInbox/Spamcheck.pm
@@ -1,21 +1,17 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Spamchecking used by -watch and -mda tools
 package PublicInbox::Spamcheck;
-use strict;
-use warnings;
+use v5.12;
 
 sub get {
-        my ($config, $key, $default) = @_;
-        my $spamcheck = $config->{$key};
-        $spamcheck = $default unless $spamcheck;
+        my ($cfg, $key, $default) = @_;
+        my $spamcheck = $cfg->{$key} || $default;
 
         return if !$spamcheck || $spamcheck eq 'none';
 
-        if ($spamcheck eq 'spamc') {
-                $spamcheck = 'PublicInbox::Spamcheck::Spamc';
-        }
+        $spamcheck = 'PublicInbox::Spamcheck::Spamc' if $spamcheck eq 'spamc';
         if ($spamcheck =~ /::/) {
                 eval "require $spamcheck";
                 return $spamcheck->new;
diff --git a/lib/PublicInbox/Spamcheck/Spamc.pm b/lib/PublicInbox/Spamcheck/Spamc.pm
index 3ba2c3c9..b4f95e2b 100644
--- a/lib/PublicInbox/Spamcheck/Spamc.pm
+++ b/lib/PublicInbox/Spamcheck/Spamc.pm
@@ -1,18 +1,17 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Default spam filter class for wrapping spamc(1)
 package PublicInbox::Spamcheck::Spamc;
-use strict;
-use warnings;
-use PublicInbox::Spawn qw(popen_rd spawn);
+use v5.12;
+use PublicInbox::Spawn qw(run_qx run_wait);
 use IO::Handle;
 use Fcntl qw(SEEK_SET);
 
 sub new {
         my ($class) = @_;
         bless {
-                checkcmd => [qw(spamc -E --headers)],
+                checkcmd => [qw(spamc -E)],
                 hamcmd => [qw(spamc -L ham)],
                 spamcmd => [qw(spamc -L spam)],
         }, $class;
@@ -21,15 +20,9 @@ sub new {
 sub spamcheck {
         my ($self, $msg, $out) = @_;
 
+        $out = \(my $buf = '') unless ref($out);
         my $rdr = { 0 => _msg_to_fh($self, $msg) };
-        my ($fh, $pid) = popen_rd($self->{checkcmd}, undef, $rdr);
-        unless (ref $out) {
-                my $buf = '';
-                $out = \$buf;
-        }
-        $$out = do { local $/; <$fh> };
-        close $fh or die "close failed: $!";
-        waitpid($pid, 0);
+        $$out = run_qx($self->{checkcmd}, undef, $rdr);
         ($? || $$out eq '') ? 0 : 1;
 }
 
@@ -49,9 +42,7 @@ sub _learn {
         $rdr->{0} = _msg_to_fh($self, $msg);
         $rdr->{1} ||= $self->_devnull;
         $rdr->{2} ||= $self->_devnull;
-        my $pid = spawn($self->{$field}, undef, $rdr);
-        waitpid($pid, 0);
-        !$?;
+        0 == run_wait($self->{$field}, undef, $rdr);
 }
 
 sub _devnull {
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index cb16fcf6..e9e81e88 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # This allows vfork to be used for spawning subprocesses if
@@ -6,35 +6,38 @@
 # is explicitly defined in the environment (and writable).
 # Under Linux, vfork can make a big difference in spawning performance
 # as process size increases (fork still needs to mark pages for CoW use).
-# Currently, we only use this for code intended for long running
-# daemons (inside the PSGI code (-httpd) and -nntpd).  The short-lived
-# scripts (-mda, -index, -learn, -init) either use IPC::run or standard
-# Perl routines.
+# None of this is intended to be thread-safe since Perl5 maintainers
+# officially discourage the use of threads.
 #
 # There'll probably be more OS-level C stuff here, down the line.
 # We don't want too many DSOs: https://udrepper.livejournal.com/8790.html
 
 package PublicInbox::Spawn;
-use strict;
+use v5.12;
 use parent qw(Exporter);
-use Symbol qw(gensym);
-use PublicInbox::ProcessPipe;
-our @EXPORT_OK = qw/which spawn popen_rd nodatacow_dir/;
-our @RLIMITS = qw(RLIMIT_CPU RLIMIT_CORE RLIMIT_DATA);
+use PublicInbox::Lock;
+use Fcntl qw(SEEK_SET);
+use IO::Handle ();
+use Carp qw(croak);
+use PublicInbox::IO;
+our @EXPORT_OK = qw(which spawn popen_rd popen_wr run_die run_wait run_qx);
+our (@RLIMITS, %RLIMITS);
+use autodie qw(close open pipe seek sysseek truncate);
 
-my $vfork_spawn = <<'VFORK_SPAWN';
+BEGIN {
+        @RLIMITS = qw(RLIMIT_CPU RLIMIT_CORE RLIMIT_DATA);
+        my $all_libc = <<'ALL_LIBC'; # all *nix systems we support
+#include <sys/resource.h>
+#include <sys/socket.h>
 #include <sys/types.h>
 #include <sys/time.h>
-#include <sys/resource.h>
+#include <sys/uio.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <errno.h>
-
-/* some platforms need alloca.h, but some don't */
-#if defined(__GNUC__) && !defined(alloca)
-#  define alloca(sz) __builtin_alloca(sz)
-#endif
-
+#include <time.h>
+#include <stdio.h>
+#include <string.h>
 #include <signal.h>
 #include <assert.h>
 
@@ -46,11 +49,17 @@ my $vfork_spawn = <<'VFORK_SPAWN';
  *   This is unlike "sv_len", which returns what you would expect.
  */
 #define AV2C_COPY(dst, src) do { \
+        static size_t dst##__capa; \
         I32 i; \
         I32 top_index = av_len(src); \
         I32 real_len = top_index + 1; \
         I32 capa = real_len + 1; \
-        dst = alloca(capa * sizeof(char *)); \
+        if (capa > dst##__capa) { \
+                dst##__capa = 0; /* in case Newx croaks */ \
+                Safefree(dst); \
+                Newx(dst, capa, char *); \
+                dst##__capa = capa; \
+        } \
         for (i = 0; i < real_len; i++) { \
                 SV **sv = av_fetch(src, i, 0); \
                 dst[i] = SvPV_nolen(*sv); \
@@ -59,9 +68,10 @@ my $vfork_spawn = <<'VFORK_SPAWN';
 } while (0)
 
 /* needs to be safe inside a vfork'ed process */
-static void exit_err(int *cerrnum)
+static void exit_err(const char *fn, volatile int *cerrnum)
 {
         *cerrnum = errno;
+        write(2, fn, strlen(fn));
         _exit(1);
 }
 
@@ -71,49 +81,60 @@ static void exit_err(int *cerrnum)
  * Be sure to update PublicInbox::SpawnPP if this changes
  */
 int pi_fork_exec(SV *redirref, SV *file, SV *cmdref, SV *envref, SV *rlimref,
-                 const char *cd)
+                 const char *cd, int pgid)
 {
         AV *redir = (AV *)SvRV(redirref);
         AV *cmd = (AV *)SvRV(cmdref);
         AV *env = (AV *)SvRV(envref);
         AV *rlim = (AV *)SvRV(rlimref);
         const char *filename = SvPV_nolen(file);
-        pid_t pid;
-        char **argv, **envp;
-        sigset_t set, old, cset;
-        int ret, perrnum, cerrnum = 0;
+        pid_t pid = -1;
+        static char **argv, **envp;
+        sigset_t set, old;
+        int ret, perrnum;
+        volatile int cerrnum = 0; /* shared due to vfork */
+        int chld_is_member; /* needed due to shared memory w/ vfork */
+        I32 max_fd = av_len(redir);
 
         AV2C_COPY(argv, cmd);
         AV2C_COPY(envp, env);
 
-        ret = sigfillset(&set);
-        assert(ret == 0 && "BUG calling sigfillset");
-        ret = sigprocmask(SIG_SETMASK, &set, &old);
-        assert(ret == 0 && "BUG calling sigprocmask to block");
-        ret = sigemptyset(&cset);
-        assert(ret == 0 && "BUG calling sigemptyset");
-        ret = sigaddset(&cset, SIGCHLD);
-        assert(ret == 0 && "BUG calling sigaddset for SIGCHLD");
+        if (sigfillset(&set)) goto out;
+        if (sigdelset(&set, SIGABRT)) goto out;
+        if (sigdelset(&set, SIGBUS)) goto out;
+        if (sigdelset(&set, SIGFPE)) goto out;
+        if (sigdelset(&set, SIGILL)) goto out;
+        if (sigdelset(&set, SIGSEGV)) goto out;
+        /* no XCPU/XFSZ here */
+        if (sigprocmask(SIG_SETMASK, &set, &old)) goto out;
+        chld_is_member = sigismember(&old, SIGCHLD);
+        if (chld_is_member < 0) goto out;
+        if (chld_is_member > 0 && sigdelset(&old, SIGCHLD)) goto out;
+
         pid = vfork();
         if (pid == 0) {
                 int sig;
-                I32 i, child_fd, max = av_len(redir);
+                I32 i, child_fd, max_rlim;
 
-                for (child_fd = 0; child_fd <= max; child_fd++) {
+                for (child_fd = 0; child_fd <= max_fd; child_fd++) {
                         SV **parent = av_fetch(redir, child_fd, 0);
                         int parent_fd = SvIV(*parent);
                         if (parent_fd == child_fd)
                                 continue;
                         if (dup2(parent_fd, child_fd) < 0)
-                                exit_err(&cerrnum);
+                                exit_err("dup2", &cerrnum);
                 }
+                if (pgid >= 0 && setpgid(0, pgid) < 0)
+                        exit_err("setpgid", &cerrnum);
                 for (sig = 1; sig < NSIG; sig++)
                         signal(sig, SIG_DFL); /* ignore errors on signals */
-                if (*cd && chdir(cd) < 0)
-                        exit_err(&cerrnum);
+                if (*cd && chdir(cd) < 0) {
+                        write(2, "cd ", 3);
+                        exit_err(cd, &cerrnum);
+                }
 
-                max = av_len(rlim);
-                for (i = 0; i < max; i += 3) {
+                max_rlim = av_len(rlim);
+                for (i = 0; i < max_rlim; i += 3) {
                         struct rlimit rl;
                         SV **res = av_fetch(rlim, i, 0);
                         SV **soft = av_fetch(rlim, i + 1, 0);
@@ -122,134 +143,207 @@ int pi_fork_exec(SV *redirref, SV *file, SV *cmdref, SV *envref, SV *rlimref,
                         rl.rlim_cur = SvIV(*soft);
                         rl.rlim_max = SvIV(*hard);
                         if (setrlimit(SvIV(*res), &rl) < 0)
-                                exit_err(&cerrnum);
+                                exit_err("setrlimit", &cerrnum);
                 }
 
-                /*
-                 * don't bother unblocking other signals for now, just SIGCHLD.
-                 * we don't want signals to the group taking out a subprocess
-                 */
-                (void)sigprocmask(SIG_UNBLOCK, &cset, NULL);
+                (void)sigprocmask(SIG_SETMASK, &old, NULL);
                 execve(filename, argv, envp);
-                exit_err(&cerrnum);
+                exit_err("execve", &cerrnum);
         }
         perrnum = errno;
+        if (chld_is_member > 0)
+                sigaddset(&old, SIGCHLD);
         ret = sigprocmask(SIG_SETMASK, &old, NULL);
         assert(ret == 0 && "BUG calling sigprocmask to restore");
         if (cerrnum) {
+                int err_fd = STDERR_FILENO;
+                if (err_fd <= max_fd) {
+                        SV **parent = av_fetch(redir, err_fd, 0);
+                        err_fd = SvIV(*parent);
+                }
                 if (pid > 0)
                         waitpid(pid, NULL, 0);
                 pid = -1;
+                /* continue message started by exit_err in child */
+                dprintf(err_fd, ": %s\n", strerror(cerrnum));
                 errno = cerrnum;
         } else if (perrnum) {
                 errno = perrnum;
         }
+out:
+        if (pid < 0)
+                croak("E: fork_exec %s: %s\n", filename, strerror(errno));
         return (int)pid;
 }
-VFORK_SPAWN
 
-# btrfs on Linux is copy-on-write (COW) by default.  As of Linux 5.7,
-# this still leads to fragmentation for SQLite and Xapian files where
-# random I/O happens, so we disable COW just for SQLite files and Xapian
-# directories.  Disabling COW disables checksumming, so we only do this
-# for regeneratable files, and not canonical git storage (git doesn't
-# checksum refs, only data under $GIT_DIR/objects).
-my $set_nodatacow = $^O eq 'linux' ? <<'SET_NODATACOW' : '';
-#include <sys/ioctl.h>
-#include <sys/vfs.h>
-#include <linux/magic.h>
-#include <linux/fs.h>
-#include <dirent.h>
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-
-void nodatacow_fd(int fd)
+static int sendmsg_retry(int *tries)
 {
-        struct statfs buf;
-        int val = 0;
-
-        if (fstatfs(fd, &buf) < 0) {
-                fprintf(stderr, "fstatfs: %s\\n", strerror(errno));
-                return;
+        const struct timespec req = { 0, 100000000 }; /* 100ms */
+        int err = errno;
+        switch (err) {
+        case EINTR: PERL_ASYNC_CHECK(); return 1;
+        case ENOBUFS: case ENOMEM: case ETOOMANYREFS:
+                if (--*tries < 0) return 0;
+                fprintf(stderr, "# sleeping on sendmsg: %s (%d tries left)\n",
+                        strerror(err), *tries);
+                nanosleep(&req, NULL);
+                PERL_ASYNC_CHECK();
+                return 1;
+        default: return 0;
         }
+}
 
-        /* only btrfs is known to have this problem, so skip for non-btrfs */
-        if (buf.f_type != BTRFS_SUPER_MAGIC)
-                return;
+#if defined(CMSG_SPACE) && defined(CMSG_LEN)
+#define SEND_FD_CAPA 10
+#define SEND_FD_SPACE (SEND_FD_CAPA * sizeof(int))
+union my_cmsg {
+        struct cmsghdr hdr;
+        char pad[sizeof(struct cmsghdr) + 16 + SEND_FD_SPACE];
+};
 
-        if (ioctl(fd, FS_IOC_GETFLAGS, &val) < 0) {
-                fprintf(stderr, "FS_IOC_GET_FLAGS: %s\\n", strerror(errno));
-                return;
+SV *send_cmd4_(PerlIO *s, SV *svfds, SV *data, int flags, int tries)
+{
+        struct msghdr msg = { 0 };
+        union my_cmsg cmsg = { 0 };
+        STRLEN dlen = 0;
+        struct iovec iov;
+        ssize_t sent;
+        AV *fds = (AV *)SvRV(svfds);
+        I32 i, nfds = av_len(fds) + 1;
+        int *fdp;
+
+        if (SvOK(data)) {
+                iov.iov_base = SvPV(data, dlen);
+                iov.iov_len = dlen;
+        }
+        if (!dlen) { /* must be non-zero */
+                iov.iov_base = &msg.msg_namelen; /* whatever */
+                iov.iov_len = 1;
         }
-        val |= FS_NOCOW_FL;
-        if (ioctl(fd, FS_IOC_SETFLAGS, &val) < 0)
-                fprintf(stderr, "FS_IOC_SET_FLAGS: %s\\n", strerror(errno));
+        msg.msg_iov = &iov;
+        msg.msg_iovlen = 1;
+        if (nfds) {
+                if (nfds > SEND_FD_CAPA) {
+                        fprintf(stderr, "FIXME: bump SEND_FD_CAPA=%d\n", nfds);
+                        nfds = SEND_FD_CAPA;
+                }
+                msg.msg_control = &cmsg.hdr;
+                msg.msg_controllen = CMSG_SPACE(nfds * sizeof(int));
+                cmsg.hdr.cmsg_level = SOL_SOCKET;
+                cmsg.hdr.cmsg_type = SCM_RIGHTS;
+                cmsg.hdr.cmsg_len = CMSG_LEN(nfds * sizeof(int));
+                fdp = (int *)CMSG_DATA(&cmsg.hdr);
+                for (i = 0; i < nfds; i++) {
+                        SV **fd = av_fetch(fds, i, 0);
+                        *fdp++ = SvIV(*fd);
+                }
+        }
+        do {
+                sent = sendmsg(PerlIO_fileno(s), &msg, flags);
+        } while (sent < 0 && sendmsg_retry(&tries));
+        return sent >= 0 ? newSViv(sent) : &PL_sv_undef;
 }
 
-void nodatacow_dir(const char *dir)
+void recv_cmd4(PerlIO *s, SV *buf, STRLEN n)
 {
-        DIR *dh = opendir(dir);
-        int fd;
+        union my_cmsg cmsg = { 0 };
+        struct msghdr msg = { 0 };
+        struct iovec iov;
+        ssize_t i;
+        Inline_Stack_Vars;
+        Inline_Stack_Reset;
 
-        if (!dh) croak("opendir(%s): %s", dir, strerror(errno));
-        fd = dirfd(dh);
-        if (fd >= 0)
-                nodatacow_fd(fd);
-        /* ENOTSUP probably won't happen under Linux... */
-        closedir(dh);
-}
-SET_NODATACOW
-
-my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= (
-                $ENV{XDG_CACHE_HOME} //
-                ( ($ENV{HOME} // '/nonexistent').'/.cache' )
-        ).'/public-inbox/inline-c';
+        if (!SvOK(buf))
+                sv_setpvn(buf, "", 0);
+        iov.iov_base = SvGROW(buf, n + 1);
+        iov.iov_len = n;
+        msg.msg_iov = &iov;
+        msg.msg_iovlen = 1;
+        msg.msg_control = &cmsg.hdr;
+        msg.msg_controllen = CMSG_SPACE(SEND_FD_SPACE);
 
-$set_nodatacow = $vfork_spawn = undef unless -d $inline_dir && -w _;
-if (defined $vfork_spawn) {
-        # Inline 0.64 or later has locking in multi-process env,
-        # but we support 0.5 on Debian wheezy
-        use Fcntl qw(:flock);
-        eval {
-                my $f = "$inline_dir/.public-inbox.lock";
-                open my $fh, '>', $f or die "failed to open $f: $!\n";
-                flock($fh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
-                eval 'use Inline C => $vfork_spawn . $set_nodatacow';
-                my $err = $@;
-                my $ndc_err;
-                if ($err && $set_nodatacow) { # missing Linux kernel headers
-                        $ndc_err = $err;
-                        undef $set_nodatacow;
-                        eval 'use Inline C => $vfork_spawn';
+        for (;;) {
+                i = recvmsg(PerlIO_fileno(s), &msg, 0);
+                if (i >= 0 || errno != EINTR) break;
+                PERL_ASYNC_CHECK();
+        }
+        if (i >= 0) {
+                SvCUR_set(buf, i);
+                if (cmsg.hdr.cmsg_level == SOL_SOCKET &&
+                                cmsg.hdr.cmsg_type == SCM_RIGHTS) {
+                        size_t len = cmsg.hdr.cmsg_len;
+                        int *fdp = (int *)CMSG_DATA(&cmsg.hdr);
+                        for (i = 0; CMSG_LEN((i + 1) * sizeof(int)) <= len; i++)
+                                Inline_Stack_Push(sv_2mortal(newSViv(*fdp++)));
                 }
-                flock($fh, LOCK_UN) or die "LOCK_UN failed on $f: $!\n";
-                die $err if $err;
-                warn $ndc_err if $ndc_err;
-        };
-        if ($@) {
-                warn "Inline::C failed for vfork: $@\n";
-                $set_nodatacow = $vfork_spawn = undef;
+        } else {
+                Inline_Stack_Push(&PL_sv_undef);
+                SvCUR_set(buf, 0);
         }
+        Inline_Stack_Done;
 }
+#endif /* defined(CMSG_SPACE) && defined(CMSG_LEN) */
 
-unless (defined $vfork_spawn) {
-        require PublicInbox::SpawnPP;
-        *pi_fork_exec = \&PublicInbox::SpawnPP::pi_fork_exec
-}
-unless ($set_nodatacow) {
-        require PublicInbox::NDC_PP;
-        no warnings 'once';
-        *nodatacow_fd = \&PublicInbox::NDC_PP::nodatacow_fd;
-        *nodatacow_dir = \&PublicInbox::NDC_PP::nodatacow_dir;
-}
-undef $set_nodatacow;
-undef $vfork_spawn;
+void rlimit_map()
+{
+        Inline_Stack_Vars;
+        Inline_Stack_Reset;
+ALL_LIBC
+        my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} // (
+                        $ENV{XDG_CACHE_HOME} //
+                        ( ($ENV{HOME} // '/nonexistent').'/.cache' )
+                ).'/public-inbox/inline-c';
+        undef $all_libc unless -d $inline_dir;
+        if (defined $all_libc) {
+                for (@RLIMITS, 'RLIM_INFINITY') {
+                        $all_libc .= <<EOM;
+        Inline_Stack_Push(sv_2mortal(newSVpvs("$_")));
+        Inline_Stack_Push(sv_2mortal(newSViv($_)));
+EOM
+                }
+                $all_libc .= <<EOM;
+        Inline_Stack_Done;
+} // rlimit_map
+EOM
+                local $ENV{PERL_INLINE_DIRECTORY} = $inline_dir;
+                # CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
+                my $lk = PublicInbox::Lock->new($inline_dir.
+                                                '/.public-inbox.lock');
+                my $fh = $lk->lock_acquire;
+                open my $oldout, '>&', \*STDOUT;
+                open my $olderr, '>&', \*STDERR;
+                open STDOUT, '>&', $fh;
+                open STDERR, '>&', $fh;
+                STDERR->autoflush(1);
+                STDOUT->autoflush(1);
+                eval 'use Inline C => $all_libc, BUILD_NOISY => 1';
+                my $err = $@;
+                open(STDERR, '>&', $olderr);
+                open(STDOUT, '>&', $oldout);
+                if ($err) {
+                        seek($fh, 0, SEEK_SET);
+                        my @msg = <$fh>;
+                        truncate($fh, 0);
+                        warn "Inline::C build failed:\n", $err, "\n", @msg;
+                        $all_libc = undef;
+                }
+        }
+        if (defined $all_libc) { # set for Gcf2
+                $ENV{PERL_INLINE_DIRECTORY} = $inline_dir;
+                %RLIMITS = rlimit_map();
+                *send_cmd4 = sub ($$$$;$) {
+                        send_cmd4_($_[0], $_[1], $_[2], $_[3], 50);
+                }
+        } else {
+                require PublicInbox::SpawnPP;
+                *pi_fork_exec = \&PublicInbox::SpawnPP::pi_fork_exec
+        }
+} # /BEGIN
 
 sub which ($) {
         my ($file) = @_;
         return $file if index($file, '/') >= 0;
-        foreach my $p (split(':', $ENV{PATH})) {
+        for my $p (split(/:/, $ENV{PATH})) {
                 $p .= "/$file";
                 return $p if -x $p;
         }
@@ -257,53 +351,94 @@ sub which ($) {
 }
 
 sub spawn ($;$$) {
-        my ($cmd, $env, $opts) = @_;
-        my $f = which($cmd->[0]);
-        defined $f or die "$cmd->[0]: command not found\n";
-        my @env;
-        $opts ||= {};
-
-        my %env = $env ? (%ENV, %$env) : %ENV;
+        my ($cmd, $env, $opt) = @_;
+        my $f = which($cmd->[0]) // die "$cmd->[0]: command not found\n";
+        my (@env, @rdr);
+        my %env = (%ENV, $env ? %$env : ());
         while (my ($k, $v) = each %env) {
-                push @env, "$k=$v";
+                push @env, "$k=$v" if defined($v);
         }
-        my $redir = [];
         for my $child_fd (0..2) {
-                my $parent_fd = $opts->{$child_fd};
-                if (defined($parent_fd) && $parent_fd !~ /\A[0-9]+\z/) {
-                        defined(my $fd = fileno($parent_fd)) or
-                                        die "$parent_fd not an IO GLOB? $!";
-                        $parent_fd = $fd;
+                my $pfd = $opt->{$child_fd};
+                if ('SCALAR' eq ref($pfd)) {
+                        open my $fh, '+>', undef;
+                        $opt->{"fh.$child_fd"} = $fh; # for read_out_err
+                        if ($child_fd == 0) {
+                                print $fh $$pfd;
+                                $fh->flush or die "flush: $!";
+                                sysseek($fh, 0, SEEK_SET);
+                        }
+                        $pfd = fileno($fh);
+                } elsif (defined($pfd) && $pfd !~ /\A[0-9]+\z/) {
+                        my $fd = fileno($pfd) //
+                                        croak "BUG: $pfd not an IO GLOB? $!";
+                        $pfd = $fd;
                 }
-                $redir->[$child_fd] = $parent_fd // $child_fd;
+                $rdr[$child_fd] = $pfd // $child_fd;
         }
         my $rlim = [];
-
         foreach my $l (@RLIMITS) {
-                defined(my $v = $opts->{$l}) or next;
-                my $r = eval "require BSD::Resource; BSD::Resource::$l();";
-                unless (defined $r) {
-                        warn "$l undefined by BSD::Resource: $@\n";
-                        next;
-                }
+                my $v = $opt->{$l} // next;
+                my $r = $RLIMITS{$l} //
+                        eval "require BSD::Resource; BSD::Resource::$l();" //
+                        do {
+                                warn "$l undefined by BSD::Resource: $@\n";
+                                next;
+                        };
                 push @$rlim, $r, @$v;
         }
-        my $cd = $opts->{'-C'} // ''; # undef => NULL mapping doesn't work?
-        my $pid = pi_fork_exec($redir, $f, $cmd, \@env, $rlim, $cd);
-        die "fork_exec @$cmd failed: $!\n" unless $pid > 0;
-        $pid;
+        my $cd = $opt->{'-C'} // ''; # undef => NULL mapping doesn't work?
+        my $pgid = $opt->{pgid} // -1;
+        pi_fork_exec(\@rdr, $f, $cmd, \@env, $rlim, $cd, $pgid);
 }
 
 sub popen_rd {
-        my ($cmd, $env, $opts) = @_;
-        pipe(my ($r, $w)) or die "pipe: $!\n";
-        $opts ||= {};
-        $opts->{1} = fileno($w);
-        my $pid = spawn($cmd, $env, $opts);
-        return ($r, $pid) if wantarray;
-        my $ret = gensym;
-        tie *$ret, 'PublicInbox::ProcessPipe', $pid, $r;
-        $ret;
+        my ($cmd, $env, $opt, @cb_arg) = @_;
+        pipe(my $r, local $opt->{1});
+        PublicInbox::IO::attach_pid($r, spawn($cmd, $env, $opt), @cb_arg);
+}
+
+sub popen_wr {
+        my ($cmd, $env, $opt, @cb_arg) = @_;
+        pipe(local $opt->{0}, my $w);
+        $w->autoflush(1);
+        PublicInbox::IO::attach_pid($w, spawn($cmd, $env, $opt), @cb_arg);
+}
+
+sub read_out_err ($) {
+        my ($opt) = @_;
+        for my $fd (1, 2) { # read stdout/stderr
+                my $fh = delete($opt->{"fh.$fd"}) // next;
+                seek($fh, 0, SEEK_SET);
+                PublicInbox::IO::read_all $fh, undef, $opt->{$fd};
+        }
+}
+
+sub run_wait ($;$$) {
+        my ($cmd, $env, $opt) = @_;
+        waitpid(spawn($cmd, $env, $opt), 0);
+        read_out_err($opt);
+        $?
+}
+
+sub run_die ($;$$) {
+        my ($cmd, $env, $rdr) = @_;
+        run_wait($cmd, $env, $rdr) and croak "E: @$cmd failed: \$?=$?";
+}
+
+sub run_qx {
+        my ($cmd, $env, $opt) = @_;
+        my $fh = popen_rd($cmd, $env, $opt);
+        my @ret;
+        if (wantarray) {
+                @ret = <$fh>;
+        } else {
+                local $/;
+                $ret[0] = <$fh>;
+        }
+        $fh->close; # caller should check $?
+        read_out_err($opt);
+        wantarray ? @ret : $ret[0];
 }
 
 1;
diff --git a/lib/PublicInbox/SpawnPP.pm b/lib/PublicInbox/SpawnPP.pm
index a72d5a2d..9ad4d0a1 100644
--- a/lib/PublicInbox/SpawnPP.pm
+++ b/lib/PublicInbox/SpawnPP.pm
@@ -1,59 +1,70 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Pure-Perl implementation of "spawn".  This can't take advantage
 # of vfork, so no speedups under Linux for spawning from large processes.
+# Do not require this directly, only use from PublicInbox::Spawn
 package PublicInbox::SpawnPP;
-use strict;
-use warnings;
-use POSIX qw(dup2 :signal_h);
+use v5.12;
+use POSIX qw(dup2 _exit setpgid :signal_h);
+use autodie qw(chdir close pipe);
+use PublicInbox::OnDestroy;
+# this is loaded by PublicInbox::Spawn, so we can't use/require it, here
 
 # Pure Perl implementation for folks that do not use Inline::C
-sub pi_fork_exec ($$$$$$) {
-        my ($redir, $f, $cmd, $env, $rlim, $cd) = @_;
+sub pi_fork_exec ($$$$$$$) {
+        my ($redir, $f, $cmd, $env, $rlim, $cd, $pgid) = @_;
         my $old = POSIX::SigSet->new();
         my $set = POSIX::SigSet->new();
-        $set->fillset or die "fillset failed: $!";
-        sigprocmask(SIG_SETMASK, $set, $old) or die "can't block signals: $!";
-        my $syserr;
-        my $pid = fork;
-        unless (defined $pid) { # compat with Inline::C version
-                $syserr = $!;
-                $pid = -1;
+        $set->fillset or die "sigfillset: $!";
+        for (POSIX::SIGABRT, POSIX::SIGBUS, POSIX::SIGFPE,
+                        POSIX::SIGILL, POSIX::SIGSEGV) {
+                $set->delset($_) or die "delset($_): $!";
         }
+        sigprocmask(SIG_SETMASK, $set, $old) or die "SIG_SETMASK(set): $!";
+        pipe(my $r, my $w);
+        my $pid = PublicInbox::OnDestroy::fork_tmp;
         if ($pid == 0) {
-                while (@$rlim) {
-                        my ($r, $soft, $hard) = splice(@$rlim, 0, 3);
-                        BSD::Resource::setrlimit($r, $soft, $hard) or
-                          warn "failed to set $r=[$soft,$hard]\n";
-                }
+                close $r;
+                $SIG{__DIE__} = sub {
+                        warn(@_);
+                        syswrite($w, my $num = $! + 0);
+                        _exit(1);
+                };
                 for my $child_fd (0..$#$redir) {
                         my $parent_fd = $redir->[$child_fd];
                         next if $parent_fd == $child_fd;
                         dup2($parent_fd, $child_fd) or
-                                die "dup2($parent_fd, $child_fd): $!\n";
+                                die "dup2($parent_fd, $child_fd): $!";
                 }
-                if ($cd ne '') {
-                        chdir $cd or die "chdir $cd: $!";
+                if ($pgid >= 0 && !defined(setpgid(0, $pgid))) {
+                        die "setpgid(0, $pgid): $!";
                 }
-                $SIG{$_} = 'DEFAULT' for keys %SIG;
-                my $cset = POSIX::SigSet->new();
-                $cset->addset(POSIX::SIGCHLD) or die "can't add SIGCHLD: $!";
-                sigprocmask(SIG_UNBLOCK, $cset) or
-                                        die "can't unblock SIGCHLD: $!";
+                $SIG{$_} = 'DEFAULT' for grep(!/\A__/, keys %SIG);
+                chdir($cd) if $cd ne '';
+                while (@$rlim) {
+                        my ($r, $soft, $hard) = splice(@$rlim, 0, 3);
+                        BSD::Resource::setrlimit($r, $soft, $hard) or
+                                die "setrlimit($r=[$soft,$hard]: $!)";
+                }
+                $old->delset(POSIX::SIGCHLD) or die "sigdelset CHLD: $!";
+                sigprocmask(SIG_SETMASK, $old) or die "SIG_SETMASK ~CHLD: $!";
+                $cmd->[0] = $f;
                 if ($ENV{MOD_PERL}) {
-                        exec which('env'), '-i', @$env, @$cmd;
-                        die "exec env -i ... $cmd->[0] failed: $!\n";
+                        $f = PublicInbox::Spawn::which('env');
+                        @$cmd = ('env', '-i', @$env, @$cmd);
                 } else {
-                        local %ENV = map { split(/=/, $_, 2) } @$env;
-                        my @cmd = @$cmd;
-                        $cmd[0] = $f;
-                        exec @cmd;
-                        die "exec $cmd->[0] failed: $!\n";
+                        %ENV = map { split(/=/, $_, 2) } @$env;
                 }
+                exec { $f } @$cmd;
+                die "exec @$cmd failed: $!";
+        }
+        close $w;
+        sigprocmask(SIG_SETMASK, $old) or die "SIG_SETMASK(old): $!";
+        if (my $cerrnum = do { local $/, <$r> }) {
+                $! = $cerrnum;
+                die "forked child $@: $!";
         }
-        sigprocmask(SIG_SETMASK, $old) or die "can't unblock signals: $!";
-        $! = $syserr;
         $pid;
 }
 
diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm
index e4f00a2a..4cbe9623 100644
--- a/lib/PublicInbox/Syscall.pm
+++ b/lib/PublicInbox/Syscall.pm
@@ -2,33 +2,34 @@
 # specifically the Debian libsys-syscall-perl 0.25-6 version to
 # fix upstream regressions in 0.25.
 #
+# See devel/sysdefs-list in the public-inbox source tree for maintenance
+# <https://80x24.org/public-inbox.git>, and machines from the GCC Farm:
+# <https://portal.cfarm.net/>
+#
 # This license differs from the rest of public-inbox
 #
 # This module is Copyright (c) 2005 Six Apart, Ltd.
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 #
 # All rights reserved.
 #
 # You may distribute under the terms of either the GNU General Public
 # License or the Artistic License, as specified in the Perl README file.
 package PublicInbox::Syscall;
-use strict;
+use v5.12;
 use parent qw(Exporter);
-use POSIX qw(ENOSYS O_NONBLOCK);
+use POSIX qw(ENOENT ENOSYS EINVAL O_NONBLOCK);
+use Socket qw(SOL_SOCKET SCM_RIGHTS);
 use Config;
+our %SIGNUM = (WINCH => 28); # most Linux, {Free,Net,Open}BSD, *Darwin
+our ($INOTIFY, %PACK);
 
 # $VERSION = '0.25'; # Sys::Syscall version
 our @EXPORT_OK = qw(epoll_ctl epoll_create epoll_wait
-                  EPOLLIN EPOLLOUT EPOLLET
-                  EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
-                  EPOLLONESHOT EPOLLEXCLUSIVE
-                  signalfd $SFD_NONBLOCK);
-our %EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait
-                             EPOLLIN EPOLLOUT
-                             EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
-                             EPOLLONESHOT EPOLLEXCLUSIVE)],
-                );
-
+                EPOLLIN EPOLLOUT EPOLLET
+                EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
+                EPOLLONESHOT EPOLLEXCLUSIVE
+                signalfd rename_noreplace %SIGNUM $F_SETPIPE_SZ);
 use constant {
         EPOLLIN => 1,
         EPOLLOUT => 4,
@@ -41,240 +42,493 @@ use constant {
         EPOLL_CTL_ADD => 1,
         EPOLL_CTL_DEL => 2,
         EPOLL_CTL_MOD => 3,
+        SIZEOF_int => $Config{intsize},
+        SIZEOF_size_t => $Config{sizesize},
+        SIZEOF_ptr => $Config{ptrsize},
+        NUL => "\0",
 };
 
-our $loaded_syscall = 0;
-
-sub _load_syscall {
-    # props to Gaal for this!
-    return if $loaded_syscall++;
-    my $clean = sub {
-        delete @INC{qw<syscall.ph asm/unistd.ph bits/syscall.ph
-                        _h2ph_pre.ph sys/syscall.ph>};
-    };
-    $clean->(); # don't trust modules before us
-    my $rv = eval { require 'syscall.ph'; 1 } || eval { require 'sys/syscall.ph'; 1 };
-    $clean->(); # don't require modules after us trust us
-    return $rv;
-}
-
+use constant TMPL_size_t => SIZEOF_size_t == 8 ? 'Q' : 'L';
 
-our (
-     $SYS_epoll_create,
-     $SYS_epoll_ctl,
-     $SYS_epoll_wait,
-     $SYS_signalfd4,
-     );
+our ($SYS_epoll_create,
+        $SYS_epoll_ctl,
+        $SYS_epoll_wait,
+        $SYS_signalfd4,
+        $SYS_renameat2,
+        $F_SETPIPE_SZ,
+        $SYS_sendmsg,
+        $SYS_recvmsg);
 
+my $SYS_fstatfs; # don't need fstatfs64, just statfs.f_type
+my ($FS_IOC_GETFLAGS, $FS_IOC_SETFLAGS);
 my $SFD_CLOEXEC = 02000000; # Perl does not expose O_CLOEXEC
-our $SFD_NONBLOCK = O_NONBLOCK;
 our $no_deprecated = 0;
 
 if ($^O eq "linux") {
-    my $machine = (POSIX::uname())[-1];
-    # whether the machine requires 64-bit numbers to be on 8-byte
-    # boundaries.
-    my $u64_mod_8 = 0;
-
-    # if we're running on an x86_64 kernel, but a 32-bit process,
-    # we need to use the x32 or i386 syscall numbers.
-    if ($machine eq "x86_64" && $Config{ptrsize} == 4) {
-        $machine = $Config{cppsymbols} =~ /\b__ILP32__=1\b/ ? 'x32' : 'i386';
-    }
+        $F_SETPIPE_SZ = 1031;
+        my (undef, undef, $release, undef, $machine) = POSIX::uname();
+        my ($maj, $min) = ($release =~ /\A([0-9]+)\.([0-9]+)/);
+        $SYS_renameat2 = 0 if "$maj.$min" < 3.15;
+        # whether the machine requires 64-bit numbers to be on 8-byte
+        # boundaries.
+        my $u64_mod_8 = 0;
 
-    # Similarly for mips64 vs mips
-    if ($machine eq "mips64" && $Config{ptrsize} == 4) {
-        $machine = "mips";
-    }
-
-    if ($machine =~ m/^i[3456]86$/) {
-        $SYS_epoll_create = 254;
-        $SYS_epoll_ctl    = 255;
-        $SYS_epoll_wait   = 256;
-        $SYS_signalfd4 = 327;
-    } elsif ($machine eq "x86_64") {
-        $SYS_epoll_create = 213;
-        $SYS_epoll_ctl    = 233;
-        $SYS_epoll_wait   = 232;
-        $SYS_signalfd4 = 289;
-    } elsif ($machine eq 'x32') {
-        $SYS_epoll_create = 1073742037;
-        $SYS_epoll_ctl = 1073742057;
-        $SYS_epoll_wait = 1073742056;
-        $SYS_signalfd4 = 1073742113;
-    } elsif ($machine eq 'sparc64') {
-        $SYS_epoll_create = 193;
-        $SYS_epoll_ctl = 194;
-        $SYS_epoll_wait = 195;
-        $u64_mod_8 = 1;
-        $SYS_signalfd4 = 317;
-        $SFD_CLOEXEC = 020000000;
-    } elsif ($machine =~ m/^parisc/) {
-        $SYS_epoll_create = 224;
-        $SYS_epoll_ctl    = 225;
-        $SYS_epoll_wait   = 226;
-        $u64_mod_8        = 1;
-        $SYS_signalfd4 = 309;
-    } elsif ($machine =~ m/^ppc64/) {
-        $SYS_epoll_create = 236;
-        $SYS_epoll_ctl    = 237;
-        $SYS_epoll_wait   = 238;
-        $u64_mod_8        = 1;
-        $SYS_signalfd4 = 313;
-    } elsif ($machine eq "ppc") {
-        $SYS_epoll_create = 236;
-        $SYS_epoll_ctl    = 237;
-        $SYS_epoll_wait   = 238;
-        $u64_mod_8        = 1;
-        $SYS_signalfd4 = 313;
-    } elsif ($machine =~ m/^s390/) {
-        $SYS_epoll_create = 249;
-        $SYS_epoll_ctl    = 250;
-        $SYS_epoll_wait   = 251;
-        $u64_mod_8        = 1;
-        $SYS_signalfd4 = 322;
-    } elsif ($machine eq "ia64") {
-        $SYS_epoll_create = 1243;
-        $SYS_epoll_ctl    = 1244;
-        $SYS_epoll_wait   = 1245;
-        $u64_mod_8        = 1;
-        $SYS_signalfd4 = 289;
-    } elsif ($machine eq "alpha") {
-        # natural alignment, ints are 32-bits
-        $SYS_epoll_create = 407;
-        $SYS_epoll_ctl    = 408;
-        $SYS_epoll_wait   = 409;
-        $u64_mod_8        = 1;
-        $SYS_signalfd4 = 484;
-        $SFD_CLOEXEC = 010000000;
-    } elsif ($machine eq "aarch64") {
-        $SYS_epoll_create = 20;  # (sys_epoll_create1)
-        $SYS_epoll_ctl    = 21;
-        $SYS_epoll_wait   = 22;  # (sys_epoll_pwait)
-        $u64_mod_8        = 1;
-        $no_deprecated    = 1;
-        $SYS_signalfd4 = 74;
-    } elsif ($machine =~ m/arm(v\d+)?.*l/) {
-        # ARM OABI
-        $SYS_epoll_create = 250;
-        $SYS_epoll_ctl    = 251;
-        $SYS_epoll_wait   = 252;
-        $u64_mod_8        = 1;
-        $SYS_signalfd4 = 355;
-    } elsif ($machine =~ m/^mips64/) {
-        $SYS_epoll_create = 5207;
-        $SYS_epoll_ctl    = 5208;
-        $SYS_epoll_wait   = 5209;
-        $u64_mod_8        = 1;
-        $SYS_signalfd4 = 5283;
-    } elsif ($machine =~ m/^mips/) {
-        $SYS_epoll_create = 4248;
-        $SYS_epoll_ctl    = 4249;
-        $SYS_epoll_wait   = 4250;
-        $u64_mod_8        = 1;
-        $SYS_signalfd4 = 4324;
-    } else {
-        # as a last resort, try using the *.ph files which may not
-        # exist or may be wrong
-        _load_syscall();
-        $SYS_epoll_create = eval { &SYS_epoll_create; } || 0;
-        $SYS_epoll_ctl    = eval { &SYS_epoll_ctl;    } || 0;
-        $SYS_epoll_wait   = eval { &SYS_epoll_wait;   } || 0;
-
-        # Note: do NOT add new syscalls to depend on *.ph, here.
-        # Better to miss syscalls (so we can fallback to IO::Poll)
-        # than to use wrong ones, since the names are not stable
-        # (at least not on FreeBSD), if the actual numbers are.
-    }
-
-    if ($u64_mod_8) {
-        *epoll_wait = \&epoll_wait_mod8;
-        *epoll_ctl = \&epoll_ctl_mod8;
-    } else {
-        *epoll_wait = \&epoll_wait_mod4;
-        *epoll_ctl = \&epoll_ctl_mod4;
-    }
+        if (SIZEOF_ptr == 4) {
+                # if we're running on an x86_64 kernel, but a 32-bit process,
+                # we need to use the x32 or i386 syscall numbers.
+                if ($machine eq 'x86_64') {
+                        my $s = $Config{cppsymbols};
+                        $machine = ($s =~ /\b__ILP32__=1\b/ &&
+                                        $s =~ /\b__x86_64__=1\b/) ?
+                                'x32' : 'i386'
+                } elsif ($machine eq 'mips64') { # similarly for mips64 vs mips
+                        $machine = 'mips';
+                }
+        }
+        if ($machine =~ m/^i[3456]86$/) {
+                $SYS_epoll_create = 254;
+                $SYS_epoll_ctl = 255;
+                $SYS_epoll_wait = 256;
+                $SYS_signalfd4 = 327;
+                $SYS_renameat2 //= 353;
+                $SYS_fstatfs = 100;
+                $SYS_sendmsg = 370;
+                $SYS_recvmsg = 372;
+                $INOTIFY = { # usage: `use constant $PublicInbox::Syscall::INOTIFY'
+                        SYS_inotify_init1 => 332,
+                        SYS_inotify_add_watch => 292,
+                        SYS_inotify_rm_watch => 293,
+                };
+                $FS_IOC_GETFLAGS = 0x80046601;
+                $FS_IOC_SETFLAGS = 0x40046602;
+        } elsif ($machine eq "x86_64") {
+                $SYS_epoll_create = 213;
+                $SYS_epoll_ctl = 233;
+                $SYS_epoll_wait = 232;
+                $SYS_signalfd4 = 289;
+                $SYS_renameat2 //= 316;
+                $SYS_fstatfs = 138;
+                $SYS_sendmsg = 46;
+                $SYS_recvmsg = 47;
+                $INOTIFY = {
+                        SYS_inotify_init1 => 294,
+                        SYS_inotify_add_watch => 254,
+                        SYS_inotify_rm_watch => 255,
+                };
+                $FS_IOC_GETFLAGS = 0x80086601;
+                $FS_IOC_SETFLAGS = 0x40086602;
+        } elsif ($machine eq 'x32') {
+                $SYS_epoll_create = 1073742037;
+                $SYS_epoll_ctl = 1073742057;
+                $SYS_epoll_wait = 1073742056;
+                $SYS_signalfd4 = 1073742113;
+                $SYS_renameat2 //= 0x40000000 + 316;
+                $SYS_fstatfs = 138;
+                $SYS_sendmsg = 0x40000206;
+                $SYS_recvmsg = 0x40000207;
+                $FS_IOC_GETFLAGS = 0x80046601;
+                $FS_IOC_SETFLAGS = 0x40046602;
+                $INOTIFY = {
+                        SYS_inotify_init1 => 1073742118,
+                        SYS_inotify_add_watch => 1073742078,
+                        SYS_inotify_rm_watch => 1073742079,
+                };
+        } elsif ($machine eq 'sparc64') {
+                $SYS_epoll_create = 193;
+                $SYS_epoll_ctl = 194;
+                $SYS_epoll_wait = 195;
+                $u64_mod_8 = 1;
+                $SYS_signalfd4 = 317;
+                $SYS_renameat2 //= 345;
+                $SFD_CLOEXEC = 020000000;
+                $SYS_fstatfs = 158;
+                $SYS_sendmsg = 114;
+                $SYS_recvmsg = 113;
+                $FS_IOC_GETFLAGS = 0x40086601;
+                $FS_IOC_SETFLAGS = 0x80086602;
+        } elsif ($machine =~ m/^parisc/) { # untested, no machine on cfarm
+                $SYS_epoll_create = 224;
+                $SYS_epoll_ctl = 225;
+                $SYS_epoll_wait = 226;
+                $u64_mod_8 = 1;
+                $SYS_signalfd4 = 309;
+                $SIGNUM{WINCH} = 23;
+        } elsif ($machine =~ m/^ppc64/) {
+                $SYS_epoll_create = 236;
+                $SYS_epoll_ctl = 237;
+                $SYS_epoll_wait = 238;
+                $u64_mod_8 = 1;
+                $SYS_signalfd4 = 313;
+                $SYS_renameat2 //= 357;
+                $SYS_fstatfs = 100;
+                $SYS_sendmsg = 341;
+                $SYS_recvmsg = 342;
+                $FS_IOC_GETFLAGS = 0x40086601;
+                $FS_IOC_SETFLAGS = 0x80086602;
+                $INOTIFY = {
+                        SYS_inotify_init1 => 318,
+                        SYS_inotify_add_watch => 276,
+                        SYS_inotify_rm_watch => 277,
+                };
+        } elsif ($machine eq "ppc") { # untested, no machine on cfarm
+                $SYS_epoll_create = 236;
+                $SYS_epoll_ctl = 237;
+                $SYS_epoll_wait = 238;
+                $u64_mod_8 = 1;
+                $SYS_signalfd4 = 313;
+                $SYS_renameat2 //= 357;
+                $SYS_fstatfs = 100;
+                $FS_IOC_GETFLAGS = 0x40086601;
+                $FS_IOC_SETFLAGS = 0x80086602;
+        } elsif ($machine =~ m/^s390/) { # untested, no machine on cfarm
+                $SYS_epoll_create = 249;
+                $SYS_epoll_ctl = 250;
+                $SYS_epoll_wait = 251;
+                $u64_mod_8 = 1;
+                $SYS_signalfd4 = 322;
+                $SYS_renameat2 //= 347;
+                $SYS_fstatfs = 100;
+                $SYS_sendmsg = 370;
+                $SYS_recvmsg = 372;
+        } elsif ($machine eq 'ia64') { # untested, no machine on cfarm
+                $SYS_epoll_create = 1243;
+                $SYS_epoll_ctl = 1244;
+                $SYS_epoll_wait = 1245;
+                $u64_mod_8 = 1;
+                $SYS_signalfd4 = 289;
+        } elsif ($machine eq "alpha") { # untested, no machine on cfarm
+                # natural alignment, ints are 32-bits
+                $SYS_epoll_create = 407;
+                $SYS_epoll_ctl = 408;
+                $SYS_epoll_wait = 409;
+                $u64_mod_8 = 1;
+                $SYS_signalfd4 = 484;
+                $SFD_CLOEXEC = 010000000;
+        } elsif ($machine =~ /\A(?:loong|a)arch64\z/ || $machine eq 'riscv64') {
+                $SYS_epoll_create = 20; # (sys_epoll_create1)
+                $SYS_epoll_ctl = 21;
+                $SYS_epoll_wait = 22; # (sys_epoll_pwait)
+                $u64_mod_8 = 1;
+                $no_deprecated = 1;
+                $SYS_signalfd4 = 74;
+                $SYS_renameat2 //= 276;
+                $SYS_fstatfs = 44;
+                $SYS_sendmsg = 211;
+                $SYS_recvmsg = 212;
+                $INOTIFY = {
+                        SYS_inotify_init1 => 26,
+                        SYS_inotify_add_watch => 27,
+                        SYS_inotify_rm_watch => 28,
+                };
+                $FS_IOC_GETFLAGS = 0x80086601;
+                $FS_IOC_SETFLAGS = 0x40086602;
+        } elsif ($machine =~ m/arm(v\d+)?.*l/) { # ARM OABI (untested on cfarm)
+                $SYS_epoll_create = 250;
+                $SYS_epoll_ctl = 251;
+                $SYS_epoll_wait = 252;
+                $u64_mod_8 = 1;
+                $SYS_signalfd4 = 355;
+                $SYS_renameat2 //= 382;
+                $SYS_fstatfs = 100;
+                $SYS_sendmsg = 296;
+                $SYS_recvmsg = 297;
+        } elsif ($machine =~ m/^mips64/) { # cfarm only has 32-bit userspace
+                $SYS_epoll_create = 5207;
+                $SYS_epoll_ctl = 5208;
+                $SYS_epoll_wait = 5209;
+                $u64_mod_8 = 1;
+                $SYS_signalfd4 = 5283;
+                $SYS_renameat2 //= 5311;
+                $SYS_fstatfs = 5135;
+                $SYS_sendmsg = 5045;
+                $SYS_recvmsg = 5046;
+                $FS_IOC_GETFLAGS = 0x40046601;
+                $FS_IOC_SETFLAGS = 0x80046602;
+        } elsif ($machine =~ m/^mips/) { # 32-bit, tested on mips64 cfarm host
+                $SYS_epoll_create = 4248;
+                $SYS_epoll_ctl = 4249;
+                $SYS_epoll_wait = 4250;
+                $u64_mod_8 = 1;
+                $SYS_signalfd4 = 4324;
+                $SYS_renameat2 //= 4351;
+                $SYS_fstatfs = 4100;
+                $SYS_sendmsg = 4179;
+                $SYS_recvmsg = 4177;
+                $FS_IOC_GETFLAGS = 0x40046601;
+                $FS_IOC_SETFLAGS = 0x80046602;
+                $SIGNUM{WINCH} = 20;
+                $INOTIFY = {
+                        SYS_inotify_init1 => 4329,
+                        SYS_inotify_add_watch => 4285,
+                        SYS_inotify_rm_watch => 4286,
+                };
+        } else {
+                warn <<EOM;
+machine=$machine ptrsize=$Config{ptrsize} has no syscall definitions
+git clone https://80x24.org/public-inbox.git and
+Send the output of ./devel/sysdefs-list to meta\@public-inbox.org
+EOM
+        }
+        if ($u64_mod_8) {
+                *epoll_wait = \&epoll_wait_mod8;
+                *epoll_ctl = \&epoll_ctl_mod8;
+        } else {
+                *epoll_wait = \&epoll_wait_mod4;
+                *epoll_ctl = \&epoll_ctl_mod4;
+        }
+} elsif ($^O =~ /\A(?:freebsd|openbsd|netbsd|dragonfly)\z/) {
+# don't use syscall.ph here, name => number mappings are not stable on *BSD
+# but the actual numbers are.
+# OpenBSD perl redirects syscall perlop to libc functions
+# https://cvsweb.openbsd.org/src/gnu/usr.bin/perl/gen_syscall_emulator.pl
+# https://www.netbsd.org/docs/internals/en/chap-processes.html#syscall_versioning
+# https://wiki.freebsd.org/AddingSyscalls#Backward_compatibily
+# (I'm assuming Dragonfly copies FreeBSD, here, too)
+        $SYS_recvmsg = 27;
+        $SYS_sendmsg = 28;
 }
 
-elsif ($^O eq "freebsd") {
-    if ($ENV{FREEBSD_SENDFILE}) {
-        # this is still buggy and in development
-    }
-}
+BEGIN {
+        if ($^O eq 'linux') {
+                %PACK = (
+                        TMPL_cmsg_len => TMPL_size_t,
+                        # cmsg_len, cmsg_level, cmsg_type
+                        SIZEOF_cmsghdr => SIZEOF_int * 2 + SIZEOF_size_t,
+                        CMSG_DATA_off => '',
+                        TMPL_msghdr => 'PL' . # msg_name, msg_namelen
+                                '@'.(2 * SIZEOF_ptr).'P'. # msg_iov
+                                'i'. # msg_iovlen
+                                '@'.(4 * SIZEOF_ptr).'P'. # msg_control
+                                'L'. # msg_controllen (socklen_t)
+                                'i', # msg_flags
+                );
+        } elsif ($^O =~ /\A(?:freebsd|openbsd|netbsd|dragonfly)\z/) {
+                %PACK = (
+                        TMPL_cmsg_len => 'L', # socklen_t
+                        SIZEOF_cmsghdr => SIZEOF_int * 3,
+                        CMSG_DATA_off => SIZEOF_ptr == 8 ? '@16' : '',
+                        TMPL_msghdr => 'PL' . # msg_name, msg_namelen
+                                '@'.(2 * SIZEOF_ptr).'P'. # msg_iov
+                                TMPL_size_t. # msg_iovlen
+                                '@'.(4 * SIZEOF_ptr).'P'. # msg_control
+                                TMPL_size_t. # msg_controllen
+                                'i', # msg_flags
 
-############################################################################
-# epoll functions
-############################################################################
+                )
+        }
+        $PACK{CMSG_ALIGN_size} = SIZEOF_size_t;
+        $PACK{SIZEOF_cmsghdr} //= 0;
+        $PACK{TMPL_cmsg_len} //= undef;
+        $PACK{CMSG_DATA_off} //= undef;
+        $PACK{TMPL_msghdr} //= undef;
+}
 
-sub epoll_defined { return $SYS_epoll_create ? 1 : 0; }
+# SFD_CLOEXEC is arch-dependent, so IN_CLOEXEC may be, too
+$INOTIFY->{IN_CLOEXEC} //= 0x80000 if $INOTIFY;
 
 sub epoll_create {
-        syscall($SYS_epoll_create, $no_deprecated ? 0 : ($_[0]||100)+0);
+        syscall($SYS_epoll_create, $no_deprecated ? 0 : 100);
 }
 
 # epoll_ctl wrapper
 # ARGS: (epfd, op, fd, events_mask)
 sub epoll_ctl_mod4 {
-    syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLL", $_[3], $_[2], 0));
+        syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0,
+                pack("LLL", $_[3], $_[2], 0));
 }
+
 sub epoll_ctl_mod8 {
-    syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLLL", $_[3], 0, $_[2], 0));
+        syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0,
+                pack("LLLL", $_[3], 0, $_[2], 0));
 }
 
 # epoll_wait wrapper
 # ARGS: (epfd, maxevents, timeout (milliseconds), arrayref)
 #  arrayref: values modified to be [$fd, $event]
-our $epoll_wait_events;
+our $epoll_wait_events = '';
 our $epoll_wait_size = 0;
 sub epoll_wait_mod4 {
-    # resize our static buffer if requested size is bigger than we've ever done
-    if ($_[1] > $epoll_wait_size) {
-        $epoll_wait_size = $_[1];
-        $epoll_wait_events = "\0" x 12 x $epoll_wait_size;
-    }
-    my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
-    for (0..$ct-1) {
-        @{$_[3]->[$_]}[1,0] = unpack("LL", substr($epoll_wait_events, 12*$_, 8));
-    }
-    return $ct;
+        my ($epfd, $maxevents, $timeout_msec, $events) = @_;
+        # resize our static buffer if maxevents bigger than we've ever done
+        if ($maxevents > $epoll_wait_size) {
+                $epoll_wait_size = $maxevents;
+                vec($epoll_wait_events, $maxevents * 12 - 1, 8) = 0;
+        }
+        @$events = ();
+        my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
+                        $maxevents, $timeout_msec);
+        for (0..$ct - 1) {
+                # 12-byte struct epoll_event
+                # 4 bytes uint32_t events mask (skipped, useless to us)
+                # 8 bytes: epoll_data_t union (first 4 bytes are the fd)
+                # So we skip the first 4 bytes and take the middle 4:
+                $events->[$_] = unpack('L', substr($epoll_wait_events,
+                                                        12 * $_ + 4, 4));
+        }
 }
 
 sub epoll_wait_mod8 {
-    # resize our static buffer if requested size is bigger than we've ever done
-    if ($_[1] > $epoll_wait_size) {
-        $epoll_wait_size = $_[1];
-        $epoll_wait_events = "\0" x 16 x $epoll_wait_size;
-    }
-    my $ct;
-    if ($no_deprecated) {
-        $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0, undef);
-    } else {
-        $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
-    }
-    for (0..$ct-1) {
-        # 16 byte epoll_event structs, with format:
-        #    4 byte mask [idx 1]
-        #    4 byte padding (we put it into idx 2, useless)
-        #    8 byte data (first 4 bytes are fd, into idx 0)
-        @{$_[3]->[$_]}[1,2,0] = unpack("LLL", substr($epoll_wait_events, 16*$_, 12));
-    }
-    return $ct;
+        my ($epfd, $maxevents, $timeout_msec, $events) = @_;
+
+        # resize our static buffer if maxevents bigger than we've ever done
+        if ($maxevents > $epoll_wait_size) {
+                $epoll_wait_size = $maxevents;
+                vec($epoll_wait_events, $maxevents * 16 - 1, 8) = 0;
+        }
+        @$events = ();
+        my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
+                        $maxevents, $timeout_msec,
+                        $no_deprecated ? undef : ());
+        for (0..$ct - 1) {
+                # 16-byte struct epoll_event
+                # 4 bytes uint32_t events mask (skipped, useless to us)
+                # 4 bytes padding (skipped, useless)
+                # 8 bytes epoll_data_t union (first 4 bytes are the fd)
+                # So skip the first 8 bytes, take 4, and ignore the last 4:
+                $events->[$_] = unpack('L', substr($epoll_wait_events,
+                                                        16 * $_ + 8, 4));
+        }
 }
 
-sub signalfd ($$$) {
-        my ($fd, $signos, $flags) = @_;
+sub signalfd ($) {
+        my ($signos) = @_;
         if ($SYS_signalfd4) {
                 my $set = POSIX::SigSet->new(@$signos);
-                syscall($SYS_signalfd4, $fd, "$$set",
+                syscall($SYS_signalfd4, -1, "$$set",
                         # $Config{sig_count} is NSIG, so this is NSIG/8:
                         int($Config{sig_count}/8),
-                        $flags|$SFD_CLOEXEC);
+                        # SFD_NONBLOCK == O_NONBLOCK for every architecture
+                        O_NONBLOCK|$SFD_CLOEXEC);
         } else {
                 $! = ENOSYS;
                 undef;
         }
 }
 
+sub _rename_noreplace_racy ($$) {
+        my ($old, $new) = @_;
+        if (link($old, $new)) {
+                warn "unlink $old: $!\n" if !unlink($old) && $! != ENOENT;
+                1
+        } else {
+                undef;
+        }
+}
+
+# TODO: support FD args?
+sub rename_noreplace ($$) {
+        my ($old, $new) = @_;
+        if ($SYS_renameat2) { # RENAME_NOREPLACE = 1, AT_FDCWD = -100
+                my $ret = syscall($SYS_renameat2, -100, $old, -100, $new, 1);
+                if ($ret == 0) {
+                        1; # like rename() perlop
+                } elsif ($! == ENOSYS || $! == EINVAL) {
+                        undef $SYS_renameat2;
+                        _rename_noreplace_racy($old, $new);
+                } else {
+                        undef
+                }
+        } else {
+                _rename_noreplace_racy($old, $new);
+        }
+}
+
+sub nodatacow_fh ($) {
+        my ($fh) = @_;
+        my $buf = "\0" x 120;
+        syscall($SYS_fstatfs // return, fileno($fh), $buf) == 0 or
+                return warn("fstatfs: $!\n");
+        my $f_type = unpack('l!', $buf); # statfs.f_type is a signed word
+        return if $f_type != 0x9123683E; # BTRFS_SUPER_MAGIC
+
+        $FS_IOC_GETFLAGS //
+                return warn('FS_IOC_GETFLAGS undefined for platform');
+        ioctl($fh, $FS_IOC_GETFLAGS, $buf) //
+                return warn("FS_IOC_GETFLAGS: $!\n");
+        my $attr = unpack('l!', $buf);
+        return if ($attr & 0x00800000); # FS_NOCOW_FL;
+        ioctl($fh, $FS_IOC_SETFLAGS, pack('l', $attr | 0x00800000)) //
+                warn("FS_IOC_SETFLAGS: $!\n");
+}
+
+sub nodatacow_dir {
+        if (open my $fh, '<', $_[0]) { nodatacow_fh($fh) }
+}
+
+use constant \%PACK;
+sub CMSG_ALIGN ($) { ($_[0] + CMSG_ALIGN_size - 1) & ~(CMSG_ALIGN_size - 1) }
+use constant CMSG_ALIGN_SIZEOF_cmsghdr => CMSG_ALIGN(SIZEOF_cmsghdr);
+sub CMSG_SPACE ($) { CMSG_ALIGN($_[0]) + CMSG_ALIGN_SIZEOF_cmsghdr }
+sub CMSG_LEN ($) { CMSG_ALIGN_SIZEOF_cmsghdr + $_[0] }
+use constant msg_controllen_max =>
+        CMSG_SPACE(10 * SIZEOF_int) + SIZEOF_cmsghdr; # space for 10 FDs
+
+if (defined($SYS_sendmsg) && defined($SYS_recvmsg)) {
+no warnings 'once';
+require PublicInbox::CmdIPC4;
+
+*send_cmd4 = sub ($$$$;$) {
+        my ($sock, $fds, undef, $flags, $tries) = @_;
+        my $iov = pack('P'.TMPL_size_t,
+                        $_[2] // NUL, length($_[2] // NUL) || 1);
+        my $fd_space = scalar(@$fds) * SIZEOF_int;
+        my $msg_controllen = CMSG_SPACE($fd_space);
+        my $cmsghdr = pack(TMPL_cmsg_len .
+                        'LL' .  # cmsg_level, cmsg_type,
+                        CMSG_DATA_off.('i' x scalar(@$fds)). # CMSG_DATA
+                        '@'.($msg_controllen - 1).'x1', # pad to space, not len
+                        CMSG_LEN($fd_space), # cmsg_len
+                        SOL_SOCKET, SCM_RIGHTS, # cmsg_{level,type}
+                        @$fds); # CMSG_DATA
+        my $mh = pack(TMPL_msghdr,
+                        undef, 0, # msg_name, msg_namelen (unused)
+                        $iov, 1, # msg_iov, msg_iovlen
+                        $cmsghdr, # msg_control
+                        $msg_controllen,
+                        0); # msg_flags
+        my $s;
+        $tries //= 50;
+        do {
+                $s = syscall($SYS_sendmsg, fileno($sock), $mh, $flags);
+        } while ($s < 0 && PublicInbox::CmdIPC4::sendmsg_retry($tries));
+        $s >= 0 ? $s : undef;
+};
+
+*recv_cmd4 = sub ($$$) {
+        my ($sock, undef, $len) = @_;
+        vec($_[1] //= '', $len - 1, 8) = 0;
+        my $cmsghdr = "\0" x msg_controllen_max; # 10 * sizeof(int)
+        my $iov = pack('P'.TMPL_size_t, $_[1], $len);
+        my $mh = pack(TMPL_msghdr,
+                        undef, 0, # msg_name, msg_namelen (unused)
+                        $iov, 1, # msg_iov, msg_iovlen
+                        $cmsghdr, # msg_control
+                        msg_controllen_max,
+                        0); # msg_flags
+        my $r;
+        do {
+                $r = syscall($SYS_recvmsg, fileno($sock), $mh, 0);
+        } while ($r < 0 && $!{EINTR});
+        if ($r < 0) {
+                $_[1] = '';
+                return (undef);
+        }
+        substr($_[1], $r, length($_[1]), '');
+        my @ret;
+        if ($r > 0) {
+                my ($len, $lvl, $type, @fds) = unpack(TMPL_cmsg_len.
+                                        'LL'. # cmsg_level, cmsg_type
+                                        CMSG_DATA_off.'i*', # @fds
+                                        $cmsghdr);
+                if ($lvl == SOL_SOCKET && $type == SCM_RIGHTS) {
+                        $len -= CMSG_ALIGN_SIZEOF_cmsghdr;
+                        @ret = @fds[0..(($len / SIZEOF_int) - 1)];
+                }
+        }
+        @ret;
+};
+}
+
 1;
 
 =head1 WARRANTY
diff --git a/lib/PublicInbox/TLS.pm b/lib/PublicInbox/TLS.pm
index 0f838e25..3ce57f1b 100644
--- a/lib/PublicInbox/TLS.pm
+++ b/lib/PublicInbox/TLS.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # IO::Socket::SSL support code
@@ -6,6 +6,7 @@ package PublicInbox::TLS;
 use strict;
 use IO::Socket::SSL;
 use PublicInbox::Syscall qw(EPOLLIN EPOLLOUT);
+use Carp qw(carp croak);
 
 sub err () { $SSL_ERROR }
 
@@ -13,7 +14,32 @@ sub err () { $SSL_ERROR }
 sub epollbit () {
         return EPOLLIN if $SSL_ERROR == SSL_WANT_READ;
         return EPOLLOUT if $SSL_ERROR == SSL_WANT_WRITE;
-        die "unexpected SSL error: $SSL_ERROR";
+        carp "unexpected SSL error: $SSL_ERROR";
+        undef;
+}
+
+sub _ctx_new ($) {
+        my ($tlsd) = @_;
+        my $ctx = IO::Socket::SSL::SSL_Context->new(
+                                @{$tlsd->{ssl_ctx_opt}}, SSL_server => 1) or
+                croak "SSL_Context->new: $SSL_ERROR";
+
+        # save ~34K per idle connection (cf. SSL_CTX_set_mode(3ssl))
+        # RSS goes from 346MB to 171MB with 10K idle NNTPS clients on amd64
+        # cf. https://rt.cpan.org/Ticket/Display.html?id=129463
+        my $mode = eval { Net::SSLeay::MODE_RELEASE_BUFFERS() };
+        if ($mode && $ctx->{context}) {
+                eval { Net::SSLeay::CTX_set_mode($ctx->{context}, $mode) };
+                warn "W: $@ (setting SSL_MODE_RELEASE_BUFFERS)\n" if $@;
+        }
+        $ctx;
+}
+
+sub start {
+        my ($io, $tlsd) = @_;
+        IO::Socket::SSL->start_SSL($io, SSL_server => 1,
+                SSL_reuse_ctx => ($tlsd->{ssl_ctx} //= _ctx_new($tlsd)),
+                SSL_startHandshake => 0);
 }
 
 1;
diff --git a/lib/PublicInbox/TailNotify.pm b/lib/PublicInbox/TailNotify.pm
new file mode 100644
index 00000000..84340a35
--- /dev/null
+++ b/lib/PublicInbox/TailNotify.pm
@@ -0,0 +1,97 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# only used for tests at the moment...
+package PublicInbox::TailNotify;
+use v5.12;
+use parent qw(PublicInbox::DirIdle); # not optimal, maybe..
+use PublicInbox::DS qw(now);
+
+my ($TAIL_MOD, $ino_cls);
+if ($^O eq 'linux' && eval { require PublicInbox::Inotify; 1 }) {
+        $TAIL_MOD = PublicInbox::Inotify::IN_MOVED_TO() |
+                PublicInbox::Inotify::IN_CREATE() |
+                PublicInbox::Inotify::IN_MODIFY();
+        $ino_cls = 'PublicInbox::Inotify';
+} elsif (eval { require PublicInbox::KQNotify }) {
+        $TAIL_MOD = PublicInbox::KQNotify::MOVED_TO_OR_CREATE() |
+                IO::KQueue::NOTE_DELETE() | IO::KQueue::NOTE_RENAME();
+        $ino_cls = 'PublicInbox::KQNotify';
+} else {
+        require PublicInbox::FakeInotify;
+        $TAIL_MOD = PublicInbox::FakeInotify::MOVED_TO_OR_CREATE() |
+                PublicInbox::FakeInotify::IN_MODIFY() |
+                PublicInbox::FakeInotify::IN_DELETE();
+}
+require IO::Poll if $ino_cls;
+
+sub reopen_file ($) {
+        my ($self) = @_;
+
+        open my $fh, '<', $self->{fn} or return undef;
+        my @st = stat $fh or die "fstat($self->{fn}): $!";
+        $self->{ino_dev} = "@st[0, 1]";
+        $self->{inot}->watch($self->{fn}, $TAIL_MOD);
+        $self->{watch_fh} = $fh; # return value
+}
+
+sub new {
+        my ($cls, $fn) = @_;
+        my $self = bless { fn => $fn }, $cls;
+        if ($ino_cls) {
+                $self->{inot} = $ino_cls->new or die "E: $ino_cls->new: $!";
+                $self->{inot}->blocking(0);
+                my ($dn) = ($fn =~ m!\A(.+)/+[^/]+\z!);
+                $self->{inot}->watch($dn // '.', $TAIL_MOD);
+        } else {
+                $self->{inot} = PublicInbox::FakeInotify->new;
+        }
+        reopen_file($self);
+        $self->{inot}->watch($fn, $TAIL_MOD);
+        $self;
+}
+
+sub delete_self {
+        for (@_) { return 1 if $_->IN_DELETE_SELF }
+        undef;
+}
+
+sub getlines {
+        my ($self, $timeo) = @_;
+        my ($fh, $buf, $rfds, @ret, @events);
+        my $end = defined($timeo) ? now + $timeo : undef;
+again:
+        while (1) {
+                @events = $self->{inot}->read; # Linux::Inotify2::read
+                last if @events;
+                return () if defined($timeo) && (!$timeo || (now > $end));
+                my $wait = 0.1;
+                if ($ino_cls) {
+                        vec($rfds = '', $self->{inot}->fileno, 1) = 1;
+                        if (defined $end) {
+                                $wait = $end - now;
+                                $wait = 0 if $wait < 0;
+                        } else {
+                                undef $wait;
+                        }
+                }
+                select($rfds, undef, undef, $wait);
+        }
+        if ($fh = $self->{watch_fh}) {
+                sysread($fh, $buf, -s $fh) and
+                        push @ret, split(/^/sm, $buf);
+                my @st = stat($self->{fn});
+                if (!@st || "@st[0, 1]" ne $self->{ino_dev} ||
+                                delete_self(@events)) {
+                        delete @$self{qw(ino_dev watch_fh)};
+                }
+        }
+        if ($fh = $self->{watch_fh} // reopen_file($self)) {
+                sysread($fh, $buf, -s $fh) and
+                        push @ret, split(/^/sm, $buf);
+        }
+        goto again if (!@ret && (!defined($end) || now < $end));
+        @ret;
+}
+
+1;
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 299b9c6a..3a67ab54 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # internal APIs used only for tests
@@ -6,68 +6,197 @@ package PublicInbox::TestCommon;
 use strict;
 use parent qw(Exporter);
 use v5.10.1;
-use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD :seek);
+use Fcntl qw(F_SETFD F_GETFD FD_CLOEXEC :seek);
 use POSIX qw(dup2);
 use IO::Socket::INET;
-our @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
-        run_script start_script key2sub xsys xqx eml_load tick
-        have_xapian_compact);
+use File::Spec;
+use Scalar::Util qw(isvstring);
+use Carp ();
+our @EXPORT;
+my $lei_loud = $ENV{TEST_LEI_ERR_LOUD};
+our $tail_cmd = $ENV{TAIL};
+our ($lei_opt, $lei_out, $lei_err);
+use autodie qw(chdir close fcntl mkdir open opendir seek unlink);
+$ENV{XDG_CACHE_HOME} //= "$ENV{HOME}/.cache"; # reuse C++ xap_helper builds
+
+$_ = File::Spec->rel2abs($_) for (grep(!m!^/!, @INC));
+our $CURRENT_DAEMON;
+BEGIN {
+        @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
+                run_script start_script key2sub xsys xsys_e xqx eml_load tick
+                have_xapian_compact json_utf8 setup_public_inboxes create_inbox
+                create_dir
+                create_coderepo require_bsd kernel_version check_broken_tmpfs
+                quit_waiter_pipe wait_for_eof require_git_http_backend
+                tcp_host_port test_lei lei lei_ok $lei_out $lei_err $lei_opt
+                test_httpd xbail require_cmd is_xdeeply tail_f
+                ignore_inline_c_missing no_pollerfd no_coredump cfg_new
+                strace strace_inject lsof_pid oct_is);
+        require Test::More;
+        my @methods = grep(!/\W/, @Test::More::EXPORT);
+        eval(join('', map { "*$_=\\&Test::More::$_;" } @methods));
+        die $@ if $@;
+        push @EXPORT, @methods;
+}
+
+sub kernel_version () {
+        state $version = do {
+                require POSIX;
+                my @u = POSIX::uname();
+                if ($u[2] =~ /\A([0-9]+(?:\.[0-9]+)+)/) {
+                        eval "v$1";
+                } else {
+                        local $" = "', `";
+                        diag "Unable to get kernel version from: `@u'";
+                        undef;
+                }
+        };
+}
+
+sub check_broken_tmpfs () {
+        return if $^O ne 'dragonfly' || kernel_version ge v6.5;
+        diag 'EVFILT_VNODE + tmpfs is broken on dragonfly <= 6.4 (have: '.
+                sprintf('%vd', kernel_version).')';
+        1;
+}
+
+sub require_bsd (;$) {
+        state $ok = ($^O =~ m!\A(?:free|net|open)bsd\z! ||
+                        $^O eq 'dragonfly');
+        return 1 if $ok;
+        return if defined(wantarray);
+        my $m = "$0 is BSD-only (\$^O=$^O)";
+        @_ ? skip($m, 1) : plan(skip_all => $m);
+}
+
+sub xbail (@) { BAIL_OUT join(' ', map { ref() ? (explain($_)) : ($_) } @_) }
+
+sub read_all ($;$$$) {
+        require PublicInbox::IO;
+        PublicInbox::IO::read_all($_[0], $_[1], $_[2], $_[3])
+}
 
 sub eml_load ($) {
         my ($path, $cb) = @_;
-        open(my $fh, '<', $path) or die "open $path: $!";
+        open(my $fh, '<', $path);
         require PublicInbox::Eml;
-        PublicInbox::Eml->new(\(do { local $/; <$fh> }));
+        PublicInbox::Eml->new(\(scalar read_all $fh));
 }
 
 sub tmpdir (;$) {
         my ($base) = @_;
         require File::Temp;
-        unless (defined $base) {
-                ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
-        }
-        my $tmpdir = File::Temp->newdir("pi-$base-$$-XXXXXX", TMPDIR => 1);
-        ($tmpdir->dirname, $tmpdir);
+        ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!) unless defined $base;
+        my $tmpdir = File::Temp->newdir("pi-$base-$$-XXXX", TMPDIR => 1);
+        wantarray ? ($tmpdir->dirname, $tmpdir) : $tmpdir;
 }
 
 sub tcp_server () {
-        IO::Socket::INET->new(
-                LocalAddr => '127.0.0.1',
+        my %opt = (
                 ReuseAddr => 1,
                 Proto => 'tcp',
                 Type => Socket::SOCK_STREAM(),
                 Listen => 1024,
                 Blocking => 0,
-        ) or Test::More::BAIL_OUT("failed to create TCP server: $!");
+        );
+        eval {
+                die 'IPv4-only' if $ENV{TEST_IPV4_ONLY};
+                my $pkg;
+                for (qw(IO::Socket::IP IO::Socket::INET6)) {
+                        eval "require $_" or next;
+                        $pkg = $_ and last;
+                }
+                $pkg->new(%opt, LocalAddr => '[::1]');
+        } || eval {
+                die 'IPv6-only' if $ENV{TEST_IPV6_ONLY};
+                IO::Socket::INET->new(%opt, LocalAddr => '127.0.0.1')
+        } || BAIL_OUT "failed to create TCP server: $! ($@)";
+}
+
+sub tcp_host_port ($) {
+        my ($s) = @_;
+        my ($h, $p) = ($s->sockhost, $s->sockport);
+        my $ipv4 = $s->sockdomain == Socket::AF_INET();
+        if (wantarray) {
+                $ipv4 ? ($h, $p) : ("[$h]", $p);
+        } else {
+                $ipv4 ? "$h:$p" : "[$h]:$p";
+        }
 }
 
 sub tcp_connect {
         my ($dest, %opt) = @_;
-        my $addr = $dest->sockhost . ':' . $dest->sockport;
-        my $s = IO::Socket::INET->new(
+        my $addr = tcp_host_port($dest);
+        my $s = ref($dest)->new(
                 Proto => 'tcp',
                 Type => Socket::SOCK_STREAM(),
                 PeerAddr => $addr,
                 %opt,
-        ) or Test::More::BAIL_OUT("failed to connect to $addr: $!");
+        ) or BAIL_OUT "failed to connect to $addr: $!";
         $s->autoflush(1);
         $s;
 }
 
+sub require_cmd ($;$) {
+        my ($cmd, $nr) = @_;
+        require PublicInbox::Spawn;
+        state %CACHE;
+        my $bin = $CACHE{$cmd} //= PublicInbox::Spawn::which($cmd);
+        return $bin if $bin;
+        return plan(skip_all => "$cmd missing from PATH for $0") if !$nr;
+        defined(wantarray) ? undef : skip("$cmd missing", $nr);
+}
+
+sub have_xapian_compact (;$) {
+        require_cmd($ENV{XAPIAN_COMPACT} || 'xapian-compact', @_ ? $_[0] : ());
+}
+
 sub require_git ($;$) {
-        my ($req, $maybe) = @_;
-        my ($req_maj, $req_min, $req_sub) = split(/\./, $req);
-        my ($cur_maj, $cur_min, $cur_sub) = (xqx([qw(git --version)])
-                        =~ /version (\d+)\.(\d+)(?:\.(\d+))?/);
-
-        my $req_int = ($req_maj << 24) | ($req_min << 16) | ($req_sub // 0);
-        my $cur_int = ($cur_maj << 24) | ($cur_min << 16) | ($cur_sub // 0);
-        if ($cur_int < $req_int) {
-                return 0 if $maybe;
-                Test::More::plan(skip_all =>
-                        "git $req+ required, have $cur_maj.$cur_min.$cur_sub");
+        my ($req, $nr) = @_;
+        require PublicInbox::Git;
+        state $cur_vstr = PublicInbox::Git::git_version();
+        $req = eval("v$req") unless isvstring($req);
+
+        return 1 if $cur_vstr ge $req;
+        state $cur_ver = sprintf('%vd', $cur_vstr);
+        return plan skip_all => "git $req+ required, have $cur_ver" if !$nr;
+        defined(wantarray) ? undef :
+                skip("git $req+ required (have $cur_ver)", $nr)
+}
+
+sub require_git_http_backend (;$) {
+        my ($nr) = @_;
+        state $ok = do {
+                require PublicInbox::Git;
+                my $git = PublicInbox::Git::git_exe() or plan
+                        skip_all => 'nothing in public-inbox works w/o git';
+                my $rdr = { 1 => \my $out, 2 => \my $err };
+                xsys([$git, qw(http-backend)], undef, $rdr);
+                $out =~ /^Status:/ism;
+        };
+        if (!$ok) {
+                my $msg = "`git http-backend' not available";
+                defined($nr) ? skip $msg, $nr : plan skip_all => $msg;
         }
-        1;
+        $ok;
+}
+
+my %IPv6_VERSION = (
+        'Net::NNTP' => 3.00,
+        'Mail::IMAPClient' => 3.40,
+        'HTTP::Tiny' => 0.042,
+        'Net::POP3' => 2.32,
+);
+
+sub need_accept_filter ($) {
+        my ($af) = @_;
+        return if $^O eq 'netbsd'; # since NetBSD 5.0, no kldstat needed
+        $^O =~ /\A(?:freebsd|dragonfly)\z/ or
+                skip 'SO_ACCEPTFILTER is FreeBSD/NetBSD/Dragonfly-only so far',
+                        1;
+        state $tried = {};
+        ($tried->{$af} //= system("kldstat -m $af >/dev/null")) and
+                skip "$af not loaded: kldload $af", 1;
 }
 
 sub require_mods {
@@ -75,16 +204,41 @@ sub require_mods {
         my $maybe = pop @mods if $mods[-1] =~ /\A[0-9]+\z/;
         my @need;
         while (my $mod = shift(@mods)) {
-                if ($mod eq 'Search::Xapian') {
+                if ($mod eq 'lei') {
+                        require_git(2.6, $maybe ? $maybe : ());
+                        push @mods, qw(DBD::SQLite Xapian +SCM_RIGHTS);
+                        $mod = 'json'; # fall-through
+                }
+                if ($mod eq 'json') {
+                        $mod = 'Cpanel::JSON::XS||JSON::MaybeXS||JSON||JSON::PP'
+                } elsif ($mod eq '-httpd') {
+                        push @mods, qw(Plack::Builder Plack::Util);
+                        next;
+                } elsif ($mod eq '-imapd') {
+                        push @mods, qw(Parse::RecDescent DBD::SQLite);
+                        next;
+                } elsif ($mod eq '-nntpd' || $mod eq 'v2') {
+                        push @mods, qw(DBD::SQLite);
+                        next;
+                }
+                if ($mod eq 'Xapian') {
                         if (eval { require PublicInbox::Search } &&
                                 PublicInbox::Search::load_xapian()) {
                                 next;
                         }
-                } elsif ($mod eq 'Search::Xapian::WritableDatabase') {
-                        if (eval { require PublicInbox::SearchIdx } &&
-                                PublicInbox::SearchIdx::load_xapian_writable()){
-                                        next;
+                } elsif ($mod eq '+SCM_RIGHTS') {
+                        if (my $msg = need_scm_rights()) {
+                                push @need, $msg;
+                                next;
                         }
+                } elsif ($mod eq ':fcntl_lock') {
+                        next if $^O eq 'linux' || require_bsd;
+                        diag "untested platform: $^O, ".
+                                "requiring File::FcntlLock...";
+                        push @mods, 'File::FcntlLock';
+                } elsif ($mod =~ /\A\+(accf_.*)\z/) {
+                        need_accept_filter($1);
+                        next
                 } elsif (index($mod, '||') >= 0) { # "Foo||Bar"
                         my $ok;
                         for my $m (split(/\Q||\E/, $mod)) {
@@ -98,6 +252,7 @@ sub require_mods {
                         eval "require $mod";
                 }
                 if ($@) {
+                        diag "require $mod: $@" if $mod =~ /Gcf2/;
                         push @need, $mod;
                 } elsif ($mod eq 'IO::Socket::SSL' &&
                         # old versions of IO::Socket::SSL aren't supported
@@ -106,11 +261,15 @@ sub require_mods {
                                 !eval{ IO::Socket::SSL->VERSION(2.007); 1 }) {
                         push @need, $@;
                 }
+                if (defined(my $v = $IPv6_VERSION{$mod})) {
+                        $ENV{TEST_IPV4_ONLY} = 1 if !eval { $mod->VERSION($v) };
+                }
         }
         return unless @need;
         my $m = join(', ', @need)." missing for $0";
-        Test::More::skip($m, $maybe) if $maybe;
-        Test::More::plan(skip_all => $m)
+        $m =~ s/\bEmail::MIME\b/Email::MIME (development purposes only)/;
+        skip($m, $maybe) if $maybe;
+        plan(skip_all => $m)
 }
 
 sub key2script ($) {
@@ -122,8 +281,8 @@ sub key2script ($) {
         'blib/script/'.$key;
 }
 
-my @io_mode = ([ *STDIN{IO}, '<&' ], [ *STDOUT{IO}, '>&' ],
-                [ *STDERR{IO}, '>&' ]);
+my @io_mode = ([ *STDIN{IO}, '+<&' ], [ *STDOUT{IO}, '+>&' ],
+                [ *STDERR{IO}, '+>&' ]);
 
 sub _prepare_redirects ($) {
         my ($fhref) = @_;
@@ -131,9 +290,9 @@ sub _prepare_redirects ($) {
         for (my $fd = 0; $fd <= $#io_mode; $fd++) {
                 my $fh = $fhref->[$fd] or next;
                 my ($oldfh, $mode) = @{$io_mode[$fd]};
-                open my $orig, $mode, $oldfh or die "$$oldfh $mode stash: $!";
+                open(my $orig, $mode, $oldfh);
                 $orig_io->[$fd] = $orig;
-                open $oldfh, $mode, $fh or die "$$oldfh $mode redirect: $!";
+                open $oldfh, $mode, $fh;
         }
         $orig_io;
 }
@@ -143,7 +302,7 @@ sub _undo_redirects ($) {
         for (my $fd = 0; $fd <= $#io_mode; $fd++) {
                 my $fh = $orig_io->[$fd] or next;
                 my ($oldfh, $mode) = @{$io_mode[$fd]};
-                open $oldfh, $mode, $fh or die "$$oldfh $mode redirect: $!";
+                open $oldfh, $mode, $fh;
         }
 }
 
@@ -164,13 +323,13 @@ sub run_script_exit {
         die RUN_SCRIPT_EXIT;
 }
 
-my %cached_scripts;
+our %cached_scripts;
 sub key2sub ($) {
         my ($key) = @_;
         $cached_scripts{$key} //= do {
                 my $f = key2script($key);
-                open my $fh, '<', $f or die "open $f: $!";
-                my $str = do { local $/; <$fh> };
+                open my $fh, '<', $f;
+                my $str = read_all($fh);
                 my $pkg = (split(m!/!, $f))[-1];
                 $pkg =~ s/([a-z])([a-z0-9]+)(\.t)?\z/\U$1\E$2/;
                 $pkg .= "_T" if $3;
@@ -185,7 +344,7 @@ use subs qw(exit);
 sub main {
 # the below "line" directive is a magic comment, see perlsyn(1) manpage
 # line 1 "$f"
-$str
+{ $str }
         0;
 }
 1;
@@ -214,59 +373,107 @@ sub _run_sub ($$$) {
         }
 }
 
+sub no_coredump (@) {
+        my @dirs = @_;
+        my $cwdfh;
+        opendir($cwdfh, '.') if @dirs;
+        my @found;
+        for (@dirs, '.') {
+                chdir $_;
+                my @cores = glob('core.* *.core');
+                push @cores, 'core' if -d 'core';
+                push(@found, "@cores found in $_") if @cores;
+                chdir $cwdfh if $cwdfh;
+        }
+        return if !@found; # keep it quiet.
+        is(scalar(@found), 0, 'no core dumps found');
+        diag(join("\n", @found) . Carp::longmess());
+        if (-t STDIN) {
+                diag 'press ENTER to continue, (q) to quit';
+                chomp(my $line = <STDIN>);
+                xbail 'user quit' if $line =~ /\Aq/;
+        }
+}
+
 sub run_script ($;$$) {
         my ($cmd, $env, $opt) = @_;
+        no_coredump($opt->{-C} ? ($opt->{-C}) : ());
         my ($key, @argv) = @$cmd;
         my $run_mode = $ENV{TEST_RUN_MODE} // $opt->{run_mode} // 1;
         my $sub = $run_mode == 0 ? undef : key2sub($key);
         my $fhref = [];
         my $spawn_opt = {};
+        my @tail_paths;
+        local $tail_cmd = $tail_cmd;
         for my $fd (0..2) {
                 my $redir = $opt->{$fd};
                 my $ref = ref($redir);
                 if ($ref eq 'SCALAR') {
-                        open my $fh, '+>', undef or die "open: $!";
+                        my $fh;
+                        if ($ENV{TAIL_ALL} && $fd > 0) {
+                                # tail -F is better, but not portable :<
+                                $tail_cmd //= 'tail -f';
+                                require File::Temp;
+                                $fh = File::Temp->new("fd.$fd-XXXX", TMPDIR=>1);
+                                push @tail_paths, $fh->filename;
+                        } else {
+                                open $fh, '+>', undef;
+                        }
+                        $fh or xbail $!;
                         $fhref->[$fd] = $fh;
                         $spawn_opt->{$fd} = $fh;
                         next if $fd > 0;
                         $fh->autoflush(1);
                         print $fh $$redir or die "print: $!";
-                        seek($fh, 0, SEEK_SET) or die "seek: $!";
+                        seek($fh, 0, SEEK_SET);
                 } elsif ($ref eq 'GLOB') {
                         $spawn_opt->{$fd} = $fhref->[$fd] = $redir;
                 } elsif ($ref) {
                         die "unable to deal with $ref $redir";
                 }
         }
+        my $tail = @tail_paths ? tail_f(@tail_paths, $opt) : undef;
+        if ($key =~ /-(index|cindex|extindex|convert|xcpdb)\z/) {
+                unshift @argv, '--no-fsync';
+        }
         if ($run_mode == 0) {
                 # spawn an independent new process, like real-world use cases:
                 require PublicInbox::Spawn;
                 my $cmd = [ key2script($key), @argv ];
-                my $pid = PublicInbox::Spawn::spawn($cmd, $env, $spawn_opt);
-                if (defined $pid) {
-                        my $r = waitpid($pid, 0);
-                        defined($r) or die "waitpid: $!";
-                        $r == $pid or die "waitpid: expected $pid, got $r";
+                if (my $d = $opt->{'-C'}) {
+                        $cmd->[0] = File::Spec->rel2abs($cmd->[0]);
+                        $spawn_opt->{'-C'} = $d;
                 }
+                PublicInbox::Spawn::run_wait($cmd, $env, $spawn_opt);
         } else { # localize and run everything in the same process:
                 # note: "local *STDIN = *STDIN;" and so forth did not work in
                 # old versions of perl
+                my $umask = umask;
                 local %ENV = $env ? (%ENV, %$env) : %ENV;
-                local %SIG = %SIG;
+                local @SIG{keys %SIG} = map { undef } values %SIG;
+                local $SIG{FPE} = 'IGNORE'; # Perl default
                 local $0 = join(' ', @$cmd);
                 my $orig_io = _prepare_redirects($fhref);
+                opendir(my $cwdfh, '.');
+                chdir $opt->{-C} if defined $opt->{-C};
                 _run_sub($sub, $key, \@argv);
+                # n.b. all our uses of PublicInbox::DS should be fine
+                # with this and we can't Reset here.
+                chdir($cwdfh);
                 _undo_redirects($orig_io);
+                select STDOUT;
+                umask($umask);
         }
 
+        { local $?; undef $tail };
         # slurp the redirects back into user-supplied strings
         for my $fd (1..2) {
                 my $fh = $fhref->[$fd] or next;
-                seek($fh, 0, SEEK_SET) or die "seek: $!";
-                my $redir = $opt->{$fd};
-                local $/;
-                $$redir = <$fh>;
+                next unless -f $fh;
+                seek($fh, 0, SEEK_SET);
+                ${$opt->{$fd}} = read_all($fh);
         }
+        no_coredump($opt->{-C} ? ($opt->{-C}) : ());
         $? == 0;
 }
 
@@ -276,17 +483,17 @@ sub tick (;$) {
         1;
 }
 
-sub wait_for_tail ($;$) {
+sub wait_for_tail {
         my ($tail_pid, $want) = @_;
-        my $wait = 2;
+        my $wait = 2; # "tail -F" sleeps 1.0s at-a-time w/o inotify/kevent
         if ($^O eq 'linux') { # GNU tail may use inotify
                 state $tail_has_inotify;
-                return tick if $want < 0 && $tail_has_inotify;
-                my $end = time + $wait;
+                return tick if !$want && $tail_has_inotify; # before TERM
+                my $end = time + $wait; # wait for startup:
                 my @ino;
                 do {
                         @ino = grep {
-                                readlink($_) =~ /\binotify\b/
+                                (readlink($_) // '') =~ /\binotify\b/
                         } glob("/proc/$tail_pid/fd/*");
                 } while (!@ino && time <= $end and tick);
                 return if !@ino;
@@ -294,7 +501,7 @@ sub wait_for_tail ($;$) {
                 $ino[0] =~ s!/fd/!/fdinfo/!;
                 my @info;
                 do {
-                        if (open my $fh, '<', $ino[0]) {
+                        if (CORE::open(my $fh, '<', $ino[0])) {
                                 local $/ = "\n";
                                 @info = grep(/^inotify wd:/, <$fh>);
                         }
@@ -318,6 +525,11 @@ sub xsys {
         $? >> 8
 }
 
+sub xsys_e { # like "/bin/sh -e"
+        xsys(@_) == 0 or
+                BAIL_OUT (ref $_[0] ? "@{$_[0]}" : "@_"). " failed \$?=$?"
+}
+
 # like `backtick` or qx{} op, but uses spawn() for env/rdr + vfork
 sub xqx {
         my ($cmd, $env, $rdr) = @_;
@@ -326,13 +538,38 @@ sub xqx {
         wantarray ? split(/^/m, $out) : $out;
 }
 
+sub tail_f (@) {
+        my @f = grep(defined, @_);
+        $tail_cmd or return; # "tail -F" or "tail -f"
+        my $opt = (ref($f[-1]) eq 'HASH') ? pop(@f) : {};
+        my $clofork = $opt->{-CLOFORK} // [];
+        my @cfmap = map {
+                my $fl = fcntl($_, F_GETFD, 0);
+                fcntl($_, F_SETFD, $fl | FD_CLOEXEC) unless $fl & FD_CLOEXEC;
+                ($_, $fl);
+        } @$clofork;
+        for (@f) { open(my $fh, '>>', $_) };
+        my $cmd = [ split(/ /, $tail_cmd), @f ];
+        require PublicInbox::Spawn;
+        my $pid = PublicInbox::Spawn::spawn($cmd, undef, { 1 => 2 });
+        while (my ($io, $fl) = splice(@cfmap, 0, 2)) {
+                fcntl($io, F_SETFD, $fl);
+        }
+        wait_for_tail($pid, scalar @f);
+        require PublicInbox::AutoReap;
+        PublicInbox::AutoReap->new($pid, \&wait_for_tail);
+}
+
 sub start_script {
         my ($cmd, $env, $opt) = @_;
         my ($key, @argv) = @$cmd;
         my $run_mode = $ENV{TEST_RUN_MODE} // $opt->{run_mode} // 2;
         my $sub = $run_mode == 0 ? undef : key2sub($key);
-        my $tail_pid;
-        if (my $tail_cmd = $ENV{TAIL}) {
+        my $tail;
+        my @xh = split(/\s+/, $ENV{TEST_DAEMON_XH} // '');
+        @xh = () if $key !~ /-(?:imapd|netd|httpd|pop3d|nntpd)\z/;
+        push @argv, @xh;
+        if ($tail_cmd) {
                 my @paths;
                 for (@argv) {
                         next unless /\A--std(?:err|out)=(.+)\z/;
@@ -350,34 +587,27 @@ sub start_script {
                                 }
                         }
                 }
-                if (@paths) {
-                        defined($tail_pid = fork) or die "fork: $!\n";
-                        if ($tail_pid == 0) {
-                                # make sure files exist, first
-                                open my $fh, '>>', $_ for @paths;
-                                open(STDOUT, '>&STDERR') or die "1>&2: $!";
-                                exec(split(' ', $tail_cmd), @paths);
-                                die "$tail_cmd failed: $!";
-                        }
-                        wait_for_tail($tail_pid, scalar @paths);
-                }
+                $tail = tail_f(@paths, $opt);
         }
-        defined(my $pid = fork) or die "fork: $!\n";
+        require PublicInbox::DS;
+        my $oset = PublicInbox::DS::block_signals();
+        require PublicInbox::OnDestroy;
+        my $tmp_mask = PublicInbox::OnDestroy::all(
+                                        \&PublicInbox::DS::sig_setmask, $oset);
+        my $pid = PublicInbox::DS::fork_persist();
         if ($pid == 0) {
-                eval { PublicInbox::DS->Reset };
+                close($_) for (@{delete($opt->{-CLOFORK}) // []});
                 # pretend to be systemd (cf. sd_listen_fds(3))
                 # 3 == SD_LISTEN_FDS_START
                 my $fd;
-                for ($fd = 0; 1; $fd++) {
-                        my $s = $opt->{$fd};
-                        last if $fd >= 3 && !defined($s);
-                        next unless $s;
-                        my $fl = fcntl($s, F_GETFD, 0);
-                        if (($fl & FD_CLOEXEC) != FD_CLOEXEC) {
-                                warn "got FD:".fileno($s)." w/o CLOEXEC\n";
+                for ($fd = 0; $fd < 3 || defined($opt->{$fd}); $fd++) {
+                        my $io = $opt->{$fd} // next;
+                        my $old = fileno($io);
+                        if ($old == $fd) {
+                                fcntl($io, F_SETFD, 0);
+                        } else {
+                                dup2($old, $fd) // die "dup2($old, $fd): $!";
                         }
-                        fcntl($s, F_SETFD, $fl &= ~FD_CLOEXEC);
-                        dup2(fileno($s), $fd) or die "dup2 failed: $!\n";
                 }
                 %ENV = (%ENV, %$env) if $env;
                 my $fds = $fd - 3;
@@ -385,9 +615,12 @@ sub start_script {
                         $ENV{LISTEN_PID} = $$;
                         $ENV{LISTEN_FDS} = $fds;
                 }
-                $0 = join(' ', @$cmd);
+                if ($opt->{-C}) { chdir($opt->{-C}) }
+                $0 = join(' ', @$cmd, @xh);
+                local @SIG{keys %SIG} = map { undef } values %SIG;
+                local $SIG{FPE} = 'IGNORE'; # Perl default
+                undef $tmp_mask;
                 if ($sub) {
-                        eval { PublicInbox::DS->Reset };
                         _run_sub($sub, $key, \@argv);
                         POSIX::_exit($? >> 8);
                 } else {
@@ -395,48 +628,430 @@ sub start_script {
                         die "FAIL: ",join(' ', $key, @argv), ": $!\n";
                 }
         }
-        PublicInboxTestProcess->new($pid, $tail_pid);
+        undef $tmp_mask;
+        require PublicInbox::AutoReap;
+        my $td = PublicInbox::AutoReap->new($pid);
+        $td->{-extra} = $tail;
+        $td;
+}
+
+# favor lei() or lei_ok() over $lei for new code
+sub lei (@) {
+        my ($cmd, $env, $xopt) = @_;
+        $lei_out = $lei_err = '';
+        if (!ref($cmd)) {
+                ($env, $xopt) = grep { (!defined) || ref } @_;
+                $cmd = [ grep { defined && !ref } @_ ];
+        }
+        my $res = run_script(['lei', @$cmd], $env, $xopt // $lei_opt);
+        if ($lei_err ne '') {
+                if ($lei_err =~ /Use of uninitialized/ ||
+                        $lei_err =~ m!\bArgument .*? isn't numeric in !) {
+                        fail "lei_err=$lei_err";
+                } else {
+                        diag "lei_err=$lei_err" if $lei_loud;
+                }
+        }
+        $res;
+};
+
+sub lei_ok (@) {
+        state $PWD = $ENV{PWD} // Cwd::getcwd();
+        my $msg = ref($_[-1]) eq 'SCALAR' ? pop(@_) : undef;
+        my $tmpdir = quotemeta(File::Spec->tmpdir);
+        # filter out anything that looks like a path name for consistent logs
+        my @msg = ref($_[0]) eq 'ARRAY' ? @{$_[0]} : @_;
+        if (!$lei_loud) {
+                for (@msg) {
+                        s!(127\.0\.0\.1|\[::1\]):(?:\d+)!$1:\$PORT!g;
+                        s!$tmpdir\b/(?:[^/]+/)?!\$TMPDIR/!g;
+                        s!\Q$PWD\E\b!\$PWD!g;
+                }
+        }
+        ok(lei(@_), "lei @msg". ($msg ? " ($$msg)" : '')) or
+                diag "\$?=$? err=$lei_err";
+}
+
+sub json_utf8 () {
+        state $x = ref(PublicInbox::Config->json)->new->utf8->canonical;
+}
+
+sub is_xdeeply ($$$) {
+        my ($x, $y, $desc) = @_;
+        my $ok = is_deeply($x, $y, $desc);
+        diag explain([$x, '!=', $y]) if !$ok;
+        $ok;
+}
+
+sub ignore_inline_c_missing {
+        $_[0] = join('', grep(/\S/, grep(!/compilation aborted/,
+                grep(!/\bInline\b/, split(/^/m, $_[0])))));
+}
+
+sub need_scm_rights () {
+        state $ok = PublicInbox::Spawn->can('send_cmd4') || do {
+                        require PublicInbox::Syscall;
+                        PublicInbox::Syscall->can('send_cmd4'); # Linux only
+                } || eval { require Socket::MsgHdr; 1 };
+        return if $ok;
+        'need SCM_RIGHTS support: Inline::C unconfigured/missing '.
+        '(mkdir -p ~/.cache/public-inbox/inline-c) OR Socket::MsgHdr missing';
 }
 
-sub have_xapian_compact () {
+# returns a pipe with FD_CLOEXEC disabled on the write-end
+sub quit_waiter_pipe () {
+        pipe(my $r, my $w);
+        fcntl($w, F_SETFD, fcntl($w, F_GETFD, 0) & ~FD_CLOEXEC);
+        ($r, $w);
+}
+
+sub wait_for_eof ($$;$) {
+        my ($io, $msg, $sec) = @_;
+        vec(my $rset = '', fileno($io), 1) = 1;
+        ok(select($rset, undef, undef, $sec // 9), "$msg (select)");
+        is(my $line = <$io>, undef, "$msg EOF");
+}
+
+sub test_lei {
+SKIP: {
+        my ($cb) = pop @_;
+        my $test_opt = shift // {};
+        require_git(2.6, 1);
+        my $mods = $test_opt->{mods} // [ 'lei' ];
+        require_mods(@$mods, 2);
+
+        # set PERL_INLINE_DIRECTORY before clobbering XDG_CACHE_HOME
         require PublicInbox::Spawn;
-        # $ENV{XAPIAN_COMPACT} is used by PublicInbox/Xapcmd.pm, too
-        PublicInbox::Spawn::which($ENV{XAPIAN_COMPACT} || 'xapian-compact');
+        require PublicInbox::Config;
+        require File::Path;
+        eval { # use XDG_CACHE_HOME, first:
+                require PublicInbox::XapHelperCxx;
+                PublicInbox::XapHelperCxx::check_build();
+        };
+        local %ENV = %ENV;
+        delete $ENV{XDG_DATA_HOME};
+        delete $ENV{XDG_CONFIG_HOME};
+        delete $ENV{XDG_CACHE_HOME};
+        $ENV{GIT_COMMITTER_EMAIL} = 'lei@example.com';
+        $ENV{GIT_COMMITTER_NAME} = 'lei user';
+        $ENV{LANG} = $ENV{LC_ALL} = 'C';
+        my (undef, $fn, $lineno) = caller(0);
+        my $t = "$fn:$lineno";
+        $lei_opt = { 1 => \$lei_out, 2 => \$lei_err };
+        my ($daemon_pid, $for_destroy, $daemon_xrd);
+        my $tmpdir = $test_opt->{tmpdir};
+        File::Path::mkpath($tmpdir) if defined $tmpdir;
+        ($tmpdir, $for_destroy) = tmpdir unless $tmpdir;
+        my ($dead_r, $dead_w);
+        state $persist_xrd = $ENV{TEST_LEI_DAEMON_PERSIST_DIR};
+        SKIP: {
+                $ENV{TEST_LEI_ONESHOT} and
+                        xbail 'TEST_LEI_ONESHOT no longer supported';
+                my $home = "$tmpdir/lei-daemon";
+                mkdir($home, 0700);
+                local $ENV{HOME} = $home;
+                my $persist;
+                if ($persist_xrd && !$test_opt->{daemon_only}) {
+                        $persist = $daemon_xrd = $persist_xrd;
+                } else {
+                        $daemon_xrd = "$home/xdg_run";
+                        mkdir($daemon_xrd, 0700);
+                        ($dead_r, $dead_w) = quit_waiter_pipe;
+                }
+                local $ENV{XDG_RUNTIME_DIR} = $daemon_xrd;
+                $cb->(); # likely shares $dead_w with lei-daemon
+                undef $dead_w; # so select() wakes up when daemon dies
+                if ($persist) { # remove before ~/.local gets removed
+                        File::Path::rmtree([glob("$home/*")]);
+                        File::Path::rmtree("$home/.config");
+                } else {
+                        no_coredump $tmpdir;
+                        lei_ok(qw(daemon-pid), \"daemon-pid after $t");
+                        chomp($daemon_pid = $lei_out);
+                        if (!$daemon_pid) {
+                                fail("daemon not running after $t");
+                                skip 'daemon died unexpectedly', 2;
+                        }
+                        ok(kill(0, $daemon_pid), "daemon running after $t");
+                        lei_ok(qw(daemon-kill), \"daemon-kill after $t");
+                }
+        }; # SKIP for lei_daemon
+        if ($daemon_pid) {
+                wait_for_eof($dead_r, 'daemon quit pipe');
+                no_coredump $tmpdir;
+                my $f = "$daemon_xrd/lei/errors.log";
+                open my $fh, '<', $f;
+                my @l = <$fh>;
+                is_xdeeply(\@l, [],
+                        "$t daemon XDG_RUNTIME_DIR/lei/errors.log empty");
+        }
+}; # SKIP if missing git 2.6+ || Xapian || SQLite || json
+} # /test_lei
+
+# returns the pathname to a ~/.public-inbox/config in scalar context,
+# ($test_home, $pi_config_pathname) in list context
+sub setup_public_inboxes () {
+        my $test_home = "t/home2";
+        my $pi_config = "$test_home/.public-inbox/config";
+        my $stamp = "$test_home/setup-stamp";
+        my @ret = ($test_home, $pi_config);
+        return @ret if -f $stamp;
+
+        require PublicInbox::Lock;
+        my $lk = PublicInbox::Lock->new("$test_home/setup.lock");
+        my $end = $lk->lock_for_scope;
+        return @ret if -f $stamp;
+
+        local $ENV{PI_CONFIG} = $pi_config;
+        for my $V (1, 2) {
+                run_script([qw(-init --skip-docdata), "-V$V",
+                                '--newsgroup', "t.v$V", "t$V",
+                                "$test_home/t$V", "http://example.com/t$V",
+                                "t$V\@example.com" ]) or xbail "init v$V";
+                unlink "$test_home/t$V/description";
+        }
+        require PublicInbox::Config;
+        require PublicInbox::InboxWritable;
+        my $cfg = PublicInbox::Config->new;
+        my $seen = 0;
+        $cfg->each_inbox(sub {
+                my ($ibx) = @_;
+                $ibx->{-no_fsync} = 1;
+                my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
+                my $V = $ibx->version;
+                my @eml = (glob('t/*.eml'), 't/data/0001.patch');
+                for (@eml) {
+                        next if $_ eq 't/psgi_v2-old.eml'; # dup mid
+                        $im->add(eml_load($_)) or BAIL_OUT "v$V add $_";
+                        $seen++;
+                }
+                $im->done;
+        });
+        $seen or BAIL_OUT 'no imports';
+        open my $fh, '>', $stamp;
+        @ret;
 }
 
-package PublicInboxTestProcess;
-use strict;
+our %COMMIT_ENV = (
+        GIT_AUTHOR_NAME => 'A U Thor',
+        GIT_COMMITTER_NAME => 'C O Mitter',
+        GIT_AUTHOR_EMAIL => 'a@example.com',
+        GIT_COMMITTER_EMAIL => 'c@example.com',
+);
 
-# prevent new threads from inheriting these objects
-sub CLONE_SKIP { 1 }
+# for memoizing based on coderefs and various create_* params
+sub my_sum {
+        require PublicInbox::SHA;
+        require Data::Dumper;
+        my $d = Data::Dumper->new(\@_);
+        $d->$_(1) for qw(Deparse Sortkeys Terse);
+        my @l = split /\n/s, $d->Dump;
+        @l = grep !/\$\^H\{.+?[A-Z]+\(0x[0-9a-f]+\)/, @l; # autodie addresses
+        my @addr = grep /[A-Za-z]+\(0x[0-9a-f]+\)/, @l;
+        xbail 'undumpable addresses: ', \@addr if @addr;
+        substr PublicInbox::SHA::sha256_hex(join('', @l)), 0, 8;
+}
 
-sub new {
-        my ($klass, $pid, $tail_pid) = @_;
-        bless { pid => $pid, tail_pid => $tail_pid, owner => $$ }, $klass;
+sub create_dir (@) {
+        my ($ident, $cb) = (shift, pop);
+        my %opt = @_;
+        require PublicInbox::Lock;
+        require PublicInbox::Import;
+        my $tmpdir = delete $opt{tmpdir};
+        my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
+        my $dir = "t/data-gen/$base.$ident-".my_sum($cb, \%opt);
+        require File::Path;
+        my $new = File::Path::make_path($dir);
+        my $lk = PublicInbox::Lock->new("$dir/creat.lock");
+        my $scope = $lk->lock_for_scope;
+        if (!-f "$dir/creat.stamp") {
+                opendir(my $cwd, '.');
+                chdir($dir);
+                local %ENV = (%ENV, %COMMIT_ENV);
+                $cb->($dir);
+                chdir($cwd); # some $cb chdir around
+                open my $s, '>', "$dir/creat.stamp";
+        }
+        return $dir if !defined($tmpdir);
+        xsys_e([qw(/bin/cp -Rp), $dir, $tmpdir]);
+        $tmpdir;
 }
 
-sub kill {
-        my ($self, $sig) = @_;
-        CORE::kill($sig // 'TERM', $self->{pid});
+sub create_coderepo (@) {
+        my $ident = shift;
+        require PublicInbox::Import;
+        my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
+        create_dir "$ident-$db", @_;
 }
 
-sub join {
-        my ($self, $sig) = @_;
-        my $pid = delete $self->{pid} or return;
-        CORE::kill($sig, $pid) if defined $sig;
-        my $ret = waitpid($pid, 0);
-        defined($ret) or die "waitpid($pid): $!";
-        $ret == $pid or die "waitpid($pid) != $ret";
+sub create_inbox ($;@) {
+        my $ident = shift;
+        my $cb = pop;
+        my %opt = @_;
+        require PublicInbox::Lock;
+        require PublicInbox::InboxWritable;
+        require PublicInbox::Import;
+        my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
+        my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
+        my $tmpdir = delete $opt{tmpdir};
+        my $dir = "t/data-gen/$base.$ident-".my_sum($db, $cb, \%opt);
+        require File::Path;
+        my $new = File::Path::make_path($dir);
+        my $lk = PublicInbox::Lock->new("$dir/creat.lock");
+        $opt{inboxdir} = File::Spec->rel2abs($dir);
+        $opt{name} //= $ident;
+        my $scope = $lk->lock_for_scope;
+        my $pre_cb = delete $opt{pre_cb};
+        $pre_cb->($dir) if $pre_cb && $new;
+        $opt{-no_fsync} = 1;
+        my $no_gc = delete $opt{-no_gc};
+        my $addr = $opt{address} // [];
+        $opt{-primary_address} //= $addr->[0] // "$ident\@example.com";
+        my $parallel = delete($opt{importer_parallel}) // 0;
+        my $creat_opt = { nproc => delete($opt{nproc}) // 1 };
+        my $ibx = PublicInbox::InboxWritable->new({ %opt }, $creat_opt);
+        if (!-f "$dir/creat.stamp") {
+                my $im = $ibx->importer($parallel);
+                $cb->($im, $ibx);
+                $im->done if $im;
+                unless ($no_gc) {
+                        my @to_gc = $ibx->version == 1 ? ($ibx->{inboxdir}) :
+                                        glob("$ibx->{inboxdir}/git/*.git");
+                        for my $dir (@to_gc) {
+                                xsys_e([ qw(git gc -q) ], { GIT_DIR => $dir });
+                        }
+                }
+                open my $s, '>', "$dir/creat.stamp";
+        }
+        if ($tmpdir) {
+                undef $ibx;
+                xsys([qw(/bin/cp -Rp), $dir, $tmpdir]) == 0 or
+                        BAIL_OUT "cp $dir $tmpdir";
+                $opt{inboxdir} = $tmpdir;
+                $ibx = PublicInbox::InboxWritable->new(\%opt);
+        }
+        $ibx;
 }
 
-sub DESTROY {
-        my ($self) = @_;
-        return if $self->{owner} != $$;
-        if (my $tail_pid = delete $self->{tail_pid}) {
-                PublicInbox::TestCommon::wait_for_tail($tail_pid, -1);
-                CORE::kill('TERM', $tail_pid);
+sub test_httpd ($$;$$) {
+        my ($env, $client, $skip, $cb) = @_;
+        my ($tmpdir, $for_destroy);
+        $env->{TMPDIR} //= do {
+                ($tmpdir, $for_destroy) = tmpdir();
+                $tmpdir;
+        };
+        for (qw(PI_CONFIG)) { $env->{$_} or BAIL_OUT "$_ unset" }
+        SKIP: {
+                require_mods(qw(Plack::Test::ExternalServer LWP::UserAgent),
+                                $skip // 1);
+                my $sock = tcp_server() or die;
+                my ($out, $err) = map { "$env->{TMPDIR}/std$_.log" } qw(out err);
+                my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ];
+                my $td = start_script($cmd, $env, { 3 => $sock });
+                my ($h, $p) = tcp_host_port($sock);
+                local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p";
+                my $ua = LWP::UserAgent->new;
+                $ua->max_redirect(0);
+                local $CURRENT_DAEMON = $td;
+                Plack::Test::ExternalServer::test_psgi(client => $client,
+                                                        ua => $ua);
+                $cb->() if $cb;
+                $td->join('TERM');
+                open my $fh, '<', $err;
+                my $e = read_all($fh);
+                if ($e =~ s/^Plack::Middleware::ReverseProxy missing,\n//gms) {
+                        $e =~ s/^URL generation for redirects .*\n//gms;
+                }
+                is($e, '', 'no errors');
+        }
+};
+
+# TODO: support fstat(1) on OpenBSD, lsof already works on FreeBSD + Linux
+# don't use this for deleted file checks, we only check that on Linux atm
+# and we can readlink /proc/PID/fd/* directly
+sub lsof_pid ($;$) {
+        my ($pid, $rdr) = @_;
+        state $lsof = require_cmd('lsof', 1);
+        $lsof or skip 'lsof missing/broken', 1;
+        my @out = xqx([$lsof, '-p', $pid], undef, $rdr);
+        if ($?) {
+                undef $lsof;
+                skip "lsof -p PID broken \$?=$?", 1;
+        }
+        my @cols = split ' ', $out[0];
+        if (($cols[7] // '') eq 'NODE') { # normal lsof
+                @out;
+        } else { # busybox lsof ignores -p, so we DIY it
+                grep /\b$pid\b/, @out;
         }
-        $self->join('TERM');
+}
+
+sub no_pollerfd ($) {
+        my ($pid) = @_;
+        my ($re, @cmd);
+        $^O eq 'linux' and
+                ($re, @cmd) = (qr/\Q[eventpoll]\E/, qw(lsof -p), $pid);
+        # n.b. *BSDs uses kqueue to emulate signalfd and/or inotify,
+        # and we can't distinguish which is which easily.
+        SKIP: {
+                (@cmd && $re) or
+                        skip 'open poller test is Linux-only', 1;
+                my $bin = require_cmd($cmd[0], 1) or skip "$cmd[0] missing", 1;
+                $cmd[0] = $bin;
+                my @of = xqx(\@cmd, {}, {2 => \(my $e)});
+                my $err = $?;
+                skip "$bin broken? (\$?=$err) ($e)", 1 if $err;
+                @of = grep /\b$pid\b/, @of; # busybox lsof ignores -p
+                is(grep(/$re/, @of), 0, "no $re FDs") or diag explain(\@of);
+        }
+}
+
+sub cfg_new ($;@) {
+        my ($tmpdir, @body) = @_;
+        require PublicInbox::Config;
+        my $f = "$tmpdir/tmp_cfg";
+        open my $fh, '>', $f;
+        print $fh @body;
+        close $fh;
+        PublicInbox::Config->new($f);
+}
+
+our $strace_cmd;
+sub strace (@) {
+        my ($for_daemon) = @_;
+        skip 'linux only test', 1 if $^O ne 'linux';
+        if ($for_daemon) {
+                my $f = '/proc/sys/kernel/yama/ptrace_scope';
+                # TODO: we could fiddle with prctl in the daemon to make
+                # things work, but I'm not sure it's worth it...
+                state $ps = do {
+                        my $fh;
+                        CORE::open($fh, '<', $f) ? readline($fh) : 0;
+                };
+                chomp $ps;
+                skip "strace unusable on daemons\n$f is `$ps' (!= 0)", 1 if $ps;
+        }
+        require_cmd('strace', 1) or skip 'strace not available', 1;
+}
+
+sub strace_inject (;$) {
+        my $cmd = strace(@_);
+        state $ver = do {
+                require PublicInbox::Spawn;
+                my $v = PublicInbox::Spawn::run_qx([$cmd, '-V']);
+                $v =~ m!version\s+([1-9]+\.[0-9]+)! or
+                                xbail "no strace -V: $v";
+                eval("v$1");
+        };
+        $ver ge v4.16 or skip "$cmd too old for syscall injection (".
+                                sprintf('v%vd', $ver). ' < v4.16)', 1;
+        $cmd
+}
+
+sub oct_is ($$$) {
+        my ($got, $exp, $msg) = @_;
+        @_ = (sprintf('0%03o', $got), sprintf('0%03o', $exp), $msg);
+        goto &is; # tail recursion to get lineno from callers on failure
 }
 
 package PublicInbox::TestCommon::InboxWakeup;
diff --git a/lib/PublicInbox/Tmpfile.pm b/lib/PublicInbox/Tmpfile.pm
index 25bb3a52..72dd9d24 100644
--- a/lib/PublicInbox/Tmpfile.pm
+++ b/lib/PublicInbox/Tmpfile.pm
@@ -1,9 +1,9 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::Tmpfile;
-use strict;
-use warnings;
-use base qw(Exporter);
+use v5.12;
+use parent qw(Exporter);
+use autodie qw(unlink);
 our @EXPORT = qw(tmpfile);
 use Fcntl qw(:DEFAULT);
 use Errno qw(EEXIST);
@@ -13,12 +13,15 @@ use File::Spec;
 # unlinked filename which makes sense when viewed with lsof
 # (at least on Linux)
 # And if we ever stop caring to have debuggable filenames, O_TMPFILE :)
+#
+# This is also for Perl <5.32 which lacks: open(..., '+>>', undef)
+# <https://rt.perl.org/Ticket/Display.html?id=134221>
 sub tmpfile ($;$$) {
         my ($id, $sock, $append) = @_;
         if (defined $sock) {
                 # add the socket inode number so we can figure out which
                 # socket it belongs to
-                my @st = stat($sock);
+                my @st = stat($sock) or die "stat($sock): $!";
                 $id .= '-ino:'.$st[1];
         }
         $id =~ tr!/!^!;
@@ -28,7 +31,7 @@ sub tmpfile ($;$$) {
         do {
                 my $fn = File::Spec->tmpdir . "/$id-".time.'-'.rand;
                 if (sysopen(my $fh, $fn, $fl, 0600)) { # likely
-                        unlink($fn) or warn "unlink($fn): $!"; # FS broken
+                        unlink($fn);
                         return $fh; # success
                 }
         } while ($! == EEXIST);
diff --git a/lib/PublicInbox/URIimap.pm b/lib/PublicInbox/URIimap.pm
index 56b6002a..41c2842a 100644
--- a/lib/PublicInbox/URIimap.pm
+++ b/lib/PublicInbox/URIimap.pm
@@ -1,20 +1,24 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 # cf. RFC 5092, which the `URI' package doesn't support
 #
 # This depends only on the documented public API of the `URI' dist,
 # not on internal `_'-prefixed subclasses such as `URI::_server'
 #
-# <https://metacpan.org/pod/URI::imap> exists, but it's not in
-# common distros.
+# <https://metacpan.org/pod/URI::imap> exists, but it appears
+# unmaintained, isn't in common distros, nor does it support
+# ';FOO=BAR' parameters such as UIDVALIDITY
 #
 # RFC 2192 also describes ";TYPE=<list_type>"
 package PublicInbox::URIimap;
-use strict;
+use v5.12;
 use URI::Split qw(uri_split uri_join); # part of URI
-use URI::Escape qw(uri_unescape);
+use URI::Escape qw(uri_unescape uri_escape);
+use overload '""' => \&as_string;
 
 my %default_ports = (imap => 143, imaps => 993);
+# for enc-auth-type and enc-user in RFC 5092
+my $achar = qr/[A-Za-z0-9%\-_\.\!\$'\(\)\+\,\&\=\*]+/;
 
 sub new {
         my ($class, $url) = @_;
@@ -28,6 +32,9 @@ sub canonical {
         my ($scheme, $auth, $path, $query, $_frag) = uri_split($$self);
         $path =~ s!\A/+!/!; # excessive leading slash
 
+        # upper-case uidvalidity= and uid= parameter names
+        $path =~ s/;([^=]+)=([^;]*)/;\U$1\E=$2/g;
+
         # lowercase the host portion
         $auth =~ s#\A(.*@)?(.*?)(?::([0-9]+))?\z#
                 my $ret = ($1//'').lc($2);
@@ -55,7 +62,7 @@ sub path {
         my ($self) = @_;
         my (undef, undef, $path) = uri_split($$self);
         $path =~ s!\A/+!!;
-        $path =~ s/;.*\z//; # ;UIDVALIDITY=nz-number
+        $path =~ s!/?;.*\z!!; # [;UIDVALIDITY=nz-number]/;UID=nz-number
         $path eq '' ? undef : $path;
 }
 
@@ -65,7 +72,38 @@ sub mailbox {
         defined($path) ? uri_unescape($path) : undef;
 }
 
-# TODO: UIDVALIDITY, search, and other params
+sub uidvalidity { # read/write
+        my ($self, $val) = @_;
+        my ($scheme, $auth, $path, $query, $frag) = uri_split($$self);
+        if (defined $val) {
+                if ($path =~ s!;UIDVALIDITY=[^;/]*\b!;UIDVALIDITY=$val!i or
+                                $path =~ s!/;!;UIDVALIDITY=$val/;!i) {
+                        # s// already changed it
+                } else { # both s// failed, so just append
+                        $path .= ";UIDVALIDITY=$val";
+                }
+                $$self = uri_join($scheme, $auth, $path, $query, $frag);
+        }
+        $path =~ s!\A/+!!;
+        $path =~ m!\A[^;]+;UIDVALIDITY=([1-9][0-9]*)\b!i ?
+                ($1 + 0) : undef;
+}
+
+sub uid {
+        my ($self, $val) = @_;
+        my ($scheme, $auth, $path, $query, $frag) = uri_split($$self);
+        if (scalar(@_) == 2) {
+                if (!defined $val) {
+                        $path =~ s!/;UID=[^;/]*\b!!i;
+                } else {
+                        $path =~ s!/;UID=[^;/]*\b!/;UID=$val!i or
+                                $path .= "/;UID=$val";
+                }
+                $$self = uri_join($scheme, $auth, $path, $query);
+        }
+        $path =~ m!\A/[^;]+(?:;UIDVALIDITY=[^;/]+)?/;UID=([1-9][0-9]*)\b!i ?
+                ($1 + 0) : undef;
+}
 
 sub port {
         my ($self) = @_;
@@ -80,12 +118,34 @@ sub authority {
 }
 
 sub user {
-        my ($self) = @_;
-        my (undef, $auth) = uri_split($$self);
-        $auth =~ s/@.*\z// or return undef; # drop host:port
-        $auth =~ s/;.*\z//; # drop ;AUTH=...
-        $auth =~ s/:.*\z//; # drop password
-        uri_unescape($auth);
+        my ($self, $val) = @_;
+        my ($scheme, $auth, $path, $query) = uri_split($$self);
+        my $at_host_port;
+        $auth =~ s/(@.*)\z// and $at_host_port = $1; # stash host:port for now
+        if (scalar(@_) == 2) { # set, this clobbers password, too
+                if (defined $val) {
+                        my $uval = uri_escape($val);
+                        if (defined($at_host_port)) {
+                                $auth =~ s!\A.*?(;AUTH=$achar).*!$uval$1!ix
+                                        or $auth = $uval;
+                        } else {
+                                substr($auth, 0, 0) = "$uval@";
+                        }
+                } elsif (defined($at_host_port)) { # clobber
+                        $auth =~ s!\A.*?(;AUTH=$achar).*!$1!i or $auth = '';
+                        if ($at_host_port && $auth eq '') {
+                                $at_host_port =~ s/\A\@//;
+                        }
+                }
+                $at_host_port //= '';
+                $$self = uri_join($scheme, $auth.$at_host_port, $path, $query);
+                $val;
+        } else { # read-only
+                $at_host_port // return undef; # explicit undef for scalar
+                $auth =~ s/;.*\z//; # drop ;AUTH=...
+                $auth =~ s/:.*\z//; # drop password
+                $auth eq '' ? undef : uri_unescape($auth);
+        }
 }
 
 sub password {
@@ -97,10 +157,32 @@ sub password {
 }
 
 sub auth {
-        my ($self) = @_;
-        my (undef, $auth) = uri_split($$self);
-        $auth =~ s/@.*\z//; # drop host:port
-        $auth =~ /;AUTH=(.+)\z/i ? uri_unescape($1) : undef;
+        my ($self, $val) = @_;
+        my ($scheme, $auth, $path, $query) = uri_split($$self);
+        my $at_host_port;
+        $auth =~ s/(@.*)\z// and $at_host_port = $1; # stash host:port for now
+        if (scalar(@_) == 2) {
+                if (defined $val) {
+                        my $uval = uri_escape($val);
+                        if ($auth =~ s!;AUTH=$achar!;AUTH=$uval!ix) {
+                                # replaced existing
+                        } elsif (defined($at_host_port)) {
+                                $auth .= ";AUTH=$uval";
+                        } else {
+                                substr($auth, 0, 0) = ";AUTH=$uval@";
+                        }
+                } else { # clobber
+                        $auth =~ s!;AUTH=$achar!!i;
+                        if ($at_host_port && $auth eq '') {
+                                $at_host_port =~ s/\A\@//;
+                        }
+                }
+                $at_host_port //= '';
+                $$self = uri_join($scheme, $auth.$at_host_port, $path, $query);
+                $val;
+        } else { # read-only
+                $auth =~ /;AUTH=(.+)\z/i ? uri_unescape($1) : undef;
+        }
 }
 
 sub scheme {
@@ -110,4 +192,6 @@ sub scheme {
 
 sub as_string { ${$_[0]} }
 
+sub clone { ref($_[0])->new(as_string($_[0])) }
+
 1;
diff --git a/lib/PublicInbox/URInntps.pm b/lib/PublicInbox/URInntps.pm
new file mode 100644
index 00000000..88c8d641
--- /dev/null
+++ b/lib/PublicInbox/URInntps.pm
@@ -0,0 +1,18 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# deal with the lack of URI::nntps in upstream URI.
+# nntps is IANA registered, snews is deprecated
+# cf. https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=983419
+# Fixed in URI 5.08, we can drop this by 2035 when LTS distros all have it
+package PublicInbox::URInntps;
+use v5.12;
+use parent qw(URI::snews);
+use URI;
+
+sub new {
+        my ($class, $url) = @_;
+        $url =~ m!\Anntps://!i ? bless(\$url, $class) : URI->new($url);
+}
+
+1;
diff --git a/lib/PublicInbox/Umask.pm b/lib/PublicInbox/Umask.pm
new file mode 100644
index 00000000..2c859e65
--- /dev/null
+++ b/lib/PublicInbox/Umask.pm
@@ -0,0 +1,70 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# base class to ensures Xapian||SQLite files respect core.sharedRepository
+# of git repos
+package PublicInbox::Umask;
+use v5.12;
+use PublicInbox::OnDestroy;
+
+use constant {
+        PERM_UMASK => 0,
+        OLD_PERM_GROUP => 1,
+        OLD_PERM_EVERYBODY => 2,
+        PERM_GROUP => 0660,
+        PERM_EVERYBODY => 0664,
+};
+
+sub _read_git_config_perm {
+        my ($self) = @_;
+        chomp(my $perm = $self->git->qx('config', 'core.sharedRepository'));
+        $perm;
+}
+
+sub _git_config_perm {
+        my $self = shift;
+        my $perm = scalar @_ ? $_[0] : _read_git_config_perm($self);
+        $perm //= '';
+        return PERM_UMASK if $perm eq '' || $perm eq 'umask';
+        return PERM_GROUP if $perm eq 'group';
+        return PERM_EVERYBODY if $perm =~ /\A(?:all|world|everybody)\z/;
+        return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/);
+        return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/);
+
+        my $i = oct($perm);
+        return PERM_UMASK if $i == PERM_UMASK;
+        return PERM_GROUP if $i == OLD_PERM_GROUP;
+        return PERM_EVERYBODY if $i == OLD_PERM_EVERYBODY;
+
+        if (($i & 0600) != 0600) {
+                die "core.sharedRepository mode invalid: ".
+                    sprintf('%.3o', $i) . "\nOwner must have permissions\n";
+        }
+        ($i & 0666);
+}
+
+sub _umask_for {
+        my ($perm) = @_; # _git_config_perm return value
+        my $rv = $perm;
+        return umask if $rv == 0;
+
+        # set +x bit if +r or +w were set
+        $rv |= 0100 if ($rv & 0600);
+        $rv |= 0010 if ($rv & 0060);
+        $rv |= 0001 if ($rv & 0006);
+        (~$rv & 0777);
+}
+
+sub with_umask {
+        my ($self, $cb, @arg) = @_;
+        my $old = umask($self->{umask} //= umask_prepare($self));
+        my $restore = on_destroy \&CORE::umask, $old;
+        $cb ? $cb->(@arg) : $restore;
+}
+
+sub umask_prepare {
+        my ($self) = @_;
+        _umask_for(_git_config_perm($self));
+}
+
+1;
diff --git a/lib/PublicInbox/Unsubscribe.pm b/lib/PublicInbox/Unsubscribe.pm
index 945e7ae7..ddbd7a2e 100644
--- a/lib/PublicInbox/Unsubscribe.pm
+++ b/lib/PublicInbox/Unsubscribe.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Standalone PSGI app to handle HTTP(s) unsubscribe links generated
@@ -12,7 +12,8 @@ use warnings;
 use Crypt::CBC;
 use Plack::Util;
 use MIME::Base64 qw(decode_base64url);
-my $CODE_URL = 'https://public-inbox.org/public-inbox.git';
+my @CODE_URL = qw(http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git
+        https://public-inbox.org/public-inbox.git);
 my @CT_HTML = ('Content-Type', 'text/html; charset=UTF-8');
 
 sub new {
@@ -38,13 +39,15 @@ sub new {
         my $unsubscribe = $opt{unsubscribe} or
                 die "`unsubscribe' callback not given\n";
 
+        my $code_url = $opt{code_url} || \@CODE_URL;
+        $code_url = [ $code_url ] if ref($code_url) ne 'ARRAY';
         bless {
-                pi_config => $opt{pi_config}, # PublicInbox::Config
+                pi_cfg => $opt{pi_config}, # PublicInbox::Config
                 owner_email => $opt{owner_email},
                 cipher => $cipher,
                 unsubscribe => $unsubscribe,
                 contact => qq(<a\nhref="mailto:$e">$e</a>),
-                code_url => $opt{code_url} || $CODE_URL,
+                code_url => $code_url,
                 confirm => $opt{confirm},
         }, $class;
 }
@@ -78,10 +81,7 @@ sub _user_list_addr {
         }
         my $user = eval { $self->{cipher}->decrypt(decode_base64url($u)) };
         if (!defined $user || index($user, '@') < 1) {
-                my $err = quotemeta($@);
-                my $errors = $env->{'psgi.errors'};
-                $errors->print("error decrypting: $u\n");
-                $errors->print("$_\n") for split("\n", $err);
+                warn "error decrypting: $u: ", ($@ ? quotemeta($@) : ());
                 $u = Plack::Util::encode_html($u);
                 return r($self, 400, 'Bad request', "Failed to decrypt: $u");
         }
@@ -138,7 +138,7 @@ sub r {
                 "<html><head><title>$title</title></head><body><pre>".
                 join("\n", "<b>$title</b>\n", @body) . '</pre><hr>'.
                 "<pre>This page is available under AGPL-3.0+\n" .
-                "git clone $self->{code_url}\n" .
+                join('', map { "git clone $_\n" } @{$self->{code_url}}) .
                 qq(Email $self->{contact} if you have any questions).
                 '</pre></body></html>'
         ] ];
@@ -149,9 +149,9 @@ sub archive_info {
         my $archive_url = $self->{archive_urls}->{$list_addr};
 
         unless ($archive_url) {
-                if (my $config = $self->{pi_config}) {
+                if (my $cfg = $self->{pi_cfg}) {
                         # PublicInbox::Config::lookup
-                        my $ibx = $config->lookup($list_addr);
+                        my $ibx = $cfg->lookup($list_addr);
                         # PublicInbox::Inbox::base_url
                         $archive_url = $ibx->base_url if $ibx;
                 }
diff --git a/lib/PublicInbox/UserContent.pm b/lib/PublicInbox/UserContent.pm
index 789da2f1..f28610f7 100644
--- a/lib/PublicInbox/UserContent.pm
+++ b/lib/PublicInbox/UserContent.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Self-updating module containing a sample CSS for client-side
@@ -17,13 +17,18 @@ sub CSS () {
          * It reduces eyestrain for me, and energy usage for all:
          * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme
          */
-        * { background:#000 !important; color:#ccc !important }
+        * { font-size: 100% !important;
+                font-family: monospace !important;
+                background:#000 !important;
+                color:#ccc !important }
+        pre { white-space: pre-wrap !important }
 
         /*
          * Underlined links add visual noise which make them hard-to-read.
          * Use colors to make them stand out, instead.
          */
-        a:link { color:#69f !important; text-decoration:none !important }
+        a:link { color:#69f !important;
+                text-decoration:none !important }
         a:visited { color:#96f !important }
 
         /* quoted text in emails gets a different color */
@@ -101,6 +106,7 @@ if (scalar(@ARGV) == 1 && -r __FILE__) {
 
         open my $rw, '+<', __FILE__ or die $!;
         my $out = do { local $/; <$rw> } or die $!;
+        $css =~ s/; /;\n\t\t/g;
         $out =~ s/^sub CSS.*^_\n\}/sub CSS () {\n\t<<'_'\n${css}_\n}/sm;
         seek $rw, 0, 0;
         print $rw $out or die $!;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 5ff2af10..15a73158 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # This interface wraps and mimics PublicInbox::Import
@@ -6,70 +6,39 @@
 package PublicInbox::V2Writable;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::Lock);
+use parent qw(PublicInbox::Lock PublicInbox::IPC);
 use PublicInbox::SearchIdxShard;
+use PublicInbox::IPC qw(nproc_shards);
 use PublicInbox::Eml;
 use PublicInbox::Git;
 use PublicInbox::Import;
+use PublicInbox::MultiGit;
 use PublicInbox::MID qw(mids references);
-use PublicInbox::ContentHash qw(content_hash content_digest);
+use PublicInbox::ContentHash qw(content_hash content_digest git_sha);
 use PublicInbox::InboxWritable;
 use PublicInbox::OverIdx;
 use PublicInbox::Msgmap;
-use PublicInbox::Spawn qw(spawn popen_rd);
-use PublicInbox::SearchIdx qw(log2stack crlf_adjust is_ancestor check_size);
+use PublicInbox::Spawn qw(spawn popen_rd run_die);
+use PublicInbox::Search;
+use PublicInbox::SearchIdx qw(log2stack is_ancestor check_size is_bad_blob);
 use IO::Handle; # ->autoflush
-use File::Temp ();
+use POSIX ();
 
 my $OID = qr/[a-f0-9]{40,}/;
 # an estimate of the post-packed size to the raw uncompressed size
-my $PACKING_FACTOR = 0.4;
-
-# SATA storage lags behind what CPUs are capable of, so relying on
-# nproc(1) can be misleading and having extra Xapian shards is a
-# waste of FDs and space.  It can also lead to excessive IO latency
-# and slow things down.  Users on NVME or other fast storage can
-# use the NPROC env or switches in our script/public-inbox-* programs
-# to increase Xapian shards
-our $NPROC_MAX_DEFAULT = 4;
-
-sub detect_nproc () {
-        # getconf(1) is POSIX, but *NPROCESSORS* vars are not
-        for (qw(_NPROCESSORS_ONLN NPROCESSORS_ONLN)) {
-                `getconf $_ 2>/dev/null` =~ /^(\d+)$/ and return $1;
-        }
-        for my $nproc (qw(nproc gnproc)) { # GNU coreutils nproc
-                `$nproc 2>/dev/null` =~ /^(\d+)$/ and return $1;
-        }
-
-        # should we bother with `sysctl hw.ncpu`?  Those only give
-        # us total processor count, not online processor count.
-        undef
-}
-
-sub nproc_shards ($) {
-        my ($creat_opt) = @_;
-        my $n = $creat_opt->{nproc} if ref($creat_opt) eq 'HASH';
-        $n //= $ENV{NPROC};
-        if (!$n) {
-                # assume 2 cores if not detectable or zero
-                state $NPROC_DETECTED = detect_nproc() || 2;
-                $n = $NPROC_DETECTED;
-                $n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT;
-        }
-
-        # subtract for the main process and git-fast-import
-        $n -= 1;
-        $n < 1 ? 1 : $n;
-}
+our $PACKING_FACTOR = 0.4;
 
 sub count_shards ($) {
         my ($self) = @_;
         # always load existing shards in case core count changes:
         # Also, shard count may change while -watch is running
-        my $srch = $self->{ibx}->search or return 0;
-        delete $self->{ibx}->{search};
-        $srch->{nshard} // 0
+        if (my $ibx = $self->{ibx}) {
+                my $srch = $ibx->search or return 0;
+                delete $ibx->{search};
+                $srch->{nshard} // 0
+        } else { # ExtSearchIdx
+                $self->{nshard} = scalar($self->xdb_shards_flat);
+        }
 }
 
 sub new {
@@ -79,18 +48,14 @@ sub new {
         $v2ibx = PublicInbox::InboxWritable->new($v2ibx);
         my $dir = $v2ibx->assert_usable_dir;
         unless (-d $dir) {
-                if ($creat) {
-                        require File::Path;
-                        File::Path::mkpath($dir);
-                } else {
-                        die "$dir does not exist\n";
-                }
+                die "$dir does not exist\n" if !$creat;
+                require File::Path;
+                File::Path::mkpath($dir);
         }
-        $v2ibx->umask_prepare;
-
         my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION;
         my $self = {
                 ibx => $v2ibx,
+                mg => PublicInbox::MultiGit->new($dir, 'all.git', 'git'),
                 im => undef, #  PublicInbox::Import
                 parallel => 1,
                 transact_bytes => 0,
@@ -117,35 +82,34 @@ sub init_inbox {
         }
         $self->idx_init;
         $self->{mm}->skip_artnum($skip_artnum) if defined $skip_artnum;
-        my $epoch_max = -1;
-        git_dir_latest($self, \$epoch_max);
-        if (defined $skip_epoch && $epoch_max == -1) {
-                $epoch_max = $skip_epoch;
-        }
-        $self->git_init($epoch_max >= 0 ? $epoch_max : 0);
+        my $max = $self->{ibx}->max_git_epoch;
+        $max = $skip_epoch if (defined($skip_epoch) && !defined($max));
+        $self->{mg}->add_epoch($max // 0);
         $self->done;
 }
 
-# returns undef on duplicate or spam
-# mimics Import::add and wraps it for v2
-sub add {
-        my ($self, $eml, $check_cb) = @_;
-        $self->{ibx}->with_umask(\&_add, $self, $eml, $check_cb);
+sub idx_shard ($$) {
+        my ($self, $num) = @_;
+        $self->{idx_shards}->[$num % scalar(@{$self->{idx_shards}})];
 }
 
 # indexes a message, returns true if checkpointing is needed
-sub do_idx ($$$$) {
-        my ($self, $msgref, $mime, $smsg) = @_;
-        $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
-        $self->{oidx}->add_overview($mime, $smsg);
-        my $idx = idx_shard($self, $smsg->{num} % $self->{shards});
-        $idx->index_raw($msgref, $mime, $smsg);
-        my $n = $self->{transact_bytes} += $smsg->{raw_bytes};
+sub do_idx ($$$) {
+        my ($self, $eml, $smsg) = @_;
+        $self->{oidx}->add_overview($eml, $smsg);
+        if ($self->{-need_xapian}) {
+                my $idx = idx_shard($self, $smsg->{num});
+                $idx->index_eml($eml, $smsg);
+        }
+        my $n = $self->{transact_bytes} += $smsg->{bytes};
         $n >= $self->{batch_bytes};
 }
 
-sub _add {
+# returns undef on duplicate or spam
+# mimics Import::add and wraps it for v2
+sub add {
         my ($self, $mime, $check_cb) = @_;
+        my $restore = $self->{ibx}->with_umask;
 
         # spam check:
         if ($check_cb) {
@@ -168,11 +132,9 @@ sub _add {
         $cmt = $im->get_mark($cmt);
         $self->{last_commit}->[$self->{epoch_max}] = $cmt;
 
-        my $msgref = delete $smsg->{-raw_email};
-        if (do_idx($self, $msgref, $mime, $smsg)) {
+        if (do_idx($self, $mime, $smsg)) {
                 $self->checkpoint;
         }
-
         $cmt;
 }
 
@@ -249,11 +211,6 @@ sub v2_num_for_harder {
         ($num, $mid0);
 }
 
-sub idx_shard {
-        my ($self, $shard_i) = @_;
-        $self->{idx_shards}->[$shard_i];
-}
-
 sub _idx_init { # with_umask callback
         my ($self, $opt) = @_;
         $self->lock_acquire unless $opt && $opt->{-skip_lock};
@@ -264,22 +221,31 @@ sub _idx_init { # with_umask callback
         $self->{shards} = $nshards if $nshards && $nshards != $self->{shards};
         $self->{batch_bytes} = $opt->{batch_size} //
                                 $PublicInbox::SearchIdx::BATCH_BYTES;
-        $self->{batch_bytes} *= $self->{shards} if $self->{parallel};
 
         # need to create all shards before initializing msgmap FD
         # idx_shards must be visible to all forked processes
         my $max = $self->{shards} - 1;
         my $idx = $self->{idx_shards} = [];
         push @$idx, PublicInbox::SearchIdxShard->new($self, $_) for (0..$max);
+        $self->{-need_xapian} = $idx->[0]->need_xapian;
+
+        # SearchIdxShard may do their own flushing, so don't scale
+        # until after forking
+        $self->{batch_bytes} *= $self->{shards} if $self->{parallel};
+
+        my $ibx = $self->{ibx} or return; # ExtIdxSearch
 
         # Now that all subprocesses are up, we can open the FDs
         # for SQLite:
-        my $mm = $self->{mm} = PublicInbox::Msgmap->new_file(
-                                "$self->{ibx}->{inboxdir}/msgmap.sqlite3",
-                                $self->{ibx}->{-no_fsync} ? 2 : 1);
+        my $mm = $self->{mm} = PublicInbox::Msgmap->new_file($ibx, 1);
         $mm->{dbh}->begin_work;
 }
 
+sub parallel_init ($$) {
+        my ($self, $indexlevel) = @_;
+        $self->{parallel} = 0 if ($indexlevel // 'full') eq 'basic';
+}
+
 # idempotent
 sub idx_init {
         my ($self, $opt) = @_;
@@ -292,17 +258,7 @@ sub idx_init {
         delete @$ibx{qw(mm search)};
         $ibx->git->cleanup;
 
-        $self->{parallel} = 0 if ($ibx->{indexlevel}//'') eq 'basic';
-        if ($self->{parallel}) {
-                pipe(my ($r, $w)) or die "pipe failed: $!";
-                # pipe for barrier notifications doesn't need to be big,
-                # 1031: F_SETPIPE_SZ
-                fcntl($w, 1031, 4096) if $^O eq 'linux';
-                $self->{bnote} = [ $r, $w ];
-                $w->autoflush(1);
-        }
-
-        $ibx->umask_prepare;
+        parallel_init($self, $ibx->{indexlevel});
         $ibx->with_umask(\&_idx_init, $self, $opt);
 }
 
@@ -312,14 +268,10 @@ sub idx_init {
 sub _replace_oids ($$$) {
         my ($self, $mime, $replace_map) = @_;
         $self->done;
-        my $pfx = "$self->{ibx}->{inboxdir}/git";
+        my $ibx = $self->{ibx};
+        my $pfx = "$ibx->{inboxdir}/git";
         my $rewrites = []; # epoch => commit
-        my $max = $self->{epoch_max};
-
-        unless (defined($max)) {
-                defined(my $latest = git_dir_latest($self, \$max)) or return;
-                $self->{epoch_max} = $max;
-        }
+        my $max = $self->{epoch_max} //= $ibx->max_git_epoch // return;
 
         foreach my $i (0..$max) {
                 my $git_dir = "$pfx/$i.git";
@@ -414,7 +366,7 @@ sub rewrite_internal ($$;$$$) {
                         } else { # ->purge or ->remove
                                 $self->{mm}->num_delete($num);
                         }
-                        unindex_oid_remote($self, $oid, $mid);
+                        unindex_oid_aux($self, $oid, $mid);
                 }
         }
 
@@ -433,17 +385,16 @@ sub rewrite_internal ($$;$$$) {
 # (retval[2]) is not part of the stable API shared with Import->remove
 sub remove {
         my ($self, $eml, $cmt_msg) = @_;
-        my $r = $self->{ibx}->with_umask(\&rewrite_internal,
-                                                $self, $eml, $cmt_msg);
+        my $restore = $self->{ibx}->with_umask;
+        my $r = rewrite_internal($self, $eml, $cmt_msg);
         defined($r) && defined($r->[0]) ? @$r: undef;
 }
 
 sub _replace ($$;$$) {
         my ($self, $old_eml, $new_eml, $sref) = @_;
-        my $arg = [ $self, $old_eml, undef, $new_eml, $sref ];
-        my $rewritten = $self->{ibx}->with_umask(\&rewrite_internal,
-                        $self, $old_eml, undef, $new_eml, $sref) or return;
-
+        my $restore = $self->{ibx}->with_umask;
+        my $rewritten = rewrite_internal($self, $old_eml, undef,
+                                        $new_eml, $sref) or return;
         my $rewrites = $rewritten->{rewrites};
         # ->done is called if there are rewrites since we gc+prune from git
         $self->idx_init if @$rewrites;
@@ -462,23 +413,6 @@ sub purge {
         $rewritten->{rewrites}
 }
 
-# returns the git object_id of $fh, does not write the object to FS
-sub git_hash_raw ($$) {
-        my ($self, $raw) = @_;
-        # grab the expected OID we have to reindex:
-        pipe(my($in, $w)) or die "pipe: $!";
-        my $git_dir = $self->{ibx}->git->{git_dir};
-        my $cmd = ['git', "--git-dir=$git_dir", qw(hash-object --stdin)];
-        my $r = popen_rd($cmd, undef, { 0 => $in });
-        print $w $$raw or die "print \$w: $!";
-        close $w or die "close \$w: $!";
-        local $/ = "\n";
-        chomp(my $oid = <$r>);
-        close $r or die "git hash-object failed: $?";
-        $oid =~ /\A$OID\z/ or die "OID not expected: $oid";
-        $oid;
-}
-
 sub _check_mids_match ($$$) {
         my ($old_list, $new_list, $hdrs) = @_;
         my %old_mids = map { $_ => 1 } @$old_list;
@@ -513,7 +447,7 @@ sub replace ($$$) {
         PublicInbox::Import::drop_unwanted_headers($new_mime);
 
         my $raw = $new_mime->as_string;
-        my $expect_oid = git_hash_raw($self, \$raw);
+        my $expect_oid = git_sha(1, \$raw)->hexdigest;
         my $rewritten = _replace($self, $old_mime, $new_mime, \$raw) or return;
         my $need_reindex = $rewritten->{need_reindex};
 
@@ -531,23 +465,23 @@ W: $list
         }
 
         # make sure we really got the OID:
-        my ($blob, $type, $bytes) = $self->{ibx}->git->check($expect_oid);
+        my ($blob, $type, $bytes) = $self->git->check($expect_oid);
         $blob eq $expect_oid or die "BUG: $expect_oid not found after replace";
 
         # don't leak FDs to Xapian:
-        $self->{ibx}->git->cleanup;
+        $self->git->cleanup;
 
         # reindex modified messages:
         for my $smsg (@$need_reindex) {
                 my $new_smsg = bless {
                         blob => $blob,
-                        raw_bytes => $bytes,
                         num => $smsg->{num},
                         mid => $smsg->{mid},
                 }, 'PublicInbox::Smsg';
                 my $sync = { autime => $smsg->{ds}, cotime => $smsg->{ts} };
                 $new_smsg->populate($new_mime, $sync);
-                do_idx($self, \$raw, $new_mime, $new_smsg);
+                $new_smsg->set_bytes($raw, $bytes);
+                do_idx($self, $new_mime, $new_smsg);
         }
         $rewritten->{rewrites};
 }
@@ -558,7 +492,7 @@ sub last_epoch_commit ($$;$) {
         $self->{mm}->last_commit_xap($v, $i, $cmt);
 }
 
-sub set_last_commits ($) {
+sub set_last_commits ($) { # this is NOT for ExtSearchIdx
         my ($self) = @_;
         defined(my $epoch_max = $self->{epoch_max}) or return;
         my $last_commit = $self->{last_commit};
@@ -569,65 +503,57 @@ sub set_last_commits ($) {
         }
 }
 
-sub barrier_init {
-        my ($self, $n) = @_;
-        $self->{bnote} or return;
-        --$n;
-        my $barrier = { map { $_ => 1 } (0..$n) };
-}
-
-sub barrier_wait {
-        my ($self, $barrier) = @_;
-        my $bnote = $self->{bnote} or return;
-        my $r = $bnote->[0];
-        while (scalar keys %$barrier) {
-                defined(my $l = readline($r)) or die "EOF on barrier_wait: $!";
-                $l =~ /\Abarrier (\d+)/ or die "bad line on barrier_wait: $l";
-                delete $barrier->{$1} or die "bad shard[$1] on barrier wait";
-        }
-}
-
 # public
 sub checkpoint ($;$) {
         my ($self, $wait) = @_;
 
-        if (my $im = $self->{im}) {
-                if ($wait) {
-                        $im->barrier;
-                } else {
-                        $im->checkpoint;
-                }
-        }
+        $self->{im}->barrier if $self->{im};
         my $shards = $self->{idx_shards};
         if ($shards) {
-                my $dbh = $self->{mm}->{dbh};
+                my $dbh = $self->{mm}->{dbh} if $self->{mm};
 
                 # SQLite msgmap data is second in importance
-                $dbh->commit;
+                $dbh->commit if $dbh;
+                eval { $dbh->do('PRAGMA optimize') };
 
                 # SQLite overview is third
                 $self->{oidx}->commit_lazy;
 
                 # Now deal with Xapian
-                if ($wait) {
-                        my $barrier = $self->barrier_init(scalar @$shards);
 
-                        # each shard needs to issue a barrier command
-                        $_->shard_barrier for @$shards;
+                # start commit_txn_lazy asynchronously on all parallel shards
+                # (non-parallel waits here)
+                $_->ipc_do('commit_txn_lazy') for @$shards;
+
+                # transactions started on parallel shards,
+                # wait for them by issuing an echo command (echo can only
+                # run after commit_txn_lazy is done)
+                if ($wait && $self->{parallel}) {
+                        my $i = 0;
+                        for my $shard (@$shards) {
+                                my $echo = $shard->ipc_do('echo', $i);
+                                $echo == $i or die <<"";
+shard[$i] bad echo:$echo != $i waiting for txn commit
+
+                                ++$i;
+                        }
+                }
 
-                        # wait for each Xapian shard
-                        $self->barrier_wait($barrier);
-                } else {
-                        $_->shard_commit for @$shards;
+                my $midx = $self->{midx}; # misc index
+                if ($midx) {
+                        $midx->commit_txn;
+                        $PublicInbox::Search::X{CLOEXEC_UNSET} and
+                                $self->git->cleanup;
                 }
 
                 # last_commit is special, don't commit these until
-                # remote shards are done:
-                $dbh->begin_work;
+                # Xapian shards are done:
+                $dbh->begin_work if $dbh;
                 set_last_commits($self);
-                $dbh->commit;
-
-                $dbh->begin_work;
+                if ($dbh) {
+                        $dbh->commit;
+                        $dbh->begin_work;
+                }
         }
         $self->{total_bytes} += $self->{transact_bytes};
         $self->{transact_bytes} = 0;
@@ -658,6 +584,11 @@ sub done {
                 eval { $mm->{dbh}->$m };
                 $err .= "msgmap $m: $@\n" if $@;
         }
+        if ($self->{oidx} && $self->{oidx}->{dbh} && $err) {
+                eval { $self->{oidx}->rollback_lazy };
+                $err .= "overview rollback: $@\n" if $@;
+        }
+
         my $shards = delete $self->{idx_shards};
         if ($shards) {
                 for (@$shards) {
@@ -667,91 +598,14 @@ sub done {
         }
         eval { $self->{oidx}->dbh_close };
         $err .= "over close: $@\n" if $@;
-        delete $self->{bnote};
+        delete $self->{midx};
         my $nbytes = $self->{total_bytes};
         $self->{total_bytes} = 0;
         $self->lock_release(!!$nbytes) if $shards;
-        $self->{ibx}->git->cleanup;
+        $self->git->cleanup;
         die $err if $err;
 }
 
-sub fill_alternates ($$) {
-        my ($self, $epoch) = @_;
-
-        my $pfx = "$self->{ibx}->{inboxdir}/git";
-        my $all = "$self->{ibx}->{inboxdir}/all.git";
-        PublicInbox::Import::init_bare($all) unless -d $all;
-        my $info_dir = "$all/objects/info";
-        my $alt = "$info_dir/alternates";
-        my (%alt, $new);
-        my $mode = 0644;
-        if (-e $alt) {
-                open(my $fh, '<', $alt) or die "open < $alt: $!\n";
-                $mode = (stat($fh))[2] & 07777;
-
-                # we assign a sort score to every alternate and favor
-                # the newest (highest numbered) one because loose objects
-                # require scanning epochs and only the latest epoch is
-                # expected to see loose objects
-                my $score;
-                my $other = 0; # in case admin adds non-epoch repos
-                %alt = map {;
-                        if (m!\A\Q../../\E([0-9]+)\.git/objects\z!) {
-                                $score = $1 + 0;
-                        } else {
-                                $score = --$other;
-                        }
-                        $_ => $score;
-                } split(/\n+/, do { local $/; <$fh> });
-        }
-
-        foreach my $i (0..$epoch) {
-                my $dir = "../../git/$i.git/objects";
-                if (!exists($alt{$dir}) && -d "$pfx/$i.git") {
-                        $alt{$dir} = $i;
-                        $new = 1;
-                }
-        }
-        return unless $new;
-
-        my $fh = File::Temp->new(TEMPLATE => 'alt-XXXXXXXX', DIR => $info_dir);
-        my $tmp = $fh->filename;
-        print $fh join("\n", sort { $alt{$b} <=> $alt{$a} } keys %alt), "\n"
-                or die "print $tmp: $!\n";
-        chmod($mode, $fh) or die "fchmod $tmp: $!\n";
-        close $fh or die "close $tmp $!\n";
-        rename($tmp, $alt) or die "rename $tmp => $alt: $!\n";
-        $fh->unlink_on_destroy(0);
-}
-
-sub git_init {
-        my ($self, $epoch) = @_;
-        my $git_dir = "$self->{ibx}->{inboxdir}/git/$epoch.git";
-        PublicInbox::Import::init_bare($git_dir);
-        my @cmd = (qw/git config/, "--file=$git_dir/config",
-                        'include.path', '../../all.git/config');
-        PublicInbox::Import::run_die(\@cmd);
-        fill_alternates($self, $epoch);
-        $git_dir
-}
-
-sub git_dir_latest {
-        my ($self, $max) = @_;
-        $$max = -1;
-        my $pfx = "$self->{ibx}->{inboxdir}/git";
-        return unless -d $pfx;
-        my $latest;
-        opendir my $dh, $pfx or die "opendir $pfx: $!\n";
-        while (defined(my $git_dir = readdir($dh))) {
-                $git_dir =~ m!\A([0-9]+)\.git\z! or next;
-                if ($1 > $$max) {
-                        $$max = $1;
-                        $latest = "$pfx/$git_dir";
-                }
-        }
-        $latest;
-}
-
 sub importer {
         my ($self) = @_;
         my $im = $self->{im};
@@ -763,14 +617,14 @@ sub importer {
                         $im->done;
                         $im = undef;
                         $self->checkpoint;
-                        my $git_dir = $self->git_init(++$self->{epoch_max});
-                        my $git = PublicInbox::Git->new($git_dir);
+                        my $dir = $self->{mg}->add_epoch(++$self->{epoch_max});
+                        my $git = PublicInbox::Git->new($dir);
                         return $self->import_init($git, 0);
                 }
         }
         my $epoch = 0;
         my $max;
-        my $latest = git_dir_latest($self, \$max);
+        my $latest = $self->{ibx}->git_dir_latest(\$max);
         if (defined $latest) {
                 my $git = PublicInbox::Git->new($latest);
                 my $packed_bytes = $git->packed_bytes;
@@ -784,8 +638,8 @@ sub importer {
                 }
         }
         $self->{epoch_max} = $epoch;
-        $latest = $self->git_init($epoch);
-        $self->import_init(PublicInbox::Git->new($latest), 0);
+        my $dir = $self->{mg}->add_epoch($epoch);
+        $self->import_init(PublicInbox::Git->new($dir), 0);
 }
 
 sub import_init {
@@ -798,23 +652,6 @@ sub import_init {
         $im;
 }
 
-# XXX experimental
-sub diff ($$$) {
-        my ($mid, $cur, $new) = @_;
-
-        my $ah = File::Temp->new(TEMPLATE => 'email-cur-XXXXXXXX', TMPDIR => 1);
-        print $ah $cur->as_string or die "print: $!";
-        $ah->flush or die "flush: $!";
-        PublicInbox::Import::drop_unwanted_headers($new);
-        my $bh = File::Temp->new(TEMPLATE => 'email-new-XXXXXXXX', TMPDIR => 1);
-        print $bh $new->as_string or die "print: $!";
-        $bh->flush or die "flush: $!";
-        my $cmd = [ qw(diff -u), $ah->filename, $bh->filename ];
-        print STDERR "# MID conflict <$mid>\n";
-        my $pid = spawn($cmd, undef, { 1 => 2 });
-        waitpid($pid, 0) == $pid or die "diff did not finish";
-}
-
 sub get_blob ($$) {
         my ($self, $smsg) = @_;
         if (my $im = $self->{im}) {
@@ -838,52 +675,68 @@ sub content_exists ($$$) {
                 }
                 my $cur = PublicInbox::Eml->new($msg);
                 return 1 if content_matches($chashes, $cur);
-
-                # XXX DEBUG_DIFF is experimental and may be removed
-                diff($mid, $cur, $mime) if $ENV{DEBUG_DIFF};
         }
         undef;
 }
 
 sub atfork_child {
         my ($self) = @_;
-        if (my $shards = $self->{idx_shards}) {
-                $_->atfork_child foreach @$shards;
+        if (my $older_siblings = $self->{idx_shards}) {
+                $_->ipc_sibling_atfork_child for @$older_siblings;
         }
         if (my $im = $self->{im}) {
                 $im->atfork_child;
         }
-        die "unexpected mm" if $self->{mm};
-        close $self->{bnote}->[0] or die "close bnote[0]: $!\n";
-        $self->{bnote}->[1];
+        die "BUG: unexpected mm" if $self->{mm};
 }
 
 sub reindex_checkpoint ($$) {
         my ($self, $sync) = @_;
 
-        $self->{ibx}->git->cleanup; # *async_wait
+        $self->git->async_wait_all;
+        $self->update_last_commit($sync);
         ${$sync->{need_checkpoint}} = 0;
         my $mm_tmp = $sync->{mm_tmp};
         $mm_tmp->atfork_prepare if $mm_tmp;
-        $self->done; # release lock
+        die 'BUG: {im} during reindex' if $self->{im};
+        if ($self->{ibx_map} && !$sync->{checkpoint_unlocks}) {
+                checkpoint($self, 1); # no need to release lock on pure index
+        } else {
+                $self->done; # release lock
+        }
 
-        if (my $pr = $sync->{-opt}->{-progress}) {
+        if (my $pr = $sync->{-regen_fmt} ? $sync->{-opt}->{-progress} : undef) {
                 $pr->(sprintf($sync->{-regen_fmt}, ${$sync->{nr}}));
         }
 
         # allow -watch or -mda to write...
         $self->idx_init($sync->{-opt}); # reacquire lock
+        if (my $intvl = $sync->{check_intvl}) { # eidx
+                $sync->{next_check} = PublicInbox::DS::now() + $intvl;
+        }
         $mm_tmp->atfork_parent if $mm_tmp;
 }
 
+sub index_finalize ($$) {
+        my ($arg, $index) = @_;
+        ++$arg->{self}->{nidx};
+        if (defined(my $cur = $arg->{cur_cmt})) {
+                ${$arg->{latest_cmt}} = $cur;
+        } elsif ($index) {
+                die 'BUG: {cur_cmt} missing';
+        } # else { unindexing @leftovers doesn't set {cur_cmt}
+}
+
 sub index_oid { # cat_async callback
         my ($bref, $oid, $type, $size, $arg) = @_;
-        return if $size == 0; # purged
+        is_bad_blob($oid, $type, $size, $arg->{oid}) and
+                return index_finalize($arg, 1); # size == 0 purged returns here
+        my $self = $arg->{self};
+        local $self->{current_info} = "$self->{current_info} $oid";
         my ($num, $mid0);
         my $eml = PublicInbox::Eml->new($$bref);
         my $mids = mids($eml);
         my $chash = content_hash($eml);
-        my $self = $arg->{v2w};
 
         if (scalar(@$mids) == 0) {
                 warn "E: $oid has no Message-ID, skipping\n";
@@ -891,16 +744,20 @@ sub index_oid { # cat_async callback
         }
 
         # {unindexed} is unlikely
-        if ((my $unindexed = $arg->{unindexed}) && scalar(@$mids) == 1) {
-                $num = delete($unindexed->{$mids->[0]});
+        if (my $unindexed = $arg->{unindexed}) {
+                my $oidbin = pack('H*', $oid);
+                my $u = $unindexed->{$oidbin};
+                ($num, $mid0) = splice(@$u, 0, 2) if $u;
                 if (defined $num) {
-                        $mid0 = $mids->[0];
                         $self->{mm}->mid_set($num, $mid0);
-                        delete($arg->{unindexed}) if !keys(%$unindexed);
+                        if (scalar(@$u) == 0) { # done with current OID
+                                delete $unindexed->{$oidbin};
+                                delete($arg->{unindexed}) if !keys(%$unindexed);
+                        }
                 }
         }
+        my $oidx = $self->{oidx};
         if (!defined($num)) { # reuse if reindexing (or duplicates)
-                my $oidx = $self->{oidx};
                 for my $mid (@$mids) {
                         ($num, $mid0) = $oidx->num_mid0_for_oid($oid, $mid);
                         last if defined $num;
@@ -908,6 +765,11 @@ sub index_oid { # cat_async callback
         }
         $mid0 //= do { # is this a number we got before?
                 $num = $arg->{mm_tmp}->num_for($mids->[0]);
+
+                # don't clobber existing if Message-ID is reused:
+                if (my $x = defined($num) ? $oidx->get_art($num) : undef) {
+                        undef($num) if $x->{blob} ne $oid;
+                }
                 defined($num) ? $mids->[0] : undef;
         };
         if (!defined($num)) {
@@ -941,45 +803,53 @@ sub index_oid { # cat_async callback
         }
         ++${$arg->{nr}};
         my $smsg = bless {
-                raw_bytes => $size,
                 num => $num,
                 blob => $oid,
                 mid => $mid0,
         }, 'PublicInbox::Smsg';
         $smsg->populate($eml, $arg);
-        if (do_idx($self, $bref, $eml, $smsg)) {
+        $smsg->set_bytes($$bref, $size);
+        if (do_idx($self, $eml, $smsg)) {
                 ${$arg->{need_checkpoint}} = 1;
         }
+        index_finalize($arg, 1);
 }
 
 # only update last_commit for $i on reindex iff newer than current
-sub update_last_commit ($$$$) {
-        my ($self, $git, $i, $cmt) = @_;
-        my $last = last_epoch_commit($self, $i);
-        if (defined $last && is_ancestor($git, $last, $cmt)) {
-                my @cmd = (qw(rev-list --count), "$last..$cmt");
-                chomp(my $n = $git->qx(@cmd));
+sub update_last_commit {
+        my ($self, $sync, $stk) = @_;
+        my $unit = $sync->{unit} // return;
+        my $latest_cmt = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}};
+        defined($latest_cmt) or return;
+        my $last = last_epoch_commit($self, $unit->{epoch});
+        if (defined $last && is_ancestor($self->git, $last, $latest_cmt)) {
+                my @cmd = (qw(rev-list --count), "$last..$latest_cmt");
+                chomp(my $n = $unit->{git}->qx(@cmd));
                 return if $n ne '' && $n == 0;
         }
-        last_epoch_commit($self, $i, $cmt);
-}
+        # don't rewind if --{since,until,before,after} are in use
+        return if (defined($last) &&
+                        grep(defined, @{$sync->{-opt}}{qw(since until)}) &&
+                        is_ancestor($self->git, $latest_cmt, $last));
 
-sub git_dir_n ($$) { "$_[0]->{ibx}->{inboxdir}/git/$_[1].git" }
+        last_epoch_commit($self, $unit->{epoch}, $latest_cmt);
+}
 
-sub last_commits ($$) {
-        my ($self, $epoch_max) = @_;
+sub last_commits {
+        my ($self, $sync) = @_;
         my $heads = [];
-        for (my $i = $epoch_max; $i >= 0; $i--) {
+        for (my $i = $sync->{epoch_max}; $i >= 0; $i--) {
                 $heads->[$i] = last_epoch_commit($self, $i);
         }
         $heads;
 }
 
 # returns a revision range for git-log(1)
-sub log_range ($$$$$) {
-        my ($self, $sync, $git, $i, $tip) = @_;
+sub log_range ($$$) {
+        my ($sync, $unit, $tip) = @_;
         my $opt = $sync->{-opt};
         my $pr = $opt->{-progress} if (($opt->{verbose} || 0) > 1);
+        my $i = $unit->{epoch};
         my $cur = $sync->{ranges}->[$i] or do {
                 $pr->("$i.git indexing all of $tip\n") if $pr;
                 return $tip; # all of it
@@ -993,7 +863,8 @@ sub log_range ($$$$$) {
 
         my $range = "$cur..$tip";
         $pr->("$i.git checking contiguity... ") if $pr;
-        if (is_ancestor($git, $cur, $tip)) { # common case
+        my $git = $unit->{git};
+        if (is_ancestor($sync->{self}->git, $cur, $tip)) { # common case
                 $pr->("OK\n") if $pr;
                 my $n = $git->qx(qw(rev-list --count), $range);
                 chomp($n);
@@ -1018,62 +889,102 @@ Rewritten history? (in $git->{git_dir})
                         warn "discarding history at $cur\n";
                 }
                 warn <<"";
-reindexing $git->{git_dir} starting at
-$range
-
-                $sync->{unindex_range}->{$i} = "$base..$cur";
+reindexing $git->{git_dir}
+starting at $range
+
+                # $cur^0 may no longer exist if pruned by git
+                if ($git->qx(qw(rev-parse -q --verify), "$cur^0")) {
+                        $unit->{unindex_range} = "$base..$cur";
+                } elsif ($base && $git->qx(qw(rev-parse -q --verify), $base)) {
+                        $unit->{unindex_range} = "$base..";
+                } else {
+                        warn "W: unable to unindex before $range\n";
+                }
         }
         $range;
 }
 
-sub sync_prepare ($$$) {
-        my ($self, $sync, $epoch_max) = @_;
+# overridden by ExtSearchIdx
+sub artnum_max { $_[0]->{mm}->num_highwater }
+
+sub sync_prepare ($$) {
+        my ($self, $sync) = @_;
+        $sync->{ranges} = sync_ranges($self, $sync);
         my $pr = $sync->{-opt}->{-progress};
         my $regen_max = 0;
-        my $head = $self->{ibx}->{ref_head} || 'refs/heads/master';
-
-        # reindex stops at the current heads and we later rerun index_sync
-        # without {reindex}
-        my $reindex_heads = last_commits($self, $epoch_max) if $sync->{reindex};
-
-        for (my $i = $epoch_max; $i >= 0; $i--) {
-                my $git_dir = git_dir_n($self, $i);
+        my $head = $sync->{ibx}->{ref_head} || 'HEAD';
+        my $pfx;
+        if ($pr) {
+                ($pfx) = ($sync->{ibx}->{inboxdir} =~ m!([^/]+)\z!g);
+                $pfx //= $sync->{ibx}->{inboxdir};
+        }
+
+        my $reindex_heads;
+        if ($self->{ibx_map}) {
+                # ExtSearchIdx won't index messages unless they're in
+                # over.sqlite3 for a given inbox, so don't read beyond
+                # what's in the per-inbox index.
+                $reindex_heads = [];
+                my $v = PublicInbox::Search::SCHEMA_VERSION;
+                my $mm = $sync->{ibx}->mm;
+                for my $i (0..$sync->{epoch_max}) {
+                        $reindex_heads->[$i] = $mm->last_commit_xap($v, $i);
+                }
+        } elsif ($sync->{reindex}) { # V2 inbox
+                # reindex stops at the current heads and we later
+                # rerun index_sync without {reindex}
+                $reindex_heads = $self->last_commits($sync);
+        }
+        if ($sync->{max_size} = $sync->{-opt}->{max_size}) {
+                $sync->{index_oid} = $self->can('index_oid');
+        }
+        my $git_pfx = "$sync->{ibx}->{inboxdir}/git";
+        for (my $i = $sync->{epoch_max}; $i >= 0; $i--) {
+                my $git_dir = "$git_pfx/$i.git";
                 -d $git_dir or next; # missing epochs are fine
                 my $git = PublicInbox::Git->new($git_dir);
+                my $unit = { git => $git, epoch => $i };
+                my $tip;
                 if ($reindex_heads) {
-                        $head = $reindex_heads->[$i] or next;
+                        $tip = $head = $reindex_heads->[$i] or next;
+                } else {
+                        $tip = $git->qx(qw(rev-parse -q --verify), $head);
+                        next if $?; # new repo
+                        chomp $tip;
                 }
-                chomp(my $tip = $git->qx(qw(rev-parse -q --verify), $head));
-
-                next if $?; # new repo
-                my $range = log_range($self, $sync, $git, $i, $tip) or next;
+                my $range = log_range($sync, $unit, $tip) or next;
                 # can't use 'rev-list --count' if we use --diff-filter
-                $pr->("$i.git counting $range ... ") if $pr;
+                $pr->("$pfx $i.git counting $range ... ") if $pr;
                 # Don't bump num_highwater on --reindex by using {D}.
                 # We intentionally do NOT use {D} in the non-reindex case
                 # because we want NNTP article number gaps from unindexed
                 # messages to show up in mirrors, too.
                 $sync->{D} //= $sync->{reindex} ? {} : undef; # OID_BIN => NR
-                my $stk = log2stack($sync, $git, $range, $self->{ibx});
+                my $stk = log2stack($sync, $git, $range);
+                return 0 if $sync->{quit};
                 my $nr = $stk ? $stk->num_records : 0;
                 $pr->("$nr\n") if $pr;
-                $sync->{stacks}->[$i] = $stk if $stk;
+                $unit->{stack} = $stk; # may be undef
+                unshift @{$sync->{todo}}, $unit;
                 $regen_max += $nr;
         }
+        return 0 if $sync->{quit};
 
         # XXX this should not happen unless somebody bypasses checks in
         # our code and blindly injects "d" file history into git repos
         if (my @leftovers = keys %{delete($sync->{D}) // {}}) {
                 warn('W: unindexing '.scalar(@leftovers)." leftovers\n");
-                my $arg = { v2w => $self };
-                my $all = $self->{ibx}->git;
+                local $self->{current_info} = 'leftover ';
+                my $unindex_oid = $self->can('unindex_oid');
                 for my $oid (@leftovers) {
+                        last if $sync->{quit};
                         $oid = unpack('H*', $oid);
-                        $self->{current_info} = "leftover $oid";
-                        $all->cat_async($oid, \&unindex_oid, $arg);
+                        my $req = { %$sync, oid => $oid };
+                        $self->git->cat_async($oid, $unindex_oid, $req);
                 }
-                $all->cat_async_wait;
+                $self->git->async_wait_all;
         }
+        return 0 if $sync->{quit};
         if (!$regen_max) {
                 $sync->{-regen_fmt} = "%u/?\n";
                 return 0;
@@ -1085,22 +996,25 @@ sub sync_prepare ($$$) {
         $sync->{-regen_fmt} = "% ${pad}u/$regen_max\n";
         $sync->{nr} = \(my $nr = 0);
         return -1 if $sync->{reindex};
-        $regen_max + $self->{mm}->num_highwater() || 0;
+        $regen_max + $self->artnum_max || 0;
 }
 
-sub unindex_oid_remote ($$$) {
+sub unindex_oid_aux ($$$) {
         my ($self, $oid, $mid) = @_;
         my @removed = $self->{oidx}->remove_oid($oid, $mid);
+        return unless $self->{-need_xapian};
         for my $num (@removed) {
-                my $idx = idx_shard($self, $num % $self->{shards});
-                $idx->shard_remove($oid, $num);
+                idx_shard($self, $num)->ipc_do('xdb_remove', $num);
         }
 }
 
 sub unindex_oid ($$;$) { # git->cat_async callback
-        my ($bref, $oid, $type, $size, $sync) = @_;
-        my $self = $sync->{v2w};
-        my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef;
+        my ($bref, $oid, $type, $size, $arg) = @_;
+        is_bad_blob($oid, $type, $size, $arg->{oid}) and
+                return index_finalize($arg, 0);
+        my $self = $arg->{self};
+        local $self->{current_info} = "$self->{current_info} $oid";
+        my $unindexed = $arg->{in_unindex} ? $arg->{unindexed} : undef;
         my $mm = $self->{mm};
         my $mids = mids(PublicInbox::Eml->new($bref));
         undef $$bref;
@@ -1116,50 +1030,55 @@ sub unindex_oid ($$;$) { # git->cat_async callback
                         warn "BUG: multiple articles linked to $oid\n",
                                 join(',',sort keys %gone), "\n";
                 }
-                foreach my $num (keys %gone) {
+                # reuse (num => mid) mapping in ascending numeric order
+                for my $num (sort { $a <=> $b } keys %gone) {
+                        $num += 0;
                         if ($unindexed) {
                                 my $mid0 = $mm->mid_for($num);
-                                $unindexed->{$mid0} = $num;
+                                my $oidbin = pack('H*', $oid);
+                                push @{$unindexed->{$oidbin}}, $num, $mid0;
                         }
                         $mm->num_delete($num);
                 }
-                unindex_oid_remote($self, $oid, $mid);
+                unindex_oid_aux($self, $oid, $mid);
         }
+        index_finalize($arg, 0);
 }
 
+sub git { $_[0]->{ibx}->git }
+
 # this is rare, it only happens when we get discontiguous history in
 # a mirror because the source used -purge or -edit
-sub unindex ($$$$) {
-        my ($self, $sync, $git, $unindex_range) = @_;
-        my $unindexed = $sync->{unindexed} //= {}; # $mid0 => $num
+sub unindex_todo ($$$) {
+        my ($self, $sync, $unit) = @_;
+        my $unindex_range = delete($unit->{unindex_range}) // return;
+        my $unindexed = $sync->{unindexed} //= {}; # $oidbin => [$num, $mid0]
         my $before = scalar keys %$unindexed;
         # order does not matter, here:
-        my @cmd = qw(log --raw -r
-                        --no-notes --no-color --no-abbrev --no-renames);
-        my $fh = $git->popen(@cmd, $unindex_range);
-        my $all = $self->{ibx}->git;
+        my $fh = $unit->{git}->popen(qw(log --raw -r --no-notes --no-color
+                                --no-abbrev --no-renames), $unindex_range);
         local $sync->{in_unindex} = 1;
+        my $unindex_oid = $self->can('unindex_oid');
         while (<$fh>) {
                 /\A:\d{6} 100644 $OID ($OID) [AM]\tm$/o or next;
-                $all->cat_async($1, \&unindex_oid, $sync);
+                $self->git->cat_async($1, $unindex_oid, { %$sync, oid => $1 });
         }
-        close $fh or die "git log failed: \$?=$?";
-        $all->cat_async_wait;
+        $fh->close or die "git log failed: \$?=$?";
+        $self->git->async_wait_all;
 
         return unless $sync->{-opt}->{prune};
         my $after = scalar keys %$unindexed;
         return if $before == $after;
 
         # ensure any blob can not longer be accessed via dumb HTTP
-        PublicInbox::Import::run_die(['git', "--git-dir=$git->{git_dir}",
-                qw(-c gc.reflogExpire=now gc --prune=all --quiet)]);
+        run_die($unit->{git}->cmd(qw(-c gc.reflogExpire=now gc
+                                --prune=all --quiet)));
 }
 
-sub sync_ranges ($$$) {
-        my ($self, $sync, $epoch_max) = @_;
+sub sync_ranges ($$) {
+        my ($self, $sync) = @_;
         my $reindex = $sync->{reindex};
-
-        return last_commits($self, $epoch_max) unless $reindex;
+        return $self->last_commits($sync) unless $reindex;
         return [] if ref($reindex) ne 'HASH';
 
         my $ranges = $reindex->{from}; # arrayref;
@@ -1171,11 +1090,10 @@ sub sync_ranges ($$$) {
 
 sub index_xap_only { # git->cat_async callback
         my ($bref, $oid, $type, $size, $smsg) = @_;
-        my $self = $smsg->{v2w};
-        my $idx = idx_shard($self, $smsg->{num} % $self->{shards});
-        $smsg->{raw_bytes} = $size;
-        $idx->index_raw($bref, undef, $smsg);
-        $self->{transact_bytes} += $size;
+        my $self = delete $smsg->{self};
+        my $idx = idx_shard($self, $smsg->{num});
+        $idx->index_eml(PublicInbox::Eml->new($bref), $smsg);
+        $self->{transact_bytes} += $smsg->{bytes};
 }
 
 sub index_xap_step ($$$;$) {
@@ -1190,8 +1108,9 @@ sub index_xap_step ($$$;$) {
                         "$beg..$end (% $step)\n");
         }
         for (my $num = $beg; $num <= $end; $num += $step) {
+                last if $sync->{quit};
                 my $smsg = $ibx->over->get_art($num) or next;
-                $smsg->{v2w} = $self;
+                $smsg->{self} = $self;
                 $ibx->git->cat_async($smsg->{blob}, \&index_xap_only, $smsg);
                 if ($self->{transact_bytes} >= $self->{batch_bytes}) {
                         ${$sync->{nr}} = $num;
@@ -1200,42 +1119,59 @@ sub index_xap_step ($$$;$) {
         }
 }
 
-sub index_epoch ($$$) {
-        my ($self, $sync, $i) = @_;
-
-        my $git_dir = git_dir_n($self, $i);
-        -d $git_dir or return; # missing epochs are fine
-        my $git = PublicInbox::Git->new($git_dir);
-        if (my $unindex_range = delete $sync->{unindex_range}->{$i}) { # rare
-                unindex($self, $sync, $git, $unindex_range);
-        }
-        defined(my $stk = $sync->{stacks}->[$i]) or return;
-        $sync->{stacks}->[$i] = undef;
-        my $all = $self->{ibx}->git;
-        while (my ($f, $at, $ct, $oid) = $stk->pop_rec) {
-                $self->{current_info} = "$i.git $oid";
+sub index_todo ($$$) {
+        my ($self, $sync, $unit) = @_;
+        return if $sync->{quit};
+        unindex_todo($self, $sync, $unit);
+        my $stk = delete($unit->{stack}) or return;
+        my $all = $self->git;
+        my $index_oid = $self->can('index_oid');
+        my $unindex_oid = $self->can('unindex_oid');
+        my $pfx;
+        if ($unit->{git}->{git_dir} =~ m!/([^/]+)/git/([0-9]+\.git)\z!) {
+                $pfx = "$1 $2"; # v2
+        } else { # v1
+                ($pfx) = ($unit->{git}->{git_dir} =~ m!/([^/]+)\z!g);
+                $pfx //= $unit->{git}->{git_dir};
+        }
+        local $self->{current_info} = "$pfx ";
+        local $sync->{latest_cmt} = \(my $latest_cmt);
+        local $sync->{unit} = $unit;
+        while (my ($f, $at, $ct, $oid, $cmt) = $stk->pop_rec) {
+                if ($sync->{quit}) {
+                        warn "waiting to quit...\n";
+                        $all->async_wait_all;
+                        $self->update_last_commit($sync);
+                        return;
+                }
+                my $req = {
+                        %$sync,
+                        autime => $at,
+                        cotime => $ct,
+                        oid => $oid,
+                        cur_cmt => $cmt
+                };
                 if ($f eq 'm') {
-                        my $arg = { %$sync, autime => $at, cotime => $ct };
                         if ($sync->{max_size}) {
-                                $all->check_async($oid, \&check_size, $arg);
+                                $req->{git} = $all;
+                                $all->check_async($oid, \&check_size, $req);
                         } else {
-                                $all->cat_async($oid, \&index_oid, $arg);
+                                $all->cat_async($oid, $index_oid, $req);
                         }
                 } elsif ($f eq 'd') {
-                        $all->cat_async($oid, \&unindex_oid, $sync);
+                        $all->cat_async($oid, $unindex_oid, $req);
                 }
                 if (${$sync->{need_checkpoint}}) {
                         reindex_checkpoint($self, $sync);
                 }
         }
-        $all->check_async_wait;
-        $all->cat_async_wait;
-        update_last_commit($self, $git, $i, $stk->{latest_cmt});
+        $all->async_wait_all;
+        $self->update_last_commit($sync, $stk);
 }
 
 sub xapian_only {
         my ($self, $opt, $sync, $art_beg) = @_;
-        my $seq = $opt->{sequential_shard};
+        my $seq = $opt->{'sequential-shard'};
         $art_beg //= 0;
         local $self->{parallel} = 0 if $seq;
         $self->idx_init($opt); # acquire lock
@@ -1243,7 +1179,7 @@ sub xapian_only {
                 $sync //= {
                         need_checkpoint => \(my $bool = 0),
                         -opt => $opt,
-                        v2w => $self,
+                        self => $self,
                         nr => \(my $nr = 0),
                         -regen_fmt => "%u/?\n",
                 };
@@ -1251,6 +1187,7 @@ sub xapian_only {
                 if ($seq || !$self->{parallel}) {
                         my $shard_end = $self->{shards} - 1;
                         for my $i (0..$shard_end) {
+                                last if $sync->{quit};
                                 index_xap_step($self, $sync, $art_beg + $i);
                                 if ($i != $shard_end) {
                                         reindex_checkpoint($self, $sync);
@@ -1260,7 +1197,8 @@ sub xapian_only {
                         index_xap_step($self, $sync, $art_beg, 1);
                 }
         }
-        $self->{ibx}->git->cat_async_wait;
+        $self->git->async_wait_all;
+        $self->{ibx}->cleanup;
         $self->done;
 }
 
@@ -1270,28 +1208,41 @@ sub index_sync {
         $opt //= {};
         return xapian_only($self, $opt) if $opt->{xapian_only};
 
-        my $pr = $opt->{-progress};
         my $epoch_max;
-        my $latest = git_dir_latest($self, \$epoch_max);
-        return unless defined $latest;
+        my $latest = $self->{ibx}->git_dir_latest(\$epoch_max) // return;
+        if ($opt->{'fast-noop'}) { # nanosecond (st_ctim) comparison
+                use Time::HiRes qw(stat);
+                if (my @mm = stat("$self->{ibx}->{inboxdir}/msgmap.sqlite3")) {
+                        my $c = $mm[10]; # 10 = ctime (nsec NV)
+                        my @hd = stat("$latest/refs/heads");
+                        my @pr = stat("$latest/packed-refs");
+                        return if $c > ($hd[10] // 0) && $c > ($pr[10] // 0);
+                }
+        }
 
-        my $seq = $opt->{sequential_shard};
+        my $pr = $opt->{-progress};
+        my $seq = $opt->{'sequential-shard'};
         my $art_beg; # the NNTP article number we start xapian_only at
         my $idxlevel = $self->{ibx}->{indexlevel};
         local $self->{ibx}->{indexlevel} = 'basic' if $seq;
 
         $self->idx_init($opt); # acquire lock
-        fill_alternates($self, $epoch_max);
+        $self->{mg}->fill_alternates;
         $self->{oidx}->rethread_prepare($opt);
         my $sync = {
                 need_checkpoint => \(my $bool = 0),
-                unindex_range => {}, # EPOCH => oid_old..oid_new
                 reindex => $opt->{reindex},
                 -opt => $opt,
-                v2w => $self,
+                self => $self,
+                ibx => $self->{ibx},
+                epoch_max => $epoch_max,
         };
-        $sync->{ranges} = sync_ranges($self, $sync, $epoch_max);
-        if (sync_prepare($self, $sync, $epoch_max)) {
+        my $quit = PublicInbox::SearchIdx::quit_cb($sync);
+        local $SIG{QUIT} = $quit;
+        local $SIG{INT} = $quit;
+        local $SIG{TERM} = $quit;
+
+        if (sync_prepare($self, $sync)) {
                 # tmp_clone seems to fail if inside a transaction, so
                 # we rollback here (because we opened {mm} for reading)
                 # Note: we do NOT rely on DBI transactions for atomicity;
@@ -1303,16 +1254,13 @@ sub index_sync {
 
                 # xapian_only works incrementally w/o --reindex
                 if ($seq && !$opt->{reindex}) {
-                        $art_beg = $sync->{mm_tmp}->max;
-                        $art_beg++ if defined($art_beg);
+                        $art_beg = $sync->{mm_tmp}->max || -1;
+                        $art_beg++;
                 }
         }
-        if ($sync->{max_size} = $opt->{max_size}) {
-                $sync->{index_oid} = \&index_oid;
-        }
         # work forwards through history
-        index_epoch($self, $sync, $_) for (0..$epoch_max);
-        $self->{oidx}->rethread_done($opt);
+        index_todo($self, $sync, $_) for @{delete($sync->{todo}) // []};
+        $self->{oidx}->rethread_done($opt) unless $sync->{quit};
         $self->done;
 
         if (my $nr = $sync->{nr}) {
@@ -1320,14 +1268,21 @@ sub index_sync {
                 $pr->('all.git '.sprintf($sync->{-regen_fmt}, $$nr)) if $pr;
         }
 
+        my $quit_warn;
         # deal with Xapian shards sequentially
         if ($seq && delete($sync->{mm_tmp})) {
-                $self->{ibx}->{indexlevel} = $idxlevel;
-                xapian_only($self, $opt, $sync, $art_beg);
+                if ($sync->{quit}) {
+                        $quit_warn = 1;
+                } else {
+                        $self->{ibx}->{indexlevel} = $idxlevel;
+                        xapian_only($self, $opt, $sync, $art_beg);
+                        $quit_warn = 1 if $sync->{quit};
+                }
         }
 
         # --reindex on the command-line
-        if ($opt->{reindex} && !ref($opt->{reindex}) && $idxlevel ne 'basic') {
+        if (!$sync->{quit} && $opt->{reindex} &&
+                        !ref($opt->{reindex}) && $idxlevel ne 'basic') {
                 $self->lock_acquire;
                 my $s0 = PublicInbox::SearchIdx->new($self->{ibx}, 0, 0);
                 if (my $xdb = $s0->idx_acquire) {
@@ -1339,12 +1294,27 @@ sub index_sync {
         }
 
         # reindex does not pick up new changes, so we rerun w/o it:
-        if ($opt->{reindex}) {
+        if ($opt->{reindex} && !$sync->{quit} &&
+                        !grep(defined, @$opt{qw(since until)})) {
                 my %again = %$opt;
                 $sync = undef;
                 delete @again{qw(rethread reindex -skip_lock)};
                 index_sync($self, \%again);
+                $opt->{quit} = $again{quit}; # propagate to caller
+        }
+        warn <<EOF if $quit_warn;
+W: interrupted, --xapian-only --reindex required upon restart
+EOF
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        if (my $lei = delete $self->{lei}) {
+                $lei->_lei_atfork_child;
+                my $pkt_op_p = delete $lei->{pkt_op_p};
+                close($pkt_op_p->{op_p});
         }
+        $self->SUPER::ipc_atfork_child;
 }
 
 1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 1d5119cd..44e1f2a8 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -1,13 +1,13 @@
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Used for displaying the HTML web interface.
 # See Documentation/design_www.txt for this.
 package PublicInbox::View;
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
 use List::Util qw(max);
+use Text::Wrap qw(wrap); # stdlib, we need Perl 5.6+ for $huge
 use PublicInbox::MsgTime qw(msg_datestamp);
 use PublicInbox::Hval qw(ascii_html obfuscate_addrs prurl mid_href
                         ts2str fmt_ts);
@@ -20,8 +20,10 @@ use PublicInbox::WwwStream qw(html_oneshot);
 use PublicInbox::Reply;
 use PublicInbox::ViewDiff qw(flush_diff);
 use PublicInbox::Eml;
+use POSIX qw(strftime);
 use Time::Local qw(timegm);
 use PublicInbox::Smsg qw(subject_normalized);
+use PublicInbox::ContentHash qw(content_hash);
 use constant COLS => 72;
 use constant INDENT => '  ';
 use constant TCHILD => '` ';
@@ -31,15 +33,17 @@ sub msg_page_i {
         my ($ctx, $eml) = @_;
         if ($eml) { # called by WwwStream::async_eml or getline
                 my $smsg = $ctx->{smsg};
-                $ctx->{smsg} = $ctx->{over}->next_by_mid(@{$ctx->{next_arg}});
+                my $over = $ctx->{ibx}->over;
+                $ctx->{smsg} = $over ? $over->next_by_mid(@{$ctx->{next_arg}})
+                                : $ctx->gone('over');
                 $ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ?
                                 "../${\mid_href($smsg->{mid})}/" : '';
-                my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($eml, $ctx);
-                multipart_text_as_html($eml, $ctx);
-                delete $ctx->{obuf};
-                $$obuf .= '</pre><hr>';
-                $$obuf .= html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
-                $$obuf;
+                if (_msg_page_prepare($eml, $ctx, $smsg->{ts})) {
+                        $eml->each_part(\&add_text_body, $ctx, 1);
+                        print { $ctx->{zfh} } '</pre><hr>';
+                }
+                html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
+                ''; # XXX TODO cleanup
         } else { # called by WwwStream::async_next or getline
                 $ctx->{smsg}; # may be undef
         }
@@ -48,25 +52,25 @@ sub msg_page_i {
 # /$INBOX/$MSGID/ for unindexed v1 inboxes
 sub no_over_html ($) {
         my ($ctx) = @_;
-        my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return; # 404
+        my $bref = $ctx->{ibx}->msg_by_mid($ctx->{mid}) or return; # 404
         my $eml = PublicInbox::Eml->new($bref);
         $ctx->{mhref} = '';
         PublicInbox::WwwStream::init($ctx);
-        my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($eml, $ctx);
-        multipart_text_as_html($eml, $ctx);
-        delete $ctx->{obuf};
-        $$obuf .= '</pre><hr>';
-        eval { $$obuf .= html_footer($ctx, $eml) };
-        html_oneshot($ctx, 200, $obuf);
+        if (_msg_page_prepare($eml, $ctx)) { # sets {-title_html}
+                $eml->each_part(\&add_text_body, $ctx, 1);
+                print { $ctx->{zfh} } '</pre><hr>';
+        }
+        html_footer($ctx, $eml);
+        $ctx->html_done;
 }
 
 # public functions: (unstable)
 
 sub msg_page {
         my ($ctx) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
-        my $over = $ctx->{over} = $ibx->over or return no_over_html($ctx);
+        my $over = $ibx->over or return no_over_html($ctx);
         my ($id, $prev);
         my $next_arg = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
 
@@ -76,7 +80,8 @@ sub msg_page {
         # allow user to easily browse the range around this message if
         # they have ->over
         $ctx->{-t_max} = $smsg->{ts};
-        PublicInbox::WwwStream::aresponse($ctx, 200, \&msg_page_i);
+        $ctx->{-spfx} = '../' if $ibx->{-repo_objs};
+        PublicInbox::WwwStream::aresponse($ctx, \&msg_page_i);
 }
 
 # /$INBOX/$MESSAGE_ID/#R
@@ -88,7 +93,7 @@ sub msg_reply ($$) {
          'https://en.wikipedia.org/wiki/Posting_style#Interleaved_style';
 
         my $info = '';
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         if (my $url = $ibx->{infourl}) {
                 $url = prurl($ctx->{env}, $url);
                 $info = qq(\n  List information: <a\nhref="$url">$url</a>\n);
@@ -136,6 +141,9 @@ $info
   <a
 href="$se_url">$se_url</a>
 $link</pre>
+
+  Be sure your reply has a <b>Subject:</b> header at the top and a blank line
+  before the message body.
 EOF
 }
 
@@ -175,6 +183,59 @@ sub nr_to_s ($$$) {
         $nr == 1 ? "$nr $singular" : "$nr $plural";
 }
 
+sub addr2urlmap ($) {
+        my ($ctx) = @_;
+        # cache makes a huge difference with /[tT] and large threads
+        my $key = PublicInbox::Git::host_prefix_url($ctx->{env}, '');
+        my $ent = $ctx->{www}->{pi_cfg}->{-addr2urlmap}->{$key} // do {
+                my $by_addr = $ctx->{www}->{pi_cfg}->{-by_addr};
+                my (%addr2url, $url);
+                while (my ($addr, $ibx) = each %$by_addr) {
+                        $url = $ibx->base_url // $ibx->base_url($ctx->{env});
+                        $addr2url{$addr} = ascii_html($url) if defined $url;
+                }
+                # don't allow attackers to randomly change Host: headers
+                # and OOM us if the server handles all hostnames:
+                my $tmp = $ctx->{www}->{pi_cfg}->{-addr2urlmap};
+                my @k = keys %$tmp; # random order
+                delete @$tmp{@k[0..3]} if scalar(@k) > 7;
+                my $re = join('|', map { quotemeta } keys %addr2url);
+                $tmp->{$key} = [ qr/\b($re)\b/i, \%addr2url ];
+        };
+        @$ent;
+}
+
+sub to_cc_html ($$$$) {
+        my ($ctx, $eml, $field, $t) = @_;
+        my @vals = $eml->header($field) or return ('', 0);
+        my (undef, $addr2url) = addr2urlmap($ctx);
+        my $pairs = PublicInbox::Address::pairs(join(', ', @vals));
+        my ($len, $line_len, $html) = (0, 0, '');
+        my ($pair, $url);
+        my ($cur_ibx, $env) = @$ctx{qw(ibx env)};
+        # avoid excessive ascii_html calls (already hot in profiles):
+        my @html = split /\n/, ascii_html(join("\n", map {
+                $_->[0] // (split(/\@/, $_->[1]))[0]; # addr user if no name
+        } @$pairs));
+        for my $n (@html) {
+                $pair = shift @$pairs;
+                if ($line_len) { # 9 = display width of ",\t":
+                        if ($line_len + length($n) > COLS - 9) {
+                                $html .= ",\n\t";
+                                $len += $line_len;
+                                $line_len = 0;
+                        } else {
+                                $html .= ', ';
+                                $line_len += 2;
+                        }
+                }
+                $line_len += length($n);
+                $url = $addr2url->{lc($pair->[1] // '')};
+                $html .= $url ? qq(<a\nhref="$url$t">$n</a>) : $n;
+        }
+        ($html, $len + $line_len);
+}
+
 # Displays the text of of the message for /$INBOX/$MSGID/[Tt]/ endpoint
 # this is already inside a <pre>
 sub eml_entry {
@@ -199,7 +260,8 @@ sub eml_entry {
         my $ds = delete $smsg->{ds}; # for v1 non-Xapian/SQLite users
 
         # Deleting these fields saves about 400K as we iterate across 1K msgs
-        delete @$smsg{qw(ts blob)};
+        my ($t, undef) = delete @$smsg{qw(ts blob)};
+        $t = $t ? '?t='.ts2str($t) : '';
 
         my $from = _hdr_names_html($eml, 'From');
         obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
@@ -208,9 +270,8 @@ sub eml_entry {
         my $mhref = $upfx . mid_href($mid_raw) . '/';
         $rv .= qq{ (<a\nhref="$mhref">permalink</a> / };
         $rv .= qq{<a\nhref="${mhref}raw">raw</a>)\n};
-        my $to = fold_addresses(_hdr_names_html($eml, 'To'));
-        my $cc = fold_addresses(_hdr_names_html($eml, 'Cc'));
-        my ($tlen, $clen) = (length($to), length($cc));
+        my ($to, $tlen) = to_cc_html($ctx, $eml, 'To', $t);
+        my ($cc, $clen) = to_cc_html($ctx, $eml, 'Cc', $t);
         my $to_cc = '';
         if (($tlen + $clen) > COLS) {
                 $to_cc .= '  To: '.$to."\n" if $tlen;
@@ -233,20 +294,22 @@ sub eml_entry {
                 my $html = ascii_html($irt);
                 $rv .= qq(In-Reply-To: &lt;<a\nhref="$href">$html</a>&gt;\n)
         }
-        $rv .= "\n";
+        say { $ctx->zfh } $rv;
 
         # scan through all parts, looking for displayable text
         $ctx->{mhref} = $mhref;
-        $ctx->{obuf} = \$rv;
-        $eml->each_part(\&add_text_body, $ctx, 1);
-        delete $ctx->{obuf};
+        $ctx->{changed_href} = "#e$id"; # for diffstat "files? changed,"
+        $eml->each_part(\&add_text_body, $ctx, 1); # expensive
 
         # add the footer
-        $rv .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
+        $rv = "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
                 "<a\nhref=\"$mhref\">permalink</a>" .
                 " <a\nhref=\"${mhref}raw\">raw</a>" .
                 " <a\nhref=\"${mhref}#R\">reply</a>";
 
+        delete($ctx->{-qry}) and
+                $rv .= qq[ <a\nhref="${mhref}#related">related</a>];
+
         my $hr;
         if (defined(my $pct = $smsg->{pct})) { # used by SearchView.pm
                 $rv .= "\t[relevance $pct%]";
@@ -254,7 +317,6 @@ sub eml_entry {
         } elsif ($mapping) {
                 my $nested = 'nested';
                 my $flat = 'flat';
-                my $end = '';
                 if ($ctx->{flat}) {
                         $hr = 1;
                         $flat = "<b>$flat</b>";
@@ -276,8 +338,15 @@ sub eml_entry {
 sub pad_link ($$;$) {
         my ($mid, $level, $s) = @_;
         $s ||= '...';
-        my $id = id_compress($mid, 1);
-        (' 'x19).indent_for($level).th_pfx($level)."<a\nhref=#r$id>($s)</a>\n";
+        my $href = defined($mid) ?
+                ("<a\nhref=#r".id_compress($mid, 1).">($s)</a>\n") :
+                "($s)\n";
+        (' 'x19).indent_for($level).th_pfx($level).$href;
+}
+
+sub _skel_hdr {
+        # my ($mapping, $mid) = @_;
+        ($_[0]->{$_[1] // \'bogus'} // [ "(?)\n" ])->[0];
 }
 
 sub _th_index_lite {
@@ -285,8 +354,7 @@ sub _th_index_lite {
         my $rv = '';
         my $mapping = $ctx->{mapping} or return $rv;
         my $pad = '  ';
-        my $mid_map = $mapping->{$mid_raw};
-        defined $mid_map or
+        my $mid_map = $mapping->{$mid_raw} //
                 return 'public-inbox BUG: '.ascii_html($mid_raw).' not mapped';
         my ($attr, $node, $idx, $level) = @$mid_map;
         my $children = $node->{children};
@@ -309,39 +377,40 @@ sub _th_index_lite {
                                 my $s = ($idx - 1). ' preceding siblings ...';
                                 $rv .= pad_link($pmid, $level, $s);
                         } elsif ($idx == 2) {
-                                my $ppmid = $siblings->[0]->{mid};
-                                $rv .= $pad . $mapping->{$ppmid}->[0];
+                                $rv .= $pad . _skel_hdr($mapping,
+                                                $siblings->[0] ?
+                                                $siblings->[0]->{mid} : undef);
                         }
-                        $rv .= $pad . $mapping->{$pmid}->[0];
+                        $rv .= $pad . _skel_hdr($mapping, $pmid);
                 }
         }
         my $s_s = nr_to_s($nr_s, 'sibling', 'siblings');
         my $s_c = nr_to_s($nr_c, 'reply', 'replies');
-        $attr =~ s!\n\z!</b>\n!s;
+        chop $attr; # remove "\n"
         $attr =~ s!<a\nhref.*</a> (?:&#34; )?!!s; # no point in dup subject
         $attr =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
-        $rv .= "<b>@ $attr";
+        $rv .= "<b>@ $attr</b>\n";
         if ($nr_c) {
-                my $cmid = $children->[0]->{mid};
-                $rv .= $pad . $mapping->{$cmid}->[0];
+                my $cmid = $children->[0] ? $children->[0]->{mid} : undef;
+                $rv .= $pad . _skel_hdr($mapping, $cmid);
                 if ($nr_c > 2) {
                         my $s = ($nr_c - 1). ' more replies';
                         $rv .= pad_link($cmid, $level + 1, $s);
                 } elsif (my $cn = $children->[1]) {
-                        $rv .= $pad . $mapping->{$cn->{mid}}->[0];
+                        $rv .= $pad . _skel_hdr($mapping, $cn->{mid});
                 }
         }
 
         my $next = $siblings->[$idx+1] if $siblings && $idx >= 0;
         if ($next) {
                 my $nmid = $next->{mid};
-                $rv .= $pad . $mapping->{$nmid}->[0];
+                $rv .= $pad . _skel_hdr($mapping, $nmid);
                 my $nnext = $nr_s - $idx;
                 if ($nnext > 2) {
                         my $s = ($nnext - 1).' subsequent siblings';
                         $rv .= pad_link($nmid, $level, $s);
                 } elsif (my $nn = $siblings->[$idx + 2]) {
-                        $rv .= $pad . $mapping->{$nn->{mid}}->[0];
+                        $rv .= $pad . _skel_hdr($mapping, $nn->{mid});
                 }
         }
         $rv .= $pad ."<a\nhref=#r$id>$s_s, $s_c; $ctx->{s_nr}</a>\n";
@@ -370,7 +439,9 @@ sub pre_thread  { # walk_thread callback
 sub thread_eml_entry {
         my ($ctx, $eml) = @_;
         my ($beg, $end) = thread_adj_level($ctx, $ctx->{level});
-        $beg . '<pre>' . eml_entry($ctx, $eml) . '</pre>' . $end;
+        print { $ctx->zfh } $beg, '<pre>';
+        print { $ctx->{zfh} } eml_entry($ctx, $eml), '</pre>';
+        $end;
 }
 
 sub next_in_queue ($$) {
@@ -397,15 +468,15 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
                                 if (!$ghost_ok) { # first non-ghost
                                         $ctx->{-title_html} =
                                                 ascii_html($smsg->{subject});
-                                        $ctx->zmore($ctx->html_top);
+                                        print { $ctx->zfh } $ctx->html_top;
                                 }
                                 return $smsg;
                         }
                         # buffer the ghost entry and loop
-                        $ctx->zmore(ghost_index_entry($ctx, $lvl, $smsg));
+                        print { $ctx->zfh } ghost_index_entry($ctx, $lvl, $smsg)
                 } else { # all done
-                        $ctx->zmore(join('', thread_adj_level($ctx, 0)));
-                        $ctx->zmore(${delete($ctx->{skel})});
+                        print { $ctx->zfh } thread_adj_level($ctx, 0),
+                                                ${delete($ctx->{skel})};
                         return;
                 }
         }
@@ -413,21 +484,23 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
 
 sub stream_thread ($$) {
         my ($rootset, $ctx) = @_;
-        $ctx->{-queue} = [ map { (0, $_) } @$rootset ];
-        PublicInbox::WwwStream::aresponse($ctx, 200, \&stream_thread_i);
+        @{$ctx->{-queue}} = map { (0, $_) } @$rootset;
+        PublicInbox::WwwStream::aresponse($ctx, \&stream_thread_i);
 }
 
 # /$INBOX/$MSGID/t/ and /$INBOX/$MSGID/T/
 sub thread_html {
         my ($ctx) = @_;
+        $ctx->{-upfx} = '../../';
         my $mid = $ctx->{mid};
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my ($nr, $msgs) = $ibx->over->get_thread($mid);
         return missing_thread($ctx) if $nr == 0;
+        $ctx->{-spfx} = '../../' if $ibx->{-repo_objs};
 
         # link $INBOX_DIR/description text to "index_topics" view around
         # the newest message in this thread
-        my $t = ts2str($ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs));
+        my $t = ts2str($ctx->{-t_max} = max(map { $_->{ts} } @$msgs));
         my $t_fmt = fmt_ts($ctx->{-t_max});
 
         my $skel = '<hr><pre>';
@@ -443,13 +516,12 @@ EOF
         $skel .= " (download: <a\nhref=\"../t.mbox.gz\">mbox.gz</a>";
         $skel .= " / follow: <a\nhref=\"../t.atom\">Atom feed</a>)\n";
         $skel .= "-- links below jump to the message on this page --\n";
-        $ctx->{-upfx} = '../../';
         $ctx->{cur_level} = 0;
         $ctx->{skel} = \$skel;
         $ctx->{prev_attr} = '';
         $ctx->{prev_level} = 0;
-        $ctx->{root_anchor} = anchor_for($mid);
-        $ctx->{mapping} = {};
+        $ctx->{root_anchor} = 'm' . id_compress($mid, 1);
+        $ctx->{mapping} = {}; # mid -> [ header_summary, node, idx, level ]
         $ctx->{s_nr} = ($nr > 1 ? "$nr+ messages" : 'only message')
                        .' in thread';
 
@@ -465,7 +537,7 @@ EOF
         # flat display: lazy load the full message from smsg
         $ctx->{msgs} = $msgs;
         $ctx->{-html_tip} = '<pre>';
-        PublicInbox::WwwStream::aresponse($ctx, 200, \&thread_html_i);
+        PublicInbox::WwwStream::aresponse($ctx, \&thread_html_i);
 }
 
 sub thread_html_i { # PublicInbox::WwwStream::getline callback
@@ -474,7 +546,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
                 my $smsg = $ctx->{smsg};
                 if (exists $ctx->{-html_tip}) {
                         $ctx->{-title_html} = ascii_html($smsg->{subject});
-                        $ctx->zmore($ctx->html_top);
+                        print { $ctx->zfh } $ctx->html_top;
                 }
                 return eml_entry($ctx, $eml);
         } else {
@@ -482,31 +554,19 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
                         return $smsg if exists($smsg->{blob});
                 }
                 my $skel = delete($ctx->{skel}) or return; # all done
-                $ctx->zmore($$skel);
+                print { $ctx->zfh } $$skel;
                 undef;
         }
 }
 
-sub multipart_text_as_html {
-        # ($mime, $ctx) = @_; # each_part may do "$_[0] = undef"
-
-        # scan through all parts, looking for displayable text
-        $_[0]->each_part(\&add_text_body, $_[1], 1);
-}
-
 sub submsg_hdr ($$) {
         my ($ctx, $eml) = @_;
-        my $obfs_ibx = $ctx->{-obfs_ibx};
-        my $rv = $ctx->{obuf};
-        $$rv .= "\n";
+        my $s = "\n";
         for my $h (qw(From To Cc Subject Date Message-ID X-Alt-Message-ID)) {
-                my @v = $eml->header($h);
-                for my $v (@v) {
-                        obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
-                        $v = ascii_html($v);
-                        $$rv .= "$h: $v\n";
-                }
+                $s .= "$h: $_\n" for $eml->header($h);
         }
+        obfuscate_addrs($ctx->{-obfs_ibx}, $s) if $ctx->{-obfs_ibx};
+        ascii_html($s);
 }
 
 sub attach_link ($$$$;$) {
@@ -517,8 +577,8 @@ sub attach_link ($$$$;$) {
         # downloads for 0-byte multipart attachments
         return unless $part->{bdy};
 
-        my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...)
-        my $size = bytes::length($part->body);
+        my $size = length($part->body);
+        delete $part->{bdy}; # save memory
 
         # hide attributes normally, unless we want to aid users in
         # spotting MUA problems:
@@ -532,46 +592,36 @@ sub attach_link ($$$$;$) {
         } else {
                 $sfn = 'a.bin';
         }
-        my $rv = $ctx->{obuf};
-        $$rv .= qq($nl<a\nhref="$ctx->{mhref}$idx-$sfn">);
-        if ($err) {
-                $$rv .= <<EOF;
+        my $rv = $idx eq '1' ? '' : "\n"; # like join("\n", ...)
+        $rv .= qq(<a\nhref="$ctx->{mhref}$idx-$sfn">);
+        $rv .= <<EOF if $err;
 [-- Warning: decoded text below may be mangled, UTF-8 assumed --]
 EOF
-        }
-        $$rv .= "[-- Attachment #$idx: ";
-        my $ts = "Type: $ct, Size: $size bytes";
+        $rv .= "[-- Attachment #$idx: ";
         my $desc = $part->header('Content-Description') // $fn // '';
-        $desc = ascii_html($desc);
-        $$rv .= ($desc eq '') ? "$ts --]" : "$desc --]\n[-- $ts --]";
-        $$rv .= "</a>\n";
-
-        submsg_hdr($ctx, $part) if $part->{is_submsg};
-
-        undef;
+        $rv .= ascii_html($desc)." --]\n[-- " if $desc ne '';
+        $rv .= "Type: $ct, Size: $size bytes --]</a>\n";
+        $rv .= submsg_hdr($ctx, $part) if $part->{is_submsg};
+        $rv;
 }
 
 sub add_text_body { # callback for each_part
         my ($p, $ctx) = @_;
         my $upfx = $ctx->{mhref};
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new;
         # $p - from each_part: [ Email::MIME-like, depth, $idx ]
         my ($part, $depth, $idx) = @$p;
         my $ct = $part->content_type || 'text/plain';
         my $fn = $part->filename;
         my ($s, $err) = msg_part_text($part, $ct);
-        return attach_link($ctx, $ct, $p, $fn) unless defined $s;
-
-        my $rv = $ctx->{obuf};
-        if ($part->{is_submsg}) {
-                submsg_hdr($ctx, $part);
-                $$rv .= "\n";
-        }
+        my $zfh = $ctx->zfh;
+        $s // return print $zfh (attach_link($ctx, $ct, $p, $fn) // '');
+        say $zfh submsg_hdr($ctx, $part) if $part->{is_submsg};
 
         # makes no difference to browsers, and don't screw up filename
         # link generation in diffs with the extra '%0D'
-        $s =~ s/\r\n/\n/sg;
+        $s =~ s/\r+\n/\n/sg;
 
         # will be escaped to `&#8226;' in HTML
         obfuscate_addrs($ibx, $s, "\x{2022}") if $ibx->{obfuscate};
@@ -590,133 +640,129 @@ sub add_text_body { # callback for each_part
                 $ctx->{-anchors} = {} if $s =~ /^diff --git /sm;
                 $diff = 1;
                 delete $ctx->{-long_path};
-                my $spfx;
-                if ($ibx->{-repo_objs}) {
-                        if (index($upfx, '//') >= 0) { # absolute URL (Atom feeds)
-                                $spfx = $upfx;
-                                $spfx =~ s!/([^/]*)/\z!/!;
-                        } else {
-                                my $n_slash = $upfx =~ tr!/!/!;
-                                if ($n_slash == 0) {
-                                        $spfx = '../';
-                                } elsif ($n_slash == 1) {
-                                        $spfx = '';
-                                } else { # nslash == 2
-                                        $spfx = '../../';
-                                }
-                        }
-                }
-                $ctx->{-spfx} = $spfx;
         };
 
-        # some editors don't put trailing newlines at the end:
-        $s .= "\n" unless $s =~ /\n\z/s;
-
         # split off quoted and unquoted blocks:
         my @sections = PublicInbox::MsgIter::split_quotes($s);
         undef $s; # free memory
         if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
                 # badly-encoded message with $err? tell the world about it!
-                attach_link($ctx, $ct, $p, $fn, $err);
-                $$rv .= "\n";
+                say $zfh attach_link($ctx, $ct, $p, $fn, $err);
         }
-        foreach my $cur (@sections) {
+        delete $part->{bdy}; # save memory
+        for my $cur (@sections) { # $cur may be huge
                 if ($cur =~ /\A>/) {
                         # we use a <span> here to allow users to specify
                         # their own color for quoted text
-                        $$rv .= qq(<span\nclass="q">);
-                        $$rv .= $l->to_html($cur);
-                        $$rv .= '</span>';
+                        print $zfh qq(<span\nclass="q">),
+                                        $l->to_html($cur), '</span>';
                 } elsif ($diff) {
                         flush_diff($ctx, \$cur);
-                } else {
-                        # regular lines, OK
-                        $$rv .= $l->to_html($cur);
+                } else { # regular lines, OK
+                        print $zfh $l->to_html($cur);
                 }
                 undef $cur; # free memory
         }
 }
 
-sub _msg_page_prepare_obuf {
-        my ($eml, $ctx) = @_;
-        my $over = $ctx->{-inbox}->over;
-        my $obfs_ibx = $ctx->{-obfs_ibx};
-        my $rv = '';
+sub _msg_page_prepare {
+        my ($eml, $ctx, $ts) = @_;
+        my $have_over = !!$ctx->{ibx}->over;
         my $mids = mids_for_index($eml);
         my $nr = $ctx->{nr}++;
         if ($nr) { # unlikely
-                $rv .= '<pre>';
+                if ($ctx->{chash} eq content_hash($eml)) {
+                        warn "W: BUG? @$mids not deduplicated properly\n";
+                        return;
+                }
+                $ctx->{-html_tip} =
+qq[<pre>WARNING: multiple messages have this Message-ID (<a
+href="d/">diff</a>)</pre><pre>];
         } else {
                 $ctx->{first_hdr} = $eml->header_obj;
-                if ($ctx->{smsg}) {
-                        $rv .=
-"<pre>WARNING: multiple messages have this Message-ID\n</pre>";
-                }
-                $rv .= "<pre\nid=b>"; # anchor for body start
+                $ctx->{chash} = content_hash($eml) if $ctx->{smsg}; # reused MID
+                $ctx->{-html_tip} = "<pre\nid=b>"; # anchor for body start
         }
-        $ctx->{-upfx} = '../' if $over;
+        $ctx->{-upfx} = '../';
         my @title; # (Subject[0], From[0])
+        my $hbuf = '';
         for my $v ($eml->header('From')) {
                 my @n = PublicInbox::Address::names($v);
-                $v = ascii_html($v);
-                $title[1] //= ascii_html(join(', ', @n));
-                if ($obfs_ibx) {
-                        obfuscate_addrs($obfs_ibx, $v);
-                        obfuscate_addrs($obfs_ibx, $title[1]);
-                }
-                $rv .= "From: $v\n" if $v ne '';
+                $title[1] //= join(', ', @n);
+                $hbuf .= "From: $v\n" if $v ne '';
         }
-        foreach my $h (qw(To Cc)) {
+        for my $h (qw(To Cc)) {
                 for my $v ($eml->header($h)) {
                         fold_addresses($v);
-                        $v = ascii_html($v);
-                        obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
-                        $rv .= "$h: $v\n" if $v ne '';
+                        $hbuf .= "$h: $v\n" if $v ne '';
                 }
         }
         my @subj = $eml->header('Subject');
-        if (@subj) {
-                my $v = ascii_html(shift @subj);
-                obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
-                $rv .= 'Subject: ';
-                $rv .= $over ? qq(<a\nhref="#r"\nid=t>$v</a>\n) : "$v\n";
-                $title[0] = $v;
-                for $v (@subj) { # multi-Subject message :<
-                        $v = ascii_html($v);
-                        obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
-                        $rv .= "Subject: $v\n";
-                }
-        } else { # dummy anchor for thread skeleton at bottom of page
-                $rv .= qq(<a\nhref="#r"\nid=t></a>) if $over;
-                $title[0] = '(no subject)';
-        }
-        for my $v ($eml->header('Date')) {
-                $v = ascii_html($v);
-                obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P
-                $rv .= "Date: $v\n";
+        $hbuf .= "Subject: $_\n" for @subj;
+        $title[0] = $subj[0] // '(no subject)';
+        $hbuf .= "Date: $_\n" for $eml->header('Date');
+        $hbuf = ascii_html($hbuf);
+        my $t = $ts ? '?t='.ts2str($ts) : '';
+        my ($re, $addr2url) = addr2urlmap($ctx);
+        $hbuf =~ s!$re!qq(<a\nhref=").$addr2url->{lc $1}.qq($t">$1</a>)!sge;
+        $ctx->{-title_html} = ascii_html(join(' - ', @title));
+        if (my $obfs_ibx = $ctx->{-obfs_ibx}) {
+                obfuscate_addrs($obfs_ibx, $hbuf);
+                obfuscate_addrs($obfs_ibx, $ctx->{-title_html});
         }
-        if (!$nr) { # first (and only) message, common case
-                $ctx->{-title_html} = join(' - ', @title);
-                $rv = $ctx->html_top . $rv;
+
+        # [thread overview] link is typically added after Date,
+        # but added after Subject, or even nothing.
+        if ($have_over) {
+                chop $hbuf; # drop "\n", or noop if $rv eq ''
+                $hbuf .= qq{\t<a\nhref="#r">[thread overview]</a>\n};
+                $hbuf =~ s!^Subject:\x20(.*?)(\n[A-Z]|\z)
+                                !Subject: <a\nhref="#r"\nid=t>$1</a>$2!msx or
+                        $hbuf .= qq(<a\nhref="#r\nid=t></a>);
         }
         if (scalar(@$mids) == 1) { # common case
-                my $mhtml = ascii_html($mids->[0]);
-                $rv .= "Message-ID: &lt;$mhtml&gt; ";
-                $rv .= "(<a\nhref=\"raw\">raw</a>)\n";
+                my $x = ascii_html($mids->[0]);
+                $hbuf .= qq[Message-ID: &lt;$x&gt; (<a href="raw">raw</a>)\n];
+        }
+        if (!$nr) { # first (and only) message, common case
+                print { $ctx->zfh } $ctx->html_top, $hbuf;
         } else {
+                delete $ctx->{-title_html};
+                print { $ctx->zfh } $ctx->{-html_tip}, $hbuf;
+        }
+        $ctx->{-linkify} //= PublicInbox::Linkify->new;
+        $hbuf = '';
+        if (scalar(@$mids) != 1) { # unlikely, but it happens :<
                 # X-Alt-Message-ID can happen if a message is injected from
                 # public-inbox-nntpd because of multiple Message-ID headers.
-                my $lnk = PublicInbox::Linkify->new;
-                my $s = '';
                 for my $h (qw(Message-ID X-Alt-Message-ID)) {
-                        $s .= "$h: $_\n" for ($eml->header_raw($h));
+                        $hbuf .= "$h: $_\n" for ($eml->header_raw($h));
                 }
-                $lnk->linkify_mids('..', \$s, 1);
-                $rv .= $s;
+                $ctx->{-linkify}->linkify_mids('..', \$hbuf, 1); # escapes HTML
+                print { $ctx->{zfh} } $hbuf;
+                $hbuf = '';
         }
-        $rv .= _parent_headers($eml, $over);
-        $rv .= "\n";
-        \$rv;
+        my @irt = $eml->header_raw('In-Reply-To');
+        my $refs;
+        if (@irt) { # ("so-and-so's message of $DATE") added by some MUAs
+                for (grep(/=\?/, @irt)) {
+                        s/(=\?.*)\z/PublicInbox::Eml::mhdr_decode $1/se;
+                }
+        } else {
+                $refs = references($eml);
+                $irt[0] = pop(@$refs) if scalar @$refs;
+        }
+        $hbuf .= "In-Reply-To: $_\n" for @irt;
+
+        # do not display References: if search is present,
+        # we show the thread skeleton at the bottom, instead.
+        if (!$have_over) {
+                $refs //= references($eml);
+                $hbuf .= 'References: <'.join(">\n\t<", @$refs).">\n" if @$refs;
+        }
+        $ctx->{-linkify}->linkify_mids('..', \$hbuf); # escapes HTML
+        say { $ctx->{zfh} } $hbuf;
+        1;
 }
 
 sub SKEL_EXPAND () {
@@ -729,7 +775,7 @@ sub SKEL_EXPAND () {
 sub thread_skel ($$$) {
         my ($skel, $ctx, $hdr) = @_;
         my $mid = mids($hdr)->[0];
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my ($nr, $msgs) = $ibx->over->get_thread($mid);
         my $parent = in_reply_to($hdr);
         $$skel .= "\n<b>Thread overview: </b>";
@@ -738,7 +784,8 @@ sub thread_skel ($$$) {
                         $$skel .= SKEL_EXPAND."\n ";
                         $$skel .= ghost_parent('../', $parent) . "\n";
                 } else {
-                        $$skel .= '[no followups] '.SKEL_EXPAND."\n";
+                        $$skel .= "<a\nid=r>[no followups]</a> ".
+                                        SKEL_EXPAND."\n";
                 }
                 $ctx->{next_msg} = undef;
                 $ctx->{parent_msg} = $parent;
@@ -752,7 +799,6 @@ sub thread_skel ($$$) {
         # when multiple Subject: headers are present, so we follow suit:
         my $subj = $hdr->header('Subject') // '';
         $subj = '(no subject)' if $subj eq '';
-        $ctx->{prev_subj} = [ split(/ /, subject_normalized($subj)) ];
         $ctx->{cur} = $mid;
         $ctx->{prev_attr} = '';
         $ctx->{prev_level} = 0;
@@ -765,54 +811,47 @@ sub thread_skel ($$$) {
         $ctx->{parent_msg} = $parent;
 }
 
-sub _parent_headers {
-        my ($hdr, $over) = @_;
-        my $rv = '';
-        my @irt = $hdr->header_raw('In-Reply-To');
-        my $refs;
-        if (@irt) {
-                my $lnk = PublicInbox::Linkify->new;
-                $rv .= "In-Reply-To: $_\n" for @irt;
-                $lnk->linkify_mids('..', \$rv);
-        } else {
-                $refs = references($hdr);
-                my $irt = pop @$refs;
-                if (defined $irt) {
-                        my $html = ascii_html($irt);
-                        my $href = mid_href($irt);
-                        $rv .= "In-Reply-To: &lt;";
-                        $rv .= "<a\nhref=\"../$href/\">$html</a>&gt;\n";
-                }
-        }
-
-        # do not display References: if search is present,
-        # we show the thread skeleton at the bottom, instead.
-        return $rv if $over;
-
-        $refs //= references($hdr);
-        if (@$refs) {
-                @$refs = map { linkify_ref_no_over($_) } @$refs;
-                $rv .= 'References: '. join("\n\t", @$refs) . "\n";
-        }
-        $rv;
-}
-
-# returns a string buffer
+# writes to zbuf
 sub html_footer {
         my ($ctx, $hdr) = @_;
-        my $ibx = $ctx->{-inbox};
         my $upfx = '../';
-        my $skel;
-        my $rv = '<pre>';
-        if ($ibx->over) {
+        my (@related, $skel);
+        my $foot = '<pre>';
+        my $qry = delete $ctx->{-qry};
+        if ($qry && $ctx->{ibx}->isrch) {
+                my $q = ''; # search for either ancestor or descendent patches
+                for (@{$qry->{dfpre}}, @{$qry->{dfpost}}) {
+                        chop if length > 7; # include 1 abbrev "older" patches
+                        $q .= "dfblob:$_ ";
+                }
+                chop $q; # omit trailing SP
+                local $Text::Wrap::columns = COLS;
+                local $Text::Wrap::huge = 'overflow';
+                $q = wrap('', '', $q);
+                my $rows = ($q =~ tr/\n/\n/) + 1;
+                $q = ascii_html($q);
+                $related[0] = <<EOM;
+<form id=related
+action=$upfx
+><pre>find likely ancestor, descendant, or conflicting patches for <a
+href=#t>this message</a>:
+<textarea name=q cols=${\COLS} rows=$rows>$q</textarea>
+<input type=submit value=search
+/>\t(<a href=${upfx}_/text/help/#search>help</a>)</pre></form>
+EOM
+                # TODO: related codesearch
+                # my $csrchv = $ctx->{ibx}->{-csrch} // [];
+                # push @related, '<pre>'.ascii_html(Dumper($csrchv)).'</pre>';
+        }
+        if ($ctx->{ibx}->over) {
                 my $t = ts2str($ctx->{-t_max});
                 my $t_fmt = fmt_ts($ctx->{-t_max});
-                $skel .= <<EOF;
-        other threads:[<a
+                my $fallback = @related ? "\t" : "<a id=related>\t</a>";
+                $skel = <<EOF;
+${fallback}other threads:[<a
 href="$upfx?t=$t">~$t_fmt UTC</a>|<a
 href="$upfx">newest</a>]
 EOF
-
                 thread_skel(\$skel, $ctx, $hdr);
                 my ($next, $prev);
                 my $parent = '       ';
@@ -820,48 +859,32 @@ EOF
 
                 if (my $n = $ctx->{next_msg}) {
                         $n = mid_href($n);
-                        $next = "<a\nhref=\"$upfx$n/\"\nrel=next>next</a>";
+                        $next = qq(<a\nhref="$upfx$n/"\nrel=next>next</a>);
                 }
-                my $u;
                 my $par = $ctx->{parent_msg};
-                if ($par) {
-                        $u = mid_href($par);
-                        $u = "$upfx$u/";
-                }
+                my $u = $par ? $upfx.mid_href($par).'/' : undef;
                 if (my $p = $ctx->{prev_msg}) {
                         $prev = mid_href($p);
                         if ($p && $par && $p eq $par) {
-                                $prev = "<a\nhref=\"$upfx$prev/\"\n" .
+                                $prev = qq(<a\nhref="$upfx$prev/"\n) .
                                         'rel=prev>prev parent</a>';
                                 $parent = '';
                         } else {
-                                $prev = "<a\nhref=\"$upfx$prev/\"\n" .
+                                $prev = qq(<a\nhref="$upfx$prev/"\n) .
                                         'rel=prev>prev</a>';
-                                $parent = " <a\nhref=\"$u\">parent</a>" if $u;
+                                $parent = qq( <a\nhref="$u">parent</a>) if $u;
                         }
                 } elsif ($u) { # unlikely
-                        $parent = " <a\nhref=\"$u\"\nrel=prev>parent</a>";
+                        $parent = qq( <a\nhref="$u"\nrel=prev>parent</a>);
                 }
-                $rv .= "$next $prev$parent ";
+                $foot .= "$next $prev$parent ";
         } else { # unindexed inboxes w/o over
                 $skel = qq( <a\nhref="$upfx">latest</a>);
         }
-        $rv .= qq(<a\nhref="#R">reply</a>);
-        $rv .= $skel;
-        $rv .= '</pre>';
-        $rv .= msg_reply($ctx, $hdr);
-}
-
-sub linkify_ref_no_over {
-        my ($mid) = @_;
-        my $href = mid_href($mid);
-        my $html = ascii_html($mid);
-        "&lt;<a\nhref=\"../$href/\">$html</a>&gt;";
-}
-
-sub anchor_for {
-        my ($msgid) = @_;
-        'm' . id_compress($msgid, 1);
+        # $skel may be big for big threads, don't append it to $foot
+        print { $ctx->zfh } $foot, qq(<a\nhref="#R">reply</a>),
+                                $skel, '</pre>', @related,
+                                msg_reply($ctx, $hdr);
 }
 
 sub ghost_parent {
@@ -922,8 +945,8 @@ sub thread_results {
                         my $tip = splice(@$rootset, $idx, 1);
                         @$rootset = reverse @$rootset;
                         unshift @$rootset, $tip;
-                        $ctx->{sl_note} = strict_loose_note($nr);
                 }
+                $ctx->{sl_note} = strict_loose_note($nr);
         }
         $rootset
 }
@@ -965,7 +988,7 @@ sub skel_dump { # walk_thread callback
                 $$skel .= delete($ctx->{sl_note}) || '';
         }
 
-        my $f = ascii_html($smsg->{from_name});
+        my $f = ascii_html(delete $smsg->{from_name});
         my $obfs_ibx = $ctx->{-obfs_ibx};
         obfuscate_addrs($obfs_ibx, $f) if $obfs_ibx;
 
@@ -1059,11 +1082,13 @@ sub _skel_ghost {
         1;
 }
 
+# note: we favor Date: here because git-send-email increments it
+# to preserve [PATCH $N/$M] ordering in series (it can't control Received:)
 sub sort_ds {
-        [ sort {
+        @{$_[0]} = sort {
                 (eval { $a->topmost->{ds} } || 0) <=>
                 (eval { $b->topmost->{ds} } || 0)
-        } @{$_[0]} ];
+        } @{$_[0]};
 }
 
 # accumulate recent topics if search is supported
@@ -1072,7 +1097,7 @@ sub acc_topic { # walk_thread callback
         my ($ctx, $level, $smsg) = @_;
         my $mid = $smsg->{mid};
         my $has_blob = $smsg->{blob} // do {
-                if (my $by_mid = $ctx->{-inbox}->smsg_by_mid($mid)) {
+                if (my $by_mid = $ctx->{ibx}->smsg_by_mid($mid)) {
                         %$smsg = (%$smsg, %$by_mid);
                         1;
                 }
@@ -1080,9 +1105,10 @@ sub acc_topic { # walk_thread callback
         if ($has_blob) {
                 my $subj = subject_normalized($smsg->{subject});
                 $subj = '(no subject)' if $subj eq '';
+                my $ts = $smsg->{ts};
                 my $ds = $smsg->{ds};
                 if ($level == 0) { # new, top-level topic
-                        my $topic = [ $ds, 1, { $subj => $mid }, $subj ];
+                        my $topic = [ $ts, $ds, 1, { $subj => $mid }, $subj ];
                         $ctx->{-cur_topic} = $topic;
                         push @{$ctx->{order}}, $topic;
                         return 1;
@@ -1090,10 +1116,11 @@ sub acc_topic { # walk_thread callback
 
                 # continue existing topic
                 my $topic = $ctx->{-cur_topic}; # should never be undef
-                $topic->[0] = $ds if $ds > $topic->[0];
-                $topic->[1]++; # bump N+ message counter
-                my $seen = $topic->[2];
-                if (scalar(@$topic) == 3) { # parent was a ghost
+                $topic->[0] = $ts if $ts > $topic->[0];
+                $topic->[1] = $ds if $ds > $topic->[1];
+                $topic->[2]++; # bump N+ message counter
+                my $seen = $topic->[3];
+                if (scalar(@$topic) == 4) { # parent was a ghost
                         push @$topic, $subj;
                 } elsif (!defined($seen->{$subj})) {
                         push @$topic, $level, $subj; # @extra messages
@@ -1101,7 +1128,7 @@ sub acc_topic { # walk_thread callback
                 $seen->{$subj} = $mid; # latest for subject
         } else { # ghost message
                 return 1 if $level != 0; # ignore child ghosts
-                my $topic = $ctx->{-cur_topic} = [ -666, 0, {} ];
+                my $topic = $ctx->{-cur_topic} = [ -666, -666, 0, {} ];
                 push @{$ctx->{order}}, $topic;
         }
         1;
@@ -1116,12 +1143,13 @@ sub dump_topics {
         }
 
         my @out;
-        my $ibx = $ctx->{-inbox};
-        my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
-
+        my $obfs_ibx = $ctx->{ibx}->{obfuscate} ? $ctx->{ibx} : undef;
+        if (my $note = delete $ctx->{t_note}) {
+                push @out, $note; # "messages from ... to ..."
+        }
         # sort by recency, this allows new posts to "bump" old topics...
         foreach my $topic (sort { $b->[0] <=> $a->[0] } @$order) {
-                my ($ds, $n, $seen, $top_subj, @extra) = @$topic;
+                my ($ts, $ds, $n, $seen, $top_subj, @extra) = @$topic;
                 @$topic = ();
                 next unless defined $top_subj;  # ghost topic
                 my $mid = delete $seen->{$top_subj};
@@ -1141,13 +1169,11 @@ sub dump_topics {
                         $anchor = '#t'; # thread skeleton
                 }
 
-                my $mbox = qq(<a\nhref="$href/t.mbox.gz">mbox.gz</a>);
-                my $atom = qq(<a\nhref="$href/t.atom">Atom</a>);
                 my $s = "<a\nhref=\"$href/T/$anchor\">$top_subj</a>\n" .
-                        " $ds UTC $n - $mbox / $atom\n";
-                for (my $i = 0; $i < scalar(@extra); $i += 2) {
-                        my $level = $extra[$i];
-                        my $subj = $extra[$i + 1]; # already normalized
+                        " $ds UTC $n\n";
+                while (@extra) {
+                        my $level = shift @extra;
+                        my $subj = shift @extra; # already normalized
                         $mid = delete $seen->{$subj};
                         my @subj = split(/ /, $subj);
                         my @next_prev = @subj; # full copy
@@ -1179,7 +1205,11 @@ sub pagination_footer ($$) {
                 $next = $next ? "$next | " : '             | ';
                 $prev .= qq[ | <a\nhref="$latest">latest</a>];
         }
-        "<hr><pre>page: $next$prev</pre>";
+        my $rv = '<hr><pre id=nav>';
+        $rv .= "page: $next$prev\n" if $next || $prev;
+        $rv .= q{- recent:[<b>subjects (threaded)</b>|<a
+href="./topics_new.html">topics (new)</a>|<a
+href="./topics_active.html">topics (active)</a>]</pre>};
 }
 
 sub paginate_recent ($$) {
@@ -1194,23 +1224,30 @@ sub paginate_recent ($$) {
         $t =~ s/\A([0-9]{8,14})-// and $after = str2ts($1);
         $t =~ /\A([0-9]{8,14})\z/ and $before = str2ts($1);
 
-        my $ibx = $ctx->{-inbox};
-        my $msgs = $ibx->recent($opts, $after, $before);
-        my $nr = scalar @$msgs;
-        if ($nr < $lim && defined($after)) {
+        my $msgs = $ctx->{ibx}->over->recent($opts, $after, $before);
+        if (defined($after) && scalar(@$msgs) < $lim) {
                 $after = $before = undef;
-                $msgs = $ibx->recent($opts);
-                $nr = scalar @$msgs;
+                $msgs = $ctx->{ibx}->over->recent($opts);
         }
-        my $more = $nr == $lim;
+        my $more = scalar(@$msgs) == $lim;
         my ($newest, $oldest);
-        if ($nr) {
+        if (@$msgs) {
                 $newest = $msgs->[0]->{ts};
                 $oldest = $msgs->[-1]->{ts};
                 # if we only had $after, our SQL query in ->recent ordered
                 if ($newest < $oldest) {
                         ($oldest, $newest) = ($newest, $oldest);
-                        $more = 0 if defined($after) && $after < $oldest;
+                        $more = undef if defined($after) && $after < $oldest;
+                }
+                if (defined($after // $before)) {
+                        my $n = strftime('%Y-%m-%d %H:%M:%S', gmtime($newest));
+                        my $o = strftime('%Y-%m-%d %H:%M:%S', gmtime($oldest));
+                        $ctx->{t_note} = <<EOM;
+ messages from $o to $n UTC [<a href="#nav">more...</a>]
+EOM
+                        my $s = ts2str($newest);
+                        $ctx->{prev_page} = qq[<a\nhref="?t=$s-"\nrel=prev>] .
+                                                'prev (newer)</a>';
                 }
         }
         if (defined($oldest) && $more) {
@@ -1218,11 +1255,6 @@ sub paginate_recent ($$) {
                 $ctx->{next_page} = qq[<a\nhref="?t=$s"\nrel=next>] .
                                         'next (older)</a>';
         }
-        if (defined($newest) && (defined($before) || defined($after))) {
-                my $s = ts2str($newest);
-                $ctx->{prev_page} = qq[<a\nhref="?t=$s-"\nrel=prev>] .
-                                        'prev (newer)</a>';
-        }
         $msgs;
 }
 
@@ -1230,11 +1262,8 @@ sub paginate_recent ($$) {
 sub index_topics {
         my ($ctx) = @_;
         my $msgs = paginate_recent($ctx, 200); # 200 is our window
-        if (@$msgs) {
-                walk_thread(thread_results($ctx, $msgs), $ctx, \&acc_topic);
-        }
-        html_oneshot($ctx, dump_topics($ctx), \pagination_footer($ctx, '.'));
-
+        walk_thread(thread_results($ctx, $msgs), $ctx, \&acc_topic) if @$msgs;
+        html_oneshot($ctx, dump_topics($ctx), pagination_footer($ctx, '.'));
 }
 
 sub thread_adj_level {
@@ -1264,8 +1293,34 @@ sub thread_adj_level {
 sub ghost_index_entry {
         my ($ctx, $level, $node) = @_;
         my ($beg, $end) = thread_adj_level($ctx,  $level);
-        $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{mid})
+        $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{mid} // '?')
                 . '</pre>' . $end;
 }
 
+# /$INBOX/$MSGID/d/ endpoint
+sub diff_msg {
+        my ($ctx) = @_;
+        require PublicInbox::MailDiff;
+        my $ibx = $ctx->{ibx};
+        my $over = $ibx->over or return no_over_html($ctx);
+        my ($id, $prev);
+        my $md = bless { ctx => $ctx }, 'PublicInbox::MailDiff';
+        my $next_arg = $md->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
+        my $smsg = $md->{smsg} = $over->next_by_mid(@$next_arg) or
+                return; # undef == 404
+        $ctx->{-t_max} = $smsg->{ts};
+        $ctx->{-upfx} = '../../';
+        $ctx->{-apfx} = '//'; # fail on to_attr()
+        $ctx->{-linkify} = PublicInbox::Linkify->new;
+        my $mid = ascii_html($smsg->{mid});
+        $ctx->{-title_html} = "diff for duplicates of &lt;$mid&gt;";
+        PublicInbox::WwwStream::html_init($ctx);
+        print { $ctx->{zfh} } '<pre>diff for duplicates of &lt;<a href="../">',
+                                $mid, "</a>&gt;\n\n";
+        sub {
+                $ctx->attach($_[0]->([200, delete $ctx->{-res_hdr}]));
+                $md->begin_mail_diff;
+        };
+}
+
 1;
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 7ec57d8d..d078c5f9 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # used by PublicInbox::View
@@ -7,17 +7,13 @@
 # (or reconstruct) blobs.
 
 package PublicInbox::ViewDiff;
-use 5.010_001;
-use strict;
-use warnings;
-use base qw(Exporter);
-our @EXPORT_OK = qw(flush_diff);
+use v5.12;
+use parent qw(Exporter);
+our @EXPORT_OK = qw(flush_diff uri_escape_path);
 use URI::Escape qw(uri_escape_utf8);
-use PublicInbox::Hval qw(ascii_html to_attr);
+use PublicInbox::Hval qw(ascii_html to_attr utf8_maybe);
 use PublicInbox::Git qw(git_unquote);
 
-sub UNSAFE () { "^A-Za-z0-9\-\._~/" }
-
 my $OID_NULL = '0{7,}';
 my $OID_BLOB = '[a-f0-9]{7,}';
 my $LF = qr!\n!;
@@ -30,7 +26,7 @@ my $DIFFSTAT_COMMENT =
 my $NULL_TO_BLOB = qr/^(index $OID_NULL\.\.)($OID_BLOB)\b/ms;
 my $BLOB_TO_NULL = qr/^index ($OID_BLOB)(\.\.$OID_NULL)\b/ms;
 my $BLOB_TO_BLOB = qr/^index ($OID_BLOB)\.\.($OID_BLOB)/ms;
-my $EXTRACT_DIFFS = qr/(
+our $EXTRACT_DIFFS = qr/(
                 (?:        # begin header stuff, don't capture filenames, here,
                         # but instead wait for the --- and +++ lines.
                         (?:^diff\x20--git\x20$FN\x20$FN$LF)
@@ -41,24 +37,26 @@ my $EXTRACT_DIFFS = qr/(
                 ^index\x20($OID_BLOB)\.\.($OID_BLOB)$ANY*$LF
                 ^---\x20($FN)$LF
                 ^\+{3}\x20($FN)$LF)/msx;
-my $IS_OID = qr/\A$OID_BLOB\z/s;
+our $IS_OID = qr/\A$OID_BLOB\z/s;
+
+sub uri_escape_path {
+        # '/' + $URI::Escape::Unsafe{RFC3986}
+        uri_escape_utf8($_[0], "^A-Za-z0-9\-\._~/");
+}
 
 # link to line numbers in blobs
-sub diff_hunk ($$$$) {
-        my ($dst, $dctx, $ca, $cb) = @_;
+sub diff_hunk ($$$) {
+        my ($dctx, $ca, $cb) = @_;
         my ($oid_a, $oid_b, $spfx) = @$dctx{qw(oid_a oid_b spfx)};
 
         if (defined($spfx) && defined($oid_a) && defined($oid_b)) {
-                my ($n) = ($ca =~ /^-([0-9]+)/);
-                $n = defined($n) ? "#n$n" : '';
+                my $n = ($ca =~ /^-([0-9]+)/) ? "#n$1" : '';
+                my $x = qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>);
 
-                $$dst .= qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>);
-
-                ($n) = ($cb =~ /^\+([0-9]+)/);
-                $n = defined($n) ? "#n$n" : '';
-                $$dst .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@);
+                $n = ($cb =~ /^\+([0-9]+)/) ? "#n$1" : '';
+                $x .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@);
         } else {
-                $$dst .= "@@ $ca $cb @@";
+                "@@ $ca $cb @@";
         }
 }
 
@@ -68,8 +66,8 @@ sub oid ($$$) {
 }
 
 # returns true if diffstat anchor written, false otherwise
-sub anchor0 ($$$$) {
-        my ($dst, $ctx, $fn, $rest) = @_;
+sub anchor0 ($$$) {
+        my ($ctx, $fn, $rest) = @_;
 
         my $orig = $fn;
 
@@ -79,27 +77,24 @@ sub anchor0 ($$$$) {
         # which works well in practice. If projects put "=>", or trailing
         # spaces in filenames, oh well :P
         $fn =~ s/$DIFFSTAT_COMMENT//;
-        $fn =~ s/{(?:.+) => (.+)}/$1/ or $fn =~ s/.* => (.+)/$1/;
+        $fn =~ s/\{(?:.+) => (.+)\}/$1/ or $fn =~ s/.* => (.+)/$1/;
         $fn = git_unquote($fn);
 
         # long filenames will require us to check in anchor1()
         push(@{$ctx->{-long_path}}, $fn) if $fn =~ s!\A\.\.\./?!!;
 
-        if (my $attr = to_attr($ctx->{-apfx}.$fn)) {
-                $ctx->{-anchors}->{$attr} = 1;
-                my $spaces = ($orig =~ s/( +)\z//) ? $1 : '';
-                $$dst .= " <a\nid=i$attr\nhref=#$attr>" .
-                        ascii_html($orig) . '</a>' . $spaces .
+        my $attr = to_attr($ctx->{-apfx}.$fn) // return;
+        $ctx->{-anchors}->{$attr} = 1;
+        my $spaces = ($orig =~ s/( +)\z//) ? $1 : '';
+        print { $ctx->{zfh} } " <a\nid=i$attr\nhref=#$attr>",
+                        ascii_html($orig), '</a>', $spaces,
                         $ctx->{-linkify}->to_html($rest);
-                return 1;
-        }
-        undef;
 }
 
 # returns "diff --git" anchor destination, undef otherwise
 sub anchor1 ($$) {
         my ($ctx, $pb) = @_;
-        my $attr = to_attr($ctx->{-apfx}.$pb) or return;
+        my $attr = to_attr($ctx->{-apfx}.$pb) // return;
 
         my $ok = delete $ctx->{-anchors}->{$attr};
 
@@ -107,10 +102,10 @@ sub anchor1 ($$) {
         # assume diffstat and diff output follow the same order,
         # and ignore different ordering (could be malicious input)
         unless ($ok) {
-                my $fn = shift(@{$ctx->{-long_path}}) or return;
+                my $fn = shift(@{$ctx->{-long_path}}) // return;
                 $pb =~ /\Q$fn\E\z/s or return;
-                $attr = to_attr($ctx->{-apfx}.$fn) or return;
-                $ok = delete $ctx->{-anchors}->{$attr} or return;
+                $attr = to_attr($ctx->{-apfx}.$fn) // return;
+                $ok = delete $ctx->{-anchors}->{$attr} // return;
         }
         $ok ? "<a\nhref=#i$attr\nid=$attr>diff</a> --git" : undef
 }
@@ -122,18 +117,14 @@ sub diff_header ($$$) {
         my $dctx = { spfx => $spfx };
 
         # get rid of leading "a/" or "b/" (or whatever --{src,dst}-prefix are)
-        $pa = (split('/', git_unquote($pa), 2))[1] if $pa ne '/dev/null';
-        $pb = (split('/', git_unquote($pb), 2))[1] if $pb ne '/dev/null';
+        $pa = (split(m'/', git_unquote($pa), 2))[1] if $pa ne '/dev/null';
+        $pb = (split(m'/', git_unquote($pb), 2))[1] if $pb ne '/dev/null';
         if ($pa eq $pb && $pb ne '/dev/null') {
-                $dctx->{Q} = "?b=".uri_escape_utf8($pb, UNSAFE);
+                $dctx->{Q} = '?b='.uri_escape_path($pb);
         } else {
                 my @q;
-                if ($pb ne '/dev/null') {
-                        push @q, 'b='.uri_escape_utf8($pb, UNSAFE);
-                }
-                if ($pa ne '/dev/null') {
-                        push @q, 'a='.uri_escape_utf8($pa, UNSAFE);
-                }
+                push @q, 'b='.uri_escape_path($pb) if $pb ne '/dev/null';
+                push @q, 'a='.uri_escape_path($pa) if $pa ne '/dev/null';
                 $dctx->{Q} = '?'.join('&amp;', @q);
         }
 
@@ -143,44 +134,48 @@ sub diff_header ($$$) {
 
         # no need to capture oid_a and oid_b on add/delete,
         # we just linkify OIDs directly via s///e in conditional
-        if (($$x =~ s/$NULL_TO_BLOB/$1 . oid($dctx, $spfx, $2)/e) ||
-                ($$x =~ s/$BLOB_TO_NULL/
-                        'index ' . oid($dctx, $spfx, $1) . $2/e)) {
+        if ($$x =~ s/$NULL_TO_BLOB/$1 . oid($dctx, $spfx, $2)/e) {
+                push @{$ctx->{-qry}->{dfpost}}, $2;
+        } elsif ($$x =~ s/$BLOB_TO_NULL/'index '.oid($dctx, $spfx, $1).$2/e) {
+                push @{$ctx->{-qry}->{dfpre}}, $1;
         } elsif ($$x =~ $BLOB_TO_BLOB) {
                 # modification-only, not add/delete:
                 # linkify hunk headers later using oid_a and oid_b
                 @$dctx{qw(oid_a oid_b)} = ($1, $2);
+                push @{$ctx->{-qry}->{dfpre}}, $1;
+                push @{$ctx->{-qry}->{dfpost}}, $2;
         } else {
                 warn "BUG? <$$x> had no ^index line";
         }
         $$x =~ s!^diff --git!anchor1($ctx, $pb) // 'diff --git'!ems;
-        my $dst = $ctx->{obuf};
-        $$dst .= qq(<span\nclass="head">);
-        $$dst .= $$x;
-        $$dst .= '</span>';
+        print { $ctx->{zfh} } qq(<span\nclass="head">), $$x, '</span>';
         $dctx;
 }
 
 sub diff_before_or_after ($$) {
         my ($ctx, $x) = @_;
-        my $linkify = $ctx->{-linkify};
-        my $dst = $ctx->{obuf};
-        my $anchors = exists($ctx->{-anchors}) ? 1 : 0;
-        for my $y (split(/(^---\n)/sm, $$x)) {
-                if ($y =~ /\A---\n\z/s) {
-                        $$dst .= "---\n"; # all HTML is "\r\n" => "\n"
-                        $anchors |= 2;
-                } elsif ($anchors == 3 && $y =~ /^ [0-9]+ files? changed, /sm) {
-                        # ok, looks like a diffstat, go line-by-line:
-                        for my $l (split(/^/m, $y)) {
-                                if ($l =~ /^ (.+)( +\| .*\z)/s) {
-                                        anchor0($dst, $ctx, $1, $2) and next;
-                                }
-                                $$dst .= $linkify->to_html($l);
-                        }
-                } else { # commit message, notes, etc
-                        $$dst .= $linkify->to_html($y);
+        if (exists $ctx->{-anchors} && $$x =~ # diffstat lines:
+                        /((?:^\x20(?:[^\n]+?)(?:\x20+\|\x20[^\n]*\n))+)
+                        (\x20[0-9]+\x20files?\x20)changed,/msx) {
+                my $pre = substr($$x, 0, $-[0]); # (likely) short prefix
+                substr($$x, 0, $+[0], ''); # sv_chop on $$x ($$x may be long)
+                my @x = ($2, $1);
+                my $lnk = $ctx->{-linkify};
+                my $zfh = $ctx->{zfh};
+                # uninteresting prefix
+                print $zfh $lnk->to_html($pre);
+                for my $l (split(/^/m, pop(@x))) { # $2 per-file stat lines
+                        $l =~ /^ (.+)( +\| .*\z)/s and
+                                anchor0($ctx, $1, $2) and next;
+                         print $zfh $lnk->to_html($l);
                 }
+                my $ch = $ctx->{changed_href} // '#related';
+                print $zfh pop(@x), # $3 /^ \d+ files? /
+                        qq(<a href="$ch">changed</a>,),
+                        # insertions/deletions, notes, commit message, etc:
+                        $lnk->to_html($$x);
+        } else {
+                print { $ctx->{zfh} } $ctx->{-linkify}->to_html($$x);
         }
 }
 
@@ -191,9 +186,9 @@ sub flush_diff ($$) {
         my @top = split($EXTRACT_DIFFS, $$cur);
         undef $$cur; # free memory
 
-        my $linkify = $ctx->{-linkify};
-        my $dst = $ctx->{obuf};
+        my $lnk = $ctx->{-linkify};
         my $dctx; # {}, keys: Q, oid_a, oid_b
+        my $zfh = $ctx->zfh;
 
         while (defined(my $x = shift @top)) {
                 if (scalar(@top) >= 4 &&
@@ -201,7 +196,8 @@ sub flush_diff ($$) {
                                 $top[0] =~ $IS_OID) {
                         $dctx = diff_header(\$x, $ctx, \@top);
                 } elsif ($dctx) {
-                        my $after = '';
+                        open(my $afh, '>>:utf8', \(my $after='')) or
+                                die "open: $!";
 
                         # Quiet "Complex regular subexpression recursion limit"
                         # warning.  Perl will truncate matches upon hitting
@@ -216,29 +212,33 @@ sub flush_diff ($$) {
                         for my $s (split(/((?:(?:^\+[^\n]*\n)+)|
                                         (?:(?:^-[^\n]*\n)+)|
                                         (?:^@@ [^\n]+\n))/xsm, $x)) {
+                                undef $x;
                                 if (!defined($dctx)) {
-                                        $after .= $s;
+                                        print $afh $s;
                                 } elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) {
-                                        $$dst .= qq(<span\nclass="hunk">);
-                                        diff_hunk($dst, $dctx, $1, $2);
-                                        $$dst .= $linkify->to_html($s);
-                                        $$dst .= '</span>';
-                                } elsif ($s =~ /\A\+/) {
-                                        $$dst .= qq(<span\nclass="add">);
-                                        $$dst .= $linkify->to_html($s);
-                                        $$dst .= '</span>';
+                                        print $zfh qq(<span\nclass="hunk">),
+                                                diff_hunk($dctx, $1, $2),
+                                                $lnk->to_html($s),
+                                                '</span>';
+                                } elsif ($s =~ /\A\+/) { # $s may be huge
+                                        print $zfh qq(<span\nclass="add">),
+                                                        $lnk->to_html($s),
+                                                        '</span>';
                                 } elsif ($s =~ /\A-- $/sm) { # email sig starts
                                         $dctx = undef;
-                                        $after .= $s;
-                                } elsif ($s =~ /\A-/) {
-                                        $$dst .= qq(<span\nclass="del">);
-                                        $$dst .= $linkify->to_html($s);
-                                        $$dst .= '</span>';
-                                } else {
-                                        $$dst .= $linkify->to_html($s);
+                                        print $afh $s;
+                                } elsif ($s =~ /\A-/) { # $s may be huge
+                                        print $zfh qq(<span\nclass="del">),
+                                                        $lnk->to_html($s),
+                                                        '</span>';
+                                } else { # $s may be huge
+                                        print $zfh $lnk->to_html($s);
                                 }
                         }
-                        diff_before_or_after($ctx, \$after) unless $dctx;
+                        if (!$dctx) {
+                                utf8_maybe($after);
+                                diff_before_or_after($ctx, \$after);
+                        }
                 } else {
                         diff_before_or_after($ctx, \$x);
                 }
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 87927d5e..83a83698 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -1,8 +1,7 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # show any VCS object, similar to "git show"
-# FIXME: we only show blobs for now
 #
 # This can use a "solver" to reconstruct blobs based on git
 # patches (with abbreviated OIDs in the header).  However, the
@@ -15,13 +14,23 @@
 
 package PublicInbox::ViewVCS;
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
+use File::Temp 0.19 (); # newdir
 use PublicInbox::SolverGit;
+use PublicInbox::Git;
+use PublicInbox::GitAsyncCat;
 use PublicInbox::WwwStream qw(html_oneshot);
 use PublicInbox::Linkify;
 use PublicInbox::Tmpfile;
-use PublicInbox::Hval qw(ascii_html to_filename);
+use PublicInbox::ViewDiff qw(flush_diff uri_escape_path);
+use PublicInbox::View;
+use PublicInbox::Eml;
+use PublicInbox::OnDestroy;
+use Text::Wrap qw(wrap);
+use PublicInbox::Hval qw(ascii_html to_filename prurl utf8_maybe);
+use POSIX qw(strftime);
+use autodie qw(open seek truncate);
+use Fcntl qw(SEEK_SET);
 my $hl = eval {
         require PublicInbox::HlMod;
         PublicInbox::HlMod->new;
@@ -30,139 +39,565 @@ my $hl = eval {
 my %QP_MAP = ( A => 'oid_a', a => 'path_a', b => 'path_b' );
 our $MAX_SIZE = 1024 * 1024; # TODO: configurable
 my $BIN_DETECT = 8000; # same as git
+my $SHOW_FMT = '--pretty=format:'.join('%n', '%P', '%p', '%H', '%T', '%s', '%f',
+        '%an <%ae>  %ai', '%cn <%ce>  %ci', '%b%x00');
 
-sub html_page ($$$) {
-        my ($ctx, $code, $strref) = @_;
+my %GIT_MODE = (
+        '100644' => ' ', # blob
+        '100755' => 'x', # executable blob
+        '040000' => 'd', # tree
+        '120000' => 'l', # symlink
+        '160000' => 'g', # commit (gitlink)
+);
+
+# TODO: not fork safe, but we don't fork w/o exec in PublicInbox::WWW
+my (@solver_q, $solver_lim);
+my $solver_nr = 0;
+
+sub html_page ($$;@) {
+        my ($ctx, $code) = @_[0, 1];
         my $wcb = delete $ctx->{-wcb};
-        $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/"
-        my $res = html_oneshot($ctx, $code, $strref);
+        $ctx->{-upfx} //= '../../'; # from "/$INBOX/$OID/s/"
+        my $res = html_oneshot($ctx, $code, @_[2..$#_]);
         $wcb ? $wcb->($res) : $res;
 }
 
+sub dbg_log ($) {
+        my ($ctx) = @_;
+        my $log = delete $ctx->{lh} // die 'BUG: already captured debug log';
+        if (!CORE::seek($log, 0, SEEK_SET)) {
+                warn "seek(log): $!";
+                return '<pre>debug log seek error</pre>';
+        }
+        $log = eval { PublicInbox::IO::read_all $log } // do {
+                warn "read(log): $@";
+                return '<pre>debug log read error</pre>';
+        };
+        return '' if $log eq '';
+        $ctx->{-linkify} //= PublicInbox::Linkify->new;
+        "<hr><pre>debug log:\n\n".
+                $ctx->{-linkify}->to_html($log).'</pre>';
+}
+
 sub stream_blob_parse_hdr { # {parse_hdr} for Qspawn
         my ($r, $bref, $ctx) = @_;
-        my ($res, $logref) = delete @$ctx{qw(-res -logref)};
-        my ($git, $oid, $type, $size, $di) = @$res;
+        my ($git, $oid, $type, $size, $di) = @{$ctx->{-res}};
         my @cl = ('Content-Length', $size);
-        if (!defined $r) { # error
-                html_page($ctx, 500, $logref);
+        if (!defined $r) { # sysread error
+                html_page($ctx, 500, dbg_log($ctx));
         } elsif (index($$bref, "\0") >= 0) {
                 [200, [qw(Content-Type application/octet-stream), @cl] ];
         } else {
-                my $n = bytes::length($$bref);
+                my $n = length($$bref);
                 if ($n >= $BIN_DETECT || $n == $size) {
                         return [200, [ 'Content-Type',
                                 'text/plain; charset=UTF-8', @cl ] ];
                 }
                 if ($r == 0) {
-                        warn "premature EOF on $oid $$logref\n";
-                        return html_page($ctx, 500, $logref);
+                        my $log = dbg_log($ctx);
+                        warn "premature EOF on $oid $log";
+                        return html_page($ctx, 500, $log);
                 }
-                @$ctx{qw(-res -logref)} = ($res, $logref);
                 undef; # bref keeps growing
         }
 }
 
-sub stream_large_blob ($$$$) {
-        my ($ctx, $res, $logref, $fn) = @_;
-        $ctx->{-logref} = $logref;
+sub stream_large_blob ($$) {
+        my ($ctx, $res) = @_;
         $ctx->{-res} = $res;
         my ($git, $oid, $type, $size, $di) = @$res;
-        my $cmd = ['git', "--git-dir=$git->{git_dir}", 'cat-file', $type, $oid];
+        my $cmd = $git->cmd('cat-file', $type, $oid);
         my $qsp = PublicInbox::Qspawn->new($cmd);
-        my $env = $ctx->{env};
-        $env->{'qspawn.wcb'} = delete $ctx->{-wcb};
-        $qsp->psgi_return($env, undef, \&stream_blob_parse_hdr, $ctx);
+        $ctx->{env}->{'qspawn.wcb'} = $ctx->{-wcb};
+        $qsp->psgi_yield($ctx->{env}, undef, \&stream_blob_parse_hdr, $ctx);
 }
 
-sub show_other_result ($$) {
+sub show_other_result ($$) { # future-proofing
         my ($bref, $ctx) = @_;
-        my ($qsp, $logref) = delete @$ctx{qw(-qsp -logref)};
-        if (my $err = $qsp->{err}) {
-                utf8::decode($$err);
-                $$logref .= "git show error: $err";
-                return html_page($ctx, 500, $logref);
+        if (my $qsp_err = delete $ctx->{-qsp_err}) {
+                return html_page($ctx, 500, dbg_log($ctx) .
+                                "git show error:$qsp_err");
         }
         my $l = PublicInbox::Linkify->new;
-        utf8::decode($$bref);
-        $$bref = '<pre>'. $l->to_html($$bref);
-        $$bref .= '</pre><hr>' . $$logref;
-        html_page($ctx, 200, $bref);
+        utf8_maybe($$bref);
+        html_page($ctx, 200, '<pre>', $l->to_html($$bref), '</pre><hr>',
+                dbg_log($ctx));
 }
 
-sub show_other ($$$$) {
-        my ($ctx, $res, $logref, $fn) = @_;
-        my ($git, $oid, $type, $size) = @$res;
-        if ($size > $MAX_SIZE) {
-                $$logref = "$oid is too big to show\n" . $$logref;
-                return html_page($ctx, 200, $logref);
+sub cmt_title { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $ctx_cb) = @_;
+        utf8_maybe($$bref);
+        my $title = $$bref =~ /\r?\n\r?\n([^\r\n]+)\r?\n?/ ? $1 : '';
+        # $ctx_cb is [ $ctx, $cmt_fin ]
+        push @{$ctx_cb->[0]->{-cmt_pt}}, ascii_html($title);
+}
+
+sub do_cat_async {
+        my ($arg, $cb, @req) = @_;
+        # favor git(1) over Gcf2 (libgit2) for SHA-256 support
+        my $ctx = ref $arg eq 'ARRAY' ? $arg->[0] : $arg;
+        $ctx->{git}->cat_async($_, $cb, $arg) for @req;
+        if ($ctx->{env}->{'pi-httpd.async'}) {
+                $ctx->{git}->watch_async;
+        } else { # synchronous, generic PSGI
+                $ctx->{git}->cat_async_wait;
+        }
+}
+
+sub do_check_async {
+        my ($ctx, $cb, @req) = @_;
+        if ($ctx->{env}->{'pi-httpd.async'}) {
+                async_check($ctx, $_, $cb, $ctx) for @req;
+        } else { # synchronous, generic PSGI
+                $ctx->{git}->check_async($_, $cb, $ctx) for @req;
+                $ctx->{git}->check_async_wait;
+        }
+}
+
+sub cmt_hdr_prep { # psgi_qx cb
+        my ($fh, $ctx, $cmt_fin) = @_;
+        return if $ctx->{-qsp_err_h}; # let cmt_fin handle it
+        seek $fh, 0, SEEK_SET;
+        my $buf = do { local $/ = "\0"; <$fh> } // die "readline: $!";
+        chop($buf) eq "\0" or die 'no NUL in git show -z output';
+        utf8_maybe($buf); # non-UTF-8 commits exist
+        chomp $buf;
+        (my $P, my $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9);
+        truncate $fh, 0;
+        return unless $P;
+        seek $fh, 0, SEEK_SET;
+        my $qsp_p = PublicInbox::Qspawn->new($ctx->{git}->cmd(qw(show
+                --encoding=UTF-8 --pretty=format:%n -M --stat -p), $ctx->{oid}),
+                undef, { 1 => $fh });
+        $qsp_p->{qsp_err} = \($ctx->{-qsp_err_p} = '');
+        $qsp_p->psgi_qx($ctx->{env}, undef, \&cmt_patch_prep, $ctx, $cmt_fin);
+        @{$ctx->{-cmt_P}} = split / /, $P;
+        @{$ctx->{-cmt_p}} = split / /, $p; # abbreviated
+        do_cat_async([$ctx, $cmt_fin], \&cmt_title, @{$ctx->{-cmt_P}});
+}
+
+sub read_patchid { # psgi_qx cb
+        my ($bref, $ctx, $cmt_fin) = @_;
+        my ($patchid) = split(/ /, $$bref); # ignore commit
+        $ctx->{-q_value_html} = "patchid:$patchid" if defined $patchid;
+}
+
+sub cmt_patch_prep { # psgi_qx cb
+        my ($fh, $ctx, $cmt_fin) = @_;
+        return if $ctx->{-qsp_err_p}; # let cmt_fin handle error
+        return if -s $fh > $MAX_SIZE; # too big to show, too big to patch-id
+        seek $fh, 0, SEEK_SET;
+        my $qsp = PublicInbox::Qspawn->new(
+                                $ctx->{git}->cmd(qw(patch-id --stable)),
+                                undef, { 0 => $fh });
+        $qsp->{qsp_err} = \$ctx->{-qsp_err_p};
+        $qsp->psgi_qx($ctx->{env}, undef, \&read_patchid, $ctx, $cmt_fin);
+}
+
+sub ibx_url_for {
+        my ($ctx) = @_;
+        $ctx->{ibx} and return; # fall back to $upfx
+        $ctx->{git} or die 'BUG: no {git}';
+        if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
+                if (defined(my $u = $ALL->base_url($ctx->{env}))) {
+                        return wantarray ? ($u) : $u;
+                }
+        }
+        my @ret;
+        if (my $ibx_names = $ctx->{git}->{ibx_names}) {
+                my $by_name = $ctx->{www}->{pi_cfg}->{-by_name};
+                for my $name (@$ibx_names) {
+                        my $ibx = $by_name->{$name} // do {
+                                warn "inbox `$name' no longer exists\n";
+                                next;
+                        };
+                        $ibx->isrch // next;
+                        my $u = defined($ibx->{url}) ?
+                                prurl($ctx->{env}, $ibx->{url}) : $name;
+                        $u .= '/' if substr($u, -1) ne '/';
+                        push @ret, $u;
+                }
+        }
+        wantarray ? (@ret) : $ret[0];
+}
+
+sub cmt_fin { # OnDestroy cb
+        my ($ctx) = @_;
+        my ($eh, $ep) = delete @$ctx{qw(-qsp_err_h -qsp_err_p)};
+        if ($eh || $ep) {
+                my $e = join(' - ', grep defined, $eh, $ep);
+                return html_page($ctx, 500, dbg_log($ctx) .
+                                "git show/patch-id error:$e");
         }
-        my $cmd = ['git', "--git-dir=$git->{git_dir}",
-                qw(show --encoding=UTF-8 --no-color --no-abbrev), $oid ];
+        $ctx->{-linkify} //= PublicInbox::Linkify->new;
+        my $upfx = $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/"
+        my ($H, $T, $s, $f, $au, $co, $bdy) = @{delete $ctx->{cmt_info}};
+        # try to keep author and committer dates lined up
+        my $x = length($au) - length($co);
+        if ($x > 0) {
+                $x = ' ' x $x;
+                $co =~ s/>/>$x/;
+        } elsif ($x < 0) {
+                $x = ' ' x (-$x);
+                $au =~ s/>/>$x/;
+        }
+        $_ = ascii_html($_) for ($au, $co);
+        my $ibx_url = ibx_url_for($ctx) // $upfx;
+        $au =~ s!(&gt; +)([0-9]{4,}-\S+ \S+)!
+                my ($gt, $t) = ($1, $2);
+                $t =~ tr/ :-//d;
+                qq($gt<a
+href="$ibx_url?t=$t"
+title="list contemporary emails">$2</a>)
+                !e;
+
+        $ctx->{-title_html} = $s = $ctx->{-linkify}->to_html($s);
+        my ($P, $p, $pt) = delete @$ctx{qw(-cmt_P -cmt_p -cmt_pt)};
+        $_ = qq(<a href="$upfx$_/s/">).shift(@$p).'</a> '.shift(@$pt) for @$P;
+        if (@$P == 1) {
+                $x = qq{ (<a
+href="$f.patch">patch</a>)\n   <a href=#parent>parent</a> $P->[0]};
+        } elsif (@$P > 1) {
+                $x = qq(\n  <a href=#parents>parents</a> $P->[0]\n);
+                shift @$P;
+                $x .= qq(          $_\n) for @$P;
+                chop $x;
+        } else {
+                $x = ' (<a href=#root_commit>root commit</a>)';
+        }
+        PublicInbox::WwwStream::html_init($ctx);
+        my $zfh = $ctx->zfh;
+        print $zfh <<EOM;
+<pre>   <a href=#commit>commit</a> $H$x
+     <a href=#tree>tree</a> <a href="$upfx$T/s/?b=">$T</a>
+   author $au
+committer $co
+
+<b>$s</b>
+EOM
+        print $zfh "\n", $ctx->{-linkify}->to_html($bdy) if length($bdy);
+        undef $bdy; # free memory
+        my $fh = delete $ctx->{patch_fh};
+        if (-s $fh > $MAX_SIZE) {
+                print $zfh '</pre><hr><pre>patch is too large to show</pre>';
+        } else { # prepare flush_diff:
+                seek $fh, 0, SEEK_SET;
+                PublicInbox::IO::read_all $fh, -s _, \$x;
+                utf8_maybe($x);
+                $ctx->{-apfx} = $ctx->{-spfx} = $upfx;
+                $x =~ s/\r?\n/\n/gs;
+                $ctx->{-anchors} = {} if $x =~ /^diff --git /sm;
+                flush_diff($ctx, \$x); # undefs $x
+                # TODO: should there be another textarea which attempts to
+                # search for the exact email which was applied to make this
+                # commit?
+                if (my $qry = delete $ctx->{-qry}) {
+                        my $q = '';
+                        for (@{$qry->{dfpost}}, @{$qry->{dfpre}}) {
+                                # keep blobs as short as reasonable, emails
+                                # are going to be older than what's in git
+                                substr($_, 7, 64, '');
+                                $q .= "dfblob:$_ ";
+                        }
+                        chop $q; # no trailing SP
+                        local $Text::Wrap::columns = PublicInbox::View::COLS;
+                        local $Text::Wrap::huge = 'overflow';
+                        $q = wrap('', '', $q);
+                        my $rows = ($q =~ tr/\n/\n/) + 1;
+                        $q = ascii_html($q);
+                        my $ibx_url = ibx_url_for($ctx);
+                        my $alt;
+                        if (defined $ibx_url) {
+                                $alt = " `$ibx_url'";
+                                $ibx_url =~ m!://! or
+                                        substr($ibx_url, 0, 0, '../../../');
+                                $ibx_url = ascii_html($ibx_url);
+                        } else {
+                                $ibx_url = $upfx;
+                                $alt = '';
+                        }
+                        print $zfh <<EOM;
+</pre><hr><form action="$ibx_url"
+id=related><pre>find related emails, including ancestors/descendants/conflicts
+<textarea name=q cols=${\PublicInbox::View::COLS} rows=$rows>$q</textarea>
+<input type=submit value="search$alt"
+/>\t(<a href="${ibx_url}_/text/help/">help</a>)</pre></form>
+EOM
+                }
+        }
+        chop($x = <<EOM);
+<hr><pre>glossary
+--------
+<dfn
+id=commit>Commit</dfn> objects reference one tree, and zero or more parents.
+
+Single <dfn
+id=parent>parent</dfn> commits can typically generate a patch in
+unified diff format via `git format-patch'.
+
+Multiple <dfn id=parents>parents</dfn> means the commit is a merge.
+
+<dfn id=root_commit>Root commits</dfn> have no ancestor.  Note that it is
+possible to have multiple root commits when merging independent histories.
+
+Every commit references one top-level <dfn id=tree>tree</dfn> object.</pre>
+EOM
+        delete($ctx->{-wcb})->($ctx->html_done($x));
+}
+
+sub stream_patch_parse_hdr { # {parse_hdr} for Qspawn
+        my ($r, $bref, $ctx) = @_;
+        if (!defined $r) { # sysread error
+                html_page($ctx, 500, dbg_log($ctx));
+        } elsif (index($$bref, "\n\n") >= 0) {
+                my $eml = bless { hdr => $bref }, 'PublicInbox::Eml';
+                my $fn = to_filename($eml->header('Subject') // '');
+                $fn = substr($fn // 'PATCH-no-subject', 6); # drop "PATCH-"
+                return [ 200, [ 'Content-Type', 'text/plain; charset=UTF-8',
+                                'Content-Disposition',
+                                qq(inline; filename=$fn.patch) ] ];
+        } elsif ($r == 0) {
+                my $log = dbg_log($ctx);
+                warn "premature EOF on $ctx->{patch_oid} $log";
+                return html_page($ctx, 500, $log);
+        } else {
+                undef; # bref keeps growing until "\n\n"
+        }
+}
+
+sub show_patch ($$) {
+        my ($ctx, $res) = @_;
+        my ($git, $oid) = @$res;
+        my $cmd = $git->cmd(qw(format-patch -1 --stdout -C),
+                "--signature=git format-patch -1 --stdout -C $oid", $oid);
         my $qsp = PublicInbox::Qspawn->new($cmd);
-        my $env = $ctx->{env};
-        $ctx->{-qsp} = $qsp;
-        $ctx->{-logref} = $logref;
-        $qsp->psgi_qx($env, undef, \&show_other_result, $ctx);
+        $ctx->{env}->{'qspawn.wcb'} = $ctx->{-wcb};
+        $ctx->{patch_oid} = $oid;
+        $qsp->psgi_yield($ctx->{env}, undef, \&stream_patch_parse_hdr, $ctx);
 }
 
-# user_cb for SolverGit, called as: user_cb->($result_or_error, $uarg)
-sub solve_result {
-        my ($res, $ctx) = @_;
-        my ($log, $hints, $fn) = delete @$ctx{qw(log hints fn)};
+sub show_commit ($$) {
+        my ($ctx, $res) = @_;
+        return show_patch($ctx, $res) if ($ctx->{fn} // '') =~ /\.patch\z/;
+        my ($git, $oid) = @$res;
+        # patch-id needs two passes, and we use the initial show to ensure
+        # a patch embedded inside the commit message body doesn't get fed
+        # to patch-id:
+        open $ctx->{patch_fh}, '+>', "$ctx->{-tmp}/show";
+        my $qsp_h = PublicInbox::Qspawn->new($git->cmd('show', $SHOW_FMT,
+                        qw(--encoding=UTF-8 -z --no-notes --no-patch), $oid),
+                        undef, { 1 => $ctx->{patch_fh} });
+        $qsp_h->{qsp_err} = \($ctx->{-qsp_err_h} = '');
+        my $cmt_fin = on_destroy \&cmt_fin, $ctx;
+        $ctx->{git} = $git;
+        $ctx->{oid} = $oid;
+        $qsp_h->psgi_qx($ctx->{env}, undef, \&cmt_hdr_prep, $ctx, $cmt_fin);
+}
 
-        unless (seek($log, 0, 0)) {
-                $ctx->{env}->{'psgi.errors'}->print("seek(log): $!\n");
-                return html_page($ctx, 500, \'seek error');
+sub show_other ($$) { # just in case...
+        my ($ctx, $res) = @_;
+        my ($git, $oid, $type, $size) = @$res;
+        $size > $MAX_SIZE and return html_page($ctx, 200,
+                ascii_html($type)." $oid is too big to show\n". dbg_log($ctx));
+        my $cmd = $git->cmd(qw(show --encoding=UTF-8
+                        --no-color --no-abbrev), $oid);
+        my $qsp = PublicInbox::Qspawn->new($cmd);
+        $qsp->{qsp_err} = \($ctx->{-qsp_err} = '');
+        $qsp->psgi_qx($ctx->{env}, undef, \&show_other_result, $ctx);
+}
+
+sub show_tree_result ($$) {
+        my ($bref, $ctx) = @_;
+        if (my $qsp_err = delete $ctx->{-qsp_err}) {
+                return html_page($ctx, 500, dbg_log($ctx) .
+                                "git ls-tree -z error:$qsp_err");
         }
-        $log = do { local $/; <$log> };
+        my @ent = split(/\0/, $$bref);
+        my $qp = delete $ctx->{qp};
+        my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new;
+        my $pfx = $ctx->{-path} // $qp->{b}; # {-path} is from RepoTree
+        $$bref = "<pre><a href=#tree>tree</a> $ctx->{tree_oid}";
+        # $REPO/tree/$path already sets {-upfx}
+        my $upfx = $ctx->{-upfx} //= '../../';
+        if (defined $pfx) {
+                $pfx =~ s!/+\z!!s;
+                if (my $t = $ctx->{-obj}) {
+                        my $t = ascii_html($t);
+                        $$bref .= <<EOM
+\n\$ git ls-tree -l $t        # shows similar output on the CLI
+EOM
+                } elsif ($pfx eq '') {
+                        $$bref .= "  (root)\n";
+                } else {
+                        my $x = ascii_html($pfx);
+                        $pfx .= '/';
+                        $$bref .= qq(  <a href=#path>path</a>: $x</a>\n);
+                }
+        } else {
+                $pfx = '';
+                $$bref .= qq[  (<a href=#path>path</a> unknown)\n];
+        }
+        my ($x, $m, $t, $oid, $sz, $f, $n, $gitlink);
+        $$bref .= "\n        size        name";
+        for (@ent) {
+                ($x, $f) = split(/\t/, $_, 2);
+                undef $_;
+                ($m, $t, $oid, $sz) = split(/ +/, $x, 4);
+                $m = $GIT_MODE{$m} // '?';
+                utf8_maybe($f);
+                $n = ascii_html($f);
+                if ($m eq 'g') { # gitlink submodule commit
+                        $$bref .= "\ng\t\t$n @ <a\nhref=#g>commit</a>$oid";
+                        $gitlink = 1;
+                        next;
+                }
+                my $q = 'b='.ascii_html(uri_escape_path($pfx.$f));
+                if ($m eq 'd') { $n .= '/' }
+                elsif ($m eq 'x') { $n = "<b>$n</b>" }
+                elsif ($m eq 'l') { $n = "<i>$n</i>" }
+                $$bref .= qq(\n$m\t$sz\t<a\nhref="$upfx$oid/s/?$q">$n</a>);
+        }
+        $$bref .= dbg_log($ctx);
+        $$bref .= <<EOM;
+<hr><pre>glossary
+--------
+<dfn
+id=tree>Tree</dfn> objects belong to commits or other tree objects.  Trees may
+reference blobs, sub-trees, or (rarely) commits of submodules.
+
+<dfn
+id=path>Path</dfn> names are stored in tree objects, but trees do not know
+their own path name.  A tree's path name comes from their parent tree,
+or it is the root tree referenced by a commit object.  Thus, this web UI
+relies on the `b=' URI parameter as a hint to display the path name.
+EOM
+
+        $$bref .= <<EOM if $gitlink;
 
-        my $ref = ref($res);
+<dfn title="submodule commit"
+id=g>Commit</dfn> objects may be stored in trees to reference submodules.</pre>
+EOM
+        chop $$bref;
+        html_page($ctx, 200, $$bref);
+}
+
+sub show_tree ($$) { # also used by RepoTree
+        my ($ctx, $res) = @_;
+        my ($git, $oid, undef, $size) = @$res;
+        $size > $MAX_SIZE and return html_page($ctx, 200,
+                        "tree $oid is too big to show\n". dbg_log($ctx));
+        my $cmd = $git->cmd(qw(ls-tree -z -l --no-abbrev), $oid);
+        my $qsp = PublicInbox::Qspawn->new($cmd);
+        $ctx->{tree_oid} = $oid;
+        $qsp->{qsp_err} = \($ctx->{-qsp_err} = '');
+        $qsp->psgi_qx($ctx->{env}, undef, \&show_tree_result, $ctx);
+}
+
+# returns seconds offset from git TZ offset
+sub tz_adj ($) {
+        my ($tz) = @_; # e.g "-0700"
+        $tz = int($tz);
+        my $mm = $tz < 0 ? -$tz : $tz;
+        $mm = int($mm / 100) * 60 + ($mm % 100);
+        $mm = $tz < 0 ? -$mm : $mm;
+        ($mm * 60);
+}
+
+sub show_tag_result { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $ctx) = @_;
+        utf8_maybe($$bref);
         my $l = PublicInbox::Linkify->new;
-        $log = '<pre>debug log:</pre><hr /><pre>' .
-                $l->to_html($log) . '</pre>';
+        $$bref = $l->to_html($$bref);
+        $$bref =~ s!^object ([a-f0-9]+)!object <a
+href=../../$1/s/>$1</a>!;
+
+        $$bref =~ s/^(tagger .*&gt; )([0-9]+) ([\-+]?[0-9]+)/$1.strftime(
+                '%Y-%m-%d %H:%M:%S', gmtime($2 + tz_adj($3)))." $3"/sme;
+        # TODO: download link
+        html_page($ctx, 200, '<pre>', $$bref, '</pre>', dbg_log($ctx));
+}
 
-        $res or return html_page($ctx, 404, \$log);
-        $ref eq 'ARRAY' or return html_page($ctx, 500, \$log);
+sub show_tag ($$) {
+        my ($ctx, $res) = @_;
+        my ($git, $oid) = @$res;
+        $ctx->{git} = $git;
+        do_cat_async($ctx, \&show_tag_result, $oid);
+}
+
+# user_cb for SolverGit, called as: user_cb->($result_or_error, $uarg)
+sub solve_result {
+        my ($res, $ctx) = @_;
+        my $hints = delete $ctx->{hints};
+        $res or return html_page($ctx, 404, 'Not found', dbg_log($ctx));
+        ref($res) eq 'ARRAY' or
+                return html_page($ctx, 500, 'Internal error', dbg_log($ctx));
 
         my ($git, $oid, $type, $size, $di) = @$res;
-        return show_other($ctx, $res, \$log, $fn) if $type ne 'blob';
-        my $path = to_filename($di->{path_b} // $hints->{path_b} // 'blob');
-        my $raw_link = "(<a\nhref=$path>raw</a>)";
+        return show_commit($ctx, $res) if $type eq 'commit';
+        return show_tree($ctx, $res) if $type eq 'tree';
+        return show_tag($ctx, $res) if $type eq 'tag';
+        return show_other($ctx, $res) if $type ne 'blob';
+        my $fn = $di->{path_b} // $hints->{path_b};
+        my $paths = $ctx->{-paths} //= do {
+                my $path = to_filename($fn // 'blob') // 'blob';
+                my $raw_more = qq[(<a\nhref="$path">raw</a>)];
+                my @def;
+
+                # XXX not sure if this is the correct wording
+                if (defined($fn)) {
+                        $raw_more .= qq(
+name: ${\ascii_html($fn)} \t # note: path name is non-authoritative<a
+href="#pathdef" id=top>(*)</a>);
+                        $def[0] = "<hr><pre\nid=pathdef>" .
+'(*) Git path names are given by the tree(s) the blob belongs to.
+    Blobs themselves have no identifier aside from the hash of its contents.'.
+qq(<a\nhref="#top">^</a></pre>);
+                }
+                [ $path, $raw_more, @def ];
+        };
+        $ctx->{-q_value_html} //= do {
+                my $s = defined($fn) ? 'dfn:'.ascii_html($fn).' ' : '';
+                $s.'dfpost:'.substr($oid, 0, 7);
+        };
+
         if ($size > $MAX_SIZE) {
-                return stream_large_blob($ctx, $res, \$log, $fn) if defined $fn;
-                $log = "<pre><b>Too big to show, download available</b>\n" .
-                        "$oid $type $size bytes $raw_link</pre>" . $log;
-                return html_page($ctx, 200, \$log);
+                return stream_large_blob($ctx, $res) if defined $ctx->{fn};
+                return html_page($ctx, 200, <<EOM . dbg_log($ctx));
+<pre><b>Too big to show, download available</b>
+blob $oid $size bytes $paths->[1]</pre>
+EOM
         }
+        bless $ctx, 'PublicInbox::WwwStream'; # for DESTROY
+        $ctx->{git} = $git;
+        do_cat_async($ctx, \&show_blob, $oid);
+}
 
-        my $blob = $git->cat_file($oid);
-        if (!$blob) { # WTF?
+sub show_blob { # git->cat_async callback
+        my ($blob, $oid, $type, $size, $ctx) = @_;
+        if (!$blob) {
                 my $e = "Failed to retrieve generated blob ($oid)";
-                $ctx->{env}->{'psgi.errors'}->print("$e ($git->{git_dir})\n");
-                $log = "<pre><b>$e</b></pre>" . $log;
-                return html_page($ctx, 500, \$log);
+                warn "$e ($ctx->{git}->{git_dir}) type=$type";
+                return html_page($ctx, 500, "<pre><b>$e</b></pre>".dbg_log($ctx))
         }
 
         my $bin = index(substr($$blob, 0, $BIN_DETECT), "\0") >= 0;
-        if (defined $fn) {
+        if (defined $ctx->{fn}) {
                 my $h = [ 'Content-Length', $size, 'Content-Type' ];
                 push(@$h, ($bin ? 'application/octet-stream' : 'text/plain'));
                 return delete($ctx->{-wcb})->([200, $h, [ $$blob ]]);
         }
 
-        if ($bin) {
-                $log = "<pre>$oid $type $size bytes (binary)" .
-                        " $raw_link</pre>" . $log;
-                return html_page($ctx, 200, \$log);
-        }
+        my ($path, $raw_more, @def) = @{delete $ctx->{-paths}};
+        $bin and return html_page($ctx, 200,
+                                "<pre>blob $oid $size bytes (binary)" .
+                                " $raw_more</pre>".dbg_log($ctx));
 
         # TODO: detect + convert to ensure validity
-        utf8::decode($$blob);
+        utf8_maybe($$blob);
         my $nl = ($$blob =~ s/\r?\n/\n/sg);
         my $pad = length($nl);
 
-        $l->linkify_1($$blob);
+        ($ctx->{-linkify} //= PublicInbox::Linkify->new)->linkify_1($$blob);
         my $ok = $hl->do_hl($blob, $path) if $hl;
         if ($ok) {
                 $blob = $ok;
@@ -171,38 +606,63 @@ sub solve_result {
         }
 
         # using some of the same CSS class names and ids as cgit
-        $log = "<pre>$oid $type $size bytes $raw_link</pre>" .
+        my $x = "<pre>blob $oid $size bytes $raw_more</pre>" .
                 "<hr /><table\nclass=blob>".
-                "<tr><td\nclass=linenumbers><pre>" . join('', map {
-                        sprintf("<a id=n$_ href=#n$_>% ${pad}u</a>\n", $_)
-                } (1..$nl)) . '</pre></td>' .
-                '<td><pre> </pre></td>'. # pad for non-CSS users
-                "<td\nclass=lines><pre\nstyle='white-space:pre'><code>" .
-                $l->linkify_2($$blob) .
-                '</code></pre></td></tr></table>' . $log;
+                "<tr><td\nclass=linenumbers><pre>";
+        # scratchpad in this loop is faster here than `printf $zfh':
+        $x .= sprintf("<a id=n$_ href=#n$_>% ${pad}u</a>\n", $_) for (1..$nl);
+        $x .= '</pre></td><td><pre> </pre></td>'. # pad for non-CSS users
+                "<td\nclass=lines><pre\nstyle='white-space:pre'><code>";
+        html_page($ctx, 200, $x, $ctx->{-linkify}->linkify_2($$blob),
+                '</code></pre></td></tr></table>'.dbg_log($ctx), @def);
+}
 
-        html_page($ctx, 200, \$log);
+sub start_solver ($) {
+        my ($ctx) = @_;
+        while (my ($from, $to) = each %QP_MAP) {
+                my $v = $ctx->{qp}->{$from} // next;
+                $ctx->{hints}->{$to} = $v if $v ne '';
+        }
+        $ctx->{-next_solver} = on_destroy \&next_solver;
+        ++$solver_nr;
+        $ctx->{-tmp} = File::Temp->newdir("solver.$ctx->{oid_b}-XXXX",
+                                                TMPDIR => 1);
+        $ctx->{lh} or open $ctx->{lh}, '+>>', "$ctx->{-tmp}/solve.log";
+        my $solver = PublicInbox::SolverGit->new($ctx->{ibx},
+                                                \&solve_result, $ctx);
+        $solver->{limiter} = $solver_lim;
+        $solver->{gits} //= [ $ctx->{git} ];
+        $solver->{tmp} = $ctx->{-tmp}; # share tmpdir
+        # PSGI server will call this immediately and give us a callback (-wcb)
+        $solver->solve(@$ctx{qw(env lh oid_b hints)});
+}
+
+# run the next solver job when done and DESTROY-ed
+sub next_solver {
+        --$solver_nr;
+        # XXX FIXME: client may've disconnected if it waited a long while
+        start_solver(shift(@solver_q) // return);
+}
+
+sub may_start_solver ($) {
+        my ($ctx) = @_;
+        $solver_lim //= $ctx->{www}->{pi_cfg}->limiter('codeblob');
+        if ($solver_nr >= $solver_lim->{max}) {
+                @solver_q > 128 ? html_page($ctx, 503, 'too busy')
+                                : push(@solver_q, $ctx);
+        } else {
+                start_solver($ctx);
+        }
 }
 
 # GET /$INBOX/$GIT_OBJECT_ID/s/
 # GET /$INBOX/$GIT_OBJECT_ID/s/$FILENAME
 sub show ($$;$) {
         my ($ctx, $oid_b, $fn) = @_;
-        my $qp = $ctx->{qp};
-        my $hints = $ctx->{hints} = {};
-        while (my ($from, $to) = each %QP_MAP) {
-                defined(my $v = $qp->{$from}) or next;
-                $hints->{$to} = $v if $v ne '';
-        }
-
-        $ctx->{'log'} = tmpfile("solve.$oid_b");
-        $ctx->{fn} = $fn;
-        my $solver = PublicInbox::SolverGit->new($ctx->{-inbox},
-                                                \&solve_result, $ctx);
-        # PSGI server will call this immediately and give us a callback (-wcb)
+        @$ctx{qw(oid_b fn)} = ($oid_b, $fn);
         sub {
                 $ctx->{-wcb} = $_[0]; # HTTP write callback
-                $solver->solve($ctx->{env}, $ctx->{log}, $oid_b, $hints);
+                may_start_solver $ctx;
         };
 }
 
diff --git a/lib/PublicInbox/WQBlocked.pm b/lib/PublicInbox/WQBlocked.pm
new file mode 100644
index 00000000..8d931fa9
--- /dev/null
+++ b/lib/PublicInbox/WQBlocked.pm
@@ -0,0 +1,48 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# non-blocking workqueues, currently used by LeiNoteEvent to track renames
+package PublicInbox::WQBlocked;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::Syscall qw(EPOLLOUT EPOLLONESHOT);
+use PublicInbox::IPC;
+use Carp ();
+
+sub new {
+        my ($cls, $wq, $buf) = @_;
+        my $self = bless { msgq => [$buf], }, $cls;
+        $wq->{wqb} = $self->SUPER::new($wq->{-wq_s1}, EPOLLOUT|EPOLLONESHOT);
+}
+
+sub flush_send {
+        my ($self) = @_;
+        push(@{$self->{msgq}}, $_[1]) if defined($_[1]);
+        while (defined(my $buf = shift @{$self->{msgq}})) {
+                if (ref($buf) eq 'CODE') {
+                        $buf->($self); # could be \&PublicInbox::DS::close
+                } else {
+                        my $wq_s1 = $self->{sock};
+                        my $n = $PublicInbox::IPC::send_cmd->($wq_s1, [], $buf,
+                                                                0);
+                        next if defined($n);
+                        Carp::croak("sendmsg: $!") unless $!{EAGAIN};
+                        PublicInbox::DS::epwait($wq_s1, EPOLLOUT|EPOLLONESHOT);
+                        unshift @{$self->{msgq}}, $buf;
+                        last; # wait for ->event_step
+                }
+        }
+}
+
+sub enq_close { flush_send($_[0], $_[0]->can('close')) }
+
+sub event_step { # called on EPOLLOUT wakeup
+        my ($self) = @_;
+        eval { flush_send($self) } if $self->{sock};
+        if ($@) {
+                warn $@;
+                $self->close;
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/WQWorker.pm b/lib/PublicInbox/WQWorker.pm
new file mode 100644
index 00000000..950bd170
--- /dev/null
+++ b/lib/PublicInbox/WQWorker.pm
@@ -0,0 +1,33 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# for PublicInbox::IPC wq_* (work queue) workers
+package PublicInbox::WQWorker;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::DS);
+use PublicInbox::Syscall qw(EPOLLIN EPOLLEXCLUSIVE);
+use Errno qw(EAGAIN ECONNRESET);
+use IO::Handle (); # blocking
+
+sub new {
+        my ($cls, $wq, $sock) = @_;
+        $sock->blocking(0);
+        my $self = bless { sock => $sock, wq => $wq }, $cls;
+        $self->SUPER::new($sock, EPOLLEXCLUSIVE|EPOLLIN);
+        $self;
+}
+
+sub event_step {
+        my ($self) = @_;
+        my $n = $self->{wq}->recv_and_run($self->{sock}) and return;
+        unless (defined $n) {
+                return if $! == EAGAIN;
+                warn "recvmsg: $!" if $! != ECONNRESET;
+        }
+        $self->{sock} == $self->{wq}->{-wq_s2} and
+                $self->{wq}->wq_atexit_child;
+        $self->close; # PublicInbox::DS::close
+}
+
+1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index e3b589cb..289599b8 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Main web interface for mailing list archives
@@ -11,11 +11,10 @@
 # - Must not rely on static content
 # - UTF-8 is only for user-content, 7-bit US-ASCII for us
 package PublicInbox::WWW;
-use 5.010_001;
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
 use PublicInbox::Config;
+use PublicInbox::Git;
 use PublicInbox::Hval;
 use URI::Escape qw(uri_unescape);
 use PublicInbox::MID qw(mid_escape);
@@ -25,16 +24,15 @@ use PublicInbox::WwwStatic qw(r path_info_raw);
 use PublicInbox::Eml;
 
 # TODO: consider a routing tree now that we have more endpoints:
-our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
+our $INBOX_RE = qr!\A/([\w\-][\w\.\-\+]*)!;
 our $MID_RE = qr!([^/]+)!;
-our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
+our $END_RE = qr!(T/|t/|d/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
 our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
 our $OID_RE = qr![a-f0-9]{7,}!;
 
 sub new {
-        my ($class, $pi_config) = @_;
-        $pi_config ||= PublicInbox::Config->new;
-        bless { pi_config => $pi_config }, $class;
+        my ($class, $pi_cfg) = @_;
+        bless { pi_cfg => $pi_cfg // PublicInbox::Config->new }, $class;
 }
 
 # backwards compatibility, do not use
@@ -48,15 +46,21 @@ sub call {
         my $ctx = { env => $env, www => $self };
 
         # we don't care about multi-value
-        %{$ctx->{qp}} = map {
-                utf8::decode($_);
-                tr/+/ /;
-                my ($k, $v) = split('=', $_, 2);
-                $v = uri_unescape($v // '');
-                # none of the keys we care about will need escaping
-                $k => $v;
-        } split(/[&;]+/, $env->{QUERY_STRING});
-
+        # '0' isn't a QUERY_STRING we care about
+        if (my $qs = $env->{QUERY_STRING}) {
+                utf8::decode($qs);
+                $qs =~ tr/+/ /;
+                %{$ctx->{qp}} = map {
+                        # we only use single-char query param keys
+                        if (s/\A([A-Za-z])=//) {
+                                $1 => uri_unescape($_)
+                        } elsif (/\A[a-z]\z/) { # some boolean options
+                                $_ => ''
+                        } else {
+                                () # ignored
+                        }
+                } split(/[&;]+/, $qs);
+        }
         my $path_info = path_info_raw($env);
         my $method = $env->{REQUEST_METHOD};
 
@@ -68,7 +72,13 @@ sub call {
                                 serve_git($ctx, $epoch, $path);
                 } elsif ($path_info =~ m!$INBOX_RE/(\w+)\.sql\.gz\z!o) {
                         return get_altid_dump($ctx, $1, $2);
-                } elsif ($path_info =~ m!$INBOX_RE/!o) {
+                } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$ATTACH_RE\z!o) {
+                        my ($idx, $fn) = ($3, $4);
+                        return invalid_inbox_mid($ctx, $1, $2) ||
+                                get_attach($ctx, $idx, $fn);
+                } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/\z!o) {
+                        return invalid_inbox_mid($ctx, $1, $2) || mbox_results($ctx);
+                } elsif ($path_info =~ m!$INBOX_RE/\z!o) {
                         return invalid_inbox($ctx, $1) || mbox_results($ctx);
                 }
         }
@@ -91,6 +101,9 @@ sub call {
                 invalid_inbox($ctx, $1) || get_atom($ctx);
         } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
                 invalid_inbox($ctx, $1) || get_new($ctx);
+        } elsif ($path_info =~
+                        m!$INBOX_RE/topics_(new|active)\.(atom|html)\z!o) {
+                get_topics($ctx, $1, $2, $3);
         } elsif ($path_info =~ m!$INBOX_RE/description\z!o) {
                 get_description($ctx, $1);
         } elsif ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)?
@@ -135,7 +148,8 @@ sub call {
         # convenience redirects order matters
         } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
                 r301($ctx, $1, $2);
-
+        } elsif ($path_info =~ m!\A/\+/([a-zA-Z0-9_\-\.]+)\.css\z!) {
+                get_css($ctx, undef, $1); # for WwwListing
         } else {
                 legacy_redirects($ctx, $path_info);
         }
@@ -169,25 +183,18 @@ sub preload {
                 eval "require PublicInbox::$_;";
         }
         if (ref($self)) {
-                my $pi_config = $self->{pi_config};
-                if (defined($pi_config->{'publicinbox.cgitrc'})) {
-                        $pi_config->limiter('-cgit');
+                my $pi_cfg = $self->{pi_cfg};
+                if (defined($pi_cfg->{'publicinbox.cgitrc'})) {
+                        $pi_cfg->limiter('-cgit');
                 }
+                $pi_cfg->ALL and require PublicInbox::Isearch;
                 $self->cgit;
+                $self->coderepo;
                 $self->stylesheets_prepare($_) for ('', '../', '../../');
                 $self->news_www;
-                $pi_config->each_inbox(\&preload_inbox);
         }
 }
 
-sub preload_inbox {
-        my $ibx = shift;
-        $ibx->altid_map;
-        $ibx->cloneurl;
-        $ibx->description;
-        $ibx->base_url;
-}
-
 # private functions below
 
 sub r404 {
@@ -201,18 +208,29 @@ sub r404 {
 
 sub news_cgit_fallback ($) {
         my ($ctx) = @_;
-        my $www = $ctx->{www};
-        my $env = $ctx->{env};
-        my $res = $www->news_www->call($env);
-        $res->[0] == 404 ? $www->cgit->call($env) : $res;
+        my $res = $ctx->{www}->news_www->call($ctx->{env});
+
+        $res->[0] == 404 and ($ctx->{www}->{cgit_fallback} //= do {
+                my $c = $ctx->{www}->{pi_cfg}->{'publicinbox.cgit'} // 'first';
+                $c ne 'first' # `fallback' and `rewrite' => true
+        } // 0) and $res = $ctx->{www}->coderepo->srv($ctx);
+
+        ref($res) eq 'ARRAY' && $res->[0] == 404 and
+                $res = $ctx->{www}->cgit->call($ctx->{env}, $ctx);
+
+        ref($res) eq 'ARRAY' && $res->[0] == 404 &&
+                        !$ctx->{www}->{cgit_fallback} and
+                $res = $ctx->{www}->coderepo->srv($ctx);
+        $res;
 }
 
 # returns undef if valid, array ref response if invalid
 sub invalid_inbox ($$) {
         my ($ctx, $inbox) = @_;
-        my $ibx = $ctx->{www}->{pi_config}->lookup_name($inbox);
+        my $ibx = $ctx->{www}->{pi_cfg}->lookup_name($inbox) //
+                        $ctx->{www}->{pi_cfg}->lookup_ei($inbox);
         if (defined $ibx) {
-                $ctx->{-inbox} = $ibx;
+                $ctx->{ibx} = $ibx;
                 return;
         }
 
@@ -230,11 +248,11 @@ sub invalid_inbox_mid {
         return $ret if $ret;
 
         my $mid = $ctx->{mid} = uri_unescape($mid_ue);
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) {
                 my ($x2, $x38) = ($1, $2);
                 # this is horrifically wasteful for legacy URLs:
-                my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return;
+                my $str = $ctx->{ibx}->msg_by_path("$x2/$x38") or return;
                 my $s = PublicInbox::Eml->new($str);
                 $mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID'));
                 return r301($ctx, $inbox, mid_escape($mid));
@@ -256,6 +274,13 @@ sub get_new {
         PublicInbox::Feed::new_html($ctx);
 }
 
+# /$INBOX/topics_(new|active).(html|atom)
+sub get_topics {
+        my ($ctx, $ibx_name, $category, $type) = @_;
+        require PublicInbox::WwwTopics;
+        PublicInbox::WwwTopics::response($ctx, $ibx_name, $category, $type);
+}
+
 # /$INBOX/?r=$GIT_COMMIT                 -> HTML only
 sub get_index {
         my ($ctx) = @_;
@@ -272,7 +297,7 @@ sub get_index {
 sub get_mid_txt {
         my ($ctx) = @_;
         require PublicInbox::Mbox;
-        PublicInbox::Mbox::emit_raw($ctx) || r404($ctx);
+        PublicInbox::Mbox::emit_raw($ctx) || r(404);
 }
 
 # /$INBOX/$MESSAGE_ID/                   -> HTML content (short quotes)
@@ -285,7 +310,7 @@ sub get_mid_html {
 # /$INBOX/$MESSAGE_ID/t/
 sub get_thread {
         my ($ctx, $flat) = @_;
-        $ctx->{-inbox}->over or return need($ctx, 'Overview');
+        $ctx->{ibx}->over or return need($ctx, 'Overview');
         $ctx->{flat} = $flat;
         require PublicInbox::View;
         PublicInbox::View::thread_html($ctx);
@@ -310,6 +335,7 @@ sub get_vcs_object ($$$;$) {
         my ($ctx, $inbox, $oid, $filename) = @_;
         my $r404 = invalid_inbox($ctx, $inbox);
         return $r404 if $r404;
+        return r(404) if !$ctx->{www}->{pi_cfg}->repo_objs($ctx->{ibx});
         require PublicInbox::ViewVCS;
         PublicInbox::ViewVCS::show($ctx, $oid, $filename);
 }
@@ -323,11 +349,12 @@ sub get_altid_dump {
 }
 
 sub need {
-        my ($ctx, $extra) = @_;
+        my ($ctx, $extra, $upref) = @_;
         require PublicInbox::WwwStream;
-        PublicInbox::WwwStream::html_oneshot($ctx, 501, \<<EOF);
+        $upref //= '../';
+        PublicInbox::WwwStream::html_oneshot($ctx, 501, <<EOF);
 <pre>$extra is not available for this public-inbox
-<a\nhref="../">Return to index</a></pre>
+<a\nhref="$upref">Return to index</a></pre>
 EOF
 }
 
@@ -338,7 +365,7 @@ EOF
 # especially on older systems.  Stick to zlib since that's what git uses.
 sub get_thread_mbox {
         my ($ctx, $sfx) = @_;
-        my $over = $ctx->{-inbox}->over or return need($ctx, 'Overview');
+        my $over = $ctx->{ibx}->over or return need($ctx, 'Overview');
         require PublicInbox::Mbox;
         PublicInbox::Mbox::thread_mbox($ctx, $over, $sfx);
 }
@@ -347,7 +374,7 @@ sub get_thread_mbox {
 # /$INBOX/$MESSAGE_ID/t.atom                  -> thread as Atom feed
 sub get_thread_atom {
         my ($ctx) = @_;
-        $ctx->{-inbox}->over or return need($ctx, 'Overview');
+        $ctx->{ibx}->over or return need($ctx, 'Overview');
         require PublicInbox::Feed;
         PublicInbox::Feed::generate_thread_atom($ctx);
 }
@@ -412,11 +439,11 @@ sub legacy_redirects {
 
 sub r301 {
         my ($ctx, $inbox, $mid_ue, $suffix) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         unless ($ibx) {
                 my $r404 = invalid_inbox($ctx, $inbox);
                 return $r404 if $r404;
-                $ibx = $ctx->{-inbox};
+                $ibx = $ctx->{ibx};
         }
         my $url = $ibx->base_url($ctx->{env});
         my $qs = $ctx->{env}->{QUERY_STRING};
@@ -447,13 +474,17 @@ sub msg_page {
 
         # legacy, but no redirect for compatibility:
         'f/' eq $e and return get_mid_html($ctx);
+        if ($e eq 'd/') {
+                require PublicInbox::View;
+                return PublicInbox::View::diff_msg($ctx);
+        }
         r404($ctx);
 }
 
 sub serve_git {
         my ($ctx, $epoch, $path) = @_;
         my $env = $ctx->{env};
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $git = defined $epoch ? $ibx->git_epoch($epoch) : $ibx->git;
         $git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404();
 }
@@ -461,7 +492,7 @@ sub serve_git {
 sub mbox_results {
         my ($ctx) = @_;
         if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
-                $ctx->{-inbox}->search or return need($ctx, 'search');
+                $ctx->{ibx}->isrch or return need($ctx, 'search');
                 require PublicInbox::SearchView;
                 return PublicInbox::SearchView::mbox_results($ctx);
         }
@@ -478,24 +509,29 @@ sub serve_mbox_range {
 
 sub news_www {
         my ($self) = @_;
-        $self->{news_www} ||= do {
+        $self->{news_www} //= do {
                 require PublicInbox::NewsWWW;
-                PublicInbox::NewsWWW->new($self->{pi_config});
+                PublicInbox::NewsWWW->new($self->{pi_cfg});
         }
 }
 
 sub cgit {
         my ($self) = @_;
-        $self->{cgit} ||= do {
-                my $pi_config = $self->{pi_config};
-
-                if (defined($pi_config->{'publicinbox.cgitrc'})) {
+        $self->{cgit} //=
+                (defined($self->{pi_cfg}->{'publicinbox.cgitrc'}) ? do {
                         require PublicInbox::Cgit;
-                        PublicInbox::Cgit->new($pi_config);
-                } else {
+                        PublicInbox::Cgit->new($self->{pi_cfg});
+                } : undef) // do {
                         require Plack::Util;
                         Plack::Util::inline_object(call => sub { r404() });
-                }
+                };
+}
+
+sub coderepo {
+        my ($self) = @_;
+        $self->{coderepo} //= do {
+                require PublicInbox::WwwCoderepo;
+                PublicInbox::WwwCoderepo->new($self->{pi_cfg});
         }
 }
 
@@ -505,7 +541,7 @@ sub get_inbox_manifest ($$$) {
         my $r404 = invalid_inbox($ctx, $inbox);
         return $r404 if $r404;
         require PublicInbox::ManifestJsGz;
-        PublicInbox::ManifestJsGz->response($ctx);
+        PublicInbox::ManifestJsGz::per_inbox($ctx);
 }
 
 sub get_attach {
@@ -537,7 +573,7 @@ sub stylesheets_prepare ($$) {
         } || sub { $_[0] };
 
         my $css_map = {};
-        my $stylesheets = $self->{pi_config}->{css} || [];
+        my $stylesheets = $self->{pi_cfg}->{css} || [];
         my $links = [];
         my $inline_ok = 1;
 
@@ -564,9 +600,9 @@ sub stylesheets_prepare ($$) {
                                 next;
                         };
                         my $ctime = 0;
-                        my $local = do { local $/; <$fh> };
+                        my $local = PublicInbox::IO::read_all $fh; # sets _
                         if ($local =~ /\S/) {
-                                $ctime = sprintf('%x',(stat($fh))[10]);
+                                $ctime = sprintf('%x',(stat(_))[10]);
                                 $local = $mini->($local);
                         }
 
@@ -628,24 +664,25 @@ sub style {
         };
 }
 
-# /$INBOX/$KEY.css endpoint
+# /$INBOX/$KEY.css and /+/$KEY.css endpoints
 # CSS is configured globally for all inboxes, but we access them on
 # a per-inbox basis.  This allows administrators to setup per-inbox
 # static routes to intercept the request before it hits PSGI
+# inbox == undef => top-level WwwListing
 sub get_css ($$$) {
         my ($ctx, $inbox, $key) = @_;
-        my $r404 = invalid_inbox($ctx, $inbox);
+        my $r404 = defined($inbox) ? invalid_inbox($ctx, $inbox) : undef;
         return $r404 if $r404;
         my $self = $ctx->{www};
-        my $css_map = $self->{-css_map} || stylesheets_prepare($self, '');
+        my $css_map = $self->{-css_map} ||
+                stylesheets_prepare($self, defined($inbox) ? '' : '+/');
         my $css = $css_map->{$key};
-        if (!defined($css) && $key eq 'userContent') {
+        if (!defined($css) && defined($inbox) && $key eq 'userContent') {
                 my $env = $ctx->{env};
-                $css = PublicInbox::UserContent::sample($ctx->{-inbox}, $env);
+                $css = PublicInbox::UserContent::sample($ctx->{ibx}, $env);
         }
         defined $css or return r404();
-        my $h = [ 'Content-Length', bytes::length($css),
-                'Content-Type', 'text/css' ];
+        my $h = [ 'Content-Length', length($css), 'Content-Type', 'text/css' ];
         PublicInbox::GitHTTPBackend::cache_one_year($h);
         [ 200, $h, [ $css ] ];
 }
@@ -653,10 +690,20 @@ sub get_css ($$$) {
 sub get_description {
         my ($ctx, $inbox) = @_;
         invalid_inbox($ctx, $inbox) || do {
-                my $d = $ctx->{-inbox}->description . "\n";
-                [ 200, [ 'Content-Length', bytes::length($d),
+                my $d = $ctx->{ibx}->description . "\n";
+                utf8::encode($d);
+                [ 200, [ 'Content-Length', length($d),
                         'Content-Type', 'text/plain' ], [ $d ] ];
         };
 }
 
+sub event_step { # called via requeue
+        my ($self) = @_;
+        # gzf = PublicInbox::GzipFilter == $ctx
+        my $gzf = shift(@{$self->{-low_prio_q}}) // return;
+        PublicInbox::DS::requeue($self) if scalar(@{$self->{-low_prio_q}});
+        my $http = $gzf->{env}->{'psgix.io'}; # PublicInbox::HTTP
+        $http->next_step($gzf->can('async_next'));
+}
+
 1;
diff --git a/lib/PublicInbox/WWW.pod b/lib/PublicInbox/WWW.pod
index 30fe602d..b55f010e 100644
--- a/lib/PublicInbox/WWW.pod
+++ b/lib/PublicInbox/WWW.pod
@@ -20,13 +20,14 @@ The PSGI web interface for public-inbox.
 
 Using this directly is not needed unless you wish to customize
 your public-inbox PSGI deployment or are using a PSGI server
-other than L<public-inbox-httpd(1)>.
+other than L<public-inbox-netd(1)> (C<-netd>) /
+L<public-inbox-httpd(1)> (C<-httpd>)
 
-While this PSGI application works with all PSGI/Plack web
+While this PSGI application should work with all PSGI/Plack web
 servers such as L<starman(1)>, L<starlet(1)> or L<twiggy(1)>;
-PublicInbox::WWW takes advantage of currently-undocumented APIs
-of L<public-inbox-httpd(1)> to improve fairness when serving
-large responses for thread views and git clones.
+PublicInbox::WWW takes advantage of internal APIs of C<-netd>
+and C<-httpd> to improve fairness when serving large responses
+for thread views, mbox downloads, and git clones.
 
 =head1 ENVIRONMENT
 
@@ -43,14 +44,15 @@ Used to override the default "~/.public-inbox/config" value.
 Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
 
 The mail archives are hosted at L<https://public-inbox.org/meta/>
-and L<http://hjrcffqmbrq6wope.onion/meta/>
+and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/>
 
 =head1 COPYRIGHT
 
-Copyright (C) 2016-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright (C) all contributors L<mailto:meta@public-inbox.org>
 
 License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt>
 
 =head1 SEE ALSO
 
-L<http://plackperl.org/>, L<Plack>, L<public-inbox-httpd(1)>
+L<http://plackperl.org/>, L<Plack>, L<public-inbox-netd(1)>,
+L<public-inbox-httpd(1)>
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 8bbce929..eb90d353 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -1,21 +1,22 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # ref: https://cr.yp.to/proto/maildir.html
-#        http://wiki2.dovecot.org/MailboxFormat/Maildir
+#        https://wiki2.dovecot.org/MailboxFormat/Maildir
 package PublicInbox::Watch;
 use strict;
 use v5.10.1;
 use PublicInbox::Eml;
-use PublicInbox::InboxWritable qw(eml_from_path warn_ignore_cb);
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::MdirReader;
+use PublicInbox::NetReader;
 use PublicInbox::Filter::Base qw(REJECT);
 use PublicInbox::Spamcheck;
-use PublicInbox::Sigfd;
-use PublicInbox::DS qw(now);
+use PublicInbox::DS qw(now add_timer awaitpid);
 use PublicInbox::MID qw(mids);
 use PublicInbox::ContentHash qw(content_hash);
-use PublicInbox::EOFpipe;
 use POSIX qw(_exit WNOHANG);
+use constant { D_MAILDIR => 1, D_MH => 2 };
 
 sub compile_watchheaders ($) {
         my ($ibx) = @_;
@@ -40,82 +41,118 @@ sub compile_watchheaders ($) {
         $ibx->{-watchheaders} = $watch_hdrs if scalar @$watch_hdrs;
 }
 
+sub d_type_set ($$$) {
+        my ($d_type, $dir, $is) = @_;
+        my $isnt = D_MAILDIR;
+        if ($is == D_MAILDIR) {
+                $isnt = D_MH;
+                $d_type->{"$dir/cur"} |= $is;
+                $d_type->{"$dir/new"} |= $is;
+        }
+        warn <<EOM if ($d_type->{$dir} |= $is) & $isnt;
+W: `$dir' is both Maildir and MH (non-fatal)
+EOM
+}
+
 sub new {
-        my ($class, $config) = @_;
-        my (%mdmap, $spamc);
+        my ($class, $cfg) = @_;
+        my (%d_map, %d_type);
         my (%imap, %nntp); # url => [inbox objects] or 'watchspam'
+        my (@imap, @nntp);
+        PublicInbox::Import::load_config($cfg);
 
         # "publicinboxwatch" is the documented namespace
         # "publicinboxlearn" is legacy but may be supported
         # indefinitely...
         foreach my $pfx (qw(publicinboxwatch publicinboxlearn)) {
                 my $k = "$pfx.watchspam";
-                defined(my $dirs = $config->{$k}) or next;
-                $dirs = PublicInbox::Config::_array($dirs);
+                my $dirs = $cfg->get_all($k) // next;
                 for my $dir (@$dirs) {
-                        my $url;
+                        my $uri;
                         if (is_maildir($dir)) {
                                 # skip "new", no MUA has seen it, yet.
-                                $mdmap{"$dir/cur"} = 'watchspam';
-                        } elsif ($url = imap_url($dir)) {
-                                $imap{$url} = 'watchspam';
-                        } elsif ($url = nntp_url($dir)) {
-                                $nntp{$url} = 'watchspam';
+                                $d_map{"$dir/cur"} = 'watchspam';
+                                d_type_set \%d_type, $dir, D_MAILDIR;
+                        } elsif (is_mh($dir)) {
+                                $d_map{$dir} = 'watchspam';
+                                d_type_set \%d_type, $dir, D_MH;
+                        } elsif ($uri = imap_uri($dir)) {
+                                $imap{$$uri} = 'watchspam';
+                                push @imap, $uri;
+                        } elsif ($uri = nntp_uri($dir)) {
+                                $nntp{$$uri} = 'watchspam';
+                                push @nntp, $uri;
                         } else {
                                 warn "unsupported $k=$dir\n";
                         }
                 }
         }
-
         my $k = 'publicinboxwatch.spamcheck';
         my $default = undef;
-        my $spamcheck = PublicInbox::Spamcheck::get($config, $k, $default);
+        my $spamcheck = PublicInbox::Spamcheck::get($cfg, $k, $default);
         $spamcheck = _spamcheck_cb($spamcheck) if $spamcheck;
 
-        $config->each_inbox(sub {
+        $cfg->each_inbox(sub {
                 # need to make all inboxes writable for spam removal:
                 my $ibx = $_[0] = PublicInbox::InboxWritable->new($_[0]);
 
                 my $watches = $ibx->{watch} or return;
+
+                $ibx->{indexlevel} //= $ibx->detect_indexlevel;
                 $watches = PublicInbox::Config::_array($watches);
                 for my $watch (@$watches) {
-                        my $url;
-                        if (is_maildir($watch)) {
+                        my $uri;
+                        my $bool = $cfg->git_bool($watch);
+                        if (defined $bool && !$bool) {
+                                $ibx->{-watch_disabled} = 1;
+                        } elsif (is_maildir($watch)) {
                                 compile_watchheaders($ibx);
                                 my ($new, $cur) = ("$watch/new", "$watch/cur");
-                                my $cur_dst = $mdmap{$cur} //= [];
+                                my $cur_dst = $d_map{$cur} //= [];
                                 return if is_watchspam($cur, $cur_dst, $ibx);
-                                push @{$mdmap{$new} //= []}, $ibx;
+                                push @{$d_map{$new} //= []}, $ibx;
                                 push @$cur_dst, $ibx;
-                        } elsif ($url = imap_url($watch)) {
-                                return if is_watchspam($url, $imap{$url}, $ibx);
+                                d_type_set \%d_type, $watch, D_MAILDIR;
+                        } elsif (is_mh($watch)) {
+                                my $cur_dst = $d_map{$watch} //= [];
+                                return if is_watchspam($watch, $cur_dst, $ibx);
                                 compile_watchheaders($ibx);
-                                push @{$imap{$url} ||= []}, $ibx;
-                        } elsif ($url = nntp_url($watch)) {
-                                return if is_watchspam($url, $nntp{$url}, $ibx);
+                                push(@$cur_dst, $ibx);
+                                d_type_set \%d_type, $watch, D_MH;
+                        } elsif ($uri = imap_uri($watch)) {
+                                my $cur_dst = $imap{$$uri} //= [];
+                                return if is_watchspam($uri, $cur_dst, $ibx);
                                 compile_watchheaders($ibx);
-                                push @{$nntp{$url} ||= []}, $ibx;
+                                push(@imap, $uri) if 1 == push(@$cur_dst, $ibx);
+                        } elsif ($uri = nntp_uri($watch)) {
+                                my $cur_dst = $nntp{$$uri} //= [];
+                                return if is_watchspam($uri, $cur_dst, $ibx);
+                                compile_watchheaders($ibx);
+                                push(@nntp, $uri) if 1 == push(@$cur_dst, $ibx);
                         } else {
                                 warn "watch unsupported: $k=$watch\n";
                         }
                 }
         });
 
-        my $mdre;
-        if (scalar keys %mdmap) {
-                $mdre = join('|', map { quotemeta($_) } keys %mdmap);
-                $mdre = qr!\A($mdre)/!;
+        my $d_re;
+        if (scalar keys %d_map) {
+                $d_re = join('|', map quotemeta, keys %d_map);
+                $d_re = qr!\A($d_re)/!;
         }
-        return unless $mdre || scalar(keys %imap) || scalar(keys %nntp);
+        return unless $d_re || scalar(keys %imap) || scalar(keys %nntp);
 
         bless {
                 max_batch => 10, # avoid hogging locks for too long
                 spamcheck => $spamcheck,
-                mdmap => \%mdmap,
-                mdre => $mdre,
-                config => $config,
+                d_map => \%d_map,
+                d_re => $d_re,
+                d_type => \%d_type,
+                pi_cfg => $cfg,
                 imap => scalar keys %imap ? \%imap : undef,
                 nntp => scalar keys %nntp? \%nntp : undef,
+                imap_order => scalar(@imap) ? \@imap : undef,
+                nntp_order => scalar(@nntp) ? \@nntp: undef,
                 importers => {},
                 opendirs => {}, # dirname => dirhandle (in progress scans)
                 ops => [], # 'quit', 'full'
@@ -134,6 +171,7 @@ sub _done_for_now {
 
 sub remove_eml_i { # each_inbox callback
         my ($ibx, $self, $eml, $loc) = @_;
+        return if $ibx->{-watch_disabled};
 
         eval {
                 # try to avoid taking a lock or unnecessary spawning
@@ -174,8 +212,8 @@ sub _remove_spam {
         # path must be marked as (S)een
         $path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return;
         my $eml = eml_from_path($path) or return;
-        local $SIG{__WARN__} = warn_ignore_cb();
-        $self->{config}->each_inbox(\&remove_eml_i, $self, $eml, $path);
+        local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb();
+        $self->{pi_cfg}->each_inbox(\&remove_eml_i, $self, $eml, $path);
 }
 
 sub import_eml ($$$) {
@@ -207,17 +245,24 @@ sub import_eml ($$$) {
 
 sub _try_path {
         my ($self, $path) = @_;
-        return unless PublicInbox::InboxWritable::is_maildir_path($path);
-        if ($path !~ $self->{mdre}) {
-                warn "unrecognized path: $path\n";
-                return;
-        }
-        my $inboxes = $self->{mdmap}->{$1};
-        unless ($inboxes) {
-                warn "unmappable dir: $1\n";
-                return;
-        }
-        my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+        $path =~ $self->{d_re} or
+                return warn("BUG? unrecognized path: $path\n");
+        my $dir = $1;
+        my $inboxes = $self->{d_map}->{$dir} //
+                return warn("W: unmappable dir: $dir\n");
+        my ($md_fl, $mh_seq);
+        if ($self->{d_type}->{$dir} & D_MH) {
+                $path =~ m!/([0-9]+)\z! ? ($mh_seq = $1) : return;
+        }
+        $self->{d_type}->{$dir} & D_MAILDIR and
+                $md_fl = PublicInbox::MdirReader::maildir_path_flags($path);
+        $md_fl // $mh_seq // return;
+        return if ($md_fl // '') =~ /[DT]/; # no Drafts or Trash
+        # n.b. none of the MH keywords are relevant for public mail,
+        # mh_seq is only used to validate we're reading an email
+        # and not treating .mh_sequences as an email
+
+        my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
         local $SIG{__WARN__} = sub {
                 my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
                 $warn_cb->($pfx, "path: $path\n", @_);
@@ -236,25 +281,24 @@ sub quit_done ($) {
         return unless $self->{quit};
 
         # don't have reliable wakeups, keep signalling
-        my $done = 1;
-        for (qw(idle_pids poll_pids)) {
-                my $pids = $self->{$_} or next;
-                for (keys %$pids) {
-                        $done = undef if kill('QUIT', $_);
-                }
-        }
-        $done;
+        my $live = grep { kill('QUIT', $_) } keys %{$self->{pids}};
+        add_timer(0.01, \&quit_done, $self) if $live;
+        $live == 0;
 }
 
-sub quit {
+sub quit { # may be called in IMAP/NNTP children
         my ($self) = @_;
         $self->{quit} = 1;
         %{$self->{opendirs}} = ();
         _done_for_now($self);
         quit_done($self);
-        if (my $idle_mic = $self->{idle_mic}) {
+        if (my $dir_idle = delete $self->{dir_idle}) {
+                $dir_idle->close if $dir_idle;
+        }
+        if (my $idle_mic = delete $self->{idle_mic}) { # IMAP child
+                return unless $idle_mic->IsConnected && $idle_mic->Socket;
                 eval { $idle_mic->done };
-                if ($@) {
+                if ($@ && $idle_mic->IsConnected && $idle_mic->Socket) {
                         warn "IDLE DONE error: $@\n";
                         eval { $idle_mic->disconnect };
                         warn "IDLE LOGOUT error: $@\n" if $@;
@@ -274,266 +318,58 @@ sub watch_fs_init ($) {
         };
         require PublicInbox::DirIdle;
         # inotify_create + EPOLL_CTL_ADD
-        PublicInbox::DirIdle->new([keys %{$self->{mdmap}}], $cb);
-}
-
-# avoid exposing deprecated "snews" to users.
-my %SCHEME_MAP = ('snews' => 'nntps');
-
-sub uri_scheme ($) {
-        my ($uri) = @_;
-        my $scheme = $uri->scheme;
-        $SCHEME_MAP{$scheme} // $scheme;
-}
-
-# returns the git config section name, e.g [imap "imaps://user@example.com"]
-# without the mailbox, so we can share connections between different inboxes
-sub uri_section ($) {
-        my ($uri) = @_;
-        uri_scheme($uri) . '://' . $uri->authority;
-}
-
-sub cfg_intvl ($$$) {
-        my ($cfg, $key, $url) = @_;
-        my $v = $cfg->urlmatch($key, $url) // return;
-        $v =~ /\A[0-9]+(?:\.[0-9]+)?\z/s and return $v + 0;
-        if (ref($v) eq 'ARRAY') {
-                $v = join(', ', @$v);
-                warn "W: $key has multiple values: $v\nW: $key ignored\n";
-        } else {
-                warn "W: $key=$v is not a numeric value in seconds\n";
-        }
-}
-
-sub cfg_bool ($$$) {
-        my ($cfg, $key, $url) = @_;
-        my $orig = $cfg->urlmatch($key, $url) // return;
-        my $bool = $cfg->git_bool($orig);
-        warn "W: $key=$orig for $url is not boolean\n" unless defined($bool);
-        $bool;
-}
-
-# flesh out common IMAP-specific data structures
-sub imap_common_init ($) {
-        my ($self) = @_;
-        my $cfg = $self->{config};
-        my $mic_args = {}; # scheme://authority => Mail:IMAPClient arg
-        for my $url (sort keys %{$self->{imap}}) {
-                my $uri = PublicInbox::URIimap->new($url);
-                my $sec = uri_section($uri);
-                for my $k (qw(Starttls Debug Compress)) {
-                        my $bool = cfg_bool($cfg, "imap.$k", $url) // next;
-                        $mic_args->{$sec}->{$k} = $bool;
-                }
-                my $to = cfg_intvl($cfg, 'imap.timeout', $url);
-                $mic_args->{$sec}->{Timeout} = $to if $to;
-                for my $k (qw(pollInterval idleInterval)) {
-                        $to = cfg_intvl($cfg, "imap.$k", $url) // next;
-                        $self->{imap_opt}->{$sec}->{$k} = $to;
-                }
-                my $k = 'imap.fetchBatchSize';
-                my $bs = $cfg->urlmatch($k, $url) // next;
-                if ($bs =~ /\A([0-9]+)\z/) {
-                        $self->{imap_opt}->{$sec}->{batch_size} = $bs;
-                } else {
-                        warn "$k=$bs is not an integer\n";
-                }
-        }
-        $mic_args;
-}
-
-sub auth_anon_cb { '' }; # for Mail::IMAPClient::Authcallback
-
-sub mic_for ($$$) { # mic = Mail::IMAPClient
-        my ($self, $url, $mic_args) = @_;
-        my $uri = PublicInbox::URIimap->new($url);
-        require PublicInbox::GitCredential;
-        my $cred = bless {
-                url => $url,
-                protocol => $uri->scheme,
-                host => $uri->host,
-                username => $uri->user,
-                password => $uri->password,
-        }, 'PublicInbox::GitCredential';
-        my $common = $mic_args->{uri_section($uri)} // {};
-        # IMAPClient and Net::Netrc both mishandles `0', so we pass `127.0.0.1'
-        my $host = $cred->{host};
-        $host = '127.0.0.1' if $host eq '0';
-        my $mic_arg = {
-                Port => $uri->port,
-                Server => $host,
-                Ssl => $uri->scheme eq 'imaps',
-                Keepalive => 1, # SO_KEEPALIVE
-                %$common, # may set Starttls, Compress, Debug ....
-        };
-        my $mic = PublicInbox::IMAPClient->new(%$mic_arg) or
-                die "E: <$url> new: $@\n";
-
-        # default to using STARTTLS if it's available, but allow
-        # it to be disabled since I usually connect to localhost
-        if (!$mic_arg->{Ssl} && !defined($mic_arg->{Starttls}) &&
-                        $mic->has_capability('STARTTLS') &&
-                        $mic->can('starttls')) {
-                $mic->starttls or die "E: <$url> STARTTLS: $@\n";
-        }
-
-        # do we even need credentials?
-        if (!defined($cred->{username}) &&
-                        $mic->has_capability('AUTH=ANONYMOUS')) {
-                $cred = undef;
-        }
-        if ($cred) {
-                $cred->check_netrc unless defined $cred->{password};
-                $cred->fill; # may prompt user here
-                $mic->User($mic_arg->{User} = $cred->{username});
-                $mic->Password($mic_arg->{Password} = $cred->{password});
-        } else { # AUTH=ANONYMOUS
-                $mic->Authmechanism($mic_arg->{Authmechanism} = 'ANONYMOUS');
-                $mic->Authcallback($mic_arg->{Authcallback} = \&auth_anon_cb);
-        }
-        if ($mic->login && $mic->IsAuthenticated) {
-                # success! keep IMAPClient->new arg in case we get disconnected
-                $self->{mic_arg}->{uri_section($uri)} = $mic_arg;
-        } else {
-                warn "E: <$url> LOGIN: $@\n";
-                $mic = undef;
-        }
-        $cred->run($mic ? 'approve' : 'reject') if $cred;
-        $mic;
+        my $dir_idle = $self->{dir_idle} = PublicInbox::DirIdle->new($cb);
+        $dir_idle->add_watches([keys %{$self->{d_map}}]);
 }
 
-sub imap_import_msg ($$$$$) {
-        my ($self, $url, $uid, $raw, $flags) = @_;
-        # our target audience expects LF-only, save storage
-        $$raw =~ s/\r\n/\n/sg;
-
-        my $inboxes = $self->{imap}->{$url};
+sub net_cb { # NetReader::(nntp|imap)_each callback
+        my ($uri, $art, $kw, $eml, $self, $inboxes) = @_;
+        return if grep(/\Adraft\z/, @$kw);
+        local $self->{cur_uid} = $art; # IMAP UID or NNTP article
         if (ref($inboxes)) {
-                for my $ibx (@$inboxes) {
-                        my $eml = PublicInbox::Eml->new($$raw);
-                        import_eml($self, $ibx, $eml);
+                my @ibx = @$inboxes;
+                my $last = pop @ibx;
+                for my $ibx (@ibx) {
+                        my $tmp = PublicInbox::Eml->new(\($eml->as_string));
+                        import_eml($self, $ibx, $tmp);
                 }
+                import_eml($self, $last, $eml);
         } elsif ($inboxes eq 'watchspam') {
-                # we don't remove unseen messages
-                if ($flags =~ /\\Seen\b/) {
-                        local $SIG{__WARN__} = warn_ignore_cb();
-                        my $eml = PublicInbox::Eml->new($raw);
-                        $self->{config}->each_inbox(\&remove_eml_i,
-                                                $self, $eml, "$url UID:$uid");
+                if ($uri->scheme =~ /\Aimaps?\z/ && !grep(/\Aseen\z/, @$kw)) {
+                        return;
                 }
+                $self->{pi_cfg}->each_inbox(\&remove_eml_i,
+                                $self, $eml, "$uri #$art");
         } else {
                 die "BUG: destination unknown $inboxes";
         }
 }
 
-sub imap_fetch_all ($$$) {
-        my ($self, $mic, $url) = @_;
-        my $uri = PublicInbox::URIimap->new($url);
-        my $sec = uri_section($uri);
-        my $mbx = $uri->mailbox;
-        $mic->Clear(1); # trim results history
-        $mic->examine($mbx) or return "E: EXAMINE $mbx ($sec) failed: $!";
-        my ($r_uidval, $r_uidnext);
-        for ($mic->Results) {
-                /^\* OK \[UIDVALIDITY ([0-9]+)\].*/ and $r_uidval = $1;
-                /^\* OK \[UIDNEXT ([0-9]+)\].*/ and $r_uidnext = $1;
-                last if $r_uidval && $r_uidnext;
-        }
-        $r_uidval //= $mic->uidvalidity($mbx) //
-                return "E: $url cannot get UIDVALIDITY";
-        $r_uidnext //= $mic->uidnext($mbx) //
-                return "E: $url cannot get UIDNEXT";
-        my $itrk = PublicInbox::IMAPTracker->new($url);
-        my ($l_uidval, $l_uid) = $itrk->get_last;
-        $l_uidval //= $r_uidval; # first time
-        $l_uid //= 1;
-        if ($l_uidval != $r_uidval) {
-                return "E: $url UIDVALIDITY mismatch\n".
-                        "E: local=$l_uidval != remote=$r_uidval";
-        }
-        my $r_uid = $r_uidnext - 1;
-        if ($l_uid != 1 && $l_uid > $r_uid) {
-                return "E: $url local UID exceeds remote ($l_uid > $r_uid)\n".
-                        "E: $url strangely, UIDVALIDLITY matches ($l_uidval)\n";
-        }
-        return if $l_uid >= $r_uid; # nothing to do
-
-        warn "I: $url fetching UID $l_uid:$r_uid\n";
-        $mic->Uid(1); # the default, we hope
-        my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 1;
-        my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
-
-        # TODO: FLAGS may be useful for personal use
-        my $key = $req;
-        $key =~ s/\.PEEK//;
-        my ($uids, $batch);
-        my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+sub imap_fetch_all ($$) {
+        my ($self, $uri) = @_;
+        my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
+        $self->{incremental} = 1;
+        $self->{on_commit} = [ \&_done_for_now, $self ];
+        local $self->{cur_uid};
         local $SIG{__WARN__} = sub {
-                my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
-                $batch //= '?';
-                $warn_cb->("$pfx$url UID:$batch\n", @_);
+                my $pfx = ($_[0] // '') =~ /^([A-Z]: |# )/g ? $1 : '';
+                my $uid = $self->{cur_uid};
+                $warn_cb->("$pfx$uri", $uid ? (" UID:$uid") : (), "\n", @_);
         };
-        my $err;
-        do {
-                # I wish "UID FETCH $START:*" could work, but:
-                # 1) servers do not need to return results in any order
-                # 2) Mail::IMAPClient doesn't offer a streaming API
-                $uids = $mic->search("UID $l_uid:*") or
-                        return "E: $url UID SEARCH $l_uid:* error: $!";
-                return if scalar(@$uids) == 0;
-
-                # RFC 3501 doesn't seem to indicate order of UID SEARCH
-                # responses, so sort it ourselves.  Order matters so
-                # IMAPTracker can store the newest UID.
-                @$uids = sort { $a <=> $b } @$uids;
-
-                # Did we actually get new messages?
-                return if $uids->[0] < $l_uid;
-
-                $l_uid = $uids->[-1] + 1; # for next search
-                my $last_uid;
-                my $n = $self->{max_batch};
-
-                while (scalar @$uids) {
-                        if (--$n < 0) {
-                                _done_for_now($self);
-                                $itrk->update_last($r_uidval, $last_uid);
-                                $n = $self->{max_batch};
-                        }
-                        my @batch = splice(@$uids, 0, $bs);
-                        $batch = join(',', @batch);
-                        local $0 = "UID:$batch $mbx $sec";
-                        my $r = $mic->fetch_hash($batch, $req, 'FLAGS');
-                        unless ($r) { # network error?
-                                $err = "E: $url UID FETCH $batch error: $!";
-                                last;
-                        }
-                        for my $uid (@batch) {
-                                # messages get deleted, so holes appear
-                                my $per_uid = delete $r->{$uid} // next;
-                                my $raw = delete($per_uid->{$key}) // next;
-                                my $fl = $per_uid->{FLAGS} // '';
-                                imap_import_msg($self, $url, $uid, \$raw, $fl);
-                                $last_uid = $uid;
-                                last if $self->{quit};
-                        }
-                        last if $self->{quit};
-                }
-                _done_for_now($self);
-                $itrk->update_last($r_uidval, $last_uid);
-        } until ($err || $self->{quit});
-        $err;
+        PublicInbox::NetReader::imap_each($self, $uri, \&net_cb, $self,
+                                        $self->{imap}->{$$uri});
 }
 
 sub imap_idle_once ($$$$) {
-        my ($self, $mic, $intvl, $url) = @_;
+        my ($self, $mic, $intvl, $uri) = @_;
         my $i = $intvl //= (29 * 60);
         my $end = now() + $intvl;
-        warn "I: $url idling for ${intvl}s\n";
+        warn "# $uri idling for ${intvl}s\n";
         local $0 = "IDLE $0";
+        return if $self->{quit};
         unless ($mic->idle) {
                 return if $self->{quit};
-                return "E: IDLE failed on $url: $!";
+                return "E: IDLE failed on $uri: $!";
         }
         $self->{idle_mic} = $mic; # for ->quit
         my @res;
@@ -544,27 +380,30 @@ sub imap_idle_once ($$$$) {
         }
         delete $self->{idle_mic};
         unless ($self->{quit}) {
-                $mic->IsConnected or return "E: IDLE disconnected on $url";
-                $mic->done or return "E: IDLE DONE failed on $url: $!";
+                $mic->IsConnected or return "E: IDLE disconnected on $uri";
+                $mic->done or return "E: IDLE DONE failed on $uri: $!";
         }
         undef;
 }
 
 # idles on a single URI
 sub watch_imap_idle_1 ($$$) {
-        my ($self, $url, $intvl) = @_;
-        my $uri = PublicInbox::URIimap->new($url);
+        my ($self, $uri, $intvl) = @_;
         my $sec = uri_section($uri);
-        my $mic_arg = $self->{mic_arg}->{$sec} or
+        my $mic_arg = $self->{net_arg}->{$sec} or
                         die "BUG: no Mail::IMAPClient->new arg for $sec";
         my $mic;
         local $0 = $uri->mailbox." $sec";
         until ($self->{quit}) {
-                $mic //= PublicInbox::IMAPClient->new(%$mic_arg);
+                $mic //= PublicInbox::NetReader::mic_new(
+                                        $self, $mic_arg, $sec, $uri);
                 my $err;
                 if ($mic && $mic->IsConnected) {
-                        $err = imap_fetch_all($self, $mic, $url);
-                        $err //= imap_idle_once($self, $mic, $intvl, $url);
+                        local $self->{mics_cached}->{$sec} = $mic;
+                        my $m = imap_fetch_all($self, $uri);
+                        $m == $mic or die "BUG: wrong mic";
+                        $mic->IsConnected and
+                                $err = imap_idle_once($self, $mic, $intvl, $uri)
                 } else {
                         $err = "E: not connected: $!";
                 }
@@ -578,63 +417,42 @@ sub watch_imap_idle_1 ($$$) {
 
 sub watch_atfork_child ($) {
         my ($self) = @_;
-        delete $self->{idle_pids};
-        delete $self->{poll_pids};
-        delete $self->{opendirs};
-        PublicInbox::DS->Reset;
-        %SIG = (%SIG, %{$self->{sig}}, CHLD => 'DEFAULT');
-        PublicInbox::Sigfd::sig_setmask($self->{oldset});
+        delete @$self{qw(dir_idle pids opendirs)};
+        my $sig = delete $self->{sig};
+        $sig->{CHLD} = $sig->{HUP} = $sig->{USR1} = 'DEFAULT';
+        # TERM/QUIT/INT call ->quit, which works in both parent+child
+        @SIG{keys %$sig} = values %$sig;
+        PublicInbox::DS::sig_setmask(PublicInbox::DS::allowset($sig));
 }
 
-sub watch_atfork_parent ($) {
-        my ($self) = @_;
-        _done_for_now($self);
-        PublicInbox::Sigfd::block_signals();
-}
+sub watch_atfork_parent ($) { _done_for_now($_[0]) }
 
-sub imap_idle_requeue ($) { # DS::add_timer callback
-        my ($self, $url_intvl) = @{$_[0]};
+sub imap_idle_requeue { # DS::add_timer callback
+        my ($self, $uri, $intvl) = @_;
         return if $self->{quit};
-        push @{$self->{idle_todo}}, $url_intvl;
+        push @{$self->{idle_todo}}, $uri, $intvl;
         event_step($self);
 }
 
-sub imap_idle_reap { # PublicInbox::DS::dwaitpid callback
-        my ($self, $pid) = @_;
-        my $url_intvl = delete $self->{idle_pids}->{$pid} or
-                die "BUG: PID=$pid (unknown) reaped: \$?=$?\n";
-
-        my ($url, $intvl) = @$url_intvl;
+sub imap_idle_reap { # awaitpid callback
+        my ($pid, $self, $uri, $intvl) = @_;
+        delete $self->{pids}->{$pid};
         return if $self->{quit};
-        warn "W: PID=$pid on $url died: \$?=$?\n" if $?;
-        PublicInbox::DS::add_timer(60,
-                                \&imap_idle_requeue, [ $self, $url_intvl ]);
+        warn "W: PID=$pid on $uri died: \$?=$?\n" if $?;
+        add_timer(60, \&imap_idle_requeue, $self, $uri, $intvl);
 }
 
-sub reap { # callback for EOFpipe
-        my ($pid, $cb, $self) = @{$_[0]};
-        my $ret = waitpid($pid, 0);
-        if ($ret == $pid) {
-                $cb->($self, $pid); # poll_fetch_reap || imap_idle_reap
-        } else {
-                warn "W: waitpid($pid) => ", $ret // "($!)", "\n";
-        }
-}
-
-sub imap_idle_fork ($$) {
-        my ($self, $url_intvl) = @_;
-        my ($url, $intvl) = @$url_intvl;
-        pipe(my ($r, $w)) or die "pipe: $!";
-        defined(my $pid = fork) or die "fork: $!";
+sub imap_idle_fork {
+        my ($self, $uri, $intvl) = @_;
+        return if $self->{quit};
+        my $pid = PublicInbox::DS::fork_persist;
         if ($pid == 0) {
-                close $r;
                 watch_atfork_child($self);
-                watch_imap_idle_1($self, $url, $intvl);
-                close $w;
+                watch_imap_idle_1($self, $uri, $intvl);
                 _exit(0);
         }
-        $self->{idle_pids}->{$pid} = $url_intvl;
-        PublicInbox::EOFpipe->new($r, \&reap, [$pid, \&imap_idle_reap, $self]);
+        $self->{pids}->{$pid} = undef;
+        awaitpid($pid, \&imap_idle_reap, $self, $uri, $intvl);
 }
 
 sub event_step {
@@ -642,128 +460,87 @@ sub event_step {
         return if $self->{quit};
         my $idle_todo = $self->{idle_todo};
         if ($idle_todo && @$idle_todo) {
-                my $oldset = watch_atfork_parent($self);
+                watch_atfork_parent($self);
                 eval {
-                        while (my $url_intvl = shift(@$idle_todo)) {
-                                imap_idle_fork($self, $url_intvl);
+                        while (my ($uri, $intvl) = splice(@$idle_todo, 0, 2)) {
+                                imap_idle_fork($self, $uri, $intvl);
                         }
                 };
-                PublicInbox::Sigfd::sig_setmask($oldset);
                 die $@ if $@;
         }
-        fs_scan_step($self) if $self->{mdre};
+        fs_scan_step($self) if $self->{d_re};
 }
 
 sub watch_imap_fetch_all ($$) {
-        my ($self, $urls) = @_;
-        for my $url (@$urls) {
-                my $uri = PublicInbox::URIimap->new($url);
-                my $sec = uri_section($uri);
-                my $mic_arg = $self->{mic_arg}->{$sec} or
-                        die "BUG: no Mail::IMAPClient->new arg for $sec";
-                my $mic = PublicInbox::IMAPClient->new(%$mic_arg) or next;
-                my $err = imap_fetch_all($self, $mic, $url);
+        my ($self, $uris) = @_;
+        for my $uri (@$uris) {
+                imap_fetch_all($self, $uri);
                 last if $self->{quit};
-                warn $err, "\n" if $err;
         }
 }
 
 sub watch_nntp_fetch_all ($$) {
-        my ($self, $urls) = @_;
-        for my $url (@$urls) {
-                my $uri = uri_new($url);
-                my $sec = uri_section($uri);
-                my $nn_arg = $self->{nn_arg}->{$sec} or
-                        die "BUG: no Net::NNTP->new arg for $sec";
-                my $nntp_opt = $self->{nntp_opt}->{$sec};
-                my $nn = nn_new($nn_arg, $nntp_opt, $url);
-                unless ($nn) {
-                        warn "E: $url: \$!=$!\n";
-                        next;
-                }
-                last if $self->{quit};
-                if (my $postconn = $nntp_opt->{-postconn}) {
-                        for my $m_arg (@$postconn) {
-                                my ($method, @args) = @$m_arg;
-                                $nn->$method(@args) and next;
-                                warn "E: <$url> $method failed\n";
-                                $nn = undef;
-                                last;
-                        }
-                }
+        my ($self, $uris) = @_;
+        $self->{incremental} = 1;
+        $self->{on_commit} = [ \&_done_for_now, $self ];
+        my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
+        local $self->{cur_uid};
+        my $uri = '';
+        local $SIG{__WARN__} = sub {
+                my $pfx = ($_[0] // '') =~ /^([A-Z]: |# )/g ? $1 : '';
+                my $art = $self->{cur_uid};
+                $warn_cb->("$pfx$uri", $art ? (" ARTICLE $art") : (), "\n", @_);
+        };
+        for $uri (@$uris) {
+                PublicInbox::NetReader::nntp_each($self, $uri, \&net_cb, $self,
+                                        $self->{nntp}->{$$uri});
                 last if $self->{quit};
-                if ($nn) {
-                        my $err = nntp_fetch_all($self, $nn, $url);
-                        warn $err, "\n" if $err;
-                }
         }
 }
 
-sub poll_fetch_fork ($) { # DS::add_timer callback
-        my ($self, $intvl, $urls) = @{$_[0]};
+sub poll_fetch_fork { # DS::add_timer callback
+        my ($self, $intvl, $uris) = @_;
         return if $self->{quit};
-        pipe(my ($r, $w)) or die "pipe: $!";
-        my $oldset = watch_atfork_parent($self);
-        my $pid = fork;
-        if (defined($pid) && $pid == 0) {
-                close $r;
+        watch_atfork_parent($self);
+        my @nntp;
+        my @imap = grep { # push() always returns > 0
+                $_->scheme =~ m!\Aimaps?!i ? 1 : (push(@nntp, $_) < 0)
+        } @$uris;
+        my $pid = PublicInbox::DS::fork_persist;
+        if ($pid == 0) {
                 watch_atfork_child($self);
-                if ($urls->[0] =~ m!\Aimaps?://!i) {
-                        watch_imap_fetch_all($self, $urls);
-                } else {
-                        watch_nntp_fetch_all($self, $urls);
-                }
-                close $w;
+                watch_imap_fetch_all($self, \@imap) if @imap;
+                watch_nntp_fetch_all($self, \@nntp) if @nntp;
                 _exit(0);
         }
-        PublicInbox::Sigfd::sig_setmask($oldset);
-        die "fork: $!"  unless defined $pid;
-        $self->{poll_pids}->{$pid} = [ $intvl, $urls ];
-        PublicInbox::EOFpipe->new($r, \&reap, [$pid, \&poll_fetch_reap, $self]);
+        $self->{pids}->{$pid} = undef;
+        awaitpid($pid, \&poll_fetch_reap, $self, $intvl, $uris);
 }
 
-sub poll_fetch_reap {
-        my ($self, $pid) = @_;
-        my $intvl_urls = delete $self->{poll_pids}->{$pid} or
-                die "BUG: PID=$pid (unknown) reaped: \$?=$?\n";
+sub poll_fetch_reap { # awaitpid callback
+        my ($pid, $self, $intvl, $uris) = @_;
+        delete $self->{pids}->{$pid};
         return if $self->{quit};
-        my ($intvl, $urls) = @$intvl_urls;
         if ($?) {
-                warn "W: PID=$pid died: \$?=$?\n", map { "$_\n" } @$urls;
+                warn "W: PID=$pid died: \$?=$?\n", map { "$_\n" } @$uris;
         }
-        warn("I: will check $_ in ${intvl}s\n") for @$urls;
-        PublicInbox::DS::add_timer($intvl, \&poll_fetch_fork,
-                                        [$self, $intvl, $urls]);
+        warn("# will check $_ in ${intvl}s\n") for @$uris;
+        add_timer($intvl, \&poll_fetch_fork, $self, $intvl, $uris);
 }
 
 sub watch_imap_init ($$) {
         my ($self, $poll) = @_;
-        eval { require PublicInbox::IMAPClient } or
-                die "Mail::IMAPClient is required for IMAP:\n$@\n";
-        eval { require PublicInbox::IMAPTracker } or
-                die "DBD::SQLite is required for IMAP\n:$@\n";
-
-        my $mic_args = imap_common_init($self); # read args from config
-
-        # make sure we can connect and cache the credentials in memory
-        $self->{mic_arg} = {}; # schema://authority => IMAPClient->new args
-        my $mics = {}; # schema://authority => IMAPClient obj
-        for my $url (sort keys %{$self->{imap}}) {
-                my $uri = PublicInbox::URIimap->new($url);
-                $mics->{uri_section($uri)} //= mic_for($self, $url, $mic_args);
-        }
-
-        my $idle = []; # [ [ url1, intvl1 ], [url2, intvl2] ]
-        for my $url (keys %{$self->{imap}}) {
-                my $uri = PublicInbox::URIimap->new($url);
+        my $mics = PublicInbox::NetReader::imap_common_init($self) or return;
+        my $idle = []; # [ uri1, intvl1, uri2, intvl2 ]
+        for my $uri (@{$self->{imap_order}}) {
                 my $sec = uri_section($uri);
                 my $mic = $mics->{$sec};
-                my $intvl = $self->{imap_opt}->{$sec}->{pollInterval};
+                my $intvl = $self->{cfg_opt}->{$sec}->{pollInterval};
                 if ($mic->has_capability('IDLE') && !$intvl) {
-                        $intvl = $self->{imap_opt}->{$sec}->{idleInterval};
-                        push @$idle, [ $url, $intvl // () ];
+                        $intvl = $self->{cfg_opt}->{$sec}->{idleInterval};
+                        push @$idle, $uri, $intvl;
                 } else {
-                        push @{$poll->{$intvl || 120}}, $url;
+                        push @{$poll->{$intvl || 120}}, $uri;
                 }
         }
         if (scalar @$idle) {
@@ -772,249 +549,32 @@ sub watch_imap_init ($$) {
         }
 }
 
-# flesh out common NNTP-specific data structures
-sub nntp_common_init ($) {
-        my ($self) = @_;
-        my $cfg = $self->{config};
-        my $nn_args = {}; # scheme://authority => Net::NNTP->new arg
-        for my $url (sort keys %{$self->{nntp}}) {
-                my $sec = uri_section(uri_new($url));
-
-                # Debug and Timeout are passed to Net::NNTP->new
-                my $v = cfg_bool($cfg, 'nntp.Debug', $url);
-                $nn_args->{$sec}->{Debug} = $v if defined $v;
-                my $to = cfg_intvl($cfg, 'nntp.Timeout', $url);
-                $nn_args->{$sec}->{Timeout} = $to if $to;
-
-                # Net::NNTP post-connect commands
-                for my $k (qw(starttls compress)) {
-                        $v = cfg_bool($cfg, "nntp.$k", $url) // next;
-                        $self->{nntp_opt}->{$sec}->{$k} = $v;
-                }
-
-                # internal option
-                for my $k (qw(pollInterval)) {
-                        $to = cfg_intvl($cfg, "nntp.$k", $url) // next;
-                        $self->{nntp_opt}->{$sec}->{$k} = $to;
-                }
-        }
-        $nn_args;
-}
-
-# Net::NNTP doesn't support CAPABILITIES, yet
-sub try_starttls ($) {
-        my ($host) = @_;
-        return if $host =~ /\.onion\z/s;
-        return if $host =~ /\A127\.[0-9]+\.[0-9]+\.[0-9]+\z/s;
-        return if $host eq '::1';
-        1;
-}
-
-sub nn_new ($$$) {
-        my ($nn_arg, $nntp_opt, $url) = @_;
-        my $nn = Net::NNTP->new(%$nn_arg) or die "E: <$url> new: $!\n";
-
-        # default to using STARTTLS if it's available, but allow
-        # it to be disabled for localhost/VPN users
-        if (!$nn_arg->{SSL} && $nn->can('starttls')) {
-                if (!defined($nntp_opt->{starttls}) &&
-                                try_starttls($nn_arg->{Host})) {
-                        # soft fail by default
-                        $nn->starttls or warn <<"";
-W: <$url> STARTTLS tried and failed (not requested)
-
-                } elsif ($nntp_opt->{starttls}) {
-                        # hard fail if explicitly configured
-                        $nn->starttls or die <<"";
-E: <$url> STARTTLS requested and failed
-
-                }
-        } elsif ($nntp_opt->{starttls}) {
-                $nn->can('starttls') or
-                        die "E: <$url> Net::NNTP too old for STARTTLS\n";
-                $nn->starttls or die <<"";
-E: <$url> STARTTLS requested and failed
-
-        }
-        $nn;
-}
-
-sub nn_for ($$$) { # nn = Net::NNTP
-        my ($self, $url, $nn_args) = @_;
-        my $uri = uri_new($url);
-        my $sec = uri_section($uri);
-        my $nntp_opt = $self->{nntp_opt}->{$sec} //= {};
-        my $host = $uri->host;
-        # Net::NNTP and Net::Netrc both mishandle `0', so we pass `127.0.0.1'
-        $host = '127.0.0.1' if $host eq '0';
-        my $cred;
-        my ($u, $p);
-        if (defined(my $ui = $uri->userinfo)) {
-                require PublicInbox::GitCredential;
-                $cred = bless {
-                        url => $sec,
-                        protocol => uri_scheme($uri),
-                        host => $host,
-                }, 'PublicInbox::GitCredential';
-                ($u, $p) = split(/:/, $ui, 2);
-                ($cred->{username}, $cred->{password}) = ($u, $p);
-                $cred->check_netrc unless defined $p;
-        }
-        my $common = $nn_args->{$sec} // {};
-        my $nn_arg = {
-                Port => $uri->port,
-                Host => $host,
-                SSL => $uri->secure, # snews == nntps
-                %$common, # may Debug ....
-        };
-        my $nn = nn_new($nn_arg, $nntp_opt, $url);
-
-        if ($cred) {
-                $cred->fill; # may prompt user here
-                if ($nn->authinfo($u, $p)) {
-                        push @{$nntp_opt->{-postconn}}, [ 'authinfo', $u, $p ];
-                } else {
-                        warn "E: <$url> AUTHINFO $u XXXX failed\n";
-                        $nn = undef;
-                }
-        }
-
-        if ($nntp_opt->{compress}) {
-                # https://rt.cpan.org/Ticket/Display.html?id=129967
-                if ($nn->can('compress')) {
-                        if ($nn->compress) {
-                                push @{$nntp_opt->{-postconn}}, [ 'compress' ];
-                        } else {
-                                warn "W: <$url> COMPRESS failed\n";
-                        }
-                } else {
-                        delete $nntp_opt->{compress};
-                        warn <<"";
-W: <$url> COMPRESS not supported by Net::NNTP
-W: see https://rt.cpan.org/Ticket/Display.html?id=129967 for updates
-
-                }
-        }
-
-        $self->{nn_arg}->{$sec} = $nn_arg;
-        $cred->run($nn ? 'approve' : 'reject') if $cred;
-        $nn;
-}
-
-sub nntp_fetch_all ($$$) {
-        my ($self, $nn, $url) = @_;
-        my $uri = uri_new($url);
-        my ($group, $num_a, $num_b) = $uri->group;
-        my $sec = uri_section($uri);
-        my ($nr, $beg, $end) = $nn->group($group);
-        unless (defined($nr)) {
-                chomp(my $msg = $nn->message);
-                return "E: GROUP $group <$sec> $msg";
-        }
-
-        # IMAPTracker is also used for tracking NNTP, UID == article number
-        # LIST.ACTIVE can get the equivalent of UIDVALIDITY, but that's
-        # expensive.  So we assume newsgroups don't change:
-        my $itrk = PublicInbox::IMAPTracker->new($url);
-        my (undef, $l_art) = $itrk->get_last;
-        $l_art //= $beg; # initial import
-
-        # allow users to specify articles to refetch
-        # cf. https://tools.ietf.org/id/draft-gilman-news-url-01.txt
-        # nntp://example.com/inbox.foo/$num_a-$num_b
-        $l_art = $num_a if defined($num_a) && $num_a < $l_art;
-        $end = $num_b if defined($num_b) && $num_b < $end;
-
-        return if $l_art >= $end; # nothing to do
-        $beg = $l_art + 1;
-
-        warn "I: $url fetching ARTICLE $beg..$end\n";
-        my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
-        my ($err, $art);
-        local $SIG{__WARN__} = sub {
-                my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
-                $warn_cb->("$pfx$url ", $art ? ("ARTICLE $art") : (), "\n", @_);
-        };
-        my $inboxes = $self->{nntp}->{$url};
-        my $last_art;
-        my $n = $self->{max_batch};
-        for ($beg..$end) {
-                last if $self->{quit};
-                $art = $_;
-                if (--$n < 0) {
-                        _done_for_now($self);
-                        $itrk->update_last(0, $last_art);
-                        $n = $self->{max_batch};
-                }
-                my $raw = $nn->article($art);
-                unless (defined($raw)) {
-                        my $msg = $nn->message;
-                        if ($nn->code == 421) { # pseudo response from Net::Cmd
-                                $err = "E: $msg";
-                                last;
-                        } else { # probably just a deleted message (spam)
-                                warn "W: $msg";
-                                next;
-                        }
-                }
-                s/\r\n/\n/ for @$raw;
-                $raw = join('', @$raw);
-                if (ref($inboxes)) {
-                        for my $ibx (@$inboxes) {
-                                my $eml = PublicInbox::Eml->new($raw);
-                                import_eml($self, $ibx, $eml);
-                        }
-                } elsif ($inboxes eq 'watchspam') {
-                        my $eml = PublicInbox::Eml->new(\$raw);
-                        $self->{config}->each_inbox(\&remove_eml_i,
-                                        $self, $eml, "$url ARTICLE $art");
-                } else {
-                        die "BUG: destination unknown $inboxes";
-                }
-                $last_art = $art;
-        }
-        _done_for_now($self);
-        $itrk->update_last(0, $last_art);
-        $err;
-}
-
 sub watch_nntp_init ($$) {
         my ($self, $poll) = @_;
-        eval { require Net::NNTP } or
-                die "Net::NNTP is required for NNTP:\n$@\n";
-        eval { require PublicInbox::IMAPTracker } or
-                die "DBD::SQLite is required for NNTP\n:$@\n";
-
-        my $nn_args = nntp_common_init($self); # read args from config
-
-        # make sure we can connect and cache the credentials in memory
-        $self->{nn_arg} = {}; # schema://authority => Net::NNTP->new args
-        for my $url (sort keys %{$self->{nntp}}) {
-                nn_for($self, $url, $nn_args);
-        }
-        for my $url (keys %{$self->{nntp}}) {
-                my $uri = uri_new($url);
+        PublicInbox::NetReader::nntp_common_init($self);
+        for my $uri (@{$self->{nntp_order}}) {
                 my $sec = uri_section($uri);
-                my $intvl = $self->{nntp_opt}->{$sec}->{pollInterval};
-                push @{$poll->{$intvl || 120}}, $url;
+                my $intvl = $self->{cfg_opt}->{$sec}->{pollInterval};
+                push @{$poll->{$intvl || 120}}, $uri;
         }
 }
 
+sub quit_inprogress { !$_[0]->quit_done } # post_loop_do CB
+
 sub watch { # main entry point
-        my ($self, $sig, $oldset) = @_;
-        $self->{oldset} = $oldset;
-        $self->{sig} = $sig;
-        my $poll = {}; # intvl_seconds => [ url1, url2 ]
+        my ($self, $sig) = @_;
+        my $first_sig;
+        $self->{sig} //= ($first_sig = $sig);
+        my $poll = {}; # intvl_seconds => [ uri1, uri2 ]
         watch_imap_init($self, $poll) if $self->{imap};
         watch_nntp_init($self, $poll) if $self->{nntp};
-        while (my ($intvl, $urls) = each %$poll) {
-                # poll all URLs for a given interval sequentially
-                PublicInbox::DS::add_timer(0, \&poll_fetch_fork,
-                                                [$self, $intvl, $urls]);
+        while (my ($intvl, $uris) = each %$poll) {
+                # poll all URIs for a given interval sequentially
+                add_timer(0, \&poll_fetch_fork, $self, $intvl, $uris);
         }
-        watch_fs_init($self) if $self->{mdre};
-        PublicInbox::DS->SetPostLoopCallback(sub { !$self->quit_done });
-        PublicInbox::DS->EventLoop; # calls ->event_step
+        watch_fs_init($self) if $self->{d_re};
+        local @PublicInbox::DS::post_loop_do = (\&quit_inprogress, $self);
+        PublicInbox::DS::event_loop($first_sig); # calls ->event_step
         _done_for_now($self);
 }
 
@@ -1043,7 +603,7 @@ sub fs_scan_step {
                 $opendirs->{$dir} = $dh if $n < 0;
         }
         if ($op && $op eq 'full') {
-                foreach my $dir (keys %{$self->{mdmap}}) {
+                foreach my $dir (keys %{$self->{d_map}}) {
                         next if $opendirs->{$dir}; # already in progress
                         my $ok = opendir(my $dh, $dir);
                         unless ($ok) {
@@ -1118,6 +678,13 @@ sub is_maildir {
         $_[0];
 }
 
+sub is_mh {
+        $_[0] =~ s!\Amh:!!i or return;
+        $_[0] =~ tr!/!/!s;
+        $_[0] =~ s!/\z!!;
+        $_[0];
+}
+
 sub is_watchspam {
         my ($cur, $ws, $ibx) = @_;
         if ($ws && !ref($ws) && $ws eq 'watchspam') {
@@ -1129,31 +696,6 @@ EOF
         undef;
 }
 
-sub uri_new {
-        my ($url) = @_;
-
-        # URI::snews exists, URI::nntps does not, so use URI::snews
-        $url =~ s!\Anntps://!snews://!i;
-        URI->new($url);
-}
-
-sub imap_url {
-        my ($url) = @_;
-        require PublicInbox::URIimap;
-        my $uri = PublicInbox::URIimap->new($url);
-        $uri ? $uri->canonical->as_string : undef;
-}
-
-my %IS_NNTP = (news => 1, snews => 1, nntp => 1);
-sub nntp_url {
-        my ($url) = @_;
-        require URI;
-        my $uri = uri_new($url);
-        return unless $uri && $IS_NNTP{$uri->scheme} && $uri->group;
-        $url = $uri->canonical->as_string;
-        # nntps is IANA registered, snews is deprecated
-        $url =~ s!\Asnews://!nntps://!;
-        $url;
-}
+sub folder_select { 'select' } # for PublicInbox::NetReader
 
 1;
diff --git a/lib/PublicInbox/WwwAltId.pm b/lib/PublicInbox/WwwAltId.pm
index 2818400e..31d9b607 100644
--- a/lib/PublicInbox/WwwAltId.pm
+++ b/lib/PublicInbox/WwwAltId.pm
@@ -1,9 +1,9 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # dumps using the ".dump" command of sqlite3(1)
 package PublicInbox::WwwAltId;
-use strict;
+use v5.12;
 use PublicInbox::Qspawn;
 use PublicInbox::WwwStream qw(html_oneshot);
 use PublicInbox::AltId;
@@ -15,8 +15,7 @@ sub check_output {
         my ($r, $bref, $ctx) = @_;
         return html_oneshot($ctx, 500) if !defined($r);
         if ($r == 0) {
-                my $err = eval { $ctx->{env}->{'psgi.errors'} } // \*STDERR;
-                $err->print("unexpected EOF from sqlite3\n");
+                warn 'unexpected EOF from sqlite3';
                 return html_oneshot($ctx, 501);
         }
         [200, [ qw(Content-Type application/gzip), 'Content-Disposition',
@@ -30,49 +29,44 @@ sub check_output {
 sub sqldump ($$) {
         my ($ctx, $altid_pfx) = @_;
         my $env = $ctx->{env};
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $altid_map = $ibx->altid_map;
         my $fn = $altid_map->{$altid_pfx};
         unless (defined $fn) {
-                return html_oneshot($ctx, 404, \<<EOF);
+                return html_oneshot($ctx, 404, <<EOF);
 <pre>`$altid_pfx' is not a valid altid for this inbox</pre>
 EOF
         }
 
         if ($env->{REQUEST_METHOD} ne 'POST') {
                 my $url = $ibx->base_url($ctx->{env}) . "$altid_pfx.sql.gz";
-                return html_oneshot($ctx, 405, \<<EOF);
-<pre>A POST request required to retrieve $altid_pfx.sql.gz
+                return html_oneshot($ctx, 405, <<EOF);
+<pre>A POST request is required to retrieve $altid_pfx.sql.gz
 
-        curl -XPOST -O $url
+        curl -d '' -O $url
 
 or
 
-        curl -XPOST $url | \\
+        curl -d '' $url | \\
                 gzip -dc | \\
                 sqlite3 /path/to/$altid_pfx.sqlite3
 </pre>
 EOF
         }
 
-        $sqlite3 //= which('sqlite3') // return html_oneshot($ctx, 501, \<<EOF);
+        $sqlite3 //= which('sqlite3') // return html_oneshot($ctx, 501, <<EOF);
 <pre>sqlite3 not available
 
 The administrator needs to install the sqlite3(1) binary
 to support gzipped sqlite3 dumps.</pre>
 EOF
 
-        # setup stdin, POSIX requires writes <= 512 bytes to succeed so
-        # we can close the pipe right away.
-        pipe(my ($r, $w)) or die "pipe: $!";
-        syswrite($w, ".dump\n") == 6 or die "write: $!";
-        close($w) or die "close: $!";
-
         # TODO: use -readonly if available with newer sqlite3(1)
-        my $qsp = PublicInbox::Qspawn->new([$sqlite3, $fn], undef, { 0 => $r });
+        my $qsp = PublicInbox::Qspawn->new([$sqlite3, $fn], undef,
+                                                        { 0 => \".dump\n" });
         $ctx->{altid_pfx} = $altid_pfx;
         $env->{'qspawn.filter'} = PublicInbox::GzipFilter->new;
-        $qsp->psgi_return($env, undef, \&check_output, $ctx);
+        $qsp->psgi_yield($env, undef, \&check_output, $ctx);
 }
 
 1;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 388def12..26b366f5 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Atom body stream for HTTP responses
@@ -8,14 +8,15 @@ use strict;
 use parent 'PublicInbox::GzipFilter';
 
 use POSIX qw(strftime);
-use Digest::SHA qw(sha1_hex);
+use PublicInbox::SHA qw(sha1_hex);
 use PublicInbox::Address;
 use PublicInbox::Hval qw(ascii_html mid_href);
 use PublicInbox::MsgTime qw(msg_timestamp);
 
 sub new {
         my ($class, $ctx, $cb) = @_;
-        $ctx->{feed_base_url} = $ctx->{-inbox}->base_url($ctx->{env});
+        $ctx->{feed_base_url} = $ctx->{ibx}->base_url($ctx->{env});
+        $ctx->{-spfx} = $ctx->{feed_base_url} if $ctx->{ibx}->{coderepo};
         $ctx->{cb} = $cb || \&PublicInbox::GzipFilter::close;
         $ctx->{emit_header} = 1;
         bless $ctx, $class;
@@ -28,7 +29,7 @@ sub async_next ($) {
                 if (my $smsg = $ctx->{smsg} = $ctx->{cb}->($ctx)) {
                         $ctx->smsg_blob($smsg);
                 } else {
-                        $ctx->{http_out}->write($ctx->translate('</feed>'));
+                        $ctx->write('</feed>');
                         $ctx->close;
                 }
         };
@@ -38,14 +39,15 @@ sub async_next ($) {
 sub async_eml { # for async_blob_cb
         my ($ctx, $eml) = @_;
         my $smsg = delete $ctx->{smsg};
-        $ctx->{http_out}->write($ctx->translate(feed_entry($ctx, $smsg, $eml)))
+        $smsg->{mid} // $smsg->populate($eml);
+        $ctx->write(feed_entry($ctx, $smsg, $eml));
 }
 
 sub response {
-        my ($class, $ctx, $code, $cb) = @_;
+        my ($class, $ctx, $cb) = @_;
         my $res_hdr = [ 'Content-Type' => 'application/atom+xml' ];
         $class->new($ctx, $cb);
-        $ctx->psgi_response($code, $res_hdr);
+        $ctx->psgi_response(200, $res_hdr);
 }
 
 # called once for each message by PSGI server
@@ -53,7 +55,7 @@ sub getline {
         my ($self) = @_;
         my $cb = $self->{cb} or return;
         while (my $smsg = $cb->($self)) {
-                my $eml = $self->{-inbox}->smsg_eml($smsg) or next;
+                my $eml = $self->{ibx}->smsg_eml($smsg) or next;
                 return $self->translate(feed_entry($self, $smsg, $eml));
         }
         delete $self->{cb};
@@ -82,7 +84,7 @@ sub to_uuid ($) {
 
 sub atom_header {
         my ($ctx, $title) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $base_url = $ctx->{feed_base_url};
         my $search_q = $ctx->{search_query};
         my $self_url = $base_url;
@@ -97,11 +99,16 @@ sub atom_header {
                 $base_url .= '?' . $search_q->qs_html(x => undef);
                 $self_url .= '?' . $search_q->qs_html;
                 $page_id = to_uuid("q\n".$query);
+        } elsif (defined(my $cat = $ctx->{topic_category})) {
+                $title = title_tag("$cat topics - ".$ibx->description);
+                $self_url .= "topics_$cat.atom";
         } else {
                 $title = title_tag($ibx->description);
                 $self_url .= 'new.atom';
-                $page_id = "mailto:$ibx->{-primary_address}";
+                my $addr = $ibx->{-primary_address};
+                $page_id = "mailto:$addr" if defined $addr;
         }
+        $page_id //= to_uuid($self_url);
         qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
         qq(<feed\nxmlns="http://www.w3.org/2005/Atom"\n) .
         qq(xmlns:thr="http://purl.org/syndication/thread/1.0">) .
@@ -136,28 +143,29 @@ sub feed_entry {
         $title = title_tag($title);
 
         my $from = $eml->header('From') // $eml->header('Sender') //
-                $ctx->{-inbox}->{-primary_address};
+                $ctx->{ibx}->{-primary_address};
         my ($email) = PublicInbox::Address::emails($from);
         my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
-        $email = ascii_html($email // $ctx->{-inbox}->{-primary_address});
+        $email = ascii_html($email // $ctx->{ibx}->{-primary_address});
 
-        my $s = delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '';
-        $s .= "<entry><author><name>$name</name><email>$email</email>" .
+        print { $ctx->zfh }
+                (delete($ctx->{emit_header}) ? atom_header($ctx, $title) : ''),
+                "<entry><author><name>$name</name><email>$email</email>" .
                 "</author>$title$updated" .
-                qq(<link\nhref="$href"/>).
+                qq(<link\nhref="$href"/>) .
                 "<id>$uuid</id>$irt" .
                 qq{<content\ntype="xhtml">} .
                 qq{<div\nxmlns="http://www.w3.org/1999/xhtml">} .
                 qq(<pre\nstyle="white-space:pre-wrap">);
-        $ctx->{obuf} = \$s;
         $ctx->{mhref} = $href;
-        PublicInbox::View::multipart_text_as_html($eml, $ctx);
-        delete $ctx->{obuf};
-        $s .= '</pre></div></content></entry>';
+        $ctx->{changed_href} = "${href}#related";
+        $eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);
+        '</pre></div></content></entry>';
 }
 
 sub feed_updated {
-        '<updated>' . strftime('%Y-%m-%dT%H:%M:%SZ', gmtime(@_)) . '</updated>';
+        my ($t) = @_;
+        '<updated>' . strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($t)) . '</updated>';
 }
 
 1;
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index 0b2cda90..87844bf3 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -1,23 +1,37 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # For retrieving attachments from messages in the WWW interface
 package PublicInbox::WwwAttach; # internal package
 use strict;
+use v5.10.1;
 use parent qw(PublicInbox::GzipFilter);
-use bytes (); # only for bytes::length
-use PublicInbox::EmlContentFoo qw(parse_content_type);
 use PublicInbox::Eml;
 
+sub referer_match ($) {
+        my ($ctx) = @_;
+        my $env = $ctx->{env};
+        return 1 if $env->{REQUEST_METHOD} eq 'POST';
+        my $referer = lc($env->{HTTP_REFERER} // '');
+        return 1 if $referer eq ''; # no referer is always OK for wget/curl
+
+        # prevent deep-linking from other domains on some browsers (Firefox)
+        # n.b.: $ctx->{ibx}->base_url($env) with INBOX_URL won't work
+        # with dillo, we can only match "$url_scheme://$HTTP_HOST/" without
+        # path components
+        my $base_url = lc($env->{'psgi.url_scheme'} . '://' .
+                        ($env->{HTTP_HOST} //
+                         "$env->{SERVER_NAME}:$env->{SERVER_PORT}") . '/');
+        index($referer, $base_url) == 0;
+}
+
 sub get_attach_i { # ->each_part callback
         my ($part, $depth, $idx) = @{$_[0]};
         my $ctx = $_[1];
         return if $idx ne $ctx->{idx}; # [0-9]+(?:\.[0-9]+)+
         my $res = $ctx->{res};
         $res->[0] = 200;
-        my $ct = $part->content_type;
-        $ct = parse_content_type($ct) if $ct;
-
+        my $ct = $part->ct;
         if ($ct && (($ct->{type} || '') eq 'text')) {
                 # display all text as text/plain:
                 my $cset = $ct->{attributes}->{charset};
@@ -28,10 +42,21 @@ sub get_attach_i { # ->each_part callback
                                                                 $ctx->{env});
                 $part = $ctx->zflush($part->body);
         } else { # TODO: allow user to configure safe types
-                $res->[1]->[1] = 'application/octet-stream';
-                $part = $part->body;
+                if (referer_match($ctx)) {
+                        $res->[1]->[1] = 'application/octet-stream';
+                        $part = $part->body;
+                } else {
+                        $res->[0] = 403;
+                        $res->[1]->[1] = 'text/html';
+                        $part = <<"";
+<html><head><title>download
+attachment</title><body><pre>Deep-linking prevented</pre><form
+method=post\naction=""><input type=submit value="Download attachment"
+/></form></body></html>
+
+                }
         }
-        push @{$res->[1]}, 'Content-Length', bytes::length($part);
+        push @{$res->[1]}, 'Content-Length', length($part);
         $res->[2]->[0] = $part;
 }
 
@@ -66,15 +91,15 @@ sub get_attach ($$$) {
         $ctx->{idx} = $idx;
         bless $ctx, __PACKAGE__;
         my $eml;
-        if ($ctx->{smsg} = $ctx->{-inbox}->smsg_by_mid($ctx->{mid})) {
+        if ($ctx->{smsg} = $ctx->{ibx}->smsg_by_mid($ctx->{mid})) {
                 return sub { # public-inbox-httpd-only
                         $ctx->{wcb} = $_[0];
                         scan_attach($ctx);
                 } if $ctx->{env}->{'pi-httpd.async'};
                 # generic PSGI:
-                $eml = $ctx->{-inbox}->smsg_eml($ctx->{smsg});
-        } elsif (!$ctx->{-inbox}->over) {
-                if (my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid})) {
+                $eml = $ctx->{ibx}->smsg_eml($ctx->{smsg});
+        } elsif (!$ctx->{ibx}->over) {
+                if (my $bref = $ctx->{ibx}->msg_by_mid($ctx->{mid})) {
                         $eml = PublicInbox::Eml->new($bref);
                 }
         }
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
new file mode 100644
index 00000000..a5e2dc4a
--- /dev/null
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -0,0 +1,377 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Standalone code repository viewer for users w/o cgit.
+# This isn't intended to replicate all of cgit, but merely to be a
+# "good enough" viewer with search support and some UI hints to encourage
+# cloning + command-line usage.
+package PublicInbox::WwwCoderepo;
+use v5.12;
+use parent qw(PublicInbox::WwwStream);
+use File::Temp 0.19 (); # newdir
+use POSIX qw(O_RDWR F_GETFL);
+use PublicInbox::ViewVCS;
+use PublicInbox::WwwStatic qw(r);
+use PublicInbox::GitHTTPBackend;
+use PublicInbox::WwwStream;
+use PublicInbox::Hval qw(prurl ascii_html utf8_maybe);
+use PublicInbox::ViewDiff qw(uri_escape_path);
+use PublicInbox::RepoSnapshot;
+use PublicInbox::RepoAtom;
+use PublicInbox::RepoTree;
+use PublicInbox::RepoList;
+use PublicInbox::OnDestroy;
+use URI::Escape qw(uri_escape_utf8);
+use File::Spec;
+use autodie qw(fcntl open);
+
+my @EACH_REF = (qw(git for-each-ref --sort=-creatordate),
+                "--format=%(HEAD)%00".join('%00', map { "%($_)" }
+                qw(objectname refname:short subject creatordate:short)));
+my $HEADS_CMD = <<'';
+# heads (aka `branches'):
+$ git for-each-ref --sort=-creatordate refs/heads \
+        --format='%(HEAD) %(refname:short) %(subject) (%(creatordate:short))'
+
+my $TAGS_CMD = <<'';
+# tags:
+$ git for-each-ref --sort=-creatordate refs/tags \
+        --format='%(refname:short) %(subject) (%(creatordate:short))'
+
+my $NO_HEADS = "# no heads (branches), yet...\n";
+my $NO_TAGS = "# no tags, yet...\n";
+
+# shared with PublicInbox::Cgit
+sub prepare_coderepos {
+        my ($self) = @_;
+        my $pi_cfg = $self->{pi_cfg};
+
+        # TODO: support gitweb and other repository viewers?
+        $pi_cfg->parse_cgitrc(undef, 0);
+
+        my $coderepos = $pi_cfg->{-coderepos};
+        for my $k (grep(/\Acoderepo\.(?:.+)\.dir\z/, keys %$pi_cfg)) {
+                $k = substr($k, length('coderepo.'), -length('.dir'));
+                $coderepos->{$k} //= $pi_cfg->fill_coderepo($k);
+        }
+
+        # associate inboxes and extindices with coderepos for search:
+        for my $k (grep(/\Apublicinbox\.(?:.+)\.coderepo\z/, keys %$pi_cfg)) {
+                $k = substr($k, length('publicinbox.'), -length('.coderepo'));
+                my $ibx = $pi_cfg->lookup_name($k) // next;
+                $pi_cfg->repo_objs($ibx);
+        }
+        for my $k (grep(/\Aextindex\.(?:.+)\.coderepo\z/, keys %$pi_cfg)) {
+                $k = substr($k, length('extindex.'), -length('.coderepo'));
+                my $eidx = $pi_cfg->lookup_ei($k) // next;
+                $pi_cfg->repo_objs($eidx);
+        }
+        $pi_cfg->each_cindex('load_coderepos', $pi_cfg);
+}
+
+sub new {
+        my ($cls, $pi_cfg) = @_;
+        my $self = bless { pi_cfg => $pi_cfg }, $cls;
+        prepare_coderepos($self);
+        $self->{snapshots} = do {
+                my $s = $pi_cfg->{'coderepo.snapshots'} // '';
+                $s eq 'all' ? \%PublicInbox::RepoSnapshot::FMT_TYPES :
+                        +{ map { $_ => 1 } split(/\s+/, $s) };
+        };
+        $self->{$_} = 10 for qw(summary_branches summary_tags);
+        $self->{$_} = 10 for qw(summary_log);
+
+        # try reuse STDIN if it's already /dev/null
+        open $self->{log_fh}, '+>', '/dev/null';
+        my @l = stat($self->{log_fh}) or die "stat: $!";
+        my @s = stat(STDIN) or die "stat(STDIN): $!";
+        if ("@l[0, 1]" eq "@s[0, 1]") {
+                my $f = fcntl(STDIN, F_GETFL, 0);
+                $self->{log_fh} = *STDIN{IO} if $f & O_RDWR;
+        }
+        $self;
+}
+
+sub _snapshot_link_prep {
+        my ($ctx) = @_;
+        my @s = sort keys %{$ctx->{wcr}->{snapshots}} or return ();
+        my $n = $ctx->{git}->local_nick // die "BUG: $ctx->{git_dir} nick";
+        $n =~ s!\.git/*\z!!;
+        ($n) = ($n =~ m!([^/]+)/*\z!);
+        (ascii_html($n).'-', @s);
+}
+
+sub _refs_heads_link {
+        my ($line, $upfx) = @_;
+        my ($pfx, $oid, $ref, $s, $cd) = split(/\0/, $line);
+        my $align = length($ref) < 12 ? ' ' x (12 - length($ref)) : '';
+        ("$pfx <a\nhref=$upfx$oid/s/>", ascii_html($ref),
+                "</a>$align ", ascii_html($s), " ($cd)\n")
+}
+
+sub _refs_tags_link {
+        my ($line, $upfx, $snap_pfx, @snap_fmt) = @_;
+        my (undef, $oid, $ref, $s, $cd) = split(/\0/, $line);
+        my $align = length($ref) < 12 ? ' ' x (12 - length($ref)) : '';
+        if (@snap_fmt) {
+                my $v = $ref;
+                $v =~ s/\A[vV]//;
+                @snap_fmt = map {
+                        qq{ <a href="${upfx}snapshot/$snap_pfx$v.$_">$_</a>}
+                } @snap_fmt;
+        }
+        ("<a\nhref=$upfx$oid/s/>", ascii_html($ref),
+                "</a>$align ", ascii_html($s), " ($cd)", @snap_fmt, "\n");
+}
+
+sub emit_joined_inboxes ($) {
+        my ($ctx) = @_;
+        my $names = $ctx->{git}->{ibx_names}; # coderepo directives in config
+        my $score = $ctx->{git}->{ibx_score}; # generated w/ cindex --join
+        ($names || $score) or return;
+        my $pi_cfg = $ctx->{wcr}->{pi_cfg};
+        my ($u, $h);
+        my $zfh = $ctx->zfh;
+        print $zfh "\n# associated public inboxes:",
+                "\n# (number on the left is used for dev purposes)";
+        my @ns = map { [ 0, $_ ] } @$names;
+        my $env = $ctx->{env};
+        for (@ns, @$score) {
+                my ($nr, $name) = @$_;
+                my $ibx = $pi_cfg->lookup_name($name) // do {
+                        warn "W: inbox `$name' gone for $ctx->{git}->{git_dir}";
+                        say $zfh '# ', ascii_html($name), ' (missing inbox?)';
+                        next;
+                };
+                if (scalar(@{$ibx->{url} // []})) {
+                        $u = $h = ascii_html(prurl($env, $ibx->{url}));
+                } else {
+                        $h = ascii_html(prurl($env, uri_escape_utf8($name)));
+                        $h .= '/';
+                        $u = ascii_html($name);
+                }
+                if ($nr) {
+                        printf $zfh "\n% 11u", $nr;
+                } else {
+                        print $zfh "\n", ' 'x11;
+                }
+                print $zfh qq{ <a\nhref="$h">$u</a>};
+        }
+}
+
+sub summary_END { # called via OnDestroy
+        my ($ctx) = @_;
+        my $wcb = delete($ctx->{-wcb}) or return; # already done
+        PublicInbox::WwwStream::html_init($ctx);
+        my $zfh = $ctx->zfh;
+
+        my @r = split(/\n/s, delete($ctx->{qx_res}->{'log'}) // '');
+        my $last = scalar(@r) > $ctx->{wcr}->{summary_log} ? pop(@r) : undef;
+        my $tip_html = '';
+        my $tip = $ctx->{qp}->{h};
+        $tip_html .= ' '.ascii_html($tip).' --' if defined $tip;
+        print $zfh <<EOM;
+<pre><a id=log>\$</a> git log --pretty=format:'%h %s (%cs)%d'$tip_html
+EOM
+        for (@r) {
+                my $d; # decorations
+                s/^ \(([^\)]+)\)// and $d = $1;
+                substr($_, 0, 1, '');
+                my ($H, $h, $cs, $s) = split(/ /, $_, 4);
+                print $zfh "<a\nhref=./$H/s/>$h</a> ", ascii_html($s),
+                        " (", $cs, ")\n";
+                print $zfh "\t(", ascii_html($d), ")\n" if $d;
+        }
+        print $zfh '# no commits in `', ($tip//'HEAD'),"', yet\n\n" if !@r;
+        print $zfh "...\n" if $last;
+
+        # README
+        my ($bref, $oid, $ref_path) = @{delete $ctx->{qx_res}->{readme}};
+        if ($bref) {
+                my $l = PublicInbox::Linkify->new;
+                $$bref =~ s/\s*\z//sm;
+                my (undef, $path) = split(/:/, $ref_path, 2); # HEAD:README
+                print $zfh "\n<a id=readme>\$</a> " .
+                        qq(git cat-file blob <a href="./$oid/s/?b=) .
+                        ascii_html(uri_escape_path($path)) . q(">).
+                        ascii_html($ref_path), "</a>\n",
+                        $l->to_html($$bref), '</pre><hr><pre>';
+        }
+
+        # refs/heads
+        print $zfh '<a id=heads>', $HEADS_CMD , '</a>';
+        @r = split(/^/sm, delete($ctx->{qx_res}->{heads}) // '');
+        $last = scalar(@r) > $ctx->{wcr}->{summary_branches} ? pop(@r) : undef;
+        chomp(@r);
+        for (@r) { print $zfh _refs_heads_link($_, './') }
+        print $zfh $NO_HEADS if !@r;
+        print $zfh qq(<a href="refs/heads/">...</a>\n) if $last;
+        print $zfh "\n<a id=tags>", $TAGS_CMD, '</a>';
+        @r = split(/^/sm, delete($ctx->{qx_res}->{tags}) // '');
+        $last = scalar(@r) > $ctx->{wcr}->{summary_tags} ? pop(@r) : undef;
+        my ($snap_pfx, @snap_fmt) = _snapshot_link_prep($ctx);
+        chomp @r;
+        for (@r) { print $zfh _refs_tags_link($_, './', $snap_pfx, @snap_fmt) }
+        print $zfh $NO_TAGS if !@r;
+        print $zfh qq(<a href="refs/tags/">...</a>\n) if $last;
+        emit_joined_inboxes $ctx;
+        $wcb->($ctx->html_done('</pre>'));
+}
+
+sub capture { # psgi_qx callback to capture git-for-each-ref
+        my ($bref, $ctx, $key) = @_; #  $_[3] = OnDestroy(summary_END)
+        $ctx->{qx_res}->{$key} = $$bref;
+        # summary_END may be called via OnDestroy $arg->[2]
+}
+
+sub set_readme { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $ctx) = @_;
+        my $ref_path = shift @{$ctx->{-readme_tries}}; # e.g. HEAD:README
+        if ($type eq 'blob' && !$ctx->{qx_res}->{readme}) {
+                $ctx->{qx_res}->{readme} = [ $bref, $oid, $ref_path ];
+        } elsif (scalar @{$ctx->{-readme_tries}} == 0) {
+                $ctx->{qx_res}->{readme} //= []; # nothing left to try
+        } # or try another README...
+        # summary_END may be called via OnDestroy ($ctx->{-END})
+}
+
+sub summary ($$) {
+        my ($ctx, $wcb) = @_;
+        $ctx->{-wcb} = $wcb; # PublicInbox::HTTP::{Identity,Chunked}
+        my $tip = $ctx->{qp}->{h}; # same as cgit
+        if (defined $tip && $tip eq '') {
+                delete $ctx->{qp}->{h};
+                undef($tip);
+        }
+        my ($nb, $nt, $nl) = map { $_ + 1 } @{$ctx->{wcr}}{qw(
+                summary_branches summary_tags summary_log)};
+        $ctx->{qx_res} = {};
+        my $qsp_err = \($ctx->{-qsp_err} = '');
+        my %opt = (quiet => 1, 2 => $ctx->{wcr}->{log_fh});
+        my %env = (GIT_DIR => $ctx->{git}->{git_dir});
+        my @log = (qw(git log), "-$nl", '--pretty=format:%d %H %h %cs %s');
+        push(@log, $tip) if defined $tip;
+
+        # limit scope for MockHTTP test (t/solver_git.t)
+        my $END = on_destroy \&summary_END, $ctx;
+        for (['log', \@log],
+                 [ 'heads', [@EACH_REF, "--count=$nb", 'refs/heads'] ],
+                 [ 'tags', [@EACH_REF, "--count=$nt", 'refs/tags'] ]) {
+                my ($k, $cmd) = @$_;
+                my $qsp = PublicInbox::Qspawn->new($cmd, \%env, \%opt);
+                $qsp->{qsp_err} = $qsp_err;
+                $qsp->psgi_qx($ctx->{env}, undef, \&capture, $ctx, $k, $END);
+        }
+        $tip //= 'HEAD';
+        my @try = ("$tip:README", "$tip:README.md"); # TODO: configurable
+        my %ctx = (%$ctx, -END => $END, -readme_tries => [ @try ]);
+        PublicInbox::ViewVCS::do_cat_async(\%ctx, \&set_readme, @try);
+}
+
+# called by GzipFilter->close after translate
+sub zflush { $_[0]->SUPER::zflush('</pre>', $_[0]->_html_end) }
+
+# called by GzipFilter->write or GetlineResponse->getline
+sub translate {
+        my $ctx = shift;
+        $_[0] // return zflush($ctx); # getline caller
+        my @out;
+        my $fbuf = delete($ctx->{fbuf}) // shift;
+        $fbuf .= shift while @_;
+        if ($ctx->{-heads}) {
+                while ($fbuf =~ s/\A([^\n]+)\n//s) {
+                        utf8_maybe(my $x = $1);
+                        push @out, _refs_heads_link($x, '../../');
+                }
+        } else {
+                my ($snap_pfx, @snap_fmt) = _snapshot_link_prep($ctx);
+                while ($fbuf =~ s/\A([^\n]+)\n//s) {
+                        utf8_maybe(my $x = $1);
+                        push @out, _refs_tags_link($x, '../../',
+                                                $snap_pfx, @snap_fmt);
+                }
+        }
+        $ctx->{fbuf} = $fbuf; # may be incomplete
+        @out ? $ctx->SUPER::translate(@out) : ''; # not EOF, yet
+}
+
+sub _refs_parse_hdr { # {parse_hdr} for Qspawn
+        my ($r, $bref, $ctx) = @_;
+        my ($code, $top);
+        if ($r == 0) {
+                $code = 404;
+                $top = $ctx->{-heads} ? $NO_HEADS : $NO_TAGS;
+        } else {
+                $code = 200;
+                $top = $ctx->{-heads} ? $HEADS_CMD : $TAGS_CMD;
+        }
+        PublicInbox::WwwStream::html_init($ctx);
+        bless $ctx, __PACKAGE__; # re-bless for ->translate
+        print { $ctx->{zfh} } '<pre>', $top;
+        [ $code, delete($ctx->{-res_hdr}), $ctx ]; # [2] is qspawn.filter
+}
+
+sub refs_foo { # /$REPO/refs/{heads,tags} endpoints
+        my ($self, $ctx, $pfx) = @_;
+        $ctx->{wcr} = $self;
+        $ctx->{-upfx} = '../../';
+        $ctx->{-heads} = 1 if $pfx eq 'refs/heads';
+        my $qsp = PublicInbox::Qspawn->new([@EACH_REF, $pfx ],
+                                        { GIT_DIR => $ctx->{git}->{git_dir} });
+        $qsp->psgi_yield($ctx->{env}, undef, \&_refs_parse_hdr, $ctx);
+}
+
+sub srv { # endpoint called by PublicInbox::WWW
+        my ($self, $ctx) = @_;
+        my $path_info = $ctx->{env}->{PATH_INFO};
+        my $git;
+        # handle clone requests
+        my $pi_cfg = $self->{pi_cfg};
+        if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!x and
+                ($git = $pi_cfg->get_coderepo($1))) {
+                        PublicInbox::GitHTTPBackend::serve($ctx->{env},$git,$2);
+        } elsif ($path_info =~ m!\A/(.+?)/\z! and
+                        ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
+                $ctx->{wcr} = $self;
+                sub { summary($ctx, $_[0]) }; # $_[0] = wcb
+        } elsif ($path_info =~ m!\A/(.+?)/([a-f0-9]+)/s/([^/]+)?\z! and
+                        ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
+                $ctx->{lh} = $self->{log_fh};
+                PublicInbox::ViewVCS::show($ctx, $2, $3);
+        } elsif ($path_info =~ m!\A/(.+?)/tree/(.*)\z! and
+                        ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
+                $ctx->{lh} = $self->{log_fh};
+                PublicInbox::RepoTree::srv_tree($ctx, $2) // r(404);
+        } elsif ($path_info =~ m!\A/(.+?)/snapshot/([^/]+)\z! and
+                        ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
+                $ctx->{wcr} = $self;
+                PublicInbox::RepoSnapshot::srv($ctx, $2) // r(404);
+        } elsif ($path_info =~ m!\A/(.+?)/atom/(.*)\z! and
+                        ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
+                $ctx->{lh} = $self->{log_fh};
+                PublicInbox::RepoAtom::srv_atom($ctx, $2) // r(404);
+        } elsif ($path_info =~ m!\A/(.+?)/tags\.atom\z! and
+                        ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
+                PublicInbox::RepoAtom::srv_tags_atom($ctx);
+        } elsif ($path_info =~ m!\A/(.+?)/(refs/(?:heads|tags))/\z! and
+                        ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
+                refs_foo($self, $ctx, $2);
+        } elsif ($path_info =~ m!\A/(.*?\*.*?)/*\z!) {
+                my $re = PublicInbox::Config::glob2re($1);
+                PublicInbox::RepoList::html($self, $ctx, qr!$re\z!) // r(404);
+        } elsif ($path_info =~ m!\A/(.+?)/\z!) {
+                my $re = qr!\A\Q$1\E/!;
+                PublicInbox::RepoList::html($self, $ctx, $re) // r(404);
+        } elsif ($path_info =~ m!\A/(.+?)\z! and
+                        ($git = $pi_cfg->get_coderepo($1))) {
+                my $qs = $ctx->{env}->{QUERY_STRING};
+                my $url = $git->base_url($ctx->{env});
+                $url .= "?$qs" if $qs ne '';
+                [ 301, [ Location => $url, 'Content-Type' => 'text/plain' ],
+                        [ "Redirecting to $url\n" ] ];
+        } else {
+                r(404);
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/WwwHighlight.pm b/lib/PublicInbox/WwwHighlight.pm
index 170bfcaa..75338806 100644
--- a/lib/PublicInbox/WwwHighlight.pm
+++ b/lib/PublicInbox/WwwHighlight.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Standalone PSGI app to provide syntax highlighting as-a-service
@@ -20,8 +20,7 @@
 
 package PublicInbox::WwwHighlight;
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
 use parent qw(PublicInbox::HlMod);
 use PublicInbox::Linkify qw();
 use PublicInbox::Hval qw(ascii_html);
@@ -47,7 +46,7 @@ sub read_in_full ($) {
                 return \$buf if $r == 0;
                 $off += $r;
         }
-        $env->{'psgi.errors'}->print("input read error: $!\n");
+        warn "input read error: $!";
         undef;
 }
 
@@ -69,7 +68,7 @@ sub call {
         $l->linkify_2($$bref);
 
         my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
-        push @$h, 'Content-Length', bytes::length($$bref);
+        push @$h, 'Content-Length', length($$bref);
 
         [ 200, $h, [ $$bref ] ]
 }
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index bda2761c..2d6c74da 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -1,29 +1,39 @@
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Provide an HTTP-accessible listing of inboxes.
 # Used by PublicInbox::WWW
 package PublicInbox::WwwListing;
 use strict;
-use PublicInbox::Hval qw(prurl fmt_ts);
-use PublicInbox::Linkify;
+use v5.10.1;
+use PublicInbox::Hval qw(prurl fmt_ts ascii_html);
 use PublicInbox::GzipFilter qw(gzf_maybe);
 use PublicInbox::ConfigIter;
-use bytes (); # bytes::length
+use PublicInbox::WwwStream;
+use URI::Escape qw(uri_escape_utf8);
+use PublicInbox::MID qw(mid_escape);
 
 sub ibx_entry {
-        my ($ctx, $ibx) = @_;
-        my $mtime = $ibx->modified;
-        my $ts = fmt_ts($mtime);
-        my $url = prurl($ctx->{env}, $ibx->{url});
-        my $tmp = <<"";
-* $ts - $url
-  ${\$ibx->description}
-
-        if (defined(my $info_url = $ibx->{infourl})) {
-                $tmp .= '  ' . prurl($ctx->{env}, $info_url) . "\n";
+        my ($ctx, $ibx, $ce) = @_;
+        my $desc = ascii_html($ce->{description} //= $ibx->description);
+        my $ts = fmt_ts($ce->{-modified} //= $ibx->modified);
+        my ($url, $href);
+        if (scalar(@{$ibx->{url} // []})) {
+                $url = $href = ascii_html(prurl($ctx->{env}, $ibx->{url}));
+        } else {
+                $href = ascii_html(uri_escape_utf8($ibx->{name})) . '/';
+                $url = ascii_html($ibx->{name});
         }
-        push @{$ctx->{-list}}, [ $mtime, $tmp ];
+        my $tmp = <<EOM;
+* $ts - <a\nhref="$href">$url</a>
+  $desc
+EOM
+        if (defined($url = $ibx->{infourl})) {
+                $url = ascii_html(prurl($ctx->{env}, $url));
+                $tmp .= qq(  <a\nhref="$url">$url</a>\n);
+        }
+        push(@{$ctx->{-list}}, (scalar(@_) == 3 ? # $misc in use, already sorted
+                                $tmp : [ $ce->{-modified}, $tmp ] ));
 }
 
 sub list_match_i { # ConfigIter callback
@@ -31,29 +41,30 @@ sub list_match_i { # ConfigIter callback
         if (defined($section)) {
                 return if $section !~ m!\Apublicinbox\.([^/]+)\z!;
                 my $ibx = $cfg->lookup_name($1) or return;
-                if (!$ibx->{-hide}->{$ctx->hide_key} &&
-                                        grep(/$re/, @{$ibx->{url}})) {
-                        $ctx->ibx_entry($ibx);
-                }
+                $ctx->ibx_entry($ibx) unless $ctx->hide_inbox($ibx, $re);
         } else { # undef == "EOF"
                 $ctx->{-wcb}->($ctx->psgi_triple);
         }
 }
 
-sub url_regexp {
+sub url_filter {
         my ($ctx, $key, $default) = @_;
         $key //= 'publicInbox.wwwListing';
         $default //= '404';
-        my $v = $ctx->{www}->{pi_config}->{lc $key} // $default;
+        my $cfg = $ctx->{www}->{pi_cfg};
+        my $v = $cfg->{lc $key} // $default;
 again:
         if ($v eq 'match=domain') {
                 my $h = $ctx->{env}->{HTTP_HOST} // $ctx->{env}->{SERVER_NAME};
                 $h =~ s/:[0-9]+\z//;
-                qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i;
+                (qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i, "url:$h");
         } elsif ($v eq 'all') {
-                qr/./;
+                my $niu = $cfg->{lc 'publicinbox.nameIsUrl'};
+                defined($niu) && $cfg->git_bool($niu) and
+                        $ctx->{-name_is_url} = [ '.' ];
+                (qr/./, undef);
         } elsif ($v eq '404') {
-                undef;
+                (undef, undef);
         } else {
                 warn <<EOF;
 `$v' is not a valid value for `$key'
@@ -66,42 +77,170 @@ EOF
 
 sub hide_key { 'www' }
 
+sub hide_inbox {
+        my ($ctx, $ibx, $re) = @_;
+        $ibx->{'-hide_'.$ctx->hide_key} ||
+                !grep(/$re/, @{$ibx->{url} // $ctx->{-name_is_url} // []})
+}
+
+sub add_misc_ibx { # MiscSearch->retry_reopen callback
+        my ($misc, $ctx, $re, $qs) = @_;
+        require PublicInbox::SearchQuery;
+        my $q = $ctx->{-sq} = PublicInbox::SearchQuery->new($ctx->{qp});
+        my $o = $q->{o};
+        my ($asc, $min, $max);
+        if ($o < 0) {
+                $asc = 1;
+                $o = -($o + 1); # so [-1] is the last element, like Perl lists
+        }
+        my $r = $q->{r};
+        my $opt = {
+                offset => $o,
+                asc => $asc,
+                relevance => $r,
+                limit => $q->{l}
+        };
+        $qs .= ' type:inbox';
+
+        delete $ctx->{-list}; # reset if retried
+        my $pi_cfg = $ctx->{www}->{pi_cfg};
+        my $user_query = $q->{'q'} // '';
+        if ($user_query =~ /\S/) {
+                $qs = "( $qs ) AND ( $user_query )";
+        } else { # special case for ALL
+                $ctx->ibx_entry($pi_cfg->ALL // die('BUG: ->ALL expected'), {});
+        }
+        my $mset = $misc->mset($qs, $opt); # sorts by $MODIFIED (mtime)
+
+        for my $mi ($mset->items) {
+                my $doc = $mi->get_document;
+                my ($eidx_key) = PublicInbox::Search::xap_terms('Q', $doc);
+                $eidx_key // next;
+                my $ibx = $pi_cfg->lookup_eidx_key($eidx_key) // next;
+                next if $ctx->hide_inbox($ibx, $re);
+                $ctx->ibx_entry($ibx, $misc->doc2ibx_cache_ent($doc));
+                if ($r) { # for descriptions in search_nav_bot
+                        my $pct = PublicInbox::Search::get_pct($mi);
+                        # only when sorting by relevance, ->items is always
+                        # ordered descending:
+                        $max //= $pct;
+                        $min = $pct;
+                }
+        }
+        if ($r) { # for descriptions in search_nav_bot
+                $q->{-min_pct} = $min;
+                $q->{-max_pct} = $max;
+        }
+        $ctx->{-mset} = $mset;
+        psgi_triple($ctx);
+}
+
 sub response {
         my ($class, $ctx) = @_;
         bless $ctx, $class;
-        my $re = $ctx->url_regexp or return $ctx->psgi_triple;
-        my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_config},
+        my ($re, $qs) = $ctx->url_filter;
+        $re // return $ctx->psgi_triple;
+        if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { # fast path
+                if ($ctx->{qp}->{a} && # "search all inboxes"
+                                $ctx->{qp}->{'q'}) {
+                        my $u = 'all/?q='.mid_escape($ctx->{qp}->{'q'});
+                        return [ 302, [ 'Location' => $u,
+                                qw(Content-Type text/plain) ],
+                                [ "Redirecting to $u\n" ] ];
+                }
+                # FIXME: test this in t/
+                $ALL->misc->reopen->retry_reopen(\&add_misc_ibx,
+                                                $ctx, $re, $qs);
+        } else { # slow path, no [extindex "all"] configured
+                my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
                                                 \&list_match_i, $re, $ctx);
-        sub {
-                $ctx->{-wcb} = $_[0]; # HTTP server callback
-                $ctx->{env}->{'pi-httpd.async'} ?
-                                $iter->event_step : $iter->each_section;
+                sub {
+                        $ctx->{-wcb} = $_[0]; # HTTP server callback
+                        $ctx->{env}->{'pi-httpd.async'} ?
+                                        $iter->event_step : $iter->each_section;
+                }
         }
 }
 
+sub mset_footer ($$) {
+        my ($ctx, $mset) = @_;
+        # no footer if too few matches
+        return '' if $mset->get_matches_estimated == $mset->size;
+        require PublicInbox::SearchView;
+        PublicInbox::SearchView::search_nav_bot($ctx, $mset, $ctx->{-sq});
+}
+
+sub mset_nav_top {
+        my ($ctx, $mset) = @_;
+        my $q = $ctx->{-sq};
+        my $qh = $q->{'q'} // '';
+        if ($qh ne '') {
+                utf8::decode($qh);
+                $qh = qq[\nvalue="].ascii_html($qh).'"';
+        }
+        chop(my $rv = <<EOM);
+<form action="./"><pre><input name=q type=text$qh/><input
+type=submit value="locate inbox"/><input type=submit name=a
+value="search all inboxes"/></pre></form><pre>
+EOM
+        if (defined($q->{'q'})) {
+                my $initial_q = $ctx->{-uxs_retried};
+                if (defined $initial_q) {
+                        my $rewritten = $q->{'q'};
+                        utf8::decode($initial_q);
+                        utf8::decode($rewritten);
+                        $initial_q = ascii_html($initial_q);
+                        $rewritten = ascii_html($rewritten);
+                        $rv .= " Warning: Initial query:\n <b>$initial_q</b>\n";
+                        $rv .= " returned no results, used:\n";
+                        $rv .= " <b>$rewritten</b>\n instead\n\n";
+                }
+                $rv .= 'Search results ordered by [';
+                if ($q->{r}) {
+                        my $d = $q->qs_html(r => 0);
+                        $rv .= qq{<a\nhref="?$d">updated</a>|<b>relevance</b>};
+                } else {
+                        my $d = $q->qs_html(r => 1);
+                        $rv .= qq{<b>updated</b>|<a\nhref="?$d">relevance</a>};
+                }
+                $rv .= ']';
+        }
+        $rv .= qq{</pre>};
+}
+
 sub psgi_triple {
         my ($ctx) = @_;
         my $h = [ 'Content-Type', 'text/html; charset=UTF-8',
                         'Content-Length', undef ];
         my $gzf = gzf_maybe($h, $ctx->{env});
-        $gzf->zmore('<html><head><title>' .
-                                'public-inbox listing</title>' .
-                                '</head><body><pre>');
+        my $zfh = $gzf->zfh;
+        print $zfh '<html><head><title>public-inbox listing</title>',
+                        $ctx->{www}->style('+/'),
+                        '</head><body>';
         my $code = 404;
-        if (my $list = $ctx->{-list}) {
+        if (my $list = delete $ctx->{-list}) {
+                my $mset = delete $ctx->{-mset};
                 $code = 200;
-                # sort by ->modified
-                @$list = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @$list;
-                $list = join("\n", @$list);
-                my $l = PublicInbox::Linkify->new;
-                $gzf->zmore($l->to_html($list));
+                if ($mset) { # already sorted, so search bar:
+                        print $zfh mset_nav_top($ctx, $mset);
+                } else { # sort config dump by ->modified
+                        @$list = map { $_->[1] }
+                                sort { $b->[0] <=> $a->[0] } @$list;
+                }
+                print $zfh '<pre>', join("\n", @$list); # big
+                print $zfh mset_footer($ctx, $mset) if $mset;
+        } elsif (my $mset = delete $ctx->{-mset}) {
+                print $zfh mset_nav_top($ctx, $mset),
+                                '<pre>no matching inboxes',
+                                mset_footer($ctx, $mset);
         } else {
-                $gzf->zmore('no inboxes, yet');
+                print $zfh '<pre>no inboxes, yet';
         }
         my $out = $gzf->zflush('</pre><hr><pre>'.
-                        PublicInbox::WwwStream::code_footer($ctx->{env}) .
+qq(This is a listing of public inboxes, see the `mirror' link of each inbox
+for instructions on how to mirror all the data and code on this site.) .
                         '</pre></body></html>');
-        $h->[3] = bytes::length($out);
+        $h->[3] = length($out);
         [ $code, $h, [ $out ] ];
 }
 
diff --git a/lib/PublicInbox/WwwStatic.pm b/lib/PublicInbox/WwwStatic.pm
index 051d2e03..d8902193 100644
--- a/lib/PublicInbox/WwwStatic.pm
+++ b/lib/PublicInbox/WwwStatic.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # This package can either be a PSGI response body for a static file
@@ -9,16 +9,15 @@
 # functionality of nginx.
 package PublicInbox::WwwStatic;
 use strict;
+use v5.10.1;
 use parent qw(Exporter);
-use bytes ();
 use Fcntl qw(SEEK_SET O_RDONLY O_NONBLOCK);
-use POSIX qw(strftime);
 use HTTP::Date qw(time2str);
 use HTTP::Status qw(status_message);
 use Errno qw(EACCES ENOTDIR ENOENT);
 use URI::Escape qw(uri_escape_utf8);
 use PublicInbox::GzipFilter qw(gzf_maybe);
-use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::Hval qw(ascii_html fmt_ts);
 use Plack::MIME;
 our @EXPORT_OK = qw(@NO_CACHE r path_info_raw);
 
@@ -218,7 +217,7 @@ my %path_re_cache;
 sub path_info_raw ($) {
         my ($env) = @_;
         my $sn = $env->{SCRIPT_NAME};
-        my $re = $path_re_cache{$sn} ||= do {
+        my $re = $path_re_cache{$sn} //= do {
                 $sn = '/'.$sn unless index($sn, '/') == 0;
                 $sn =~ s!/\z!!;
                 qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!;
@@ -275,12 +274,11 @@ sub dir_response ($$$) {
         my $path_info = $env->{PATH_INFO};
         push @entries, '..' if $path_info ne '/';
         for my $base (@entries) {
+                my @st = stat($fs_path . $base) or next; # unlikely
                 my $href = ascii_html(uri_escape_utf8($base));
                 my $name = ascii_html($base);
-                my @st = stat($fs_path . $base) or next; # unlikely
-                my ($gzipped, $uncompressed, $hsize);
-                my $entry = '';
                 my $mtime = $st[9];
+                my ($entry, $hsize);
                 if (-d _) {
                         $href .= '/';
                         $name .= '/';
@@ -296,12 +294,12 @@ sub dir_response ($$$) {
                         next;
                 }
                 # 54 = 80 - (SP length(strftime(%Y-%m-%d %k:%M)) SP human_size)
-                $hsize = sprintf('% 8s', $hsize);
                 my $pad = 54 - length($name);
                 $pad = 1 if $pad <= 0;
-                $entry .= qq(<a\nhref="$href">$name</a>) . (' ' x $pad);
-                $mtime = strftime('%Y-%m-%d %k:%M', gmtime($mtime));
-                $entry .= $mtime . $hsize;
+                $entry = qq(\n<a\nhref="$href">$name</a>) .
+                                (' ' x $pad) .
+                                fmt_ts($mtime) .
+                                sprintf('% 8s', $hsize);
         }
 
         # filter out '.gz' files as long as the mtime matches the
@@ -309,17 +307,16 @@ sub dir_response ($$$) {
         delete(@other{keys %want_gz});
         @entries = ((map { ${$dirs{$_}} } sort keys %dirs),
                         (map { ${$other{$_}} } sort keys %other));
-
         my $path_info_html = ascii_html($path_info);
-        my $h = [qw(Content-Type text/html Content-Length), undef];
-        my $gzf = gzf_maybe($h, $env);
-        $gzf->zmore("<html><head><title>Index of $path_info_html</title>" .
-                ${$self->{style}} .
-                "</head><body><pre>Index of $path_info_html</pre><hr><pre>\n");
-        $gzf->zmore(join("\n", @entries));
-        my $out = $gzf->zflush("</pre><hr></body></html>\n");
-        $h->[3] = bytes::length($out);
-        [ 200, $h, [ $out ] ]
+        my @h = qw(Content-Type text/html);
+        my $gzf = gzf_maybe(\@h, $env);
+        print { $gzf->zfh } '<html><head><title>Index of ', $path_info_html,
+                '</title>', ${$self->{style}}, '</head><body><pre>Index of ',
+                $path_info_html, '</pre><hr><pre>', @entries,
+                '</pre><hr></body></html>';
+        my $out = $gzf->zflush;
+        push @h, 'Content-Length', length($out);
+        [ 200, \@h, [ $out ] ]
 }
 
 sub call { # PSGI app endpoint
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 638f4e27..8d32074f 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # HTML body stream for which yields getline+close methods for
@@ -7,16 +7,19 @@
 # See PublicInbox::GzipFilter parent class for more info.
 package PublicInbox::WwwStream;
 use strict;
+use v5.10.1;
 use parent qw(Exporter PublicInbox::GzipFilter);
 our @EXPORT_OK = qw(html_oneshot);
-use bytes (); # length
 use PublicInbox::Hval qw(ascii_html prurl ts2str);
-our $TOR_URL = 'https://www.torproject.org/';
-our $CODE_URL = 'https://public-inbox.org/public-inbox.git';
+
+our $CODE_URL = [ qw(
+http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git
+https://public-inbox.org/public-inbox.git) ];
 
 sub base_url ($) {
-        my $ctx = shift;
-        my $base_url = $ctx->{-inbox}->base_url($ctx->{env});
+        my ($ctx) = @_;
+        my $thing = $ctx->{ibx} // $ctx->{git} // return;
+        my $base_url = $thing->base_url($ctx->{env});
         chop $base_url; # no trailing slash for clone
         $base_url;
 }
@@ -25,22 +28,74 @@ sub init {
         my ($ctx, $cb) = @_;
         $ctx->{cb} = $cb;
         $ctx->{base_url} = base_url($ctx);
+        $ctx->{-res_hdr} = [ 'Content-Type' => 'text/html; charset=UTF-8' ];
+        $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($ctx->{-res_hdr},
+                                                        $ctx->{env});
         bless $ctx, __PACKAGE__;
 }
 
 sub async_eml { # for async_blob_cb
         my ($ctx, $eml) = @_;
-        $ctx->{http_out}->write($ctx->translate($ctx->{cb}->($ctx, $eml)));
+        $ctx->write($ctx->{cb}->($ctx, $eml));
+}
+
+sub html_repo_top ($) {
+        my ($ctx) = @_;
+        my $git = $ctx->{git} // return $ctx->html_top_fallback;
+        my $desc = ascii_html($git->description);
+        my $title = delete($ctx->{-title_html}) // $desc;
+        my $upfx = $ctx->{-upfx} // '';
+        my $atom = $ctx->{-atom} // (substr($upfx, -1) eq '/' ?
+                                        "${upfx}atom/" : "$upfx/atom/");
+        my $top = ascii_html($git->{nick});
+        $top = qq(<a\nhref="$upfx">$top</a>) if length($upfx);
+        $top .= <<EOM;
+  <a href='$upfx#readme'>about</a> / <a
+href='$upfx#heads'>heads</a> / <a
+href='$upfx#tags'>tags</a>
+<b>$desc</b>
+EOM
+        my @url = PublicInbox::ViewVCS::ibx_url_for($ctx);
+        if (@url) {
+                $ctx->{-has_srch} = 1;
+                my $base_url = base_url($ctx);
+                my ($pfx, $sfx) = ($base_url =~ m!\A(https?://[^/]+/)(.*)\z!i);
+                my $iupfx = '../' x (($sfx =~ tr!/!/!) + 1);
+                $pfx = ascii_html($pfx);
+                $pfx = qr/\A\Q$pfx\E/i;
+                my $tmp = $top;
+                $top = '';
+                my ($s, $u);
+                my $q_val = delete($ctx->{-q_value_html}) // '';
+                $q_val = qq(\nvalue="$q_val") if $q_val ne '';
+                for (@url) {
+                        $u = $s = ascii_html($_);
+                        substr($u, 0, 0, $iupfx) if $u !~ m!://!;
+                        $s =~ s!$pfx!!;
+                        $s =~ s!/\z!!;
+                        $top .= qq{<form\naction="$u"><pre>$tmp} .
+                                qq{<input\nname=q type=text$q_val />} .
+                                qq{<input type=submit\n} .
+                                qq{value="search mail in `$s&#39;"/>} .
+                                q{</pre></form>};
+                        $tmp = '';
+                }
+        } else {
+                $top = "<pre>$top</pre>";
+        }
+        "<html><head><title>$title</title>" .
+                qq(<link\nrel=alternate\ntitle="Atom feed"\n).
+                qq(href="$atom"\ntype="application/atom+xml"/>) .
+                $ctx->{www}->style($upfx) .
+                '</head><body>'.$top;
 }
 
 sub html_top ($) {
         my ($ctx) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx} // return html_repo_top($ctx);
         my $desc = ascii_html($ibx->description);
         my $title = delete($ctx->{-title_html}) // $desc;
         my $upfx = $ctx->{-upfx} || '';
-        my $help = $upfx.'_/text/help';
-        my $color = $upfx.'_/text/color';
         my $atom = $ctx->{-atom} || $upfx.'new.atom';
         my $top = "<b>$desc</b>";
         if (my $t_max = $ctx->{-t_max}) {
@@ -49,12 +104,18 @@ sub html_top ($) {
         # we had some kind of query, link to /$INBOX/?t=YYYYMMDDhhmmss
         } elsif ($ctx->{qp}->{t}) {
                 $top = qq(<a\nhref="./">$top</a>);
+        } elsif (length($upfx)) {
+                $top = qq(<a\nhref="$upfx">$top</a>);
         }
-        my $links = qq(<a\nhref="$help">help</a> / ).
-                        qq(<a\nhref="$color">color</a> / ).
-                        qq(<a\nhref=#mirror>mirror</a> / ).
+        my $code = $ibx->{coderepo} ? qq( / <a\nhref=#code>code</a>) : '';
+        # id=mirror must exist for legacy bookmarks
+        my $links = qq(<a\nhref="${upfx}_/text/help/">help</a> / ).
+                        qq(<a\nhref="${upfx}_/text/color/">color</a> / ).
+                        qq(<a\nid=mirror) .
+                        qq(\nhref="${upfx}_/text/mirror/">mirror</a>$code / ).
                         qq(<a\nhref="$atom">Atom feed</a>);
-        if ($ibx->search) {
+        $links .= delete($ctx->{-html_more_links}) if $ctx->{-html_more_links};
+        if ($ibx->isrch) {
                 my $q_val = delete($ctx->{-q_value_html}) // '';
                 $q_val = qq(\nvalue="$q_val") if $q_val ne '';
                 # XXX gross, for SearchView.pm
@@ -76,106 +137,71 @@ sub html_top ($) {
                 '</head><body>'. $top . (delete($ctx->{-html_tip}) // '');
 }
 
+sub inboxes { () } # TODO
+
 sub coderepos ($) {
         my ($ctx) = @_;
-        my $ibx = $ctx->{-inbox};
-        my @ret;
-        if (defined(my $cr = $ibx->{coderepo})) {
-                my $cfg = $ctx->{www}->{pi_config};
-                my $env = $ctx->{env};
-                for my $cr_name (@$cr) {
-                        my $urls = $cfg->{"coderepo.$cr_name.cgiturl"};
-                        if ($urls) {
-                                $ret[0] //= <<EOF;
-code repositories for the project(s) associated with this inbox:
-EOF
-                                $ret[0] .= "\n\t".prurl($env, $_) for @$urls;
-                        }
+        $ctx->{ibx} // return inboxes($ctx);
+        my $cr = $ctx->{ibx}->{coderepo} // return ();
+        my $upfx = ($ctx->{-upfx} // ''). '../';
+        my $pfx = $ctx->{base_url} //= $ctx->base_url;
+        my $up = $upfx =~ tr!/!/!;
+        $pfx =~ s!/[^/]+\z!/! for (1..$up);
+        $pfx .= '/' if substr($pfx, -1, 1) ne '/';
+        my $buf = '<a id=code>' .
+                'Code repositories for project(s) associated with this '.
+                $ctx->{ibx}->thing_type . "\n";
+        for my $git (@{$ctx->{www}->{pi_cfg}->repo_objs($ctx->{ibx})}) {
+                for ($git->pub_urls($ctx->{env})) {
+                        my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ : $pfx.$_;
+                        $u = ascii_html(prurl($ctx->{env}, $u));
+                        $buf .= qq(\n\t<a\nhref="$u">$u</a>);
                 }
         }
-        @ret; # may be empty
-}
-
-sub code_footer ($) {
-        my ($env) = @_;
-        my $u = prurl($env, $CODE_URL);
-        qq(AGPL code for this site: git clone <a\nhref="$u">$u</a>)
+        ($buf);
 }
 
 sub _html_end {
         my ($ctx) = @_;
-        my $urls = <<EOF;
-<a
-id=mirror>This inbox may be cloned and mirrored by anyone:</a>
+        my $upfx = $ctx->{-upfx} || '';
+        my $m = "${upfx}_/text/mirror/";
+        my $x = '';
+        if ($ctx->{ibx} && $ctx->{ibx}->can('cloneurl')) {
+                $x = <<EOF;
+This is a public inbox, see <a
+href="$m">mirroring instructions</a>
+for how to clone and mirror all data and code used for this inbox
 EOF
-
-        my $ibx = $ctx->{-inbox};
-        my $desc = ascii_html($ibx->description);
-
-        my @urls;
-        my $http = $ctx->{base_url};
-        my $max = $ibx->max_git_epoch;
-        my $dir = (split(m!/!, $http))[-1];
-        my %seen = ($http => 1);
-        if (defined($max)) { # v2
-                for my $i (0..$max) {
-                        # old epochs my be deleted:
-                        -d "$ibx->{inboxdir}/git/$i.git" or next;
-                        my $url = "$http/$i";
-                        $seen{$url} = 1;
-                        push @urls, "$url $dir/git/$i.git";
-                }
-                my $nr = scalar(@urls);
-                if ($nr > 1) {
-                        $urls .= "\n\t# this inbox consists of $nr epochs:";
-                        $urls[0] .= "\t# oldest";
-                        $urls[-1] .= "\t# newest";
+                my $has_nntp = @{$ctx->{ibx}->nntp_url($ctx)};
+                my $has_imap = @{$ctx->{ibx}->imap_url($ctx)};
+                if ($has_nntp || $has_imap) {
+                        substr($x, -1, 1) = ";\n"; # s/\n/;\n
+                        if ($has_nntp && $has_imap) {
+                                $x .= <<EOM;
+as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).
+EOM
+                        } elsif ($has_nntp) {
+                                $x .= <<EOM;
+as well as URLs for NNTP newsgroup(s).
+EOM
+                        } else {
+                                $x .= <<EOM;
+as well as URLs for IMAP folder(s).
+EOM
+                        }
                 }
-        } else { # v1
-                push @urls, $http;
-        }
-
-        # FIXME: epoch splits can be different in other repositories,
-        # use the "cloneurl" file as-is for now:
-        foreach my $u (@{$ibx->cloneurl}) {
-                next if $seen{$u}++;
-                push @urls, $u =~ /\Ahttps?:/ ? qq(<a\nhref="$u">$u</a>) : $u;
-        }
-
-        $urls .= "\n" . join('', map { "\tgit clone --mirror $_\n" } @urls);
-        my $addrs = $ibx->{address};
-        $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
-        my $v = defined $max ? '-V2' : '-V1';
-        $urls .= <<EOF;
-
-        # If you have public-inbox 1.1+ installed, you may
-        # initialize and index your mirror using the following commands:
-        public-inbox-init $v $ibx->{name} $dir/ $http \\
-                $addrs
-        public-inbox-index $dir
-EOF
-        my $cfg_link = ($ctx->{-upfx} // '').'_/text/config/raw';
-        $urls .= <<EOF;
-
-Example <a
-href="$cfg_link">config snippet</a> for mirrors.
+        } elsif ($ctx->{ibx}) { # extindex
+                $x = <<EOF;
+This is an external index of several public inboxes,
+see <a href="$m">mirroring instructions</a> on how to clone and mirror
+all data and code used by this external index.
 EOF
-        my @nntp = map { qq(<a\nhref="$_">$_</a>) } @{$ibx->nntp_url};
-        if (@nntp) {
-                $urls .= @nntp == 1 ? 'Newsgroup' : 'Newsgroups are';
-                $urls .= ' available over NNTP:';
-                $urls .= "\n\t" . join("\n\t", @nntp) . "\n";
+        } elsif ($ctx->{git}) { # coderepo
+                $x = join('', map { "git clone $_\n" }
+                        @{$ctx->{git}->cloneurl($ctx->{env})});
         }
-        if ($urls =~ m!\b[^:]+://\w+\.onion/!) {
-                $urls .= " note: .onion URLs require Tor: ";
-                $urls .= qq[<a\nhref="$TOR_URL">$TOR_URL</a>];
-        }
-        '<hr><pre>'.join("\n\n",
-                $desc,
-                $urls,
-                coderepos($ctx),
-                code_footer($ctx->{env})
-        ).'</pre></body></html>';
+        chomp $x;
+        '<hr><pre>'.join("\n\n", coderepos($ctx), $x).'</pre></body></html>'
 }
 
 # callback for HTTP.pm (and any other PSGI servers)
@@ -184,7 +210,7 @@ sub getline {
         my $cb = $ctx->{cb} or return;
         while (defined(my $x = $cb->($ctx))) { # x = smsg or scalar non-ref
                 if (ref($x)) { # smsg
-                        my $eml = $ctx->{-inbox}->smsg_eml($x) or next;
+                        my $eml = $ctx->{ibx}->smsg_eml($x) or next;
                         $ctx->{smsg} = $x;
                         return $ctx->translate($cb->($ctx, $eml));
                 } else { # scalar
@@ -195,19 +221,27 @@ sub getline {
         $ctx->zflush(_html_end($ctx));
 }
 
-sub html_oneshot ($$;$) {
-        my ($ctx, $code, $sref) = @_;
+sub html_done ($;@) {
+        my $ctx = $_[0];
+        my $bdy = $ctx->zflush(@_[1..$#_], _html_end($ctx));
+        my $res_hdr = delete $ctx->{-res_hdr};
+        push @$res_hdr, 'Content-Length', length($bdy);
+        [ 200, $res_hdr, [ $bdy ] ]
+}
+
+sub html_oneshot ($$;@) {
+        my ($ctx, $code) = @_[0, 1];
         my $res_hdr = [ 'Content-Type' => 'text/html; charset=UTF-8',
                 'Content-Length' => undef ];
         bless $ctx, __PACKAGE__;
         $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res_hdr, $ctx->{env});
-        $ctx->{base_url} //= do {
-                $ctx->zmore(html_top($ctx));
-                base_url($ctx);
+        my @top;
+        $ctx->{base_url} // do {
+                @top = html_top($ctx);
+                $ctx->{base_url} = base_url($ctx);
         };
-        $ctx->zmore($$sref) if $sref;
-        my $bdy = $ctx->zflush(_html_end($ctx));
-        $res_hdr->[3] = bytes::length($bdy);
+        my $bdy = $ctx->zflush(@top, @_[2..$#_], _html_end($ctx));
+        $res_hdr->[3] = length($bdy);
         [ $code, $res_hdr, [ $bdy ] ]
 }
 
@@ -218,8 +252,7 @@ sub async_next ($) {
                 if (my $smsg = $ctx->{smsg} = $ctx->{cb}->($ctx)) {
                         $ctx->smsg_blob($smsg);
                 } else {
-                        $ctx->{http_out}->write(
-                                        $ctx->translate(_html_end($ctx)));
+                        $ctx->write(_html_end($ctx));
                         $ctx->close; # GzipFilter->close
                 }
         };
@@ -227,10 +260,23 @@ sub async_next ($) {
 }
 
 sub aresponse {
-        my ($ctx, $code, $cb) = @_;
-        my $res_hdr = [ 'Content-Type' => 'text/html; charset=UTF-8' ];
+        my ($ctx, $cb) = @_;
         init($ctx, $cb);
-        $ctx->psgi_response($code, $res_hdr);
+        $ctx->psgi_response(200, delete $ctx->{-res_hdr});
+}
+
+sub html_init {
+        my $ctx = $_[-1];
+        $ctx->{base_url} = base_url($ctx);
+        my $h = $ctx->{-res_hdr} = ['Content-Type', 'text/html; charset=UTF-8'];
+        $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($h, $ctx->{env});
+        bless $ctx, @_ > 1 ? $_[0] : __PACKAGE__;
+        print { $ctx->zfh } html_top($ctx);
+}
+
+sub DESTROY {
+        my ($ctx) = @_;
+        $ctx->{git}->cleanup if $ctx->{git} && $ctx->{git}->{-tmp};
 }
 
 1;
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index 04c9b1c4..5e23005e 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -1,14 +1,14 @@
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # used for displaying help texts and other non-mail content
 package PublicInbox::WwwText;
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
 use PublicInbox::Linkify;
 use PublicInbox::WwwStream;
-use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::Hval qw(ascii_html prurl fmt_ts);
+use HTTP::Date qw(time2str);
 use URI::Escape qw(uri_escape_utf8);
 use PublicInbox::GzipFilter qw(gzf_maybe);
 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
@@ -24,32 +24,29 @@ sub get_text {
         my ($ctx, $key) = @_;
         my $code = 200;
 
-        $key = 'help' if !defined $key; # this 302s to _/text/help/
+        $key //= 'help'; # this 302s to _/text/help/
 
         # get the raw text the same way we get mboxrds
         my $raw = ($key =~ s!/raw\z!!);
         my $have_tslash = ($key =~ s!/\z!!) if !$raw;
 
         my $txt = '';
-        my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ];
-        if (!_default_text($ctx, $key, $hdr, \$txt)) {
+        if (!_default_text($ctx, $key, \$txt)) {
                 $code = 404;
                 $txt = "404 Not Found ($key)\n";
         }
         my $env = $ctx->{env};
         if ($raw) {
-                if ($code == 200) {
-                        my $gzf = gzf_maybe($hdr, $env);
-                        $txt = $gzf->translate($txt);
-                        $txt .= $gzf->zflush;
-                }
-                $hdr->[3] = bytes::length($txt);
-                return [ $code, $hdr, [ $txt ] ]
+                my $h = delete $ctx->{-res_hdr};
+                $txt = gzf_maybe($h, $env)->zflush($txt) if $code == 200;
+                push @$h, 'Content-Type', 'text/plain',
+                        'Content-Length', length($txt);
+                return [ $code, $h, [ $txt ] ]
         }
 
         # enforce trailing slash for "wget -r" compatibility
         if (!$have_tslash && $code == 200) {
-                my $url = $ctx->{-inbox}->base_url($env);
+                my $url = $ctx->{ibx}->base_url($env);
                 $url .= "_/text/$key/";
 
                 return [ 302, [ 'Content-Type', 'text/plain',
@@ -71,14 +68,18 @@ sub get_text {
                 $txt = ascii_html($txt);
         }
         $txt = '<pre>' . $l->linkify_2($txt) . '</pre>';
-        PublicInbox::WwwStream::html_oneshot($ctx, $code, \$txt);
+        $txt =~ s!^search$!<a\nid=search>search</a>!sm;
+        $txt =~ s!\bPOP3\b!<a\nid=pop3>POP3</a>!;
+        $txt =~ s!\b(Newsgroups?)\b!<a\nid=nntp>$1</a>!;
+        $txt =~ s!\bIMAP\b!<a\nid=imap>IMAP</a>!;
+        PublicInbox::WwwStream::html_oneshot($ctx, $code, $txt);
 }
 
 sub _srch_prefix ($$) {
-        my ($srch, $txt) = @_;
+        my ($ibx, $txt) = @_;
         my $pad = 0;
         my $htxt = '';
-        my $help = $srch->help;
+        my $help = $ibx->isrch->help;
         my $i;
         for ($i = 0; $i < @$help; $i += 2) {
                 my $pfx = $help->[$i];
@@ -89,10 +90,9 @@ sub _srch_prefix ($$) {
                 $htxt .= "\f\n";
         }
         $pad += 2;
-        my $padding = ' ' x ($pad + 8);
+        my $padding = ' ' x ($pad + 4);
         $htxt =~ s/^/$padding/gms;
-        $htxt =~ s/^$padding(\S+)\0/"        $1".
-                                (' ' x ($pad - length($1)))/egms;
+        $htxt =~ s/^$padding(\S+)\0/"    $1".(' ' x ($pad - length($1)))/egms;
         $htxt =~ s/\f\n/\n/gs;
         $$txt .= $htxt;
         1;
@@ -100,7 +100,7 @@ sub _srch_prefix ($$) {
 
 sub _colors_help ($$) {
         my ($ctx, $txt) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $env = $ctx->{env};
         my $base_url = $ibx->base_url($env);
         $$txt .= "color customization for $base_url\n";
@@ -113,7 +113,7 @@ Users of browsers such as dillo, Firefox, or some browser
 extensions may start by downloading the following sample CSS file
 to control the colors they see:
 
-        ${base_url}userContent.css
+  ${base_url}userContent.css
 
 CSS sample
 ----------
@@ -130,13 +130,51 @@ sub dq_escape ($) {
         $name;
 }
 
-sub URI_PATH () { '^A-Za-z0-9\-\._~/' }
+sub _coderepo_config ($$) {
+        my ($ctx, $txt) = @_;
+        my $cr = $ctx->{ibx}->{coderepo} // return;
+        # note: this doesn't preserve cgitrc layout, since we parse cgitrc
+        # and drop the original structure
+        $$txt .= "\tcoderepo = $_\n" for @$cr;
+        $$txt .= <<'EOF';
+
+; `coderepo' entries allows blob reconstruction via patch emails if
+; the inbox is indexed with Xapian.  `@@ <from-range> <to-range> @@'
+; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
+; an HTTP endpoint which reconstructs git blobs via git-apply(1).
+EOF
+        my $pi_cfg = $ctx->{www}->{pi_cfg};
+        for my $cr_name (@$cr) {
+                my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl");
+                my $path = "/path/to/$cr_name";
+                $cr_name = dq_escape($cr_name);
+
+                $$txt .= qq([coderepo "$cr_name"]\n);
+                if ($urls && scalar(@$urls)) {
+                        $$txt .= "\t; ";
+                        $$txt .= join(" ||\n\t;\t", map {;
+                                my $dst = $path;
+                                if ($path !~ m![a-z0-9_/\.\-]!i) {
+                                        $dst = '"'.dq_escape($dst).'"';
+                                }
+                                qq(git clone $_ $dst);
+                        } @$urls);
+                        $$txt .= "\n";
+                }
+                $$txt .= "\tdir = $path\n";
+                $$txt .= "\tcgiturl = https://example.com/";
+                $$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n";
+        }
+}
 
 # n.b. this is a perfect candidate for memoization
-sub inbox_config ($$$) {
-        my ($ctx, $hdr, $txt) = @_;
-        my $ibx = $ctx->{-inbox};
-        push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
+sub inbox_config ($$) {
+        my ($ctx, $txt) = @_;
+        my $ibx = $ctx->{ibx};
+        push @{$ctx->{-res_hdr}},
+                'Content-Disposition', 'inline; filename=inbox.config';
+        my $t = eval { $ibx->mm->created_at };
+        push(@{$ctx->{-res_hdr}}, 'Last-Modified', time2str($t)) if $t;
         my $name = dq_escape($ibx->{name});
         my $inboxdir = '/path/to/top-level-inbox';
         my $base_url = $ibx->base_url($ctx->{env});
@@ -165,7 +203,7 @@ EOS
         ; gzip(1), and sqlite3(1) as documented:
 EOF
                 for (sort keys %$altid_map) {
-                        $$txt .= "\t;\tcurl -XPOST $base_url$_.sql.gz | \\\n" .
+                        $$txt .= "\t;\tcurl -d '' $base_url$_.sql.gz | \\\n" .
                                 "\t;\tgzip -dc | \\\n" .
                                 "\t;\tsqlite3 $inboxdir/$_.sqlite3\n";
                         $$txt .= "\taltid = serial:$_:file=$_.sqlite3\n";
@@ -176,170 +214,328 @@ EOF
                 defined(my $v = $ibx->{$k}) or next;
                 $$txt .= "\t$k = $v\n";
         }
-        $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url});
+        $$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)});
+        $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)});
+        _coderepo_config($ctx, $txt);
+        1;
+}
 
-        # note: this doesn't preserve cgitrc layout, since we parse cgitrc
-        # and drop the original structure
-        if (defined(my $cr = $ibx->{coderepo})) {
-                $$txt .= "\tcoderepo = $_\n" for @$cr;
-                $$txt .= <<'EOF';
+# n.b. this is a perfect candidate for memoization
+sub extindex_config ($$) {
+        my ($ctx, $txt) = @_;
+        my $ibx = $ctx->{ibx};
+        push @{$ctx->{-res_hdr}},
+                'Content-Disposition', 'inline; filename=extindex.config';
+        my $name = dq_escape($ibx->{name});
+        my $base_url = $ibx->base_url($ctx->{env});
+        $$txt .= <<EOS;
+; Example public-inbox config snippet for the external index (extindex) at:
+; $base_url
+; See public-inbox-config(5)manpage for more details:
+; https://public-inbox.org/public-inbox-config.html
+[extindex "$name"]
+        topdir = /path/to/extindex-topdir
+        url = https://example.com/$name/
+        url = http://example.onion/$name/
+EOS
+        for my $k (qw(infourl)) {
+                defined(my $v = $ibx->{$k}) or next;
+                $$txt .= "\t$k = $v\n";
+        }
+        _coderepo_config($ctx, $txt);
+        1;
+}
 
-; `coderepo' entries allows blob reconstruction via patch emails if
-; the inbox is indexed with Xapian.  `@@ <from-range> <to-range> @@'
-; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
-; an HTTP endpoint which reconstructs git blobs via git-apply(1).
-EOF
-                my $pi_config = $ctx->{www}->{pi_config};
-                for my $cr_name (@$cr) {
-                        my $urls = $pi_config->{"coderepo.$cr_name.cgiturl"};
-                        my $path = "/path/to/$cr_name";
-                        $cr_name = dq_escape($cr_name);
-
-                        $$txt .= qq([coderepo "$cr_name"]\n);
-                        if ($urls && scalar(@$urls)) {
-                                $$txt .= "\t; ";
-                                $$txt .= join(" ||\n\t;\t", map {;
-                                        my $dst = $path;
-                                        if ($path !~ m![a-z0-9_/\.\-]!i) {
-                                                $dst = '"'.dq_escape($dst).'"';
-                                        }
-                                        qq(git clone $_ $dst);
-                                } @$urls);
-                                $$txt .= "\n";
+sub coderepos_raw ($$) {
+        my ($ctx, $top_url) = @_;
+        my $cfg = $ctx->{www}->{pi_cfg};
+        my $cr = $cfg->repo_objs($ctx->{ibx}) or return ();
+        my $buf = 'Code repositories for project(s) associated with this '.
+                $ctx->{ibx}->thing_type . ":\n";
+        my @recs = PublicInbox::CodeSearch::repos_sorted($cfg, @$cr);
+        my $cr_score = $ctx->{ibx}->{-cr_score};
+        my $env = $ctx->{env};
+        for (@recs) {
+                my ($t, $git) = @$_;
+                for ($git->pub_urls($env)) {
+                        my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ : $top_url.$_;
+                        my $nr = $cr_score->{$git->{nick}};
+                        $buf .= "\n";
+                        $buf .= $nr ? sprintf('% 9u', $nr) : (' 'x9);
+                        $buf .= ' '.fmt_ts($t).' '.prurl($env, $u);
+                }
+        }
+        ($buf);
+}
+
+sub _add_non_http_urls ($$) {
+        my ($ctx, $txt) = @_;
+        $ctx->{ibx}->can('nntp_url') or return; # TODO extindex can have IMAP
+        my $urls = $ctx->{ibx}->imap_url($ctx);
+        if (@$urls) {
+                $urls = join("\n  ", @$urls);
+                $urls =~ s!://([^/@]+)/!://;AUTH=ANONYMOUS\@$1/!sg;
+                $$txt .= <<EOM
+
+IMAP subfolder(s) are available under:
+  $urls
+  # each subfolder (starting with `0') holds 50K messages at most
+EOM
+        }
+        $urls = $ctx->{ibx}->nntp_url($ctx);
+        if (@$urls) {
+                $$txt .= @$urls == 1 ? "\nNewsgroup" : "\nNewsgroups are";
+                $$txt .= ' available over NNTP:';
+                $$txt .= "\n  " . join("\n  ", @$urls) . "\n";
+        }
+        $urls = $ctx->{ibx}->pop3_url($ctx);
+        if (@$urls) {
+                $urls = join("\n  ", @$urls);
+                $$txt .= <<EOM;
+
+POP3 access is available:
+  $urls
+
+The POP3 password is: anonymous
+The POP3 username is: \$(uuidgen)\@$ctx->{ibx}->{newsgroup}
+where \$(uuidgen) in the output of the `uuidgen' command on your system.
+The UUID in the username functions as a private cookie (don't share it).
+By default, only 1000 messages are retrieved.  You may download more
+by appending `?limit=NUM' (without quotes) to the username, where
+`NUM' is an integer between 1 and 50000.
+Idle accounts will expire periodically.
+EOM
+        }
+}
+
+sub _add_onion_note ($) {
+        my ($txt) = @_;
+        $$txt =~ m!\b[^:]+://\w+\.onion/!i and $$txt .= <<EOM
+
+note: .onion URLs require Tor: https://www.torproject.org/
+
+EOM
+}
+
+sub _mirror_help ($$) {
+        my ($ctx, $txt) = @_;
+        my $ibx = $ctx->{ibx};
+        my $base_url = $ibx->base_url($ctx->{env});
+        chop $base_url; # no trailing slash for "git clone"
+        my $dir = (split(m!/!, $base_url))[-1];
+        my %seen = ($base_url => 1);
+        my $top_url = $base_url;
+        $top_url =~ s!/[^/]+\z!/!;
+        $$txt .= "public-inbox mirroring instructions\n\n";
+        if ($ibx->can('cloneurl')) { # PublicInbox::Inbox
+                $$txt .=
+                  "This public inbox may be cloned and mirrored by anyone:\n";
+                my @urls;
+                my $max = $ibx->max_git_epoch;
+                # TODO: some of these URLs may be too long and we may need to
+                # do something like code_footer() above, but these are local
+                # admin-defined
+                if (defined($max)) { # v2
+                        for my $i (0..$max) {
+                                # old epochs my be deleted:
+                                -d "$ibx->{inboxdir}/git/$i.git" or next;
+                                my $url = "$base_url/$i";
+                                $seen{$url} = 1;
+                                push @urls, "$url $dir/git/$i.git";
+                        }
+                        my $nr = scalar(@urls);
+                        if ($nr > 1) {
+                                chomp($$txt .= <<EOM);
+
+  # this inbox consists of $nr epochs: (no need to clone all of them)
+EOM
+                                $urls[0] .= " # oldest";
+                                $urls[-1] .= " # newest";
                         }
-                        $$txt .= "\tdir = $path\n";
-                        $$txt .= "\tcgiturl = https://example.com/";
-                        $$txt .= uri_escape_utf8($cr_name, URI_PATH)."\n";
+                } else { # v1
+                        push @urls, $base_url;
+                }
+                # FIXME: epoch splits can be different in other repositories,
+                # use the "cloneurl" file as-is for now:
+                for my $u (@{$ibx->cloneurl}) {
+                        next if $seen{$u}++;
+                        push @urls, $u;
+                }
+                $$txt .= "\n";
+                $$txt .= join('', map { "  git clone --mirror $_\n" } @urls);
+                my $addrs = $ibx->{address} // 'inbox@example.com';
+                my $ng = $ibx->{newsgroup} // '';
+                substr($ng, 0, 0, ' --ng ') if $ng;
+                $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
+                my $v = defined $max ? '-V2' : '-V1';
+                $$txt .= <<EOF;
+
+  # If you have public-inbox 1.1+ installed, you may
+  # initialize and index your mirror using the following commands:
+  public-inbox-init $v$ng \\
+    $ibx->{name} ./$dir $base_url \\
+    $addrs
+  public-inbox-index ./$dir
+EOF
+        } else { # PublicInbox::ExtSearch
+                $$txt .= <<EOM;
+This is an external index which is an amalgamation of several public inboxes.
+Each public inbox needs to be mirrored individually.
+EOM
+                my $v = $ctx->{www}->{pi_cfg}->{lc('publicInbox.wwwListing')};
+                if (($v // '') =~ /\A(?:all|match=domain)\z/) {
+                        $$txt .= <<EOM;
+A list of them is available at $top_url
+EOM
                 }
         }
+        my $cfg_link = "$base_url/_/text/config/raw";
+        $$txt .= <<EOF;
+
+Example config snippet for mirrors: $cfg_link
+EOF
+        _add_non_http_urls($ctx, $txt);
+        _add_onion_note($txt);
+
+        my $code_url = prurl($ctx->{env}, $PublicInbox::WwwStream::CODE_URL);
+        $$txt .= join("\n\n",
+                coderepos_raw($ctx, $top_url), # may be empty
+                "AGPL code for this site:\n  git clone $code_url");
         1;
 }
 
-sub _default_text ($$$$) {
-        my ($ctx, $key, $hdr, $txt) = @_;
-        return _colors_help($ctx, $txt) if $key eq 'color';
-        return inbox_config($ctx, $hdr, $txt) if $key eq 'config';
+sub _default_text ($$$) {
+        my ($ctx, $key, $txt) = @_;
+        if ($key eq 'mirror') {
+                return _mirror_help($ctx, $txt);
+        } elsif ($key eq 'color') {
+                return _colors_help($ctx, $txt);
+        } elsif ($key eq 'config') {
+                return $ctx->{ibx}->can('cloneurl') ?
+                        inbox_config($ctx, $txt) :
+                        extindex_config($ctx, $txt);
+        }
         return if $key ne 'help'; # TODO more keys?
 
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $base_url = $ibx->base_url($ctx->{env});
-        $$txt .= "public-inbox help for $base_url\n";
         $$txt .= <<EOF;
+public-inbox help for $base_url
 
 overview
 --------
 
-    public-inbox uses Message-ID identifiers in URLs.
-    One may look up messages by substituting Message-IDs
-    (without the leading '<' or trailing '>') into the URL.
-    Forward slash ('/') characters in the Message-IDs
-    need to be escaped as "%2F" (without quotes).
+  public-inbox uses Message-ID identifiers in URLs.
+  One may look up messages by substituting Message-IDs
+  (without the leading '<' or trailing '>') into the URL.
+  Forward slash ('/') characters in the Message-IDs
+  need to be escaped as "%2F" (without quotes).
 
-    Thus, it is possible to retrieve any message by its
-    Message-ID by going to:
+  Thus, it is possible to retrieve any message by its
+  Message-ID by going to:
 
-        $base_url<Message-ID>/
+    $base_url<Message-ID>/
+    (without the '<' or '>')
 
-        (without the '<' or '>')
+  Message-IDs are described at:
 
-    Message-IDs are described at:
-
-        $WIKI_URL/Message-ID
+    $WIKI_URL/Message-ID
 
 EOF
 
         # n.b. we use the Xapian DB for any regeneratable,
         # order-of-arrival-independent data.
-        my $srch = $ibx->search;
-        if ($srch) {
+        if ($ibx->isrch) {
                 $$txt .= <<EOF;
 search
 ------
 
-    This public-inbox has search functionality provided by Xapian.
+  This public-inbox has search functionality provided by Xapian.
 
-    It supports typical AND, OR, NOT, '+', '-' queries present
-    in other search engines.
+  It supports typical AND, OR, NOT, '+', '-' queries present
+  in other search engines.
 
-    We also support search prefixes to limit the scope of the
-    search to certain fields.
+  We also support search prefixes to limit the scope of the
+  search to certain fields.
 
-    Prefixes supported in this installation include:
+  Prefixes supported in this installation include:
 
 EOF
-                _srch_prefix($srch, $txt);
-
+                _srch_prefix($ibx, $txt);
                 $$txt .= <<EOF;
 
-    Most prefixes are probabilistic, meaning they support stemming
-    and wildcards ('*').  Ranges (such as 'd:') and boolean prefixes
-    do not support stemming or wildcards.
-    The upstream Xapian query parser documentation fully explains
-    the query syntax:
+  Most prefixes are probabilistic, meaning they support stemming
+  and wildcards ('*').  Ranges (such as 'd:') and boolean prefixes
+  do not support stemming or wildcards.
+  The upstream Xapian query parser documentation fully explains
+  the query syntax:
 
-        $QP_URL
+    $QP_URL
 
 EOF
         } # $srch
-        my $over = $ibx->over;
-        if ($over) {
+        if ($ibx->over) {
                 $$txt .= <<EOF;
 message threading
 -----------------
 
-    Message threading is enabled for this public-inbox,
-    additional endpoints for message threads are available:
+  Message threading is enabled for this public-inbox,
+  additional endpoints for message threads are available:
 
-    * $base_url<Message-ID>/T/#u
+  * $base_url<Message-ID>/T/#u
 
-      Loads the thread belonging to the given <Message-ID>
-      in flat chronological order.  The "#u" anchor
-      focuses the browser on the given <Message-ID>.
+    Loads the thread belonging to the given <Message-ID>
+    in flat chronological order.  The "#u" anchor
+    focuses the browser on the given <Message-ID>.
 
-    * $base_url<Message-ID>/t/#u
+  * $base_url<Message-ID>/t/#u
 
-      Loads the thread belonging to the given <Message-ID>
-      in threaded order with nesting.  For deep threads,
-      this requires a wide display or horizontal scrolling.
+    Loads the thread belonging to the given <Message-ID>
+    in threaded order with nesting.  For deep threads,
+    this requires a wide display or horizontal scrolling.
 
-    Both of these HTML endpoints are suitable for offline reading
-    using the thread overview at the bottom of each page.
+  Both of these HTML endpoints are suitable for offline reading
+  using the thread overview at the bottom of each page.
 
-    Users of feed readers may follow a particular thread using:
+  The gzipped mbox for a thread is available for downloading and
+  importing into your favorite mail client:
 
-    * $base_url<Message-ID>/t.atom
+  * $base_url<Message-ID>/t.mbox.gz
 
-      Which loads the thread in Atom Syndication Standard
-      described at Wikipedia and RFC4287:
+    We use the mboxrd variant of the mbox format described at:
 
-        $WIKI_URL/Atom_(standard)
-        https://tools.ietf.org/html/rfc4287
+    $WIKI_URL/Mbox
 
-      Atom Threading Extensions (RFC4685) is supported:
+  Users of feed readers may follow a particular thread using:
 
-        https://tools.ietf.org/html/rfc4685
+  * $base_url<Message-ID>/t.atom
 
-    Finally, the gzipped mbox for a thread is available for
-    downloading and importing into your favorite mail client:
+    Which loads the thread in Atom Syndication Standard
+    described at Wikipedia and RFC4287:
 
-    * $base_url<Message-ID>/t.mbox.gz
+    $WIKI_URL/Atom_(standard)
+    https://tools.ietf.org/html/rfc4287
 
-    We use the mboxrd variant of the mbox format described
-    at:
+    Atom Threading Extensions (RFC4685) are supported:
 
-        $WIKI_URL/Mbox
+    https://tools.ietf.org/html/rfc4685
 
 EOF
         } # $over
 
+        _add_non_http_urls($ctx, \(my $note = ''));
+        $note and $note =~ s/^/  /gms and $$txt .= <<EOF;
+additional protocols
+--------------------
+$note
+EOF
         $$txt .= <<EOF;
 contact
 -------
 
-    This help text is maintained by public-inbox developers
-    reachable via plain-text email at: meta\@public-inbox.org
-    Their inbox is archived at: https://public-inbox.org/meta/
-
+  This help text is maintained by public-inbox developers
+  reachable via plain-text email at: meta\@public-inbox.org
+  Their inbox is archived at: https://public-inbox.org/meta/
 EOF
         # TODO: support admin contact info in ~/.public-inbox/config
         1;
diff --git a/lib/PublicInbox/WwwTopics.pm b/lib/PublicInbox/WwwTopics.pm
new file mode 100644
index 00000000..9d270732
--- /dev/null
+++ b/lib/PublicInbox/WwwTopics.pm
@@ -0,0 +1,85 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::WwwTopics;
+use v5.12;
+use PublicInbox::Hval qw(ascii_html mid_href fmt_ts);
+
+sub add_topic_html ($$) {
+        my (undef, $smsg) = @_;
+        my $s = ascii_html($smsg->{subject});
+        $s = '(no subject)' if $s eq '';
+        $_[0] .= "\n".fmt_ts($smsg->{ds}) .
+                qq{ <a\nhref="}.mid_href($smsg->{mid}).qq{/#r">$s</a>};
+        my $nr = $smsg->{'COUNT(num)'};
+        $_[0] .= " $nr+ messages" if $nr > 1;
+}
+
+# n.b. the `SELECT DISTINCT(tid)' subquery is critical for performance
+# with giant inboxes and extindices
+sub topics_new ($) {
+        $_[0]->do_get(<<EOS);
+SELECT ds,ddd,COUNT(num) FROM over WHERE tid IN
+(SELECT DISTINCT(tid) FROM over WHERE tid > 0 ORDER BY ts DESC LIMIT 200)
+AND +num > 0
+GROUP BY tid
+ORDER BY ds ASC
+EOS
+}
+
+sub topics_active ($) {
+        $_[0]->do_get(<<EOS);
+SELECT ddd,MAX(ds) as ds,COUNT(num) FROM over WHERE tid IN
+(SELECT DISTINCT(tid) FROM over WHERE tid > 0 ORDER BY ts DESC LIMIT 200)
+AND +num > 0
+GROUP BY tid
+ORDER BY ds ASC
+EOS
+}
+
+sub topics_i { pop @{$_[0]->{msgs}} }
+
+sub topics_atom { # GET /$INBOX_NAME/topics_(new|active).atom
+        my ($ctx) = @_;
+        require PublicInbox::WwwAtomStream;
+        my ($hdr, $smsg, $val);
+        PublicInbox::WwwAtomStream->response($ctx, \&topics_i);
+}
+
+sub topics_html { # GET /$INBOX_NAME/topics_(new|active).html
+        my ($ctx) = @_;
+        require PublicInbox::WwwStream;
+        my $buf = '<pre>';
+        $ctx->{-html_more_links} = qq{\n- recent:[<a
+href="./">subjects (threaded)</a>|};
+
+        if ($ctx->{topic_category} eq 'new') {
+                $ctx->{-html_more_links} .= qq{<b>topics (new)</b>|<a
+href="./topics_active.html">topics (active)</a>]};
+        } else { # topic_category eq "active" - topics with recent replies
+                $ctx->{-html_more_links} .= qq{<a
+href="./topics_new.html">topics (new)</a>|<b>topics (active)</b>]};
+        }
+        # can't use SQL to filter references since our schema wasn't designed
+        # for it, but our SQL sorts by ascending time to favor top-level
+        # messages while our final result (post-references filter) favors
+        # recent messages
+        my $msgs = delete $ctx->{msgs};
+        add_topic_html($buf, pop @$msgs) while scalar(@$msgs);
+        $buf .= '</pre>';
+        PublicInbox::WwwStream::html_oneshot($ctx, 200, $buf);
+}
+
+sub response {
+        my ($ctx, $ibx_name, $category, $type) = @_;
+        my ($ret, $over);
+        $ret = PublicInbox::WWW::invalid_inbox($ctx, $ibx_name) and return $ret;
+        $over = $ctx->{ibx}->over or
+                return PublicInbox::WWW::need($ctx, 'Overview', './');
+        $ctx->{msgs} = $category eq 'new' ? topics_new($over) :
+                        topics_active($over);
+        $ctx->{topic_category} = $category;
+        $type eq 'atom' ? topics_atom($ctx) : topics_html($ctx);
+}
+
+1;
diff --git a/lib/PublicInbox/XapClient.pm b/lib/PublicInbox/XapClient.pm
new file mode 100644
index 00000000..24b3f45e
--- /dev/null
+++ b/lib/PublicInbox/XapClient.pm
@@ -0,0 +1,50 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# This talks to (XapHelperCxx.pm + xap_helper.h) or XapHelper.pm
+# and will eventually allow users with neither XS nor SWIG Perl
+# bindings to use Xapian as long as they have Xapian development
+# headers/libs and a C++ compiler
+package PublicInbox::XapClient;
+use v5.12;
+use PublicInbox::Spawn qw(spawn);
+use Socket qw(AF_UNIX SOCK_SEQPACKET);
+use PublicInbox::IPC;
+use autodie qw(pipe socketpair);
+our $tries = 50;
+
+sub mkreq {
+        my ($self, $ios, @arg) = @_;
+        my ($r, $n);
+        pipe($r, $ios->[0]) if !defined($ios->[0]);
+        my @fds = map fileno($_), @$ios;
+        my $buf = join("\0", @arg, '');
+        $n = $PublicInbox::IPC::send_cmd->($self->{io}, \@fds, $buf, 0, $tries)
+                // die "send_cmd: $!";
+        $n == length($buf) or die "send_cmd: $n != ".length($buf);
+        $r;
+}
+
+sub start_helper (@) {
+        $PublicInbox::IPC::send_cmd or return; # can't work w/o SCM_RIGHTS
+        my @argv = @_;
+        socketpair(my $sock, my $in, AF_UNIX, SOCK_SEQPACKET, 0);
+        my $cls = 'PublicInbox::XapHelperCxx';
+        my $env;
+        my $cmd = eval "require $cls; ${cls}::cmd()";
+        if ($@) { # fall back to Perl + XS|SWIG
+                $cls = 'PublicInbox::XapHelper';
+                # ensure the child process has the same @INC we do:
+                $env = { PERL5LIB => join(':', @INC) };
+                $cmd = [$^X, ($^W ? ('-w') : ()), "-M$cls", '-e',
+                        $cls.'::start(@ARGV)', '--' ];
+        }
+        push @$cmd, @argv;
+        my $pid = spawn($cmd, $env, { 0 => $in });
+        my $self = bless { io => $sock, impl => $cls }, __PACKAGE__;
+        PublicInbox::IO::attach_pid($sock, $pid);
+        $self;
+}
+
+1;
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
new file mode 100644
index 00000000..c9957f64
--- /dev/null
+++ b/lib/PublicInbox/XapHelper.pm
@@ -0,0 +1,340 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Perl + SWIG||XS implementation if XapHelperCxx / xap_helper.h isn't usable.
+package PublicInbox::XapHelper;
+use v5.12;
+use Getopt::Long (); # good API even if we only use short options
+our $GLP = Getopt::Long::Parser->new;
+$GLP->configure(qw(require_order bundling no_ignore_case no_auto_abbrev));
+use PublicInbox::Search qw(xap_terms);
+use PublicInbox::CodeSearch;
+use PublicInbox::IPC;
+use PublicInbox::IO qw(read_all);
+use Socket qw(SOL_SOCKET SO_TYPE SOCK_SEQPACKET AF_UNIX);
+use PublicInbox::DS qw(awaitpid);
+use autodie qw(open getsockopt);
+use POSIX qw(:signal_h);
+use Fcntl qw(LOCK_UN LOCK_EX);
+use Carp qw(croak);
+my $X = \%PublicInbox::Search::X;
+our (%SRCH, %WORKERS, $nworker, $workerset, $in);
+our $stderr = \*STDERR;
+
+sub cmd_test_inspect {
+        my ($req) = @_;
+        print { $req->{0} } "pid=$$ has_threadid=",
+                ($req->{srch}->has_threadid ? 1 : 0)
+}
+
+sub cmd_test_sleep { select(undef, undef, undef, 0.01) while 1 }
+
+sub iter_retry_check ($) {
+        if (ref($@) =~ /\bDatabaseModifiedError\b/) {
+                $_[0]->{srch}->reopen;
+                undef; # retries
+        } elsif (ref($@) =~ /\bDocNotFoundError\b/) {
+                warn "doc not found: $@";
+                0; # continue to next doc
+        } else {
+                die;
+        }
+}
+
+sub term_length_extract ($) {
+        my ($req) = @_;
+        @{$req->{A_len}} = map {
+                my $len = s/([0-9]+)\z// ? ($1 + 0) : undef;
+                [ $_, $len ];
+        } @{$req->{A}};
+}
+
+sub dump_ibx_iter ($$$) {
+        my ($req, $ibx_id, $it) = @_;
+        my $out = $req->{0};
+        eval {
+                my $doc = $it->get_document;
+                for my $pair (@{$req->{A_len}}) {
+                        my ($pfx, $len) = @$pair;
+                        my @t = xap_terms($pfx, $doc);
+                        @t = grep { length == $len } @t if defined($len);
+                        for (@t) {
+                                print $out "$_ $ibx_id\n" or die "print: $!";
+                                ++$req->{nr_out};
+                        }
+                }
+        };
+        $@ ? iter_retry_check($req) : 0;
+}
+
+sub emit_mset_stats ($$) {
+        my ($req, $mset) = @_;
+        my $err = $req->{1} or croak "BUG: caller only passed 1 FD";
+        say $err 'mset.size='.$mset->size.' nr_out='.$req->{nr_out}
+}
+
+sub cmd_dump_ibx {
+        my ($req, $ibx_id, $qry_str) = @_;
+        $qry_str // die 'usage: dump_ibx [OPTIONS] IBX_ID QRY_STR';
+        $req->{A} or die 'dump_ibx requires -A PREFIX';
+        term_length_extract $req;
+        my $max = $req->{'m'} // $req->{srch}->{xdb}->get_doccount;
+        my $opt = { relevance => -1, limit => $max, offset => $req->{o} // 0 };
+        $opt->{eidx_key} = $req->{O} if defined $req->{O};
+        my $mset = $req->{srch}->mset($qry_str, $opt);
+        $req->{0}->autoflush(1);
+        for my $it ($mset->items) {
+                for (my $t = 10; $t > 0; --$t) {
+                        $t = dump_ibx_iter($req, $ibx_id, $it) // $t;
+                }
+        }
+        emit_mset_stats($req, $mset);
+}
+
+sub dump_roots_iter ($$$) {
+        my ($req, $root2off, $it) = @_;
+        eval {
+                my $doc = $it->get_document;
+                my $G = join(' ', map { $root2off->{$_} } xap_terms('G', $doc));
+                for my $pair (@{$req->{A_len}}) {
+                        my ($pfx, $len) = @$pair;
+                        my @t = xap_terms($pfx, $doc);
+                        @t = grep { length == $len } @t if defined($len);
+                        for (@t) {
+                                $req->{wbuf} .= "$_ $G\n";
+                                ++$req->{nr_out};
+                        }
+                }
+        };
+        $@ ? iter_retry_check($req) : 0;
+}
+
+sub dump_roots_flush ($$) {
+        my ($req, $fh) = @_;
+        if ($req->{wbuf} ne '') {
+                until (flock($fh, LOCK_EX)) { die "LOCK_EX: $!" if !$!{EINTR} }
+                print { $req->{0} } $req->{wbuf} or die "print: $!";
+                until (flock($fh, LOCK_UN)) { die "LOCK_UN: $!" if !$!{EINTR} }
+                $req->{wbuf} = '';
+        }
+}
+
+sub cmd_dump_roots {
+        my ($req, $root2off_file, $qry_str) = @_;
+        $qry_str // die 'usage: dump_roots [OPTIONS] ROOT2ID_FILE QRY_STR';
+        $req->{A} or die 'dump_roots requires -A PREFIX';
+        term_length_extract $req;
+        open my $fh, '<', $root2off_file;
+        my $root2off; # record format: $OIDHEX "\0" uint32_t
+        my @x = split(/\0/, read_all $fh);
+        while (defined(my $oidhex = shift @x)) {
+                $root2off->{$oidhex} = shift @x;
+        }
+        my $opt = { relevance => -1, limit => $req->{'m'},
+                        offset => $req->{o} // 0 };
+        my $mset = $req->{srch}->mset($qry_str, $opt);
+        $req->{0}->autoflush(1);
+        $req->{wbuf} = '';
+        for my $it ($mset->items) {
+                for (my $t = 10; $t > 0; --$t) {
+                        $t = dump_roots_iter($req, $root2off, $it) // $t;
+                }
+                if (!($req->{nr_out} & 0x3fff)) {
+                        dump_roots_flush($req, $fh);
+                }
+        }
+        dump_roots_flush($req, $fh);
+        emit_mset_stats($req, $mset);
+}
+
+sub mset_iter ($$) {
+        my ($req, $it) = @_;
+        say { $req->{0} } $it->get_docid, "\0",
+                        $it->get_percent, "\0", $it->get_rank;
+}
+
+sub cmd_mset { # to be used by WWW + IMAP
+        my ($req, $qry_str) = @_;
+        $qry_str // die 'usage: mset [OPTIONS] QRY_STR';
+        my $opt = { limit => $req->{'m'}, offset => $req->{o} // 0 };
+        $opt->{relevance} = 1 if $req->{r};
+        $opt->{threads} = 1 if defined $req->{t};
+        $opt->{git_dir} = $req->{g} if defined $req->{g};
+        $opt->{eidx_key} = $req->{O} if defined $req->{O};
+        $opt->{threadid} = $req->{T} if defined $req->{T};
+        my $mset = $req->{srch}->mset($qry_str, $opt);
+        say { $req->{0} } 'mset.size=', $mset->size,
+                ' .get_matches_estimated=', $mset->get_matches_estimated;
+        for my $it ($mset->items) {
+                for (my $t = 10; $t > 0; --$t) {
+                        $t = mset_iter($req, $it) // $t;
+                }
+        }
+}
+
+sub srch_init_extra ($) {
+        my ($req) = @_;
+        my $qp = $req->{srch}->{qp};
+        for (@{$req->{Q}}) {
+                my ($upfx, $m, $xpfx) = split /([:=])/;
+                $xpfx // die "E: bad -Q $_";
+                $m = $m eq '=' ? 'add_boolean_prefix' : 'add_prefix';
+                $qp->$m($upfx, $xpfx);
+        }
+        $req->{srch}->{qp_extra_done} = 1;
+}
+
+sub dispatch {
+        my ($req, $cmd, @argv) = @_;
+        my $fn = $req->can("cmd_$cmd") or return;
+        $GLP->getoptionsfromarray(\@argv, $req, @PublicInbox::Search::XH_SPEC)
+                or return;
+        my $dirs = delete $req->{d} or die 'no -d args';
+        my $key = join("\0", @$dirs);
+        my $new;
+        $req->{srch} = $SRCH{$key} //= do {
+                $new = { qp_flags => $PublicInbox::Search::QP_FLAGS };
+                my $first = shift @$dirs;
+                my $slow_phrase = -f "$first/iamchert";
+                $new->{xdb} = $X->{Database}->new($first);
+                for (@$dirs) {
+                        $slow_phrase ||= -f "$_/iamchert";
+                        $new->{xdb}->add_database($X->{Database}->new($_));
+                }
+                $slow_phrase or
+                        $new->{qp_flags} |= PublicInbox::Search::FLAG_PHRASE();
+                bless $new, $req->{c} ? 'PublicInbox::CodeSearch' :
+                                        'PublicInbox::Search';
+                $new->{qp} = $new->qparse_new;
+                $new;
+        };
+        $req->{srch}->{xdb}->reopen unless $new;
+        $req->{Q} && !$req->{srch}->{qp_extra_done} and
+                srch_init_extra $req;
+        my $timeo = $req->{K};
+        alarm($timeo) if $timeo;
+        $fn->($req, @argv);
+        alarm(0) if $timeo;
+}
+
+sub recv_loop {
+        local $SIG{__WARN__} = sub { print $stderr @_ };
+        my $rbuf;
+        local $SIG{TERM} = sub { undef $in };
+        local $SIG{USR1} = \&reopen_logs;
+        while (defined($in)) {
+                PublicInbox::DS::sig_setmask($workerset);
+                my @fds = eval { # we undef $in in SIG{TERM}
+                        $PublicInbox::IPC::recv_cmd->($in, $rbuf, 4096*33)
+                };
+                if ($@) {
+                        exit if !$in; # hit by SIGTERM
+                        die;
+                }
+                scalar(@fds) or exit(66); # EX_NOINPUT
+                die "recvmsg: $!" if !defined($fds[0]);
+                PublicInbox::DS::block_signals(POSIX::SIGALRM);
+                my $req = bless {}, __PACKAGE__;
+                my $i = 0;
+                open($req->{$i++}, '+<&=', $_) for @fds;
+                local $stderr = $req->{1} // \*STDERR;
+                die "not NUL-terminated" if chop($rbuf) ne "\0";
+                my @argv = split(/\0/, $rbuf);
+                $req->{nr_out} = 0;
+                $req->dispatch(@argv) if @argv;
+        }
+}
+
+sub reap_worker { # awaitpid CB
+        my ($pid, $nr) = @_;
+        delete $WORKERS{$nr};
+        if (($? >> 8) == 66) { # EX_NOINPUT
+                undef $in;
+        } elsif ($?) {
+                warn "worker[$nr] died \$?=$?\n";
+        }
+        PublicInbox::DS::requeue(\&start_workers) if $in;
+}
+
+sub start_worker ($) {
+        my ($nr) = @_;
+        my $pid = eval { PublicInbox::DS::fork_persist } // return(warn($@));
+        if ($pid == 0) {
+                undef %WORKERS;
+                $SIG{TTIN} = $SIG{TTOU} = 'IGNORE';
+                $SIG{CHLD} = 'DEFAULT'; # Xapian may use this
+                recv_loop();
+                exit(0);
+        } else {
+                $WORKERS{$nr} = $pid;
+                awaitpid($pid, \&reap_worker, $nr);
+        }
+}
+
+sub start_workers {
+        for my $nr (grep { !defined($WORKERS{$_}) } (0..($nworker - 1))) {
+                start_worker($nr) if $in;
+        }
+}
+
+sub do_sigttou {
+        if ($in && $nworker > 1) {
+                --$nworker;
+                my @nr = grep { $_ >= $nworker } keys %WORKERS;
+                kill('TERM', @WORKERS{@nr});
+        }
+}
+
+sub reopen_logs {
+        my $p = $ENV{STDOUT_PATH};
+        defined($p) && open(STDOUT, '>>', $p) and STDOUT->autoflush(1);
+        $p = $ENV{STDERR_PATH};
+        defined($p) && open(STDERR, '>>', $p) and STDERR->autoflush(1);
+}
+
+sub parent_reopen_logs {
+        reopen_logs();
+        kill('USR1', values %WORKERS);
+}
+
+sub xh_alive { $in || scalar(keys %WORKERS) }
+
+sub start (@) {
+        my (@argv) = @_;
+        my $c = getsockopt(local $in = \*STDIN, SOL_SOCKET, SO_TYPE);
+        unpack('i', $c) == SOCK_SEQPACKET or die 'stdin is not SOCK_SEQPACKET';
+
+        local (%SRCH, %WORKERS);
+        PublicInbox::Search::load_xapian();
+        $GLP->getoptionsfromarray(\@argv, my $opt = { j => 1 }, 'j=i') or
+                die 'bad args';
+        local $workerset = POSIX::SigSet->new;
+        $workerset->fillset or die "fillset: $!";
+        for (@PublicInbox::DS::UNBLOCKABLE, POSIX::SIGUSR1) {
+                $workerset->delset($_) or die "delset($_): $!";
+        }
+
+        local $nworker = $opt->{j};
+        return recv_loop() if $nworker == 0;
+        die '-j must be >= 0' if $nworker < 0;
+        for (POSIX::SIGTERM, POSIX::SIGCHLD) {
+                $workerset->delset($_) or die "delset($_): $!";
+        }
+        my $sig = {
+                TTIN => sub {
+                        if ($in) {
+                                ++$nworker;
+                                PublicInbox::DS::requeue(\&start_workers)
+                        }
+                },
+                TTOU => \&do_sigttou,
+                CHLD => \&PublicInbox::DS::enqueue_reap,
+                USR1 => \&parent_reopen_logs,
+        };
+        PublicInbox::DS::block_signals();
+        start_workers();
+        @PublicInbox::DS::post_loop_do = \&xh_alive;
+        PublicInbox::DS::event_loop($sig);
+}
+
+1;
diff --git a/lib/PublicInbox/XapHelperCxx.pm b/lib/PublicInbox/XapHelperCxx.pm
new file mode 100644
index 00000000..74852ad1
--- /dev/null
+++ b/lib/PublicInbox/XapHelperCxx.pm
@@ -0,0 +1,141 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Just-ahead-of-time builder for the lib/PublicInbox/xap_helper.h shim.
+# I never want users to be without source code for repairs, so this
+# aims to replicate the feel of a scripting language using C++.
+# The resulting executable is not linked to Perl in any way.
+package PublicInbox::XapHelperCxx;
+use v5.12;
+use PublicInbox::Spawn qw(run_die run_qx run_wait which);
+use PublicInbox::IO qw(try_cat write_file);
+use PublicInbox::Search;
+use Fcntl qw(SEEK_SET);
+use Config;
+use autodie;
+my $cxx = which($ENV{CXX} // 'c++') // which('clang') // die 'no C++ compiler';
+my $dir = substr("$cxx-$Config{archname}", 1); # drop leading '/'
+$dir =~ tr!/!-!;
+my $idir;
+if ((defined($ENV{XDG_CACHE_HOME}) && -d $ENV{XDG_CACHE_HOME}) ||
+                        (defined($ENV{HOME}) && -d $ENV{HOME})) {
+        $idir = ($ENV{XDG_CACHE_HOME} //
+                        (($ENV{HOME} // die('HOME unset')).'/.cache')
+                ).'/public-inbox/jaot';
+}
+$idir //= $ENV{PERL_INLINE_DIRECTORY} //
+        die 'HOME and PERL_INLINE_DIRECTORY unset';
+substr($dir, 0, 0) = "$idir/";
+my $bin = "$dir/xap_helper";
+my ($srcpfx) = (__FILE__ =~ m!\A(.+/)[^/]+\z!);
+my @srcs = map { $srcpfx.$_ } qw(xh_mset.h xh_cidx.h xap_helper.h);
+my @pm_dep = map { $srcpfx.$_ } qw(Search.pm CodeSearch.pm);
+my $ldflags = '-Wl,-O1';
+$ldflags .= ' -Wl,--compress-debug-sections=zlib' if $^O ne 'openbsd';
+my $xflags = ($ENV{CXXFLAGS} // '-Wall -ggdb3 -pipe') . ' ' .
+        ' -DTHREADID=' . PublicInbox::Search::THREADID .
+        ' -DXH_SPEC="'.join('',
+                map { s/=.*/:/; $_ } @PublicInbox::Search::XH_SPEC) . '" ' .
+        ($ENV{LDFLAGS} // $ldflags);
+substr($xflags, 0, 0, '-O2 ') if !defined($ENV{CXXFLAGS}) && !-w __FILE__;
+my $xap_modversion;
+
+sub xap_cfg (@) {
+        my $cmd = [ $ENV{PKG_CONFIG} // 'pkg-config', @_, 'xapian-core' ];
+        chomp(my $ret = run_qx($cmd, undef, { 2 => \(my $err) }));
+        return $ret if !$?;
+        die <<EOM;
+@$cmd failed: Xapian development files missing? (\$?=$?)
+$err
+EOM
+}
+
+sub needs_rebuild () {
+        my $prev = try_cat("$dir/XFLAGS") or return 1;
+        chomp $prev;
+        return 1 if $prev ne $xflags;
+
+        $prev = try_cat("$dir/xap_modversion") or return 1;
+        chomp $prev;
+
+        $xap_modversion = xap_cfg('--modversion');
+        $xap_modversion ne $prev;
+}
+
+sub build () {
+        if (!-d $dir) {
+                require File::Path;
+                eval { File::Path::make_path($dir) };
+                if (!-d $dir && defined($ENV{PERL_INLINE_DIRECTORY})) {
+                        $dir = $ENV{PERL_INLINE_DIRECTORY};
+                        File::Path::make_path($dir);
+                }
+        }
+        require PublicInbox::CodeSearch;
+        require PublicInbox::Lock;
+        my ($prog) = ($bin =~ m!/([^/]+)\z!);
+        my $lk = PublicInbox::Lock->new("$dir/$prog.lock")->lock_for_scope;
+        write_file '>', "$dir/$prog.cpp", qq{#include "xap_helper.h"\n},
+                        PublicInbox::Search::generate_cxx(),
+                        PublicInbox::CodeSearch::generate_cxx();
+
+        # xap_modversion may be set by needs_rebuild
+        $xap_modversion //= xap_cfg('--modversion');
+        my $fl = xap_cfg(qw(--libs --cflags));
+
+        # Using rpath seems acceptable/encouraged in the NetBSD packaging world
+        # since /usr/pkg/lib isn't searched by the dynamic loader by default.
+        # Not sure if other OSes need this, but rpath seems fine for JAOT
+        # binaries (like this one) even if other distros discourage it for
+        # distributed packages.
+        $^O eq 'netbsd' and $fl =~ s/(\A|[ \t])\-L([^ \t]+)([ \t]|\z)/
+                                "$1-L$2 -Wl,-rpath=$2$3"/egsx;
+        my @xflags = split(' ', "$fl $xflags"); # ' ' awk-mode eats leading WS
+        my @cflags = ('-I', $srcpfx, grep(!/\A-(?:Wl|l|L)/, @xflags));
+        run_die([$cxx, '-o', "$dir/$prog.o", '-c', "$dir/$prog.cpp", @cflags]);
+
+        # xapian on Alpine Linux (tested 3.19.0) is linked against libstdc++,
+        # and clang needs to be told to use it (rather than libc++):
+        my @try = rindex($cxx, 'clang') >= 0 ? qw(-lstdc++) : ();
+        my @cmd = ($cxx, '-o', "$dir/$prog.tmp", "$dir/$prog.o", @xflags);
+        while (run_wait(\@cmd) and @try) {
+                warn("# attempting to link again with $try[0]...\n");
+                push(@cmd, shift(@try));
+        }
+        die "# @cmd failed: \$?=$?" if $?;
+        unlink "$dir/$prog.cpp", "$dir/$prog.o";
+        write_file '>', "$dir/XFLAGS.tmp", $xflags, "\n";
+        write_file '>', "$dir/xap_modversion.tmp", $xap_modversion, "\n";
+        undef $xap_modversion; # do we ever build() twice?
+        # not quite atomic, but close enough :P
+        rename("$dir/$_.tmp", "$dir/$_") for ($prog, qw(XFLAGS xap_modversion));
+}
+
+sub check_build () {
+        use Time::HiRes qw(stat);
+        my $ctime = 0;
+        my @bin = stat($bin) or return build();
+        for (@srcs, @pm_dep) {
+                my @st = stat($_) or die "stat $_: $!";
+                if ($st[10] > $ctime) {
+                        $ctime = $st[10];
+                        return build() if $ctime > $bin[10];
+                }
+        }
+        needs_rebuild() ? build() : 0;
+}
+
+# returns spawn arg
+sub cmd {
+        die 'PI_NO_CXX set' if $ENV{PI_NO_CXX};
+        check_build();
+        my @cmd;
+        if (my $v = $ENV{VALGRIND}) {
+                $v = 'valgrind -v' if $v eq '1';
+                @cmd = split(/\s+/, $v);
+        }
+        push @cmd, $bin;
+        \@cmd;
+}
+
+1;
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 6a74daf9..9a148ae4 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -1,15 +1,17 @@
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::Xapcmd;
-use strict;
-use PublicInbox::Spawn qw(which popen_rd nodatacow_dir);
+use v5.12;
+use PublicInbox::Spawn qw(which popen_rd);
+use PublicInbox::Syscall;
 use PublicInbox::Admin qw(setup_signals);
 use PublicInbox::Over;
+use PublicInbox::Search qw(xap_terms);
 use PublicInbox::SearchIdx;
 use File::Temp 0.19 (); # ->newdir
 use File::Path qw(remove_tree);
-use File::Basename qw(dirname);
-use POSIX qw(WNOHANG);
+use POSIX qw(WNOHANG _exit);
+use PublicInbox::DS;
 
 # support testing with dev versions of Xapian which installs
 # commands with a version number suffix (e.g. "xapian-compact-1.5")
@@ -21,13 +23,25 @@ sub commit_changes ($$$$) {
         my $reshard = $opt->{reshard};
 
         $SIG{INT} or die 'BUG: $SIG{INT} not handled';
-        my @old_shard;
-        my $over_chg;
-
-        while (my ($old, $newdir) = each %$tmp) {
+        my (@old_shard, $over_chg);
+
+        # Sort shards highest-to-lowest, since ->xdb_shards_flat
+        # determines the number of shards to load based on the max;
+        # and we'd rather xdb_shards_flat to momentarily fail rather
+        # than load out-of-date shards
+        my @order = sort {
+                my ($x) = ($a =~ m!/([0-9]+)/*\z!);
+                my ($y) = ($b =~ m!/([0-9]+)/*\z!);
+                ($y // -1) <=> ($x // -1) # we may have non-shards
+        } keys %$tmp;
+
+        my ($dname) = ($order[0] =~ m!(.*/)[^/]+/*\z!);
+        my $mode = (stat($dname))[2];
+        for my $old (@order) {
                 next if $old eq ''; # no invalid paths
-                my @st = stat($old);
-                if (!@st && !defined($opt->{reshard})) {
+                my $newdir = $tmp->{$old};
+                my $have_old = -e $old;
+                if (!$have_old && !defined($opt->{reshard})) {
                         die "failed to stat($old): $!";
                 }
 
@@ -47,35 +61,31 @@ sub commit_changes ($$$$) {
                         next;
                 }
 
-                if (@st) {
-                        chmod($st[2] & 07777, $new) or die "chmod $old: $!\n";
+                chmod($mode & 07777, $new) or die "chmod($new): $!\n";
+                if ($have_old) {
                         rename($old, "$new/old") or
                                         die "rename $old => $new/old: $!\n";
                 }
                 rename($new, $old) or die "rename $new => $old: $!\n";
-                if (@st) {
-                        my $prev = "$old/old";
-                        remove_tree($prev) or
-                                die "failed to remove $prev: $!\n";
-                }
+                push @old_shard, "$old/old" if $have_old;
         }
 
         # trigger ->check_inodes in read-only daemons
-        syswrite($im->{lockfh}, '.') if $over_chg;
+        syswrite($im->{lockfh}, '.') if $over_chg && $im;
 
         remove_tree(@old_shard);
         $tmp = undef;
         if (!$opt->{-coarse_lock}) {
                 $opt->{-skip_lock} = 1;
-
-                if ($im->can('count_shards')) {
+                $im //= $ibx if $ibx->can('eidx_sync') || $ibx->can('cidx_run');
+                if ($im->can('count_shards')) { # v2w, eidx, cidx
                         my $pr = $opt->{-progress};
                         my $n = $im->count_shards;
                         if (defined $reshard && $n != $reshard) {
                                 die
 "BUG: counted $n shards after resharding to $reshard";
                         }
-                        my $prev = $im->{shards};
+                        my $prev = $im->{shards} // $ibx->{nshard};
                         if ($pr && $prev != $n) {
                                 $pr->("shard count changed: $prev => $n\n");
                                 $im->{shards} = $n;
@@ -83,16 +93,21 @@ sub commit_changes ($$$$) {
                 }
                 my $env = $opt->{-idx_env};
                 local %ENV = (%ENV, %$env) if $env;
-                PublicInbox::Admin::index_inbox($ibx, $im, $opt);
+                if ($ibx->can('eidx_sync')) {
+                        $ibx->eidx_sync($opt);
+                } elsif (!$ibx->can('cidx_run')) {
+                        PublicInbox::Admin::index_inbox($ibx, $im, $opt);
+                }
         }
 }
 
 sub cb_spawn {
         my ($cb, $args, $opt) = @_; # $cb = cpdb() or compact()
-        defined(my $pid = fork) or die "fork: $!";
+        my $pid = PublicInbox::DS::fork_persist;
         return $pid if $pid > 0;
+        $SIG{__DIE__} = sub { warn @_; _exit(1) }; # don't jump up stack
         $cb->($args, $opt);
-        POSIX::_exit(0);
+        _exit(0);
 }
 
 sub runnable_or_die ($) {
@@ -100,17 +115,18 @@ sub runnable_or_die ($) {
         which($exe) or die "$exe not found in PATH\n";
 }
 
-sub prepare_reindex ($$$) {
-        my ($ibx, $im, $opt) = @_;
-        if ($ibx->version == 1) {
+sub prepare_reindex ($$) {
+        my ($ibx, $opt) = @_;
+        if ($ibx->can('eidx_sync') || $ibx->can('cidx_run')) {
+                # no prep needed for ExtSearchIdx nor CodeSearchIdx
+        } elsif ($ibx->version == 1) {
                 my $dir = $ibx->search->xdir(1);
                 my $xdb = $PublicInbox::Search::X{Database}->new($dir);
                 if (my $lc = $xdb->get_metadata('last_commit')) {
                         $opt->{reindex}->{from} = $lc;
                 }
         } else { # v2
-                my $max;
-                $im->git_dir_latest(\$max) or return;
+                my $max = $ibx->max_git_epoch // return;
                 my $from = $opt->{reindex}->{from};
                 my $mm = $ibx->mm;
                 my $v = PublicInbox::Search::SCHEMA_VERSION();
@@ -132,8 +148,9 @@ sub kill_pids {
 }
 
 sub process_queue {
-        my ($queue, $cb, $opt) = @_;
+        my ($queue, $task, $opt) = @_;
         my $max = $opt->{jobs} // scalar(@$queue);
+        my $cb = \&$task;
         if ($max <= 1) {
                 while (defined(my $args = shift @$queue)) {
                         $cb->($args, $opt);
@@ -143,7 +160,7 @@ sub process_queue {
 
         # run in parallel:
         my %pids;
-        local %SIG = %SIG;
+        local @SIG{keys %SIG} = values %SIG;
         setup_signals(\&kill_pids, \%pids);
         while (@$queue) {
                 while (scalar(keys(%pids)) < $max && scalar(@$queue)) {
@@ -170,9 +187,16 @@ sub prepare_run {
         my ($ibx, $opt) = @_;
         my $tmp = {}; # old shard dir => File::Temp->newdir object or undef
         my @queue; # ([old//src,newdir]) - list of args for cpdb() or compact()
-        my $old;
-        if (my $srch = $ibx->search) {
+        my ($old, $misc_ok);
+        if ($ibx->can('cidx_run')) {
+                $old = $ibx->xdir(1);
+        } elsif ($ibx->can('eidx_sync')) {
+                $misc_ok = 1;
+                $old = $ibx->xdir(1);
+        } elsif (my $srch = $ibx->search) {
                 $old = $srch->xdir(1);
+        }
+        if (defined $old) {
                 -d $old or die "$old does not exist\n";
         }
         my $reshard = $opt->{reshard};
@@ -182,32 +206,33 @@ sub prepare_run {
 
         # we want temporary directories to be as deep as possible,
         # so v2 shards can keep "xap$SCHEMA_VERSION" on a separate FS.
-        if ($old && $ibx->version == 1) {
+        if (defined($old) && $ibx->can('version') && $ibx->version == 1) {
                 if (defined $reshard) {
                         warn
 "--reshard=$reshard ignored for v1 $ibx->{inboxdir}\n";
                 }
-                my $dir = dirname($old);
+                my ($dir) = ($old =~ m!(.*?/)[^/]+/*\z!);
                 same_fs_or_die($dir, $old);
                 my $v = PublicInbox::Search::SCHEMA_VERSION();
-                my $wip = File::Temp->newdir("xapian$v-XXXXXXXX", DIR => $dir);
+                my $wip = File::Temp->newdir("xapian$v-XXXX", DIR => $dir);
                 $tmp->{$old} = $wip;
-                nodatacow_dir($wip->dirname);
+                PublicInbox::Syscall::nodatacow_dir($wip->dirname);
                 push @queue, [ $old, $wip ];
-        } elsif ($old) {
+        } elsif (defined $old) {
                 opendir my $dh, $old or die "Failed to opendir $old: $!\n";
                 my @old_shards;
                 while (defined(my $dn = readdir($dh))) {
                         if ($dn =~ /\A[0-9]+\z/) {
-                                push @old_shards, $dn;
+                                push(@old_shards, $dn + 0);
                         } elsif ($dn eq '.' || $dn eq '..') {
                         } elsif ($dn =~ /\Aover\.sqlite3/) {
+                        } elsif ($dn eq 'misc' && $misc_ok) {
                         } else {
                                 warn "W: skipping unknown dir: $old/$dn\n"
                         }
                 }
                 die "No Xapian shards found in $old\n" unless @old_shards;
-
+                @old_shards = sort { $a <=> $b } @old_shards;
                 my ($src, $max_shard);
                 if (!defined($reshard) || $reshard == scalar(@old_shards)) {
                         # 1:1 copy
@@ -218,12 +243,11 @@ sub prepare_run {
                         $src = [ map { "$old/$_" } @old_shards ];
                 }
                 foreach my $dn (0..$max_shard) {
-                        my $tmpl = "$dn-XXXXXXXX";
-                        my $wip = File::Temp->newdir($tmpl, DIR => $old);
+                        my $wip = File::Temp->newdir("$dn-XXXX", DIR => $old);
                         same_fs_or_die($old, $wip->dirname);
                         my $cur = "$old/$dn";
                         push @queue, [ $src // $cur , $wip ];
-                        nodatacow_dir($wip->dirname);
+                        PublicInbox::Syscall::nodatacow_dir($wip->dirname);
                         $tmp->{$cur} = $wip;
                 }
                 # mark old shards to be unlinked
@@ -236,43 +260,49 @@ sub prepare_run {
 
 sub check_compact () { runnable_or_die($XAPIAN_COMPACT) }
 
-sub _run {
-        my ($ibx, $cb, $opt) = @_;
-        my $im = $ibx->importer(0);
-        $im->lock_acquire;
-        my ($tmp, $queue) = prepare_run($ibx, $opt);
-
-        # fine-grained locking if we prepare for reindex
-        if (!$opt->{-coarse_lock}) {
-                prepare_reindex($ibx, $im, $opt);
-                $im->lock_release;
-        }
-
-        $ibx->cleanup;
-        process_queue($queue, $cb, $opt);
-        $im->lock_acquire if !$opt->{-coarse_lock};
-        commit_changes($ibx, $im, $tmp, $opt);
-}
-
 sub run {
         my ($ibx, $task, $opt) = @_; # task = 'cpdb' or 'compact'
-        my $cb = \&$task;
         PublicInbox::Admin::progress_prepare($opt ||= {});
-        defined(my $dir = $ibx->{inboxdir}) or die "no inboxdir defined\n";
-        -d $dir or die "inboxdir=$dir does not exist\n";
-        check_compact() if $opt->{compact} && $ibx->search;
+        my $dir;
+        for my $fld (qw(inboxdir topdir cidx_dir)) {
+                my $d = $ibx->{$fld} // next;
+                -d $d or die "$fld=$d does not exist\n";
+                $dir = $d;
+                last;
+        }
+        check_compact() if $opt->{compact} &&
+                                ($ibx->can('cidx_run') || $ibx->search);
 
-        if (!$opt->{-coarse_lock}) {
+        if (!$ibx->can('eidx_sync') && $ibx->can('version') &&
+                                        !$opt->{-coarse_lock}) {
                 # per-epoch ranges for v2
                 # v1:{ from => $OID }, v2:{ from => [ $OID, $OID, $OID ] } }
                 $opt->{reindex} = { from => $ibx->version == 1 ? '' : [] };
                 PublicInbox::SearchIdx::load_xapian_writable();
         }
 
-        local %SIG = %SIG;
+        local @SIG{keys %SIG} = values %SIG;
         setup_signals();
-        $ibx->umask_prepare;
-        $ibx->with_umask(\&_run, $ibx, $cb, $opt);
+        my $restore = $ibx->with_umask;
+
+        my $im = $ibx->can('importer') ? $ibx->importer(0) : undef;
+        ($im // $ibx)->lock_acquire;
+        my ($tmp, $queue) = prepare_run($ibx, $opt);
+
+        # fine-grained locking if we prepare for reindex
+        if (!$opt->{-coarse_lock}) {
+                prepare_reindex($ibx, $opt);
+                ($im // $ibx)->lock_release;
+        }
+
+        $ibx->cleanup if $ibx->can('cleanup');
+        if ($task eq 'cpdb' && $opt->{reshard} && $ibx->can('cidx_run')) {
+                cidx_reshard($ibx, $queue, $opt);
+        } else {
+                process_queue($queue, $task, $opt);
+        }
+        ($im // $ibx)->lock_acquire if !$opt->{-coarse_lock};
+        commit_changes($ibx, $im, $tmp, $opt);
 }
 
 sub cpdb_retryable ($$) {
@@ -290,20 +320,21 @@ sub cpdb_retryable ($$) {
 }
 
 sub progress_pfx ($) {
-        my ($wip) = @_; # tempdir v2: ([0-9])+-XXXXXXXX
-        my @p = split('/', $wip);
+        my ($wip) = @_; # tempdir v2: ([0-9])+-XXXX
+        my @p = split(m'/', $wip);
 
-        # return "xap15/0" for v2, or "xapian15" for v1:
-        ($p[-1] =~ /\A([0-9]+)/) ? "$p[-2]/$1" : $p[-1];
+        # "basename(inboxdir)/xap15/0" for v2,
+        # "basename(inboxdir)/xapian15" for v1:
+        ($p[-1] =~ /\A([0-9]+)/) ? "$p[-3]/$p[-2]/$1" : "$p[-2]/$p[-1]";
 }
 
 sub kill_compact { # setup_signals callback
-        my ($sig, $pidref) = @_;
-        kill($sig, $$pidref) if defined($$pidref);
+        my ($sig, $ioref) = @_;
+        kill($sig, $$ioref->attached_pid // return) if defined($$ioref);
 }
 
 # xapian-compact wrapper
-sub compact ($$) {
+sub compact ($$) { # cb_spawn callback
         my ($args, $opt) = @_;
         my ($src, $newdir) = @$args;
         my $dst = ref($newdir) ? $newdir->dirname : $newdir;
@@ -327,18 +358,16 @@ sub compact ($$) {
         }
         $pr->("$pfx `".join(' ', @$cmd)."'\n") if $pr;
         push @$cmd, $src, $dst;
-        my ($rd, $pid);
-        local %SIG = %SIG;
-        setup_signals(\&kill_compact, \$pid);
-        ($rd, $pid) = popen_rd($cmd, undef, $rdr);
+        local @SIG{keys %SIG} = values %SIG;
+        setup_signals(\&kill_compact, \my $rd);
+        $rd = popen_rd($cmd, undef, $rdr);
         while (<$rd>) {
                 if ($pr) {
                         s/\r/\r$pfx /g;
                         $pr->("$pfx $_");
                 }
         }
-        waitpid($pid, 0);
-        die "@$cmd failed: \$?=$?\n" if $?;
+        $rd->close or die "@$cmd failed: \$?=$?\n";
 }
 
 sub cpdb_loop ($$$;$$) {
@@ -382,18 +411,96 @@ sub cpdb_loop ($$$;$$) {
         } while (cpdb_retryable($src, $pfx));
 }
 
+sub xapian_write_prep ($) {
+        my ($opt) = @_;
+        PublicInbox::SearchIdx::load_xapian_writable();
+        my $flag = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
+        die if $@;
+        $flag |= $PublicInbox::SearchIdx::DB_NO_SYNC if !$opt->{fsync};
+        (\%PublicInbox::Search::X, $flag);
+}
+
+sub compact_tmp_shard ($) {
+        my ($wip) = @_;
+        my $new = $wip->dirname;
+        my ($dir) = ($new =~ m!(.*?/)[^/]+/*\z!);
+        same_fs_or_die($dir, $new);
+        my $ft = File::Temp->newdir("$new.compact-XXXX", DIR => $dir);
+        PublicInbox::Syscall::nodatacow_dir($ft->dirname);
+        $ft;
+}
+
+sub cidx_reshard { # not docid based
+        my ($cidx, $queue, $opt) = @_;
+        my ($X, $flag) = xapian_write_prep($opt);
+        my $src = $cidx->xdb;
+        delete($cidx->{xdb}) == $src or die "BUG: xdb != $src";
+        my $pfx = $opt->{-progress_pfx} = progress_pfx($cidx->xdir.'/0');
+        my $pr = $opt->{-progress};
+        my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr;
+        local @SIG{keys %SIG} = values %SIG;
+
+        # like copydatabase(1), be sure we don't overwrite anything in case
+        # of other bugs:
+        setup_signals() if $opt->{compact};
+        my @tmp;
+        my @dst = map {
+                my $wip = $_->[1];
+                my $tmp = $opt->{compact} ? compact_tmp_shard($wip) : $wip;
+                push @tmp, $tmp;
+                $X->{WritableDatabase}->new($tmp->dirname, $flag);
+        } @$queue;
+        my $l = $src->get_metadata('indexlevel');
+        $dst[0]->set_metadata('indexlevel', $l) if $l eq 'medium';
+        my $fmt;
+        if ($pr_data) {
+                my $tot = $src->get_doccount;
+                $fmt = "$pfx % ".length($tot)."u/$tot\n";
+                $pr->("$pfx copying $tot documents\n");
+        }
+        my $cur = $src->postlist_begin('');
+        my $end = $src->postlist_end('');
+        my $git_dir_hash = $cidx->can('git_dir_hash');
+        my ($n, $nr);
+        for (; $cur != $end; $cur++) {
+                my $doc = $src->get_document($cur->get_docid);
+                if (my @cmt = xap_terms('Q', $doc)) {
+                        $n = hex(substr($cmt[0], 0, 8)) % scalar(@dst);
+                        warn "W: multi-commit: @cmt" if scalar(@cmt) != 1;
+                } elsif (my @P = xap_terms('P', $doc)) {
+                        $n = $git_dir_hash->($P[0]) % scalar(@dst);
+                        warn "W: multi-path @P " if scalar(@P) != 1;
+                } else {
+                        warn "W: skipped, no terms in ".$cur->get_docid;
+                        next;
+                }
+                $dst[$n]->add_document($doc);
+                $pr->(sprintf($fmt, $nr)) if $pr_data && !(++$nr & 1023);
+        }
+        return if !$opt->{compact};
+        $src = undef;
+        @dst = (); # flushes and closes
+        my @q;
+        for my $tmp (@tmp) {
+                my $arg = shift @$queue // die 'BUG: $queue empty';
+                my $wip = $arg->[1] // die 'BUG: no $wip';
+                push @q, [ "$tmp", $wip ];
+        }
+        delete $opt->{-progress_pfx};
+        process_queue(\@q, 'compact', $opt);
+}
+
 # Like copydatabase(1), this is horribly slow; and it doesn't seem due
 # to the overhead of Perl.
-sub cpdb ($$) {
+sub cpdb ($$) { # cb_spawn callback
         my ($args, $opt) = @_;
-        my ($old, $newdir) = @$args;
-        my $new = $newdir->dirname;
+        my ($old, $wip) = @$args;
         my ($src, $cur_shard);
         my $reshard;
-        PublicInbox::SearchIdx::load_xapian_writable() or die;
-        my $XapianDatabase = $PublicInbox::Search::X{Database};
+        my ($X, $flag) = xapian_write_prep($opt);
         if (ref($old) eq 'ARRAY') {
-                ($cur_shard) = ($new =~ m!xap[0-9]+/([0-9]+)\b!);
+                my $new = $wip->dirname;
+                ($cur_shard) = ($new =~ m!(?:xap|ei)[0-9]+/([0-9]+)\b!);
                 defined $cur_shard or
                         die "BUG: could not extract shard # from $new";
                 $reshard = $opt->{reshard};
@@ -402,36 +509,27 @@ sub cpdb ($$) {
                 # resharding, M:N copy means have full read access
                 foreach (@$old) {
                         if ($src) {
-                                my $sub = $XapianDatabase->new($_);
+                                my $sub = $X->{Database}->new($_);
                                 $src->add_database($sub);
                         } else {
-                                $src = $XapianDatabase->new($_);
+                                $src = $X->{Database}->new($_);
                         }
                 }
         } else {
-                $src = $XapianDatabase->new($old);
+                $src = $X->{Database}->new($old);
         }
 
-        my ($tmp, $ft);
-        local %SIG = %SIG;
+        my $tmp = $wip;
+        local @SIG{keys %SIG} = values %SIG;
         if ($opt->{compact}) {
-                my $dir = dirname($new);
-                same_fs_or_die($dir, $new);
-                $ft = File::Temp->newdir("$new.compact-XXXXXX", DIR => $dir);
+                $tmp = compact_tmp_shard($wip);
                 setup_signals();
-                $tmp = $ft->dirname;
-                nodatacow_dir($tmp);
-        } else {
-                $tmp = $new;
         }
 
         # like copydatabase(1), be sure we don't overwrite anything in case
         # of other bugs:
-        my $flag = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
-        die if $@;
-        my $XapianWritableDatabase = $PublicInbox::Search::X{WritableDatabase};
-        $flag |= $PublicInbox::SearchIdx::DB_NO_SYNC if !$opt->{fsync};
-        my $dst = $XapianWritableDatabase->new($tmp, $flag);
+        my $new = $wip->dirname;
+        my $dst = $X->{WritableDatabase}->new($tmp->dirname, $flag);
         my $pr = $opt->{-progress};
         my $pfx = $opt->{-progress_pfx} = progress_pfx($new);
         my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr;
@@ -443,11 +541,10 @@ sub cpdb ($$) {
                         $dst->set_metadata('last_commit', $lc) if $lc;
 
                         # only the first xapian shard (0) gets 'indexlevel'
-                        if ($new =~ m!(?:xapian[0-9]+|xap[0-9]+/0)\b!) {
+                        if ($new =~ m!/(?:xapian[0-9]+|(?:ei|xap)[0-9]+/0)\b!) {
                                 my $l = $src->get_metadata('indexlevel');
-                                if ($l eq 'medium') {
+                                $l eq 'medium' and
                                         $dst->set_metadata('indexlevel', $l);
-                                }
                         }
                         if ($pr_data) {
                                 my $tot = $src->get_doccount;
@@ -474,7 +571,7 @@ sub cpdb ($$) {
                 # individually.
                 $src = undef;
                 foreach (@$old) {
-                        my $old = $XapianDatabase->new($_);
+                        my $old = $X->{Database}->new($_);
                         cpdb_loop($old, $dst, $pr_data, $cur_shard, $reshard);
                 }
         } else {
@@ -489,7 +586,6 @@ sub cpdb ($$) {
         # this is probably the best place to do xapian-compact
         # since $dst isn't readable by HTTP or NNTP clients, yet:
         compact([ $tmp, $new ], $opt);
-        remove_tree($tmp) or die "failed to remove $tmp: $!\n";
 }
 
 1;
diff --git a/lib/PublicInbox/XhcMset.pm b/lib/PublicInbox/XhcMset.pm
new file mode 100644
index 00000000..ac25eece
--- /dev/null
+++ b/lib/PublicInbox/XhcMset.pm
@@ -0,0 +1,51 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# mocks Xapian::Mset and allows slow queries from blocking the event loop
+package PublicInbox::XhcMset;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::XhcMsetIterator;
+use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
+
+sub event_step {
+        my ($self) = @_;
+        my ($cb, @args) = @{delete $self->{cb_args} // return};
+        my $rd = $self->{sock};
+        eval {
+                my $hdr = <$rd> // die "E: reading mset header: $!";
+                for (split /\s+/, $hdr) { # read mset.size + estimated_matches
+                        my ($k, $v) = split /=/, $_, 2;
+                        $k =~ s/\A[^\.]*\.//; # s/(mset)?\./
+                        $self->{$k} = $v;
+                }
+                my $size = $self->{size} // die "E: bad xhc header: `$hdr'";
+                my @it = map { PublicInbox::XhcMsetIterator::make($_) } <$rd>;
+                $self->{items} = \@it;
+                scalar(@it) == $size or die
+                        'E: got ',scalar(@it),", expected mset.size=$size";
+        };
+        my $err = $@;
+        $self->close;
+        eval { $cb->(@args, $self, $err) };
+        warn "E: $@\n" if $@;
+}
+
+sub maybe_new {
+        my (undef, $rd, $srch, @cb_args) = @_;
+        my $self = bless { cb_args => \@cb_args, srch => $srch }, __PACKAGE__;
+        if ($PublicInbox::DS::in_loop) { # async
+                $self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT);
+        } else { # synchronous
+                $self->{sock} = $rd;
+                event_step($self);
+                undef;
+        }
+}
+
+eval(join('', map { "sub $_ { \$_[0]->{$_} }\n" } qw(size
+        get_matches_estimated)));
+
+sub items { @{$_[0]->{items}} }
+
+1;
diff --git a/lib/PublicInbox/XhcMsetIterator.pm b/lib/PublicInbox/XhcMsetIterator.pm
new file mode 100644
index 00000000..dcfc61e4
--- /dev/null
+++ b/lib/PublicInbox/XhcMsetIterator.pm
@@ -0,0 +1,20 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# mocks Xapian::MsetIterator, there's many of these allocated at once
+package PublicInbox::XhcMsetIterator;
+use v5.12;
+
+sub make ($) {
+        chomp($_[0]);
+        my @self = map { $_ + 0 } split /\0/, $_[0]; # docid, pct, rank
+        # we don't store $xdb in self[4] since we avoid $it->get_document
+        # in favor of $xdb->get_document($it->get_docid)
+        bless \@self, __PACKAGE__;
+}
+
+sub get_docid { $_[0]->[0] }
+sub get_percent { $_[0]->[1] }
+sub get_rank { $_[0]->[2] }
+
+1;
diff --git a/lib/PublicInbox/gcf2_libgit2.h b/lib/PublicInbox/gcf2_libgit2.h
index 800c6bad..e1f0ef39 100644
--- a/lib/PublicInbox/gcf2_libgit2.h
+++ b/lib/PublicInbox/gcf2_libgit2.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020 all contributors <meta@public-inbox.org>
+ * Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
  * License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
  *
  * libgit2 for Inline::C
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
new file mode 100644
index 00000000..a30a8768
--- /dev/null
+++ b/lib/PublicInbox/xap_helper.h
@@ -0,0 +1,1134 @@
+/*
+ * Copyright (C) all contributors <meta@public-inbox.org>
+ * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+ * Note: GPL-2+ since it'll incorporate approxidate from git someday
+ *
+ * Standalone helper process using C and minimal C++ for Xapian,
+ * this is not linked to Perl in any way.
+ * C (not C++) is used as much as possible to lower the contribution
+ * barrier for hackers who mainly know C (this includes the maintainer).
+ * Yes, that means we use C stdlib stuff like hsearch and open_memstream
+ * instead their equivalents in the C++ stdlib :P
+ * Everything here is an unstable internal API of public-inbox and
+ * NOT intended for ordinary users; only public-inbox hackers
+ */
+#ifndef _ALL_SOURCE
+#        define _ALL_SOURCE
+#endif
+#if defined(__NetBSD__) && !defined(_OPENBSD_SOURCE) // for reallocarray(3)
+#        define _OPENBSD_SOURCE
+#endif
+#include <sys/file.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <poll.h>
+
+#include <assert.h>
+#include <err.h> // BSD, glibc, and musl all have this
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <search.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+#include <xapian.h> // our only reason for using C++
+
+#define MY_VER(maj,min,rev) ((maj) << 16 | (min) << 8 | (rev))
+#define XAP_VER \
+        MY_VER(XAPIAN_MAJOR_VERSION,XAPIAN_MINOR_VERSION,XAPIAN_REVISION)
+
+#if XAP_VER >= MY_VER(1,3,6)
+#        define NRP Xapian::NumberRangeProcessor
+#        define ADD_RP add_rangeprocessor
+#        define SET_MAX_EXPANSION set_max_expansion // technically 1.3.3
+#else
+#        define NRP Xapian::NumberValueRangeProcessor
+#        define ADD_RP add_valuerangeprocessor
+#        define SET_MAX_EXPANSION set_max_wildcard_expansion
+#endif
+
+#if defined(__GLIBC__)
+#        define MY_DO_OPTRESET() do { optind = 0; } while (0)
+#else /* FreeBSD, musl, dfly, NetBSD, OpenBSD */
+#        define MY_DO_OPTRESET() do { optind = optreset = 1; } while (0)
+#endif
+
+#if defined(__DragonFly__) || defined(__FreeBSD__) || defined(__GLIBC__)
+#        define STDERR_ASSIGNABLE (1)
+#else
+#        define STDERR_ASSIGNABLE (0)
+#endif
+
+// assert functions are used correctly (e.g. ensure hackers don't
+// cause EINVAL/EFAULT).  Not for stuff that can fail due to HW
+// failures.
+# define CHECK(type, expect, expr) do { \
+        type ckvar______ = (expr); \
+        assert(ckvar______ == (expect) && "BUG" && __FILE__ && __LINE__); \
+} while (0)
+
+// coredump on most usage errors since our only users are internal
+#define ABORT(...) do { warnx(__VA_ARGS__); abort(); } while (0)
+#define EABORT(...) do { warn(__VA_ARGS__); abort(); } while (0)
+
+// sock_fd is modified in signal handler, yes, it's SOCK_SEQPACKET
+static volatile int sock_fd = STDIN_FILENO;
+static sigset_t fullset, workerset;
+static bool alive = true;
+#if STDERR_ASSIGNABLE
+static FILE *orig_err = stderr;
+#endif
+static int orig_err_fd = -1;
+static void *srch_tree; // tsearch + tdelete + twalk
+static pid_t *worker_pids; // nr => pid
+#define WORKER_MAX USHRT_MAX
+static unsigned long nworker, nworker_hwm;
+static int pipefds[2];
+static const char *stdout_path, *stderr_path; // for SIGUSR1
+static sig_atomic_t worker_needs_reopen;
+
+// PublicInbox::Search and PublicInbox::CodeSearch generate these:
+static void mail_nrp_init(void);
+static void code_nrp_init(void);
+static void qp_init_mail_search(Xapian::QueryParser *);
+static void qp_init_code_search(Xapian::QueryParser *);
+
+enum exc_iter {
+        ITER_OK = 0,
+        ITER_RETRY,
+        ITER_ABORT
+};
+
+struct srch {
+        int paths_len; // int for comparisons
+        unsigned qp_flags;
+        bool qp_extra_done;
+        Xapian::Database *db;
+        Xapian::QueryParser *qp;
+        char paths[]; // $shard_path0\0$shard_path1\0...
+};
+
+#define MY_ARG_MAX 256
+typedef bool (*cmd)(struct req *);
+
+// only one request per-process since we have RLIMIT_CPU timeout
+struct req { // argv and pfxv point into global rbuf
+        char *argv[MY_ARG_MAX];
+        char *pfxv[MY_ARG_MAX]; // -A <prefix>
+        char *qpfxv[MY_ARG_MAX]; // -Q <user_prefix>[:=]<INTERNAL_PREFIX>
+        size_t *lenv; // -A <prefix>LENGTH
+        struct srch *srch;
+        char *Pgit_dir;
+        char *Oeidx_key;
+        cmd fn;
+        unsigned long long max;
+        unsigned long long off;
+        unsigned long long threadid;
+        unsigned long timeout_sec;
+        size_t nr_out;
+        long sort_col; // value column, negative means BoolWeight
+        int argc;
+        int pfxc;
+        int qpfxc;
+        FILE *fp[2]; // [0] response pipe or sock, [1] status/errors (optional)
+        bool has_input; // fp[0] is bidirectional
+        bool collapse_threads;
+        bool code_search;
+        bool relevance; // sort by relevance before column
+        bool asc; // ascending sort
+};
+
+struct worker {
+        pid_t pid;
+        unsigned nr;
+};
+
+struct fbuf {
+        FILE *fp;
+        char *ptr;
+        size_t len;
+};
+
+#define SPLIT2ARGV(dst,buf,len) split2argv(dst,buf,len,MY_ARRAY_SIZE(dst))
+static size_t split2argv(char **dst, char *buf, size_t len, size_t limit)
+{
+        if (buf[0] == 0 || len == 0 || buf[len - 1] != 0)
+                ABORT("bogus argument given");
+        size_t nr = 0;
+        char *c = buf;
+        for (size_t i = 1; i < len; i++) {
+                if (!buf[i]) {
+                        dst[nr++] = c;
+                        c = buf + i + 1;
+                }
+                if (nr == limit)
+                        ABORT("too many args: %zu == %zu", nr, limit);
+        }
+        if (nr == 0) ABORT("no argument given");
+        if ((long)nr < 0) ABORT("too many args: %zu", nr);
+        return (long)nr;
+}
+
+static bool has_threadid(const struct srch *srch)
+{
+        return srch->db->get_metadata("has_threadid") == "1";
+}
+
+static Xapian::Enquire prep_enquire(const struct req *req)
+{
+        Xapian::Enquire enq(*req->srch->db);
+        if (req->sort_col < 0) {
+                enq.set_weighting_scheme(Xapian::BoolWeight());
+                enq.set_docid_order(req->asc ? Xapian::Enquire::ASCENDING
+                                        : Xapian::Enquire::DESCENDING);
+        } else if (req->relevance) {
+                enq.set_sort_by_relevance_then_value(req->sort_col, !req->asc);
+        } else {
+                enq.set_sort_by_value_then_relevance(req->sort_col, !req->asc);
+        }
+        return enq;
+}
+
+static Xapian::MSet enquire_mset(struct req *req, Xapian::Enquire *enq)
+{
+        if (!req->max) {
+                switch (sizeof(Xapian::doccount)) {
+                case 4: req->max = UINT_MAX; break;
+                default: req->max = ULLONG_MAX;
+                }
+        }
+        for (int i = 0; i < 9; i++) {
+                try {
+                        Xapian::MSet mset = enq->get_mset(req->off, req->max);
+                        return mset;
+                } catch (const Xapian::DatabaseModifiedError & e) {
+                        req->srch->db->reopen();
+                }
+        }
+        return enq->get_mset(req->off, req->max);
+}
+
+// for v1, v2, and extindex
+static Xapian::MSet mail_mset(struct req *req, const char *qry_str)
+{
+        struct srch *srch = req->srch;
+        Xapian::Query qry = srch->qp->parse_query(qry_str, srch->qp_flags);
+        if (req->Oeidx_key) {
+                req->Oeidx_key[0] = 'O'; // modifies static rbuf
+                qry = Xapian::Query(Xapian::Query::OP_FILTER, qry,
+                                        Xapian::Query(req->Oeidx_key));
+        }
+        // TODO: uid_range
+        if (req->threadid != ULLONG_MAX) {
+                std::string tid = Xapian::sortable_serialise(req->threadid);
+                qry = Xapian::Query(Xapian::Query::OP_FILTER, qry,
+                        Xapian::Query(Xapian::Query::OP_VALUE_RANGE, THREADID,
+                                        tid, tid));
+        }
+        Xapian::Enquire enq = prep_enquire(req);
+        enq.set_query(qry);
+        // THREADID is a CPP macro defined on CLI (see) XapHelperCxx.pm
+        if (req->collapse_threads && has_threadid(srch))
+                enq.set_collapse_key(THREADID);
+
+        return enquire_mset(req, &enq);
+}
+
+static bool starts_with(const std::string *s, const char *pfx, size_t pfx_len)
+{
+        return s->size() >= pfx_len && !memcmp(pfx, s->c_str(), pfx_len);
+}
+
+static void apply_roots_filter(struct req *req, Xapian::Query *qry)
+{
+        if (!req->Pgit_dir) return;
+        req->Pgit_dir[0] = 'P'; // modifies static rbuf
+        Xapian::Database *xdb = req->srch->db;
+        for (int i = 0; i < 9; i++) {
+                try {
+                        std::string P = req->Pgit_dir;
+                        Xapian::PostingIterator p = xdb->postlist_begin(P);
+                        if (p == xdb->postlist_end(P)) {
+                                warnx("W: %s not indexed?", req->Pgit_dir + 1);
+                                return;
+                        }
+                        Xapian::TermIterator cur = xdb->termlist_begin(*p);
+                        Xapian::TermIterator end = xdb->termlist_end(*p);
+                        cur.skip_to("G");
+                        if (cur == end) {
+                                warnx("W: %s has no root commits?",
+                                        req->Pgit_dir + 1);
+                                return;
+                        }
+                        Xapian::Query f = Xapian::Query(*cur);
+                        for (++cur; cur != end; ++cur) {
+                                std::string tn = *cur;
+                                if (!starts_with(&tn, "G", 1))
+                                        continue;
+                                f = Xapian::Query(Xapian::Query::OP_OR, f, tn);
+                        }
+                        *qry = Xapian::Query(Xapian::Query::OP_FILTER, *qry, f);
+                        return;
+                } catch (const Xapian::DatabaseModifiedError & e) {
+                        xdb->reopen();
+                }
+        }
+}
+
+// for cindex
+static Xapian::MSet commit_mset(struct req *req, const char *qry_str)
+{
+        struct srch *srch = req->srch;
+        Xapian::Query qry = srch->qp->parse_query(qry_str, srch->qp_flags);
+        apply_roots_filter(req, &qry);
+
+        // we only want commits:
+        qry = Xapian::Query(Xapian::Query::OP_FILTER, qry,
+                                Xapian::Query("T" "c"));
+        Xapian::Enquire enq = prep_enquire(req);
+        enq.set_query(qry);
+        return enquire_mset(req, &enq);
+}
+
+static void emit_mset_stats(struct req *req, const Xapian::MSet *mset)
+{
+        if (req->fp[1])
+                fprintf(req->fp[1], "mset.size=%llu nr_out=%zu\n",
+                        (unsigned long long)mset->size(), req->nr_out);
+        else
+                ABORT("BUG: %s caller only passed 1 FD", req->argv[0]);
+}
+
+static int my_setlinebuf(FILE *fp) // glibc setlinebuf(3) can't report errors
+{
+        return setvbuf(fp, NULL, _IOLBF, 0);
+}
+
+// n.b. __cleanup__ works fine with C++ exceptions, but not longjmp
+// Only clang and g++ are supported, as AFAIK there's no other
+// relevant Free(-as-in-speech) C++ compilers.
+#define CLEANUP_FBUF __attribute__((__cleanup__(fbuf_ensure)))
+static void fbuf_ensure(void *ptr)
+{
+        struct fbuf *fbuf = (struct fbuf *)ptr;
+        if (fbuf->fp && fclose(fbuf->fp))
+                err(EXIT_FAILURE, "fclose(fbuf->fp)"); // ENOMEM?
+        fbuf->fp = NULL;
+        free(fbuf->ptr);
+}
+
+static void fbuf_init(struct fbuf *fbuf)
+{
+        assert(!fbuf->ptr);
+        fbuf->fp = open_memstream(&fbuf->ptr, &fbuf->len);
+        if (!fbuf->fp) err(EXIT_FAILURE, "open_memstream(fbuf)");
+}
+
+static bool write_all(int fd, const struct fbuf *wbuf, size_t len)
+{
+        const char *p = wbuf->ptr;
+        assert(wbuf->len >= len);
+        do { // write to client FD
+                ssize_t n = write(fd, p, len);
+                if (n > 0) {
+                        len -= n;
+                        p += n;
+                } else {
+                        perror(n ? "write" : "write (zero bytes)");
+                        return false;
+                }
+        } while (len);
+        return true;
+}
+
+#define ERR_FLUSH(f) do { \
+        if (ferror(f) | fflush(f)) err(EXIT_FAILURE, "ferror|fflush "#f); \
+} while (0)
+
+#define ERR_CLOSE(f, e) do { \
+        if (ferror(f) | fclose(f)) \
+                e ? err(e, "ferror|fclose "#f) : perror("ferror|fclose "#f); \
+} while (0)
+
+static void xclose(int fd)
+{
+        if (close(fd) < 0 && errno != EINTR)
+                EABORT("BUG: close");
+}
+
+static size_t off2size(off_t n)
+{
+        if (n < 0 || (uintmax_t)n > SIZE_MAX)
+                ABORT("off_t out of size_t range: %lld\n", (long long)n);
+        return (size_t)n;
+}
+
+static char *hsearch_enter_key(char *s)
+{
+#if defined(__OpenBSD__) || defined(__DragonFly__)
+        // hdestroy frees each key on some platforms,
+        // so give it something to free:
+        char *ret = strdup(s);
+        if (!ret) err(EXIT_FAILURE, "strdup");
+        return ret;
+// AFAIK there's no way to detect musl, assume non-glibc Linux is musl:
+#elif defined(__GLIBC__) || defined(__linux__) || \
+        defined(__FreeBSD__) || defined(__NetBSD__)
+        // do nothing on these platforms
+#else
+#warning untested platform detected, unsure if hdestroy(3) frees keys
+#warning contact us at meta@public-inbox.org if you get segfaults
+#endif
+        return s;
+}
+
+// for test usage only, we need to ensure the compiler supports
+// __cleanup__ when exceptions are thrown
+struct inspect { struct req *req; };
+
+static void inspect_ensure(struct inspect *x)
+{
+        fprintf(x->req->fp[0], "pid=%d has_threadid=%d",
+                (int)getpid(), has_threadid(x->req->srch) ? 1 : 0);
+}
+
+static bool cmd_test_inspect(struct req *req)
+{
+        __attribute__((__cleanup__(inspect_ensure))) struct inspect x;
+        x.req = req;
+        try {
+                throw Xapian::InvalidArgumentError("test");
+        } catch (Xapian::InvalidArgumentError) {
+                return true;
+        }
+        fputs("this should not be printed", req->fp[0]);
+        return false;
+}
+
+static bool cmd_test_sleep(struct req *req)
+{
+        for (;;) poll(NULL, 0, 10);
+        return false;
+}
+#include "xh_mset.h" // read-only (WWW, IMAP, lei) stuff
+#include "xh_cidx.h" // CodeSearchIdx.pm stuff
+
+#define CMD(n) { .fn_len = sizeof(#n) - 1, .fn_name = #n, .fn = cmd_##n }
+static const struct cmd_entry {
+        size_t fn_len;
+        const char *fn_name;
+        cmd fn;
+} cmds[] = { // should be small enough to not need bsearch || gperf
+        // most common commands first
+        CMD(mset), // WWW and IMAP requests
+        CMD(dump_ibx), // many inboxes
+        CMD(dump_roots), // per-cidx shard
+        CMD(test_inspect), // least common commands last
+        CMD(test_sleep), // least common commands last
+};
+
+#define MY_ARRAY_SIZE(x)        (sizeof(x)/sizeof((x)[0]))
+#define RECV_FD_CAPA 2
+#define RECV_FD_SPACE        (RECV_FD_CAPA * sizeof(int))
+union my_cmsg {
+        struct cmsghdr hdr;
+        char pad[sizeof(struct cmsghdr) + 16 + RECV_FD_SPACE];
+};
+
+static bool recv_req(struct req *req, char *rbuf, size_t *len)
+{
+        union my_cmsg cmsg = {};
+        struct msghdr msg = {};
+        struct iovec iov;
+        ssize_t r;
+        iov.iov_base = rbuf;
+        iov.iov_len = *len;
+        msg.msg_iov = &iov;
+        msg.msg_iovlen = 1;
+        msg.msg_control = &cmsg.hdr;
+        msg.msg_controllen = CMSG_SPACE(RECV_FD_SPACE);
+
+        // allow SIGTERM to hit
+        CHECK(int, 0, sigprocmask(SIG_SETMASK, &workerset, NULL));
+
+again:
+        r = recvmsg(sock_fd, &msg, 0);
+        if (r == 0) {
+                exit(EX_NOINPUT); /* grandparent went away */
+        } else if (r < 0) {
+                switch (errno) {
+                case EINTR: goto again;
+                case EBADF: if (sock_fd < 0) exit(0);
+                        // fall-through
+                default: err(EXIT_FAILURE, "recvmsg");
+                }
+        }
+
+        // success! no signals for the rest of the request/response cycle
+        CHECK(int, 0, sigprocmask(SIG_SETMASK, &fullset, NULL));
+        if (r > 0 && msg.msg_flags)
+                ABORT("unexpected msg_flags");
+
+        *len = r;
+        if (cmsg.hdr.cmsg_level == SOL_SOCKET &&
+                        cmsg.hdr.cmsg_type == SCM_RIGHTS) {
+                size_t clen = cmsg.hdr.cmsg_len;
+                int *fdp = (int *)CMSG_DATA(&cmsg.hdr);
+                size_t i;
+                for (i = 0; CMSG_LEN((i + 1) * sizeof(int)) <= clen; i++) {
+                        int fd = *fdp++;
+                        const char *mode = NULL;
+                        int fl = fcntl(fd, F_GETFL);
+                        if (fl == -1) {
+                                errx(EXIT_FAILURE, "invalid fd=%d", fd);
+                        } else if (fl & O_WRONLY) {
+                                mode = "w";
+                        } else if (fl & O_RDWR) {
+                                mode = "r+";
+                                if (i == 0) req->has_input = true;
+                        } else {
+                                errx(EXIT_FAILURE,
+                                        "invalid mode from F_GETFL: 0x%x", fl);
+                        }
+                        req->fp[i] = fdopen(fd, mode);
+                        if (!req->fp[i])
+                                err(EXIT_FAILURE, "fdopen(fd=%d)", fd);
+                }
+                return true;
+        }
+        errx(EXIT_FAILURE, "no FD received in %zd-byte request", r);
+        return false;
+}
+
+static int srch_cmp(const void *pa, const void *pb) // for tfind|tsearch
+{
+        const struct srch *a = (const struct srch *)pa;
+        const struct srch *b = (const struct srch *)pb;
+        int diff = a->paths_len - b->paths_len;
+
+        return diff ? diff : memcmp(a->paths, b->paths, (size_t)a->paths_len);
+}
+
+static bool is_chert(const char *dir)
+{
+        char iamchert[PATH_MAX];
+        struct stat sb;
+        int rc = snprintf(iamchert, sizeof(iamchert), "%s/iamchert", dir);
+
+        if (rc <= 0 || rc >= (int)sizeof(iamchert))
+                err(EXIT_FAILURE, "BUG: snprintf(%s/iamchert)", dir);
+        if (stat(iamchert, &sb) == 0 && S_ISREG(sb.st_mode))
+                return true;
+        return false;
+}
+
+static bool srch_init(struct req *req)
+{
+        char *dirv[MY_ARG_MAX];
+        int i;
+        struct srch *srch = req->srch;
+        int dirc = (int)SPLIT2ARGV(dirv, srch->paths, (size_t)srch->paths_len);
+        const unsigned FLAG_PHRASE = Xapian::QueryParser::FLAG_PHRASE;
+        srch->qp_flags = FLAG_PHRASE |
+                        Xapian::QueryParser::FLAG_BOOLEAN |
+                        Xapian::QueryParser::FLAG_LOVEHATE |
+                        Xapian::QueryParser::FLAG_WILDCARD;
+        if (is_chert(dirv[0]))
+                srch->qp_flags &= ~FLAG_PHRASE;
+        try {
+                srch->db = new Xapian::Database(dirv[0]);
+        } catch (...) {
+                warn("E: Xapian::Database(%s)", dirv[0]);
+                return false;
+        }
+        try {
+                for (i = 1; i < dirc; i++) {
+                        if (srch->qp_flags & FLAG_PHRASE && is_chert(dirv[i]))
+                                srch->qp_flags &= ~FLAG_PHRASE;
+                        srch->db->add_database(Xapian::Database(dirv[i]));
+                }
+        } catch (...) {
+                warn("E: add_database(%s)", dirv[i]);
+                return false;
+        }
+        try {
+                srch->qp = new Xapian::QueryParser;
+        } catch (...) {
+                perror("E: Xapian::QueryParser");
+                return false;
+        }
+        srch->qp->set_default_op(Xapian::Query::OP_AND);
+        srch->qp->set_database(*srch->db);
+        try {
+                srch->qp->set_stemmer(Xapian::Stem("english"));
+        } catch (...) {
+                perror("E: Xapian::Stem");
+                return false;
+        }
+        srch->qp->set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
+        srch->qp->SET_MAX_EXPANSION(100);
+
+        if (req->code_search)
+                qp_init_code_search(srch->qp); // CodeSearch.pm
+        else
+                qp_init_mail_search(srch->qp); // Search.pm
+        return true;
+}
+
+// setup query parser for altid and arbitrary headers
+static void srch_init_extra(struct req *req)
+{
+        const char *XPFX;
+        for (int i = 0; i < req->qpfxc; i++) {
+                size_t len = strlen(req->qpfxv[i]);
+                char *c = (char *)memchr(req->qpfxv[i], '=', len);
+
+                if (c) { // it's boolean "gmane=XGMANE"
+                        XPFX = c + 1;
+                        *c = 0;
+                        req->srch->qp->add_boolean_prefix(req->qpfxv[i], XPFX);
+                        continue;
+                }
+                // maybe it's a non-boolean prefix "blob:XBLOBID"
+                c = (char *)memchr(req->qpfxv[i], ':', len);
+                if (!c)
+                        errx(EXIT_FAILURE, "bad -Q %s", req->qpfxv[i]);
+                XPFX = c + 1;
+                *c = 0;
+                req->srch->qp->add_prefix(req->qpfxv[i], XPFX);
+        }
+        req->srch->qp_extra_done = true;
+}
+
+static void free_srch(void *p) // tdestroy
+{
+        struct srch *srch = (struct srch *)p;
+        delete srch->qp;
+        delete srch->db;
+        free(srch);
+}
+
+static void dispatch(struct req *req)
+{
+        int c;
+        size_t size = strlen(req->argv[0]);
+        union {
+                struct srch *srch;
+                char *ptr;
+        } kbuf;
+        char *end;
+        FILE *kfp;
+        struct srch **s;
+        req->threadid = ULLONG_MAX;
+        for (c = 0; c < (int)MY_ARRAY_SIZE(cmds); c++) {
+                if (cmds[c].fn_len == size &&
+                        !memcmp(cmds[c].fn_name, req->argv[0], size)) {
+                        req->fn = cmds[c].fn;
+                        break;
+                }
+        }
+        if (!req->fn) ABORT("not handled: `%s'", req->argv[0]);
+
+        kfp = open_memstream(&kbuf.ptr, &size);
+        if (!kfp) err(EXIT_FAILURE, "open_memstream(kbuf)");
+        // write padding, first (contents don't matter)
+        fwrite(&req->argv[0], offsetof(struct srch, paths), 1, kfp);
+
+        // global getopt variables:
+        optopt = 0;
+        optarg = NULL;
+        MY_DO_OPTRESET();
+
+        // XH_SPEC is generated from @PublicInbox::Search::XH_SPEC
+        while ((c = getopt(req->argc, req->argv, XH_SPEC)) != -1) {
+                switch (c) {
+                case 'a': req->asc = true; break;
+                case 'c': req->code_search = true; break;
+                case 'd': fwrite(optarg, strlen(optarg) + 1, 1, kfp); break;
+                case 'g': req->Pgit_dir = optarg - 1; break; // pad "P" prefix
+                case 'k':
+                        req->sort_col = strtol(optarg, &end, 10);
+                        if (*end) ABORT("-k %s", optarg);
+                        switch (req->sort_col) {
+                        case LONG_MAX: case LONG_MIN: ABORT("-k %s", optarg);
+                        }
+                        break;
+                case 'm':
+                        req->max = strtoull(optarg, &end, 10);
+                        if (*end || req->max == ULLONG_MAX)
+                                ABORT("-m %s", optarg);
+                        break;
+                case 'o':
+                        req->off = strtoull(optarg, &end, 10);
+                        if (*end || req->off == ULLONG_MAX)
+                                ABORT("-o %s", optarg);
+                        break;
+                case 'r': req->relevance = true; break;
+                case 't': req->collapse_threads = true; break;
+                case 'A':
+                        req->pfxv[req->pfxc++] = optarg;
+                        if (MY_ARG_MAX == req->pfxc)
+                                ABORT("too many -A");
+                        break;
+                case 'K':
+                        req->timeout_sec = strtoul(optarg, &end, 10);
+                        if (*end || req->timeout_sec == ULONG_MAX)
+                                ABORT("-K %s", optarg);
+                        break;
+                case 'O': req->Oeidx_key = optarg - 1; break; // pad "O" prefix
+                case 'T':
+                        req->threadid = strtoull(optarg, &end, 10);
+                        if (*end || req->threadid == ULLONG_MAX)
+                                ABORT("-T %s", optarg);
+                        break;
+                case 'Q':
+                        req->qpfxv[req->qpfxc++] = optarg;
+                        if (MY_ARG_MAX == req->qpfxc) ABORT("too many -Q");
+                        break;
+                default: ABORT("bad switch `-%c'", c);
+                }
+        }
+        ERR_CLOSE(kfp, EXIT_FAILURE); // may ENOMEM, sets kbuf.srch
+        kbuf.srch->db = NULL;
+        kbuf.srch->qp = NULL;
+        kbuf.srch->qp_extra_done = false;
+        kbuf.srch->paths_len = size - offsetof(struct srch, paths);
+        if (kbuf.srch->paths_len <= 0)
+                ABORT("no -d args");
+        s = (struct srch **)tsearch(kbuf.srch, &srch_tree, srch_cmp);
+        if (!s) err(EXIT_FAILURE, "tsearch"); // likely ENOMEM
+        req->srch = *s;
+        if (req->srch != kbuf.srch) { // reuse existing
+                free_srch(kbuf.srch);
+                req->srch->db->reopen();
+        } else if (!srch_init(req)) {
+                assert(kbuf.srch == *((struct srch **)tfind(
+                                        kbuf.srch, &srch_tree, srch_cmp)));
+                void *del = tdelete(kbuf.srch, &srch_tree, srch_cmp);
+                assert(del);
+                free_srch(kbuf.srch);
+                goto cmd_err; // srch_init already warned
+        }
+        if (req->qpfxc && !req->srch->qp_extra_done)
+                srch_init_extra(req);
+        if (req->timeout_sec)
+                alarm(req->timeout_sec > UINT_MAX ?
+                        UINT_MAX : (unsigned)req->timeout_sec);
+        try {
+                if (!req->fn(req))
+                        warnx("`%s' failed", req->argv[0]);
+        } catch (const Xapian::Error & e) {
+                warnx("Xapian::Error: %s", e.get_description().c_str());
+        } catch (...) {
+                warn("unhandled exception");
+        }
+        if (req->timeout_sec)
+                alarm(0);
+cmd_err:
+        return; // just be silent on errors, for now
+}
+
+static void cleanup_pids(void)
+{
+        free(worker_pids);
+        worker_pids = NULL;
+}
+
+static void stderr_set(FILE *tmp_err)
+{
+#if STDERR_ASSIGNABLE
+        if (my_setlinebuf(tmp_err))
+                perror("W: setlinebuf(tmp_err)");
+        stderr = tmp_err;
+        return;
+#endif
+        int fd = fileno(tmp_err);
+        if (fd < 0) err(EXIT_FAILURE, "BUG: fileno(tmp_err)");
+        while (dup2(fd, STDERR_FILENO) < 0) {
+                if (errno != EINTR)
+                        err(EXIT_FAILURE, "dup2(%d => 2)", fd);
+        }
+}
+
+static void stderr_restore(FILE *tmp_err)
+{
+#if STDERR_ASSIGNABLE
+        stderr = orig_err;
+        return;
+#endif
+        ERR_FLUSH(stderr);
+        while (dup2(orig_err_fd, STDERR_FILENO) < 0) {
+                if (errno != EINTR)
+                        err(EXIT_FAILURE, "dup2(%d => 2)", orig_err_fd);
+        }
+        clearerr(stderr);
+}
+
+static void sigw(int sig) // SIGTERM+SIGUSR1 handler for worker
+{
+        switch (sig) {
+        case SIGUSR1: worker_needs_reopen = 1; break;
+        default: sock_fd = -1; // break out of recv_loop
+        }
+}
+
+#define CLEANUP_REQ __attribute__((__cleanup__(req_cleanup)))
+static void req_cleanup(void *ptr)
+{
+        struct req *req = (struct req *)ptr;
+        free(req->lenv);
+}
+
+static void reopen_logs(void)
+{
+        if (stdout_path && *stdout_path && !freopen(stdout_path, "a", stdout))
+                err(EXIT_FAILURE, "freopen %s", stdout_path);
+        if (stderr_path && *stderr_path) {
+                if (!freopen(stderr_path, "a", stderr))
+                        err(EXIT_FAILURE, "freopen %s", stderr_path);
+                if (my_setlinebuf(stderr))
+                        err(EXIT_FAILURE, "setlinebuf(stderr)");
+        }
+}
+
+static void recv_loop(void) // worker process loop
+{
+        static char rbuf[4096 * 33]; // per-process
+        struct sigaction sa = {};
+        sa.sa_handler = sigw;
+
+        CHECK(int, 0, sigaction(SIGTERM, &sa, NULL));
+        CHECK(int, 0, sigaction(SIGUSR1, &sa, NULL));
+
+        while (sock_fd == 0) {
+                size_t len = sizeof(rbuf);
+                CLEANUP_REQ struct req req = {};
+
+                if (!recv_req(&req, rbuf, &len))
+                        continue;
+                if (req.fp[1])
+                        stderr_set(req.fp[1]);
+                req.argc = (int)SPLIT2ARGV(req.argv, rbuf, len);
+                dispatch(&req);
+                ERR_CLOSE(req.fp[0], 0);
+                if (req.fp[1]) {
+                        stderr_restore(req.fp[1]);
+                        ERR_CLOSE(req.fp[1], 0);
+                }
+                if (worker_needs_reopen) {
+                        worker_needs_reopen = 0;
+                        reopen_logs();
+                }
+        }
+}
+
+static void insert_pid(pid_t pid, unsigned nr)
+{
+        assert(!worker_pids[nr]);
+        worker_pids[nr] = pid;
+}
+
+static void start_worker(unsigned nr)
+{
+        pid_t pid = fork();
+        if (pid < 0) {
+                warn("E: fork(worker=%u)", nr);
+        } else if (pid > 0) {
+                insert_pid(pid, nr);
+        } else {
+                cleanup_pids();
+                xclose(pipefds[0]);
+                xclose(pipefds[1]);
+                if (signal(SIGCHLD, SIG_DFL) == SIG_ERR)
+                        err(EXIT_FAILURE, "signal CHLD");
+                if (signal(SIGTTIN, SIG_IGN) == SIG_ERR)
+                        err(EXIT_FAILURE, "signal TTIN");
+                if (signal(SIGTTOU, SIG_IGN) == SIG_ERR)
+                        err(EXIT_FAILURE, "signal TTIN");
+                recv_loop();
+                exit(0);
+        }
+}
+
+static void start_workers(void)
+{
+        sigset_t old;
+
+        CHECK(int, 0, sigprocmask(SIG_SETMASK, &fullset, &old));
+        for (unsigned long nr = 0; nr < nworker; nr++) {
+                if (!worker_pids[nr])
+                        start_worker(nr);
+        }
+        CHECK(int, 0, sigprocmask(SIG_SETMASK, &old, NULL));
+}
+
+static void cleanup_all(void)
+{
+        cleanup_pids();
+#ifdef __GLIBC__
+        tdestroy(srch_tree, free_srch);
+        srch_tree = NULL;
+#endif
+}
+
+static void parent_reopen_logs(void)
+{
+        reopen_logs();
+        for (unsigned long nr = nworker; nr < nworker_hwm; nr++) {
+                pid_t pid = worker_pids[nr];
+                if (pid != 0 && kill(pid, SIGUSR1))
+                        warn("BUG?: kill(%d, SIGUSR1)", (int)pid);
+        }
+}
+
+static void sigp(int sig) // parent signal handler
+{
+        static const char eagain[] = "signals coming in too fast";
+        static const char bad_sig[] = "BUG: bad sig\n";
+        static const char write_errno[] = "BUG: sigp write (errno)";
+        static const char write_zero[] = "BUG: sigp write wrote zero bytes";
+        char c = 0;
+
+        switch (sig) {
+        case SIGCHLD: c = '.'; break;
+        case SIGTTOU: c = '-'; break;
+        case SIGTTIN: c = '+'; break;
+        case SIGUSR1: c = '#'; break;
+        default:
+                write(STDERR_FILENO, bad_sig, sizeof(bad_sig) - 1);
+                _exit(EXIT_FAILURE);
+        }
+        ssize_t w = write(pipefds[1], &c, 1);
+        if (w > 0) return;
+        if (w < 0 && errno == EAGAIN) {
+                write(STDERR_FILENO, eagain, sizeof(eagain) - 1);
+                return;
+        } else if (w == 0) {
+                write(STDERR_FILENO, write_zero, sizeof(write_zero) - 1);
+        } else {
+                // strerror isn't technically async-signal-safe, and
+                // strerrordesc_np+strerrorname_np isn't portable
+                write(STDERR_FILENO, write_errno, sizeof(write_errno) - 1);
+        }
+        _exit(EXIT_FAILURE);
+}
+
+static void reaped_worker(pid_t pid, int st)
+{
+        unsigned long nr = 0;
+        for (; nr < nworker_hwm; nr++) {
+                if (worker_pids[nr] == pid) {
+                        worker_pids[nr] = 0;
+                        break;
+                }
+        }
+        if (nr >= nworker_hwm) {
+                warnx("W: unknown pid=%d reaped $?=%d", (int)pid, st);
+                return;
+        }
+        if (WIFEXITED(st) && WEXITSTATUS(st) == EX_NOINPUT)
+                alive = false;
+        else if (st)
+                warnx("worker[%lu] died $?=%d alive=%d", nr, st, (int)alive);
+        if (alive)
+                start_workers();
+}
+
+static void do_sigchld(void)
+{
+        while (1) {
+                int st;
+                pid_t pid = waitpid(-1, &st, WNOHANG);
+                if (pid > 0) {
+                        reaped_worker(pid, st);
+                } else if (pid == 0) {
+                        return;
+                } else {
+                        switch (errno) {
+                        case ECHILD: return;
+                        case EINTR: break; // can it happen w/ WNOHANG?
+                        default: err(EXIT_FAILURE, "BUG: waitpid");
+                        }
+                }
+        }
+}
+
+static void do_sigttin(void)
+{
+        if (!alive) return;
+        if (nworker >= WORKER_MAX) {
+                warnx("workers cannot exceed %zu", (size_t)WORKER_MAX);
+                return;
+        }
+        void *p = realloc(worker_pids, (nworker + 1) * sizeof(pid_t));
+        if (!p) {
+                warn("realloc worker_pids");
+        } else {
+                worker_pids = (pid_t *)p;
+                worker_pids[nworker++] = 0;
+                if (nworker_hwm < nworker)
+                        nworker_hwm = nworker;
+                start_workers();
+        }
+}
+
+static void do_sigttou(void)
+{
+        if (!alive || nworker <= 1) return;
+
+        // worker_pids array does not shrink
+        --nworker;
+        for (unsigned long nr = nworker; nr < nworker_hwm; nr++) {
+                pid_t pid = worker_pids[nr];
+                if (pid != 0 && kill(pid, SIGTERM))
+                        warn("BUG?: kill(%d, SIGTERM)", (int)pid);
+        }
+}
+
+static size_t living_workers(void)
+{
+        size_t ret = 0;
+
+        for (unsigned long nr = 0; nr < nworker_hwm; nr++) {
+                if (worker_pids[nr])
+                        ret++;
+        }
+        return ret;
+}
+
+int main(int argc, char *argv[])
+{
+        int c;
+        socklen_t slen = (socklen_t)sizeof(c);
+        stdout_path = getenv("STDOUT_PATH");
+        stderr_path = getenv("STDERR_PATH");
+
+        if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &c, &slen))
+                err(EXIT_FAILURE, "getsockopt");
+        if (c != SOCK_SEQPACKET)
+                errx(EXIT_FAILURE, "stdin is not SOCK_SEQPACKET");
+
+        mail_nrp_init();
+        code_nrp_init();
+        atexit(cleanup_all);
+
+        if (!STDERR_ASSIGNABLE) {
+                orig_err_fd = dup(STDERR_FILENO);
+                if (orig_err_fd < 0)
+                        err(EXIT_FAILURE, "dup(2)");
+        }
+
+        nworker = 1;
+        // make warn/warnx/err multi-process friendly:
+        if (my_setlinebuf(stderr))
+                err(EXIT_FAILURE, "setlinebuf(stderr)");
+        // not using -W<workers> like Daemon.pm, since -W is reserved (glibc)
+        while ((c = getopt(argc, argv, "j:")) != -1) {
+                char *end;
+
+                switch (c) {
+                case 'j':
+                        nworker = strtoul(optarg, &end, 10);
+                        if (*end != 0 || nworker > WORKER_MAX)
+                                errx(EXIT_FAILURE, "-j %s invalid", optarg);
+                        break;
+                case ':':
+                        errx(EXIT_FAILURE, "missing argument: `-%c'", optopt);
+                case '?':
+                        errx(EXIT_FAILURE, "unrecognized: `-%c'", optopt);
+                default:
+                        errx(EXIT_FAILURE, "BUG: `-%c'", c);
+                }
+        }
+        sigset_t pset; // parent-only
+        CHECK(int, 0, sigfillset(&pset));
+
+        // global sigsets:
+        CHECK(int, 0, sigfillset(&fullset));
+        CHECK(int, 0, sigfillset(&workerset));
+
+#define DELSET(sig) do { \
+        CHECK(int, 0, sigdelset(&fullset, sig)); \
+        CHECK(int, 0, sigdelset(&workerset, sig)); \
+        CHECK(int, 0, sigdelset(&pset, sig)); \
+} while (0)
+        DELSET(SIGABRT);
+        DELSET(SIGBUS);
+        DELSET(SIGFPE);
+        DELSET(SIGILL);
+        DELSET(SIGSEGV);
+        DELSET(SIGXCPU);
+        DELSET(SIGXFSZ);
+#undef DELSET
+        CHECK(int, 0, sigdelset(&workerset, SIGUSR1));
+        CHECK(int, 0, sigdelset(&fullset, SIGALRM));
+
+        if (nworker == 0) { // no SIGTERM handling w/o workers
+                recv_loop();
+                return 0;
+        }
+        CHECK(int, 0, sigdelset(&workerset, SIGTERM));
+        CHECK(int, 0, sigdelset(&workerset, SIGCHLD));
+        nworker_hwm = nworker;
+        worker_pids = (pid_t *)calloc(nworker, sizeof(pid_t));
+        if (!worker_pids) err(EXIT_FAILURE, "calloc");
+
+        if (pipe(pipefds)) err(EXIT_FAILURE, "pipe");
+        int fl = fcntl(pipefds[1], F_GETFL);
+        if (fl == -1) err(EXIT_FAILURE, "F_GETFL");
+        if (fcntl(pipefds[1], F_SETFL, fl | O_NONBLOCK))
+                err(EXIT_FAILURE, "F_SETFL");
+
+        CHECK(int, 0, sigdelset(&pset, SIGCHLD));
+        CHECK(int, 0, sigdelset(&pset, SIGTTIN));
+        CHECK(int, 0, sigdelset(&pset, SIGTTOU));
+        CHECK(int, 0, sigdelset(&pset, SIGUSR1));
+
+        struct sigaction sa = {};
+        sa.sa_handler = sigp;
+
+        CHECK(int, 0, sigaction(SIGUSR1, &sa, NULL));
+        CHECK(int, 0, sigaction(SIGTTIN, &sa, NULL));
+        CHECK(int, 0, sigaction(SIGTTOU, &sa, NULL));
+        sa.sa_flags = SA_NOCLDSTOP;
+        CHECK(int, 0, sigaction(SIGCHLD, &sa, NULL));
+
+        CHECK(int, 0, sigprocmask(SIG_SETMASK, &pset, NULL));
+
+        start_workers();
+
+        char sbuf[64];
+        while (alive || living_workers()) {
+                ssize_t n = read(pipefds[0], &sbuf, sizeof(sbuf));
+                if (n < 0) {
+                        if (errno == EINTR) continue;
+                        err(EXIT_FAILURE, "read");
+                } else if (n == 0) {
+                        errx(EXIT_FAILURE, "read EOF");
+                }
+                do_sigchld();
+                for (ssize_t i = 0; i < n; i++) {
+                        switch (sbuf[i]) {
+                        case '.': break; // do_sigchld already called
+                        case '-': do_sigttou(); break;
+                        case '+': do_sigttin(); break;
+                        case '#': parent_reopen_logs(); break;
+                        default: errx(EXIT_FAILURE, "BUG: c=%c", sbuf[i]);
+                        }
+                }
+        }
+
+        return 0;
+}
diff --git a/lib/PublicInbox/xh_cidx.h b/lib/PublicInbox/xh_cidx.h
new file mode 100644
index 00000000..311ca05f
--- /dev/null
+++ b/lib/PublicInbox/xh_cidx.h
@@ -0,0 +1,277 @@
+// Copyright (C) all contributors <meta@public-inbox.org>
+// License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+// This file is only intended to be included by xap_helper.h
+// it implements pieces used by CodeSearchIdx.pm
+
+static void term_length_extract(struct req *req)
+{
+        req->lenv = (size_t *)calloc(req->pfxc, sizeof(size_t));
+        if (!req->lenv)
+                EABORT("lenv = calloc(%d %zu)", req->pfxc, sizeof(size_t));
+        for (int i = 0; i < req->pfxc; i++) {
+                char *pfx = req->pfxv[i];
+                // extract trailing digits as length:
+                // $len = s/([0-9]+)\z// ? ($1+0) : 0
+                for (size_t j = 0; pfx[j]; j++) {
+                        if (pfx[j] < '0' || pfx[j] > '9')
+                                continue;
+                        if (j == 0) {
+                                warnx("W: `%s' not a valid prefix", pfx);
+                                continue;
+                        }
+                        char *end;
+                        unsigned long long tmp = strtoull(pfx + j, &end, 10);
+                        if (*end || tmp >= (unsigned long long)SIZE_MAX) {
+                                warnx("W: `%s' not recognized", pfx);
+                        } else {
+                                req->lenv[i] = (size_t)tmp;
+                                pfx[j] = 0;
+                                break;
+                        }
+                }
+        }
+}
+
+static void dump_ibx_term(struct req *req, int p,
+                        Xapian::Document *doc, const char *ibx_id)
+{
+        Xapian::TermIterator cur = doc->termlist_begin();
+        Xapian::TermIterator end = doc->termlist_end();
+        const char *pfx = req->pfxv[p];
+        size_t pfx_len = strlen(pfx);
+        size_t term_len = req->lenv[p];
+
+        for (cur.skip_to(pfx); cur != end; cur++) {
+                std::string tn = *cur;
+                if (!starts_with(&tn, pfx, pfx_len)) break;
+                if (term_len > 0 && (tn.length() - pfx_len) != term_len)
+                        continue;
+                fprintf(req->fp[0], "%s %s\n", tn.c_str() + pfx_len, ibx_id);
+                ++req->nr_out;
+        }
+}
+
+static enum exc_iter dump_ibx_iter(struct req *req, const char *ibx_id,
+                                Xapian::MSetIterator *i)
+{
+        try {
+                Xapian::Document doc = i->get_document();
+                for (int p = 0; p < req->pfxc; p++)
+                        dump_ibx_term(req, p, &doc, ibx_id);
+        } catch (const Xapian::DatabaseModifiedError & e) {
+                req->srch->db->reopen();
+                return ITER_RETRY;
+        } catch (const Xapian::DocNotFoundError & e) { // oh well...
+                warnx("doc not found: %s", e.get_description().c_str());
+        }
+        return ITER_OK;
+}
+
+static bool cmd_dump_ibx(struct req *req)
+{
+        if ((optind + 1) >= req->argc)
+                ABORT("usage: dump_ibx [OPTIONS] IBX_ID QRY_STR");
+        if (!req->pfxc)
+                ABORT("dump_ibx requires -A PREFIX");
+
+        const char *ibx_id = req->argv[optind];
+        if (my_setlinebuf(req->fp[0])) // for sort(1) pipe
+                EABORT("setlinebuf(fp[0])"); // WTF?
+        req->asc = true;
+        req->sort_col = -1;
+        term_length_extract(req);
+        Xapian::MSet mset = mail_mset(req, req->argv[optind + 1]);
+
+        // @UNIQ_FOLD in CodeSearchIdx.pm can handle duplicate lines fine
+        // in case we need to retry on DB reopens
+        for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); i++) {
+                for (int t = 10; t > 0; --t)
+                        switch (dump_ibx_iter(req, ibx_id, &i)) {
+                        case ITER_OK: t = 0; break; // leave inner loop
+                        case ITER_RETRY: break; // continue for-loop
+                        case ITER_ABORT: return false; // error
+                        }
+        }
+        emit_mset_stats(req, &mset);
+        return true;
+}
+
+struct dump_roots_tmp {
+        struct stat sb;
+        void *mm_ptr;
+        char **entries;
+        struct fbuf wbuf;
+        int root2off_fd;
+};
+
+#define CLEANUP_DUMP_ROOTS __attribute__((__cleanup__(dump_roots_ensure)))
+static void dump_roots_ensure(void *ptr)
+{
+        struct dump_roots_tmp *drt = (struct dump_roots_tmp *)ptr;
+        if (drt->root2off_fd >= 0)
+                xclose(drt->root2off_fd);
+        hdestroy(); // idempotent
+        size_t size = off2size(drt->sb.st_size);
+        if (drt->mm_ptr && munmap(drt->mm_ptr, size))
+                EABORT("BUG: munmap(%p, %zu)", drt->mm_ptr, size);
+        free(drt->entries);
+        fbuf_ensure(&drt->wbuf);
+}
+
+static bool root2offs_str(struct fbuf *root_offs, Xapian::Document *doc)
+{
+        Xapian::TermIterator cur = doc->termlist_begin();
+        Xapian::TermIterator end = doc->termlist_end();
+        ENTRY e, *ep;
+        fbuf_init(root_offs);
+        for (cur.skip_to("G"); cur != end; cur++) {
+                std::string tn = *cur;
+                if (!starts_with(&tn, "G", 1)) break;
+                union { const char *in; char *out; } u;
+                u.in = tn.c_str() + 1;
+                e.key = u.out;
+                ep = hsearch(e, FIND);
+                if (!ep) ABORT("hsearch miss `%s'", e.key);
+                // ep->data is a NUL-terminated string matching /[0-9]+/
+                fputc(' ', root_offs->fp);
+                fputs((const char *)ep->data, root_offs->fp);
+        }
+        fputc('\n', root_offs->fp);
+        ERR_CLOSE(root_offs->fp, EXIT_FAILURE); // ENOMEM
+        root_offs->fp = NULL;
+        return true;
+}
+
+// writes term values matching @pfx for a given @doc, ending the line
+// with the contents of @root_offs
+static void dump_roots_term(struct req *req, int p,
+                                struct dump_roots_tmp *drt,
+                                struct fbuf *root_offs,
+                                Xapian::Document *doc)
+{
+        Xapian::TermIterator cur = doc->termlist_begin();
+        Xapian::TermIterator end = doc->termlist_end();
+        const char *pfx = req->pfxv[p];
+        size_t pfx_len = strlen(pfx);
+        size_t term_len = req->lenv[p];
+
+        for (cur.skip_to(pfx); cur != end; cur++) {
+                std::string tn = *cur;
+                if (!starts_with(&tn, pfx, pfx_len)) break;
+                if (term_len > 0 && (tn.length() - pfx_len) != term_len)
+                        continue;
+                fputs(tn.c_str() + pfx_len, drt->wbuf.fp);
+                fwrite(root_offs->ptr, root_offs->len, 1, drt->wbuf.fp);
+                ++req->nr_out;
+        }
+}
+
+// we may have lines which exceed PIPE_BUF, so we do our own
+// buffering and rely on flock(2), here
+static bool dump_roots_flush(struct req *req, struct dump_roots_tmp *drt)
+{
+        bool ok = true;
+        off_t off = ftello(drt->wbuf.fp);
+        if (off < 0) EABORT("ftello");
+        if (!off) return ok;
+
+        ERR_FLUSH(drt->wbuf.fp); // ENOMEM
+        int fd = fileno(req->fp[0]);
+
+        while (flock(drt->root2off_fd, LOCK_EX)) {
+                if (errno == EINTR) continue;
+                err(EXIT_FAILURE, "LOCK_EX"); // ENOLCK?
+        }
+        ok = write_all(fd, &drt->wbuf, (size_t)off);
+        while (flock(drt->root2off_fd, LOCK_UN)) {
+                if (errno == EINTR) continue;
+                err(EXIT_FAILURE, "LOCK_UN"); // ENOLCK?
+        }
+        if (fseeko(drt->wbuf.fp, 0, SEEK_SET)) EABORT("fseeko");
+        return ok;
+}
+
+static enum exc_iter dump_roots_iter(struct req *req,
+                                struct dump_roots_tmp *drt,
+                                Xapian::MSetIterator *i)
+{
+        CLEANUP_FBUF struct fbuf root_offs = {}; // " $ID0 $ID1 $IDx..\n"
+        try {
+                Xapian::Document doc = i->get_document();
+                if (!root2offs_str(&root_offs, &doc))
+                        return ITER_ABORT; // bad request, abort
+                for (int p = 0; p < req->pfxc; p++)
+                        dump_roots_term(req, p, drt, &root_offs, &doc);
+        } catch (const Xapian::DatabaseModifiedError & e) {
+                req->srch->db->reopen();
+                return ITER_RETRY;
+        } catch (const Xapian::DocNotFoundError & e) { // oh well...
+                warnx("doc not found: %s", e.get_description().c_str());
+        }
+        return ITER_OK;
+}
+
+static bool cmd_dump_roots(struct req *req)
+{
+        CLEANUP_DUMP_ROOTS struct dump_roots_tmp drt = {};
+        drt.root2off_fd = -1;
+        if ((optind + 1) >= req->argc)
+                ABORT("usage: dump_roots [OPTIONS] ROOT2ID_FILE QRY_STR");
+        if (!req->pfxc)
+                ABORT("dump_roots requires -A PREFIX");
+        const char *root2off_file = req->argv[optind];
+        drt.root2off_fd = open(root2off_file, O_RDONLY);
+        if (drt.root2off_fd < 0)
+                EABORT("open(%s)", root2off_file);
+        if (fstat(drt.root2off_fd, &drt.sb)) // ENOMEM?
+                err(EXIT_FAILURE, "fstat(%s)", root2off_file);
+        // each entry is at least 43 bytes ({OIDHEX}\0{INT}\0),
+        // so /32 overestimates the number of expected entries by
+        // ~%25 (as recommended by Linux hcreate(3) manpage)
+        size_t size = off2size(drt.sb.st_size);
+        size_t est = (size / 32) + 1; //+1 for "\0" termination
+        drt.mm_ptr = mmap(NULL, size, PROT_READ,
+                                MAP_PRIVATE, drt.root2off_fd, 0);
+        if (drt.mm_ptr == MAP_FAILED)
+                err(EXIT_FAILURE, "mmap(%zu, %s)", size, root2off_file);
+        size_t asize = est * 2;
+        if (asize < est) ABORT("too many entries: %zu", est);
+        drt.entries = (char **)calloc(asize, sizeof(char *));
+        if (!drt.entries)
+                err(EXIT_FAILURE, "calloc(%zu * 2, %zu)", est, sizeof(char *));
+        size_t tot = split2argv(drt.entries, (char *)drt.mm_ptr, size, asize);
+        if (tot <= 0) return false; // split2argv already warned on error
+        if (!hcreate(est))
+                err(EXIT_FAILURE, "hcreate(%zu)", est);
+        for (size_t i = 0; i < tot; ) {
+                ENTRY e;
+                e.key = hsearch_enter_key(drt.entries[i++]); // dies on ENOMEM
+                e.data = drt.entries[i++];
+                if (!hsearch(e, ENTER))
+                        err(EXIT_FAILURE, "hsearch(%s => %s, ENTER)", e.key,
+                                        (const char *)e.data);
+        }
+        req->asc = true;
+        req->sort_col = -1;
+        Xapian::MSet mset = commit_mset(req, req->argv[optind + 1]);
+        term_length_extract(req);
+
+        fbuf_init(&drt.wbuf);
+
+        // @UNIQ_FOLD in CodeSearchIdx.pm can handle duplicate lines fine
+        // in case we need to retry on DB reopens
+        for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); i++) {
+                for (int t = 10; t > 0; --t)
+                        switch (dump_roots_iter(req, &drt, &i)) {
+                        case ITER_OK: t = 0; break; // leave inner loop
+                        case ITER_RETRY: break; // continue for-loop
+                        case ITER_ABORT: return false; // error
+                        }
+                if (!(req->nr_out & 0x3fff) && !dump_roots_flush(req, &drt))
+                        return false;
+        }
+        if (!dump_roots_flush(req, &drt))
+                return false;
+        emit_mset_stats(req, &mset);
+        return true;
+}
diff --git a/lib/PublicInbox/xh_mset.h b/lib/PublicInbox/xh_mset.h
new file mode 100644
index 00000000..db2692c9
--- /dev/null
+++ b/lib/PublicInbox/xh_mset.h
@@ -0,0 +1,53 @@
+// Copyright (C) all contributors <meta@public-inbox.org>
+// License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+// This file is only intended to be included by xap_helper.h
+// it implements pieces used by WWW, IMAP and lei
+
+#ifndef WBUF_FLUSH_THRESHOLD
+#        define WBUF_FLUSH_THRESHOLD (BUFSIZ - 1000)
+#endif
+#if WBUF_FLUSH_THRESHOLD < 0
+#        undef WBUF_FLUSH_THRESHOLD
+#        define WBUF_FLUSH_THRESHOLD BUFSIZ
+#endif
+
+static bool cmd_mset(struct req *req)
+{
+        if (optind >= req->argc) ABORT("usage: mset [OPTIONS] WANT QRY_STR");
+        if (req->fp[1]) ABORT("mset only accepts 1 FD");
+        const char *qry_str = req->argv[optind];
+        CLEANUP_FBUF struct fbuf wbuf = {};
+        Xapian::MSet mset = req->code_search ? commit_mset(req, qry_str) :
+                                                mail_mset(req, qry_str);
+        fbuf_init(&wbuf);
+        fprintf(wbuf.fp, "mset.size=%llu .get_matches_estimated=%llu\n",
+                (unsigned long long)mset.size(),
+                (unsigned long long)mset.get_matches_estimated());
+        int fd = fileno(req->fp[0]);
+        for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); i++) {
+                off_t off = ftello(wbuf.fp);
+                if (off < 0) EABORT("ftello");
+                /*
+                 * TODO verify our fflush + fseeko use isn't affected by a
+                 * glibc <2.25 bug:
+                 * https://sourceware.org/bugzilla/show_bug.cgi?id=20181
+                 * CentOS 7.x only has glibc 2.17.  In any case, bug #20181
+                 * shouldn't affect us since our use of fseeko is used to
+                 * effectively discard data.
+                 */
+                if (off > WBUF_FLUSH_THRESHOLD) {
+                        ERR_FLUSH(wbuf.fp);
+                        if (!write_all(fd, &wbuf, (size_t)off)) return false;
+                        if (fseeko(wbuf.fp, 0, SEEK_SET)) EABORT("fseeko");
+                        off = 0;
+                }
+                fprintf(wbuf.fp, "%llu" "%c" "%d" "%c" "%llu\n",
+                        (unsigned long long)(*i), // get_docid
+                        0, i.get_percent(),
+                        0, (unsigned long long)i.get_rank());
+        }
+        off_t off = ftello(wbuf.fp);
+        if (off < 0) EABORT("ftello");
+        ERR_FLUSH(wbuf.fp);
+        return off > 0 ? write_all(fd, &wbuf, (size_t)off) : true;
+}