about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Admin.pm122
-rw-r--r--lib/PublicInbox/Cgit.pm24
-rw-r--r--lib/PublicInbox/Config.pm114
-rw-r--r--lib/PublicInbox/DS.pm49
-rw-r--r--lib/PublicInbox/DSKQXS.pm2
-rw-r--r--lib/PublicInbox/DSPoll.pm3
-rw-r--r--lib/PublicInbox/Daemon.pm28
-rw-r--r--lib/PublicInbox/DummyInbox.pm6
-rw-r--r--lib/PublicInbox/ExtMsg.pm51
-rw-r--r--lib/PublicInbox/ExtSearch.pm129
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm1105
-rw-r--r--lib/PublicInbox/Feed.pm8
-rw-r--r--lib/PublicInbox/Filter/RubyLang.pm2
-rw-r--r--lib/PublicInbox/Gcf2.pm110
-rw-r--r--lib/PublicInbox/Gcf2Client.pm69
-rw-r--r--lib/PublicInbox/Git.pm183
-rw-r--r--lib/PublicInbox/GitAsyncCat.pm92
-rw-r--r--lib/PublicInbox/GzipFilter.pm4
-rw-r--r--lib/PublicInbox/IMAP.pm47
-rw-r--r--lib/PublicInbox/IMAPD.pm49
-rw-r--r--lib/PublicInbox/IdxStack.pm18
-rw-r--r--lib/PublicInbox/Import.pm9
-rw-r--r--lib/PublicInbox/Inbox.pm147
-rw-r--r--lib/PublicInbox/InboxIdle.pm22
-rw-r--r--lib/PublicInbox/InboxWritable.pm22
-rw-r--r--lib/PublicInbox/Isearch.pm127
-rw-r--r--lib/PublicInbox/MDA.pm4
-rw-r--r--lib/PublicInbox/ManifestJsGz.pm112
-rw-r--r--lib/PublicInbox/Mbox.pm75
-rw-r--r--lib/PublicInbox/MboxGz.pm6
-rw-r--r--lib/PublicInbox/MiscIdx.pm151
-rw-r--r--lib/PublicInbox/MiscSearch.pm191
-rw-r--r--lib/PublicInbox/Msgmap.pm7
-rw-r--r--lib/PublicInbox/NNTP.pm432
-rw-r--r--lib/PublicInbox/NNTPD.pm54
-rw-r--r--lib/PublicInbox/NewsWWW.pm39
-rw-r--r--lib/PublicInbox/Over.pm21
-rw-r--r--lib/PublicInbox/OverIdx.pm188
-rw-r--r--lib/PublicInbox/Qspawn.pm4
-rw-r--r--lib/PublicInbox/Search.pm124
-rw-r--r--lib/PublicInbox/SearchIdx.pm318
-rw-r--r--lib/PublicInbox/SearchIdxShard.pm113
-rw-r--r--lib/PublicInbox/SearchThread.pm4
-rw-r--r--lib/PublicInbox/SearchView.pm27
-rw-r--r--lib/PublicInbox/Smsg.pm18
-rw-r--r--lib/PublicInbox/SolverGit.pm2
-rw-r--r--lib/PublicInbox/Spamcheck.pm4
-rw-r--r--lib/PublicInbox/Syscall.pm66
-rw-r--r--lib/PublicInbox/Tmpfile.pm7
-rw-r--r--lib/PublicInbox/Unsubscribe.pm15
-rw-r--r--lib/PublicInbox/V2Writable.pm508
-rw-r--r--lib/PublicInbox/View.pm22
-rw-r--r--lib/PublicInbox/ViewVCS.pm2
-rw-r--r--lib/PublicInbox/WWW.pm50
-rw-r--r--lib/PublicInbox/Watch.pm26
-rw-r--r--lib/PublicInbox/WwwAltId.pm2
-rw-r--r--lib/PublicInbox/WwwAtomStream.pm10
-rw-r--r--lib/PublicInbox/WwwAttach.pm10
-rw-r--r--lib/PublicInbox/WwwListing.pm7
-rw-r--r--lib/PublicInbox/WwwStream.pm46
-rw-r--r--lib/PublicInbox/WwwText.pm14
-rw-r--r--lib/PublicInbox/Xapcmd.pm4
-rw-r--r--lib/PublicInbox/gcf2_libgit2.h142
63 files changed, 4095 insertions, 1272 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index fb88e621..d414e4e2 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -6,15 +6,15 @@
 package PublicInbox::Admin;
 use strict;
 use parent qw(Exporter);
-use Cwd qw(abs_path);
-use POSIX ();
-our @EXPORT_OK = qw(resolve_repo_dir setup_signals);
+our @EXPORT_OK = qw(setup_signals);
 use PublicInbox::Config;
 use PublicInbox::Inbox;
 use PublicInbox::Spawn qw(popen_rd);
+*rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed;
 
 sub setup_signals {
         my ($cb, $arg) = @_; # optional
+        require POSIX;
 
         # we call exit() here instead of _exit() so DESTROY methods
         # get called (e.g. File::Temp::Dir and PublicInbox::Msgmap)
@@ -27,21 +27,34 @@ sub setup_signals {
         };
 }
 
-sub resolve_repo_dir {
+sub resolve_inboxdir {
         my ($cd, $ver) = @_;
-        my $prefix = defined $cd ? $cd : './';
-        if (-d $prefix && -f "$prefix/inbox.lock") { # v2
-                $$ver = 2 if $ver;
-                return abs_path($prefix);
+        my $try = $cd // '.';
+        my $root_dev_ino;
+        while (1) { # favor v2, first
+                if (-f "$try/inbox.lock") {
+                        $$ver = 2 if $ver;
+                        return rel2abs_collapsed($try);
+                } elsif (-d $try) {
+                        my @try = stat _;
+                        $root_dev_ino //= do {
+                                my @root = stat('/') or die "stat /: $!\n";
+                                "$root[0]\0$root[1]";
+                        };
+                        last if "$try[0]\0$try[1]" eq $root_dev_ino;
+                        $try .= '/..'; # continue, cd up
+                } else {
+                        die "`$try' is not a directory\n";
+                }
         }
+        # try v1 bare git dirs
         my $cmd = [ qw(git rev-parse --git-dir) ];
         my $fh = popen_rd($cmd, undef, {-C => $cd});
         my $dir = do { local $/; <$fh> };
-        close $fh or die "error in ".join(' ', @$cmd)." (cwd:$cd): $!\n";
+        close $fh or die "error in @$cmd (cwd:${\($cd // '.')}): $!\n";
         chomp $dir;
         $$ver = 1 if $ver;
-        return abs_path($cd) if ($dir eq '.' && defined $cd);
-        abs_path($dir);
+        rel2abs_collapsed($dir eq '.' ? ($cd // $dir) : $dir);
 }
 
 # for unconfigured inboxes
@@ -78,8 +91,8 @@ sub unconfigured_ibx ($$) {
                 name => $name,
                 address => [ "$name\@example.com" ],
                 inboxdir => $dir,
-                # TODO: consumers may want to warn on this:
-                #-unconfigured => 1,
+                # consumers (-convert) warn on this:
+                -unconfigured => 1,
         });
 }
 
@@ -95,40 +108,53 @@ sub resolve_inboxes ($;$$) {
         }
 
         my $min_ver = $opt->{-min_inbox_version} || 0;
+        # lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames,
+        # pathnames are not unique due to symlinks and bind mounts
         my (@old, @ibxs);
-        my %dir2ibx;
-        if ($cfg) {
+        if ($opt->{all}) {
                 $cfg->each_inbox(sub {
                         my ($ibx) = @_;
-                        my $path = abs_path($ibx->{inboxdir});
-                        if (defined($path)) {
-                                $dir2ibx{$path} = $ibx;
+                        if (-e $ibx->{inboxdir}) {
+                                push(@ibxs, $ibx) if $ibx->version >= $min_ver;
                         } else {
-                                warn <<EOF;
-W: $ibx->{name} $ibx->{inboxdir}: $!
-EOF
+                                warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n";
                         }
                 });
-        }
-        if ($opt->{all}) {
-                my @all = values %dir2ibx;
-                @all = grep { $_->version >= $min_ver } @all;
-                push @ibxs, @all;
         } else { # directories specified on the command-line
-                my $i = 0;
                 my @dirs = @$argv;
-                push @dirs, '.' unless @dirs;
-                foreach (@dirs) {
-                        my $v;
-                        my $dir = resolve_repo_dir($_, \$v);
-                        if ($v < $min_ver) {
+                push @dirs, '.' if !@dirs && $opt->{-use_cwd};
+                my %s2i; # "st_dev\0st_ino" => array index
+                for (my $i = 0; $i <= $#dirs; $i++) {
+                        my $dir = $dirs[$i];
+                        my @st = stat($dir) or die "stat($dir): $!\n";
+                        $dir = $dirs[$i] = resolve_inboxdir($dir, \(my $ver));
+                        if ($ver >= $min_ver) {
+                                $s2i{"$st[0]\0$st[1]"} //= $i;
+                        } else {
                                 push @old, $dir;
-                                next;
                         }
-                        my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i);
-                        $i++;
-                        push @ibxs, $ibx;
                 }
+                my $done = \'done';
+                eval {
+                        $cfg->each_inbox(sub {
+                                my ($ibx) = @_;
+                                return if $ibx->version < $min_ver;
+                                my $dir = $ibx->{inboxdir};
+                                if (my @s = stat $dir) {
+                                        my $i = delete($s2i{"$s[0]\0$s[1]"})
+                                                // return;
+                                        $ibxs[$i] = $ibx;
+                                        die $done if !keys(%s2i);
+                                } else {
+                                        warn "W: $ibx->{name} $dir: $!\n";
+                                }
+                        });
+                };
+                die $@ if $@ && $@ ne $done;
+                for my $i (sort { $a <=> $b } values %s2i) {
+                        $ibxs[$i] = unconfigured_ibx($dirs[$i], $i);
+                }
+                @ibxs = grep { defined } @ibxs; # duplicates are undef
         }
         if (@old) {
                 die "-V$min_ver inboxes not supported by $0\n\t",
@@ -208,12 +234,20 @@ sub index_terminate {
 
 sub index_inbox {
         my ($ibx, $im, $opt) = @_;
+        require PublicInbox::InboxWritable;
         my $jobs = delete $opt->{jobs} if $opt;
         if (my $pr = $opt->{-progress}) {
                 $pr->("indexing $ibx->{inboxdir} ...\n");
         }
         local %SIG = %SIG;
         setup_signals(\&index_terminate, $ibx);
+        my $warn_cb = $SIG{__WARN__} // \&CORE::warn;
+        my $idx = { current_info => $ibx->{inboxdir} };
+        my $warn_ignore = PublicInbox::InboxWritable->can('warn_ignore');
+        local $SIG{__WARN__} = sub {
+                return if $warn_ignore->(@_);
+                $warn_cb->($idx->{current_info}, ': ', @_);
+        };
         if (ref($ibx) && $ibx->version == 2) {
                 eval { require PublicInbox::V2Writable };
                 die "v2 requirements not met: $@\n" if $@;
@@ -225,21 +259,19 @@ sub index_inbox {
                         } else {
                                 my $n = $v2w->{shards};
                                 if ($jobs < ($n + 1) && !$opt->{reshard}) {
-                                        warn
-"Unable to respect --jobs=$jobs on index, inbox was created with $n shards\n";
+                                        warn <<EOM;
+Unable to respect --jobs=$jobs on index, inbox was created with $n shards
+EOM
                                 }
                         }
                 }
-                my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
-                local $SIG{__WARN__} = sub {
-                        $warn_cb->($v2w->{current_info}, ': ', @_);
-                };
-                $v2w->index_sync($opt);
+                $idx = $v2w;
         } else {
                 require PublicInbox::SearchIdx;
-                my $s = PublicInbox::SearchIdx->new($ibx, 1);
-                $s->index_sync($opt);
+                $idx = PublicInbox::SearchIdx->new($ibx, 1);
         }
+        $idx->index_sync($opt);
+        $idx->{nidx} // 0; # returns number processed
 }
 
 sub progress_prepare ($) {
diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm
index fb0d0e60..472509a8 100644
--- a/lib/PublicInbox/Cgit.pm
+++ b/lib/PublicInbox/Cgit.pm
@@ -16,9 +16,9 @@ use PublicInbox::Qspawn;
 use PublicInbox::WwwStatic qw(r);
 
 sub locate_cgit ($) {
-        my ($pi_config) = @_;
-        my $cgit_bin = $pi_config->{'publicinbox.cgitbin'};
-        my $cgit_data = $pi_config->{'publicinbox.cgitdata'};
+        my ($pi_cfg) = @_;
+        my $cgit_bin = $pi_cfg->{'publicinbox.cgitbin'};
+        my $cgit_data = $pi_cfg->{'publicinbox.cgitdata'};
 
         # /var/www/htdocs/cgit is the default install path from cgit.git
         # /usr/{lib,share}/cgit is where Debian puts cgit
@@ -51,28 +51,28 @@ sub locate_cgit ($) {
 }
 
 sub new {
-        my ($class, $pi_config) = @_;
-        my ($cgit_bin, $cgit_data) = locate_cgit($pi_config);
+        my ($class, $pi_cfg) = @_;
+        my ($cgit_bin, $cgit_data) = locate_cgit($pi_cfg);
 
         my $self = bless {
                 cmd => [ $cgit_bin ],
                 cgit_data => $cgit_data,
-                pi_config => $pi_config,
+                pi_cfg => $pi_cfg,
         }, $class;
 
-        $pi_config->fill_all; # fill in -code_repos mapped to inboxes
+        $pi_cfg->fill_all; # fill in -code_repos mapped to inboxes
 
         # some cgit repos may not be mapped to inboxes, so ensure those exist:
-        my $code_repos = $pi_config->{-code_repos};
-        foreach my $k (keys %$pi_config) {
+        my $code_repos = $pi_cfg->{-code_repos};
+        foreach my $k (keys %$pi_cfg) {
                 $k =~ /\Acoderepo\.(.+)\.dir\z/ or next;
-                my $dir = $pi_config->{$k};
+                my $dir = $pi_cfg->{$k};
                 $code_repos->{$1} ||= PublicInbox::Git->new($dir);
         }
         while (my ($nick, $repo) = each %$code_repos) {
                 $self->{"\0$nick"} = $repo;
         }
-        my $cgit_static = $pi_config->{-cgit_static};
+        my $cgit_static = $pi_cfg->{-cgit_static};
         my $static = join('|', map { quotemeta $_ } keys %$cgit_static);
         $self->{static} = qr/\A($static)\z/;
         $self;
@@ -120,7 +120,7 @@ sub call {
 
         my $rdr = input_prepare($env) or return r(500);
         my $qsp = PublicInbox::Qspawn->new($self->{cmd}, $cgi_env, $rdr);
-        my $limiter = $self->{pi_config}->limiter('-cgit');
+        my $limiter = $self->{pi_cfg}->limiter('-cgit');
         $qsp->psgi_return($env, $limiter, $parse_cgi_headers);
 }
 
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index d57c361a..21f2161a 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -33,6 +33,7 @@ sub new {
         $self->{-by_list_id} = {};
         $self->{-by_name} = {};
         $self->{-by_newsgroup} = {};
+        $self->{-by_eidx_key} = {};
         $self->{-no_obfuscate} = {};
         $self->{-limiters} = {};
         $self->{-code_repos} = {}; # nick => PublicInbox::Git object
@@ -89,6 +90,14 @@ sub lookup_name ($$) {
         $self->{-by_name}->{$name} // _fill($self, "publicinbox.$name");
 }
 
+sub lookup_ei {
+        my ($self, $name) = @_;
+        $self->{-ei_by_name}->{$name} //= _fill_ei($self, "extindex.$name");
+}
+
+# special case for [extindex "all"]
+sub ALL { lookup_ei($_[0], 'all') }
+
 sub each_inbox {
         my ($self, $cb, @arg) = @_;
         # may auto-vivify if config file is non-existent:
@@ -123,20 +132,16 @@ sub default_file {
 
 sub config_fh_parse ($$$) {
         my ($fh, $rs, $fs) = @_;
-        my %rv;
-        my (%section_seen, @section_order);
+        my (%rv, %seen, @section_order, $line, $k, $v, $section, $cur, $i);
         local $/ = $rs;
-        while (defined(my $line = <$fh>)) {
-                chomp $line;
-                my ($k, $v) = split($fs, $line, 2);
-                my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/);
-                unless (defined $section_seen{$section}) {
-                        $section_seen{$section} = 1;
-                        push @section_order, $section;
-                }
-
-                my $cur = $rv{$k};
-                if (defined $cur) {
+        while (defined($line = <$fh>)) { # perf critical with giant configs
+                $i = index($line, $fs);
+                $k = substr($line, 0, $i);
+                $v = substr($line, $i + 1, -1); # chop off $fs
+                $section = substr($k, 0, rindex($k, '.'));
+                $seen{$section} //= push(@section_order, $section);
+
+                if (defined($cur = $rv{$k})) {
                         if (ref($cur) eq "ARRAY") {
                                 push @$cur, $v;
                         } else {
@@ -154,11 +159,10 @@ sub config_fh_parse ($$$) {
 sub git_config_dump {
         my ($file) = @_;
         return {} unless -e $file;
-        my @cmd = (qw/git config -z -l --includes/, "--file=$file");
-        my $cmd = join(' ', @cmd);
-        my $fh = popen_rd(\@cmd);
+        my $cmd = [ qw(git config -z -l --includes), "--file=$file" ];
+        my $fh = popen_rd($cmd);
         my $rv = config_fh_parse($fh, "\0", "\n");
-        close $fh or die "failed to close ($cmd) pipe: $?";
+        close $fh or die "failed to close (@$cmd) pipe: $?";
         $rv;
 }
 
@@ -360,6 +364,16 @@ sub git_bool {
         }
 }
 
+# abs_path resolves symlinks, so we want to avoid it if rel2abs
+# is sufficient and doesn't leave "/.." or "/../"
+sub rel2abs_collapsed {
+        require File::Spec;
+        my $p = File::Spec->rel2abs($_[-1]);
+        return $p if substr($p, -3, 3) ne '/..' && index($p, '/../') < 0;
+        require Cwd;
+        Cwd::abs_path($p);
+}
+
 sub _fill {
         my ($self, $pfx) = @_;
         my $ibx = {};
@@ -382,10 +396,10 @@ EOF
                 }
         }
 
-        # backwards compatibility:
-        $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"};
-        if (($ibx->{inboxdir} // '') =~ /\n/s) {
-                warn "E: `$ibx->{inboxdir}' must not contain `\\n'\n";
+        # "mainrepo" is backwards compatibility:
+        my $dir = $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"} // return;
+        if (index($dir, "\n") >= 0) {
+                warn "E: `$dir' must not contain `\\n'\n";
                 return;
         }
         foreach my $k (qw(obfuscate)) {
@@ -406,17 +420,14 @@ EOF
                 }
         }
 
-        return unless defined($ibx->{inboxdir});
-        my $name = $pfx;
-        $name =~ s/\Apublicinbox\.//;
-
+        my $name = substr($pfx, length('publicinbox.'));
         if (!valid_inbox_name($name)) {
                 warn "invalid inbox name: '$name'\n";
                 return;
         }
 
         $ibx->{name} = $name;
-        $ibx->{-pi_config} = $self;
+        $ibx->{-pi_cfg} = $self;
         $ibx = PublicInbox::Inbox->new($ibx);
         foreach (@{$ibx->{address}}) {
                 my $lc_addr = lc($_);
@@ -429,8 +440,31 @@ EOF
                         $self->{-by_list_id}->{lc($list_id)} = $ibx;
                 }
         }
-        if (my $ng = $ibx->{newsgroup}) {
-                $self->{-by_newsgroup}->{$ng} = $ibx;
+        if (defined(my $ngname = $ibx->{newsgroup})) {
+                if (ref($ngname)) {
+                        delete $ibx->{newsgroup};
+                        warn 'multiple newsgroups not supported: '.
+                                join(', ', @$ngname). "\n";
+                # Newsgroup name needs to be compatible with RFC 3977
+                # wildmat-exact and RFC 3501 (IMAP) ATOM-CHAR.
+                # Leave out a few chars likely to cause problems or conflicts:
+                # '|', '<', '>', ';', '#', '$', '&',
+                } elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! ||
+                                $ngname eq '') {
+                        delete $ibx->{newsgroup};
+                        warn "newsgroup name invalid: `$ngname'\n";
+                } else {
+                        # PublicInbox::NNTPD does stricter ->nntp_usable
+                        # checks, keep this lean for startup speed
+                        $self->{-by_newsgroup}->{$ngname} = $ibx;
+                }
+        }
+        unless (defined $ibx->{newsgroup}) { # for ->eidx_key
+                my $abs = rel2abs_collapsed($dir);
+                if ($abs ne $dir) {
+                        warn "W: `$dir' canonicalized to `$abs'\n";
+                        $ibx->{inboxdir} = $abs;
+                }
         }
         $self->{-by_name}->{$name} = $ibx;
         if ($ibx->{obfuscate}) {
@@ -453,8 +487,18 @@ EOF
                         push @$repo_objs, $repo if $repo;
                 }
         }
+        if (my $es = ALL($self)) {
+                require PublicInbox::Isearch;
+                $ibx->{isrch} = PublicInbox::Isearch->new($ibx, $es);
+        }
+        $self->{-by_eidx_key}->{$ibx->eidx_key} = $ibx;
+}
 
-        $ibx
+sub _fill_ei ($$) {
+        my ($self, $pfx) = @_;
+        require PublicInbox::ExtSearch;
+        my $d = $self->{"$pfx.topdir"};
+        defined($d) && -d $d ? PublicInbox::ExtSearch->new($d) : undef;
 }
 
 sub urlmatch {
@@ -476,4 +520,16 @@ sub urlmatch {
         }
 }
 
+sub json {
+        state $json;
+        $json //= do {
+                for my $mod (qw(Cpanel::JSON::XS JSON::MaybeXS JSON JSON::PP)) {
+                        eval "require $mod" or next;
+                        # ->ascii encodes non-ASCII to "\uXXXX"
+                        $json = $mod->new->ascii(1) and last;
+                }
+                $json;
+        };
+}
+
 1;
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index a02b3bb7..97a6f6ef 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -50,7 +50,6 @@ our (
      $PostLoopCallback,          # subref to call at the end of each loop, if defined (global)
 
      $LoopTimeout,               # timeout of event loop in milliseconds
-     $DoneInit,                  # if we've done the one-time module init yet
      @Timers,                    # timers
      $in_loop,
      );
@@ -75,12 +74,9 @@ sub Reset {
     @Timers = ();
 
     $PostLoopCallback = undef;
-    $DoneInit = 0;
 
     $_io = undef; # closes real $Epoll FD
     $Epoll = undef; # may call DSKQXS::DESTROY
-
-    *EventLoop = *FirstTimeEventLoop;
 }
 
 =head2 C<< CLASS->SetLoopTimeout( $timeout ) >>
@@ -91,9 +87,7 @@ A timeout of 0 (zero) means poll forever. A timeout of -1 means poll and return
 immediately.
 
 =cut
-sub SetLoopTimeout {
-    return $LoopTimeout = $_[1] + 0;
-}
+sub SetLoopTimeout { $LoopTimeout = $_[1] + 0 }
 
 =head2 C<< PublicInbox::DS::add_timer( $seconds, $coderef, $arg) >>
 
@@ -137,14 +131,13 @@ sub set_cloexec ($) {
     fcntl($_io, F_SETFD, $fl | FD_CLOEXEC);
 }
 
+# caller sets return value to $Epoll
 sub _InitPoller
 {
-    return if $DoneInit;
-    $DoneInit = 1;
-
     if (PublicInbox::Syscall::epoll_defined())  {
-        $Epoll = epoll_create();
-        set_cloexec($Epoll) if (defined($Epoll) && $Epoll >= 0);
+        my $fd = epoll_create();
+        set_cloexec($fd) if (defined($fd) && $fd >= 0);
+        $fd;
     } else {
         my $cls;
         for (qw(DSKQXS DSPoll)) {
@@ -152,9 +145,8 @@ sub _InitPoller
             last if eval "require $cls";
         }
         $cls->import(qw(epoll_ctl epoll_wait));
-        $Epoll = $cls->new;
+        $cls->new;
     }
-    *EventLoop = *EpollEventLoop;
 }
 
 =head2 C<< CLASS->EventLoop() >>
@@ -163,13 +155,6 @@ Start processing IO events. In most daemon programs this never exits. See
 C<PostLoopCallback> below for how to exit the loop.
 
 =cut
-sub FirstTimeEventLoop {
-    my $class = shift;
-
-    _InitPoller();
-
-    EventLoop($class);
-}
 
 sub now () { clock_gettime(CLOCK_MONOTONIC) }
 
@@ -213,12 +198,7 @@ sub RunTimers {
     my $timeout = int(($Timers[0][0] - $now) * 1000) + 1;
 
     # -1 is an infinite timeout, so prefer a real timeout
-    return $timeout     if $LoopTimeout == -1;
-
-    # otherwise pick the lower of our regular timeout and time until
-    # the next timer
-    return $LoopTimeout if $LoopTimeout < $timeout;
-    return $timeout;
+    ($LoopTimeout < 0 || $LoopTimeout >= $timeout) ? $timeout : $LoopTimeout;
 }
 
 # We can't use waitpid(-1) safely here since it can hit ``, system(),
@@ -271,21 +251,21 @@ sub PostEventLoop () {
         $PostLoopCallback ? $PostLoopCallback->(\%DescriptorMap) : 1;
 }
 
-sub EpollEventLoop {
+sub EventLoop {
+    $Epoll //= _InitPoller();
     local $in_loop = 1;
+    my @events;
     do {
-        my @events;
-        my $i;
         my $timeout = RunTimers();
 
         # get up to 1000 events
-        my $evcount = epoll_wait($Epoll, 1000, $timeout, \@events);
-        for ($i=0; $i<$evcount; $i++) {
+        epoll_wait($Epoll, 1000, $timeout, \@events);
+        for my $fd (@events) {
             # it's possible epoll_wait returned many events, including some at the end
             # that ones in the front triggered unregister-interest actions.  if we
             # can't find the %sock entry, it's because we're no longer interested
             # in that event.
-            $DescriptorMap{$events[$i]->[0]}->event_step;
+            $DescriptorMap{$fd}->event_step;
         }
     } while (PostEventLoop());
     _run_later();
@@ -330,8 +310,7 @@ sub new {
     $self->{sock} = $sock;
     my $fd = fileno($sock);
 
-    _InitPoller();
-
+    $Epoll //= _InitPoller();
 retry:
     if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) {
         if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) {
diff --git a/lib/PublicInbox/DSKQXS.pm b/lib/PublicInbox/DSKQXS.pm
index d1d3fe60..aa2c9168 100644
--- a/lib/PublicInbox/DSKQXS.pm
+++ b/lib/PublicInbox/DSKQXS.pm
@@ -134,7 +134,7 @@ sub epoll_wait {
                 }
         }
         # caller only cares for $events[$i]->[0]
-        scalar(@$events);
+        $_ = $_->[0] for @$events;
 }
 
 # kqueue is close-on-fork (not exec), so we must not close it
diff --git a/lib/PublicInbox/DSPoll.pm b/lib/PublicInbox/DSPoll.pm
index 1d9b51d9..a218f695 100644
--- a/lib/PublicInbox/DSPoll.pm
+++ b/lib/PublicInbox/DSPoll.pm
@@ -45,14 +45,13 @@ sub epoll_wait {
                         my $fd = $pset[$i++];
                         my $revents = $pset[$i++] or next;
                         delete($self->{$fd}) if $self->{$fd} & EPOLLONESHOT;
-                        push @$events, [ $fd ];
+                        push @$events, $fd;
                 }
                 my $nevents = scalar @$events;
                 if ($n != $nevents) {
                         warn "BUG? poll() returned $n, but got $nevents";
                 }
         }
-        $n;
 }
 
 1;
diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm
index 5fdcba14..1762be0b 100644
--- a/lib/PublicInbox/Daemon.pm
+++ b/lib/PublicInbox/Daemon.pm
@@ -11,7 +11,6 @@ use IO::Socket;
 use POSIX qw(WNOHANG :signal_h);
 use Socket qw(IPPROTO_TCP SOL_SOCKET);
 sub SO_ACCEPTFILTER () { 0x1000 }
-use Cwd qw/abs_path/;
 STDOUT->autoflush(1);
 STDERR->autoflush(1);
 use PublicInbox::DS qw(now);
@@ -19,6 +18,7 @@ use PublicInbox::Syscall qw($SFD_NONBLOCK);
 require PublicInbox::Listener;
 use PublicInbox::EOFpipe;
 use PublicInbox::Sigfd;
+use PublicInbox::GitAsyncCat;
 my @CMD;
 my ($set_user, $oldset);
 my (@cfg_listen, $stdout, $stderr, $group, $user, $pid_file, $daemonize);
@@ -201,10 +201,11 @@ sub check_absolute ($$) {
 
 sub daemonize () {
         if ($daemonize) {
+                require Cwd;
                 foreach my $i (0..$#ARGV) {
                         my $arg = $ARGV[$i];
                         next unless -e $arg;
-                        $ARGV[$i] = abs_path($arg);
+                        $ARGV[$i] = Cwd::abs_path($arg);
                 }
                 check_absolute('stdout', $stdout);
                 check_absolute('stderr', $stderr);
@@ -236,8 +237,7 @@ EOF
         };
 
         if ($daemonize) {
-                my $pid = fork;
-                die "could not fork: $!\n" unless defined $pid;
+                my $pid = fork // die "fork: $!";
                 exit if $pid;
 
                 open(STDIN, '+<', '/dev/null') or
@@ -245,8 +245,7 @@ EOF
                 open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n";
                 open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n";
                 POSIX::setsid();
-                $pid = fork;
-                die "could not fork: $!\n" unless defined $pid;
+                $pid = fork // die "fork: $!";
                 exit if $pid;
         }
         return unless defined $pid_file;
@@ -368,14 +367,12 @@ sub inherit ($) {
         foreach my $fd (3..$end) {
                 my $s = IO::Handle->new_from_fd($fd, 'r');
                 if (my $k = sockname($s)) {
-                        if ($s->blocking) {
-                                $s->blocking(0);
-                                warn <<"";
+                        my $prev_was_blocking = $s->blocking(0);
+                        warn <<"" if $prev_was_blocking;
 Inherited socket (fd=$fd) is blocking, making it non-blocking.
 Set 'NonBlocking = true' in the systemd.service unit to avoid stalled
 processes when multiple service instances start.
 
-                        }
                         $listener_names->{$k} = $s;
                         push @rv, $s;
                 } else {
@@ -422,11 +419,8 @@ sub upgrade { # $_[0] = signal name or number (unused)
 }
 
 sub kill_workers ($) {
-        my ($s) = @_;
-
-        while (my ($pid, $id) = each %pids) {
-                kill $s, $pid;
-        }
+        my ($sig) = @_;
+        kill $sig, keys(%pids);
 }
 
 sub upgrade_aborted ($) {
@@ -648,6 +642,10 @@ sub run ($$$;$) {
         daemon_prepare($default);
         my $af_default = $default =~ /:8080\z/ ? 'httpready' : undef;
         my $for_destroy = daemonize();
+
+        # localize GCF2C for tests:
+        local $PublicInbox::GitAsyncCat::GCF2C;
+
         daemon_loop($refresh, $post_accept, $tlsd, $af_default);
         PublicInbox::DS->Reset;
         # ->DESTROY runs when $for_destroy goes out-of-scope
diff --git a/lib/PublicInbox/DummyInbox.pm b/lib/PublicInbox/DummyInbox.pm
index 69b0b683..981043ce 100644
--- a/lib/PublicInbox/DummyInbox.pm
+++ b/lib/PublicInbox/DummyInbox.pm
@@ -7,16 +7,16 @@
 package PublicInbox::DummyInbox;
 use strict;
 
-sub created_at { 0 } # Msgmap::created_at
+sub uidvalidity { 0 } # Msgmap::created_at
 sub mm { shift }
 sub uid_range { [] } # Over::uid_range
 sub subscribe_unlock { undef };
 
 no warnings 'once';
-*max = \&created_at;
+*max = \&uidvalidity;
 *query_xover = \&uid_range;
 *over = \&mm;
-*search = *unsubscribe_unlock =
+*isrch = *search = *unsubscribe_unlock =
         *get_art = *description = *base_url = \&subscribe_unlock;
 
 1;
diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 03faf3a1..4df885ab 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -32,8 +32,8 @@ sub PARTIAL_MAX () { 100 }
 sub search_partial ($$) {
         my ($ibx, $mid) = @_;
         return if length($mid) < $MIN_PARTIAL_LEN;
-        my $srch = $ibx->search or return;
-        my $opt = { limit => PARTIAL_MAX, mset => 2 };
+        my $srch = $ibx->search or return; # NOT ->isrch, we already try ->ALL
+        my $opt = { limit => PARTIAL_MAX, relevance => -1 };
         my @try = ("m:$mid*");
         my $chop = $mid;
         if ($chop =~ s/(\W+)(\w*)\z//) {
@@ -76,7 +76,7 @@ sub search_partial ($$) {
 sub ext_msg_i {
         my ($other, $ctx) = @_;
 
-        return if $other->{name} eq $ctx->{-inbox}->{name} || !$other->base_url;
+        return if $other->{name} eq $ctx->{ibx}->{name} || !$other->base_url;
 
         my $mm = $other->mm or return;
 
@@ -103,19 +103,48 @@ sub ext_msg_step {
         }
 }
 
+sub ext_msg_ALL ($) {
+        my ($ctx) = @_;
+        my $ALL = $ctx->{www}->{pi_cfg}->ALL or return;
+        my $by_eidx_key = $ctx->{www}->{pi_cfg}->{-by_eidx_key};
+        my $cur_key = eval { $ctx->{ibx}->eidx_key } //
+                        return partial_response($ctx); # $cur->{ibx} == $ALL
+        my %seen = ($cur_key => 1);
+        my ($id, $prev);
+        while (my $x = $ALL->over->next_by_mid($ctx->{mid}, \$id, \$prev)) {
+                my $xr3 = $ALL->over->get_xref3($x->{num});
+                for my $k (@$xr3) {
+                        $k =~ s/:[0-9]+:$x->{blob}\z// or next;
+                        next if $k eq $cur_key;
+                        my $ibx = $by_eidx_key->{$k} // next;
+                        my $url = $ibx->base_url or next;
+                        push(@{$ctx->{found}}, $ibx) unless $seen{$k}++;
+                }
+        }
+        return exact($ctx) if $ctx->{found};
+
+        # fall back to partial MID matching
+        for my $ibxish ($ctx->{ibx}, $ALL) {
+                my $mids = search_partial($ibxish, $ctx->{mid}) or next;
+                push @{$ctx->{partial}}, [ $ibxish, $mids ];
+                last if ($ctx->{n_partial} += scalar(@$mids)) >= PARTIAL_MAX;
+        }
+        partial_response($ctx);
+}
+
 sub ext_msg {
         my ($ctx) = @_;
-        sub {
+        ext_msg_ALL($ctx) // sub {
                 $ctx->{-wcb} = $_[0]; # HTTP server write callback
 
                 if ($ctx->{env}->{'pi-httpd.async'}) {
                         require PublicInbox::ConfigIter;
                         my $iter = PublicInbox::ConfigIter->new(
-                                                $ctx->{www}->{pi_config},
+                                                $ctx->{www}->{pi_cfg},
                                                 \&ext_msg_step, $ctx);
                         $iter->event_step;
                 } else {
-                        $ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, $ctx);
+                        $ctx->{www}->{pi_cfg}->each_inbox(\&ext_msg_i, $ctx);
                         finalize_exact($ctx);
                 }
         };
@@ -141,7 +170,7 @@ sub finalize_exact {
 
         # fall back to partial MID matching
         my $mid = $ctx->{mid};
-        my $cur = $ctx->{-inbox};
+        my $cur = $ctx->{ibx};
         my $mids = search_partial($cur, $mid);
         if ($mids) {
                 $ctx->{n_partial} = scalar(@$mids);
@@ -159,7 +188,7 @@ sub finalize_exact {
         finalize_partial($ctx);
 }
 
-sub finalize_partial {
+sub partial_response ($) {
         my ($ctx) = @_;
         my $mid = $ctx->{mid};
         my $code = 404;
@@ -172,7 +201,7 @@ sub finalize_partial {
                 my $es = $n_partial == 1 ? '' : 'es';
                 $n_partial .= '+' if ($n_partial == PARTIAL_MAX);
                 $s .= "\n$n_partial partial match$es found:\n\n";
-                my $cur_name = $ctx->{-inbox}->{name};
+                my $cur_name = $ctx->{ibx}->{name};
                 foreach my $pair (@{$ctx->{partial}}) {
                         my ($ibx, $res) = @$pair;
                         my $env = $ctx->{env} if $ibx->{name} eq $cur_name;
@@ -192,9 +221,11 @@ sub finalize_partial {
         $ctx->{-html_tip} = $s .= '</pre>';
         $ctx->{-title_html} = $title;
         $ctx->{-upfx} = '../';
-        $ctx->{-wcb}->(html_oneshot($ctx, $code));
+        html_oneshot($ctx, $code);
 }
 
+sub finalize_partial ($) { $_[0]->{-wcb}->(partial_response($_[0])) }
+
 sub ext_urls {
         my ($ctx, $mid, $href, $html) = @_;
 
diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm
new file mode 100644
index 00000000..7c9586a6
--- /dev/null
+++ b/lib/PublicInbox/ExtSearch.pm
@@ -0,0 +1,129 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Read-only external (detached) index for cross inbox search.
+# This is a read-only counterpart to PublicInbox::ExtSearchIdx
+# and behaves like PublicInbox::Inbox AND PublicInbox::Search
+package PublicInbox::ExtSearch;
+use strict;
+use v5.10.1;
+use PublicInbox::Over;
+use PublicInbox::Inbox;
+use PublicInbox::MiscSearch;
+use DBI qw(:sql_types); # SQL_BLOB
+
+# for ->reopen, ->mset, ->mset_to_artnums
+use parent qw(PublicInbox::Search);
+
+sub new {
+        my (undef, $topdir) = @_;
+        bless {
+                topdir => $topdir,
+                # xpfx => 'ei15'
+                xpfx => "$topdir/ei".PublicInbox::Search::SCHEMA_VERSION
+        }, __PACKAGE__;
+}
+
+sub misc {
+        my ($self) = @_;
+        $self->{misc} //= PublicInbox::MiscSearch->new("$self->{xpfx}/misc");
+}
+
+# overrides PublicInbox::Search::_xdb
+sub _xdb {
+        my ($self) = @_;
+        $self->xdb_sharded;
+}
+
+# same as per-inbox ->over, for now...
+sub over {
+        my ($self) = @_;
+        $self->{over} //= PublicInbox::Over->new("$self->{xpfx}/over.sqlite3");
+}
+
+sub git {
+        my ($self) = @_;
+        $self->{git} //= PublicInbox::Git->new("$self->{topdir}/ALL.git");
+}
+
+# returns a hashref of { $NEWSGROUP_NAME => $ART_NO } using the `xref3' table
+sub nntp_xref_for { # NNTP only
+        my ($self, $xibx, $xsmsg) = @_;
+        my $dbh = over($self)->dbh;
+
+        my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
+
+        $sth->execute($xibx->{newsgroup});
+        my $xibx_id = $sth->fetchrow_array // do {
+                warn "W: `$xibx->{newsgroup}' not found in $self->{topdir}\n";
+                return;
+        };
+
+        $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT docid FROM xref3 WHERE oidbin = ? AND xnum = ? AND ibx_id = ? LIMIT 1
+
+        $sth->bind_param(1, pack('H*', $xsmsg->{blob}), SQL_BLOB);
+
+        # NNTP::cmd_over can set {num} to zero according to RFC 3977 8.3.2
+        $sth->bind_param(2, $xsmsg->{num} || $xsmsg->{-orig_num});
+        $sth->bind_param(3, $xibx_id);
+        $sth->execute;
+        my $docid = $sth->fetchrow_array // do {
+                warn <<EOF;
+W: `$xibx->{newsgroup}:$xsmsg->{num}' not found in $self->{topdir}"
+EOF
+                return;
+        };
+
+        # LIMIT is number of newsgroups on server:
+        $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id,xnum FROM xref3 WHERE docid = ? AND ibx_id != ?
+
+        $sth->execute($docid, $xibx_id);
+        my $rows = $sth->fetchall_arrayref;
+
+        my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT eidx_key FROM inboxes WHERE ibx_id = ? LIMIT 1
+
+        my %xref = map {
+                my ($ibx_id, $xnum) = @$_;
+
+                $eidx_key_sth->execute($ibx_id);
+                my $eidx_key = $eidx_key_sth->fetchrow_array;
+
+                # only include if there's a newsgroup name
+                $eidx_key && index($eidx_key, '/') >= 0 ?
+                        () : ($eidx_key => $xnum)
+        } @$rows;
+        $xref{$xibx->{newsgroup}} = $xsmsg->{num};
+        \%xref;
+}
+
+sub mm { undef }
+
+sub altid_map { {} }
+
+sub description {
+        my ($self) = @_;
+        ($self->{description} //=
+                PublicInbox::Inbox::cat_desc("$self->{topdir}/description")) //
+                '$EXTINDEX_DIR/description missing';
+}
+
+sub cloneurl { [] } # TODO
+
+sub base_url { 'https://example.com/TODO/' }
+sub nntp_url { [] }
+
+no warnings 'once';
+*smsg_eml = \&PublicInbox::Inbox::smsg_eml;
+*smsg_by_mid = \&PublicInbox::Inbox::smsg_by_mid;
+*msg_by_mid = \&PublicInbox::Inbox::msg_by_mid;
+*modified = \&PublicInbox::Inbox::modified;
+*recent = \&PublicInbox::Inbox::recent;
+
+*max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
+*isrch = *search = \&PublicInbox::Search::reopen;
+
+1;
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
new file mode 100644
index 00000000..07e64698
--- /dev/null
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -0,0 +1,1105 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Detached/external index cross inbox search indexing support
+# read-write counterpart to PublicInbox::ExtSearch
+#
+# It's based on the same ideas as public-inbox-v2-format(5) using
+# over.sqlite3 for dedupe and sharded Xapian.  msgmap.sqlite3 is
+# missing, so there is no Message-ID conflict resolution, meaning
+# no NNTP support for now.
+#
+# v2 has a 1:1 mapping of index:inbox or msgmap for NNTP support.
+# This is intended to be an M:N index:inbox mapping, but it'll likely
+# be 1:N in common practice (M==1)
+
+package PublicInbox::ExtSearchIdx;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::ExtSearch PublicInbox::Lock);
+use Carp qw(croak carp);
+use Sys::Hostname qw(hostname);
+use POSIX qw(strftime);
+use PublicInbox::Search;
+use PublicInbox::SearchIdx qw(crlf_adjust prepare_stack is_ancestor
+        is_bad_blob);
+use PublicInbox::OverIdx;
+use PublicInbox::MiscIdx;
+use PublicInbox::MID qw(mids);
+use PublicInbox::V2Writable;
+use PublicInbox::InboxWritable;
+use PublicInbox::ContentHash qw(content_hash);
+use PublicInbox::Eml;
+use PublicInbox::DS qw(now);
+use DBI qw(:sql_types); # SQL_BLOB
+
+sub new {
+        my (undef, $dir, $opt) = @_;
+        my $l = $opt->{indexlevel} // 'full';
+        $l !~ $PublicInbox::SearchIdx::INDEXLEVELS and
+                die "invalid indexlevel=$l\n";
+        $l eq 'basic' and die "E: indexlevel=basic not yet supported\n";
+        my $self = bless {
+                xpfx => "$dir/ei".PublicInbox::Search::SCHEMA_VERSION,
+                topdir => $dir,
+                creat => $opt->{creat},
+                ibx_map => {}, # (newsgroup//inboxdir) => $ibx
+                ibx_list => [],
+                indexlevel => $l,
+                transact_bytes => 0,
+                total_bytes => 0,
+                current_info => '',
+                parallel => 1,
+                lock_path => "$dir/ei.lock",
+        }, __PACKAGE__;
+        $self->{shards} = $self->count_shards || nproc_shards($opt->{creat});
+        my $oidx = PublicInbox::OverIdx->new("$self->{xpfx}/over.sqlite3");
+        $self->{-no_fsync} = $oidx->{-no_fsync} = 1 if !$opt->{fsync};
+        $self->{oidx} = $oidx;
+        $self
+}
+
+sub attach_inbox {
+        my ($self, $ibx) = @_;
+        $self->{ibx_map}->{$ibx->eidx_key} //= do {
+                push @{$self->{ibx_list}}, $ibx;
+                $ibx;
+        }
+}
+
+sub _ibx_attach { # each_inbox callback
+        my ($ibx, $self) = @_;
+        attach_inbox($self, $ibx);
+}
+
+sub attach_config {
+        my ($self, $cfg) = @_;
+        $self->{cfg} = $cfg;
+        $cfg->each_inbox(\&_ibx_attach, $self);
+}
+
+sub check_batch_limit ($) {
+        my ($req) = @_;
+        my $self = $req->{self};
+        my $new_smsg = $req->{new_smsg};
+
+        # {raw_bytes} may be unset, so just use {bytes}
+        my $n = $self->{transact_bytes} += $new_smsg->{bytes};
+
+        # set flag for PublicInbox::V2Writable::index_todo:
+        ${$req->{need_checkpoint}} = 1 if $n >= $self->{batch_bytes};
+}
+
+sub do_xpost ($$) {
+        my ($req, $smsg) = @_;
+        my $self = $req->{self};
+        my $docid = $smsg->{num};
+        my $idx = $self->idx_shard($docid);
+        my $oid = $req->{oid};
+        my $xibx = $req->{ibx};
+        my $eml = $req->{eml};
+        my $eidx_key = $xibx->eidx_key;
+        if (my $new_smsg = $req->{new_smsg}) { # 'm' on cross-posted message
+                my $xnum = $req->{xnum};
+                $self->{oidx}->add_xref3($docid, $xnum, $oid, $eidx_key);
+                $idx->shard_add_eidx_info($docid, $eidx_key, $eml);
+                check_batch_limit($req);
+        } else { # 'd'
+                my $rm_eidx_info;
+                my $nr = $self->{oidx}->remove_xref3($docid, $oid, $eidx_key,
+                                                        \$rm_eidx_info);
+                if ($nr == 0) {
+                        $self->{oidx}->eidxq_del($docid);
+                        $idx->shard_remove($docid);
+                } elsif ($rm_eidx_info) {
+                        $idx->shard_remove_eidx_info($docid, $eidx_key, $eml);
+                        $self->{oidx}->eidxq_add($docid); # yes, add
+                }
+        }
+}
+
+# called by V2Writable::sync_prepare
+sub artnum_max { $_[0]->{oidx}->eidx_max }
+
+sub index_unseen ($) {
+        my ($req) = @_;
+        my $new_smsg = $req->{new_smsg} or die 'BUG: {new_smsg} unset';
+        my $eml = delete $req->{eml};
+        $new_smsg->populate($eml, $req);
+        my $self = $req->{self};
+        my $docid = $self->{oidx}->adj_counter('eidx_docid', '+');
+        $new_smsg->{num} = $docid;
+        my $idx = $self->idx_shard($docid);
+        $self->{oidx}->add_overview($eml, $new_smsg);
+        my $oid = $new_smsg->{blob};
+        my $ibx = delete $req->{ibx} or die 'BUG: {ibx} unset';
+        $self->{oidx}->add_xref3($docid, $req->{xnum}, $oid, $ibx->eidx_key);
+        $idx->index_raw(undef, $eml, $new_smsg, $ibx->eidx_key);
+        check_batch_limit($req);
+}
+
+sub do_finalize ($) {
+        my ($req) = @_;
+        if (my $indexed = $req->{indexed}) {
+                do_xpost($req, $_) for @$indexed;
+        } elsif (exists $req->{new_smsg}) { # totally unseen messsage
+                index_unseen($req);
+        } else {
+                # `d' message was already unindexed in the v1/v2 inboxes,
+                # so it's too noisy to warn, here.
+        }
+        # cur_cmt may be undef for unindex_oid, set by V2Writable::index_todo
+        if (defined(my $cur_cmt = $req->{cur_cmt})) {
+                ${$req->{latest_cmt}} = $cur_cmt;
+        }
+}
+
+sub do_step ($) { # main iterator for adding messages to the index
+        my ($req) = @_;
+        my $self = $req->{self} // die 'BUG: {self} missing';
+        while (1) {
+                if (my $next_arg = $req->{next_arg}) {
+                        if (my $smsg = $self->{oidx}->next_by_mid(@$next_arg)) {
+                                $req->{cur_smsg} = $smsg;
+                                $self->git->cat_async($smsg->{blob},
+                                                        \&ck_existing, $req);
+                                return; # ck_existing calls do_step
+                        }
+                        delete $req->{cur_smsg};
+                        delete $req->{next_arg};
+                }
+                my $mid = shift(@{$req->{mids}});
+                last unless defined $mid;
+                my ($id, $prev);
+                $req->{next_arg} = [ $mid, \$id, \$prev ];
+                # loop again
+        }
+        do_finalize($req);
+}
+
+sub _blob_missing ($) { # called when req->{cur_smsg}->{blob} is bad
+        my ($req) = @_;
+        my $smsg = $req->{cur_smsg} or die 'BUG: {cur_smsg} missing';
+        my $self = $req->{self};
+        my $xref3 = $self->{oidx}->get_xref3($smsg->{num});
+        my @keep = grep(!/:$smsg->{blob}\z/, @$xref3);
+        if (@keep) {
+                $keep[0] =~ /:([a-f0-9]{40,}+)\z/ or
+                        die "BUG: xref $keep[0] has no OID";
+                my $oidhex = $1;
+                $self->{oidx}->remove_xref3($smsg->{num}, $smsg->{blob});
+                my $upd = $self->{oidx}->update_blob($smsg, $oidhex);
+                my $saved = $self->{oidx}->get_art($smsg->{num});
+        } else {
+                $self->{oidx}->delete_by_num($smsg->{num});
+        }
+}
+
+sub ck_existing { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $req) = @_;
+        my $smsg = $req->{cur_smsg} or die 'BUG: {cur_smsg} missing';
+        if ($type eq 'missing') {
+                _blob_missing($req);
+        } elsif (!is_bad_blob($oid, $type, $size, $smsg->{blob})) {
+                my $self = $req->{self} // die 'BUG: {self} missing';
+                local $self->{current_info} = "$self->{current_info} $oid";
+                my $cur = PublicInbox::Eml->new($bref);
+                if (content_hash($cur) eq $req->{chash}) {
+                        push @{$req->{indexed}}, $smsg; # for do_xpost
+                } # else { index_unseen later }
+        }
+        do_step($req);
+}
+
+# is the messages visible in the inbox currently being indexed?
+# return the number if so
+sub cur_ibx_xnum ($$) {
+        my ($req, $bref) = @_;
+        my $ibx = $req->{ibx} or die 'BUG: current {ibx} missing';
+
+        $req->{eml} = PublicInbox::Eml->new($bref);
+        $req->{chash} = content_hash($req->{eml});
+        $req->{mids} = mids($req->{eml});
+        my @q = @{$req->{mids}}; # copy
+        while (defined(my $mid = shift @q)) {
+                my ($id, $prev);
+                while (my $x = $ibx->over->next_by_mid($mid, \$id, \$prev)) {
+                        return $x->{num} if $x->{blob} eq $req->{oid};
+                }
+        }
+        undef;
+}
+
+sub index_oid { # git->cat_async callback for 'm'
+        my ($bref, $oid, $type, $size, $req) = @_;
+        my $self = $req->{self};
+        local $self->{current_info} = "$self->{current_info} $oid";
+        return if is_bad_blob($oid, $type, $size, $req->{oid});
+        my $new_smsg = $req->{new_smsg} = bless {
+                blob => $oid,
+        }, 'PublicInbox::Smsg';
+        $new_smsg->{bytes} = $size + crlf_adjust($$bref);
+        defined($req->{xnum} = cur_ibx_xnum($req, $bref)) or return;
+        ++${$req->{nr}};
+        do_step($req);
+}
+
+sub unindex_oid { # git->cat_async callback for 'd'
+        my ($bref, $oid, $type, $size, $req) = @_;
+        my $self = $req->{self};
+        local $self->{current_info} = "$self->{current_info} $oid";
+        return if is_bad_blob($oid, $type, $size, $req->{oid});
+        return if defined(cur_ibx_xnum($req, $bref)); # was re-added
+        do_step($req);
+}
+
+# overrides V2Writable::last_commits, called by sync_ranges via sync_prepare
+sub last_commits {
+        my ($self, $sync) = @_;
+        my $heads = [];
+        my $ekey = $sync->{ibx}->eidx_key;
+        my $uv = $sync->{ibx}->uidvalidity;
+        for my $i (0..$sync->{epoch_max}) {
+                $heads->[$i] = $self->{oidx}->eidx_meta("lc-v2:$ekey//$uv;$i");
+        }
+        $heads;
+}
+
+sub _ibx_index_reject ($) {
+        my ($ibx) = @_;
+        $ibx->mm // return 'unindexed, no msgmap.sqlite3';
+        $ibx->uidvalidity // return 'no UIDVALIDITY';
+        $ibx->over // return 'unindexed, no over.sqlite3';
+        undef;
+}
+
+sub _sync_inbox ($$$) {
+        my ($self, $sync, $ibx) = @_;
+        my $ekey = $ibx->eidx_key;
+        if (defined(my $err = _ibx_index_reject($ibx))) {
+                return "W: skipping $ekey ($err)";
+        }
+        $sync->{ibx} = $ibx;
+        $sync->{nr} = \(my $nr = 0);
+        my $v = $ibx->version;
+        if ($v == 2) {
+                $sync->{epoch_max} = $ibx->max_git_epoch // return;
+                sync_prepare($self, $sync); # or return # TODO: once MiscIdx is stable
+        } elsif ($v == 1) {
+                my $uv = $ibx->uidvalidity;
+                my $lc = $self->{oidx}->eidx_meta("lc-v1:$ekey//$uv");
+                my $head = $ibx->mm->last_commit //
+                        return "E: $ibx->{inboxdir} is not indexed";
+                my $stk = prepare_stack($sync, $lc ? "$lc..$head" : $head);
+                my $unit = { stack => $stk, git => $ibx->git };
+                push @{$sync->{todo}}, $unit;
+        } else {
+                return "E: $ekey unsupported inbox version (v$v)";
+        }
+        for my $unit (@{delete($sync->{todo}) // []}) {
+                last if $sync->{quit};
+                index_todo($self, $sync, $unit);
+        }
+        $self->{midx}->index_ibx($ibx) unless $sync->{quit};
+        $ibx->git->cleanup; # done with this inbox, now
+        undef;
+}
+
+sub gc_unref_doc ($$$$) {
+        my ($self, $ibx_id, $eidx_key, $docid) = @_;
+        my $dbh = $self->{oidx}->dbh;
+
+        # for debug/info purposes, oids may no longer be accessible
+        my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT oidbin FROM xref3 WHERE docid = ? AND ibx_id = ?
+
+        $sth->execute($docid, $ibx_id);
+        my @oid = map { unpack('H*', $_->[0]) } @{$sth->fetchall_arrayref};
+
+        $dbh->prepare_cached(<<'')->execute($docid, $ibx_id);
+DELETE FROM xref3 WHERE docid = ? AND ibx_id = ?
+
+        my $remain = $self->{oidx}->get_xref3($docid);
+        if (scalar(@$remain)) {
+                $self->{oidx}->eidxq_add($docid); # enqueue for reindex
+                for my $oid (@oid) {
+                        warn "I: unref #$docid $eidx_key $oid\n";
+                }
+        } else {
+                warn "I: remove #$docid $eidx_key @oid\n";
+                $self->idx_shard($docid)->shard_remove($docid);
+        }
+}
+
+sub eidx_gc {
+        my ($self, $opt) = @_;
+        $self->{cfg} or die "E: GC requires ->attach_config\n";
+        $opt->{-idx_gc} = 1;
+        $self->idx_init($opt); # acquire lock via V2Writable::_idx_init
+
+        my $dbh = $self->{oidx}->dbh;
+        my $x3_doc = $dbh->prepare('SELECT docid FROM xref3 WHERE ibx_id = ?');
+        my $ibx_ck = $dbh->prepare('SELECT ibx_id,eidx_key FROM inboxes');
+        my $lc_i = $dbh->prepare('SELECT key FROM eidx_meta WHERE key LIKE ?');
+
+        $ibx_ck->execute;
+        while (my ($ibx_id, $eidx_key) = $ibx_ck->fetchrow_array) {
+                next if $self->{ibx_map}->{$eidx_key};
+                $self->{midx}->remove_eidx_key($eidx_key);
+                warn "I: deleting messages for $eidx_key...\n";
+                $x3_doc->execute($ibx_id);
+                while (defined(my $docid = $x3_doc->fetchrow_array)) {
+                        gc_unref_doc($self, $ibx_id, $eidx_key, $docid);
+                }
+                $dbh->prepare_cached(<<'')->execute($ibx_id);
+DELETE FROM inboxes WHERE ibx_id = ?
+
+                # drop last_commit info
+                my $pat = $eidx_key;
+                $pat =~ s/([_%])/\\$1/g;
+                $lc_i->execute("lc-%:$pat//%");
+                while (my ($key) = $lc_i->fetchrow_array) {
+                        next if $key !~ m!\Alc-v[1-9]+:\Q$eidx_key\E//!;
+                        warn "I: removing $key\n";
+                        $dbh->prepare_cached(<<'')->execute($key);
+DELETE FROM eidx_meta WHERE key = ?
+
+                }
+
+                warn "I: $eidx_key removed\n";
+        }
+
+        # it's not real unless it's in `over', we use parallelism here,
+        # shards will be reading directly from over, so commit
+        $self->{oidx}->commit_lazy;
+        $self->{oidx}->begin_lazy;
+
+        for my $idx (@{$self->{idx_shards}}) {
+                warn "I: cleaning up shard #$idx->{shard}\n";
+                $idx->shard_over_check($self->{oidx});
+        }
+        my $nr = $dbh->do(<<'');
+DELETE FROM xref3 WHERE docid NOT IN (SELECT num FROM over)
+
+        warn "I: eliminated $nr stale xref3 entries\n" if $nr != 0;
+
+        done($self);
+}
+
+sub _ibx_for ($$$) {
+        my ($self, $sync, $smsg) = @_;
+        my $ibx_id = delete($smsg->{ibx_id}) // die '{ibx_id} unset';
+        my $pos = $sync->{id2pos}->{$ibx_id} // die "$ibx_id no pos";
+        $self->{ibx_list}->[$pos] // die "BUG: ibx for $smsg->{blob} not mapped"
+}
+
+sub _fd_constrained ($) {
+        my ($self) = @_;
+        $self->{-fd_constrained} //= do {
+                my $soft;
+                if (eval { require BSD::Resource; 1 }) {
+                        my $NOFILE = BSD::Resource::RLIMIT_NOFILE();
+                        ($soft, undef) = BSD::Resource::getrlimit($NOFILE);
+                } else {
+                        chomp($soft = `sh -c 'ulimit -n'`);
+                }
+                if (defined($soft)) {
+                        my $want = scalar(@{$self->{ibx_list}}) + 64; # estimate
+                        my $ret = $want > $soft;
+                        if ($ret) {
+                                warn <<EOF;
+RLIMIT_NOFILE=$soft insufficient (want: $want), will close DB handles early
+EOF
+                        }
+                        $ret;
+                } else {
+                        warn "Unable to determine RLIMIT_NOFILE: $@\n";
+                        1;
+                }
+        };
+}
+
+sub _reindex_finalize ($$$) {
+        my ($req, $smsg, $eml) = @_;
+        my $sync = $req->{sync};
+        my $self = $sync->{self};
+        my $by_chash = delete $req->{by_chash} or die 'BUG: no {by_chash}';
+        my $nr = scalar(keys(%$by_chash)) or die 'BUG: no content hashes';
+        my $orig_smsg = $req->{orig_smsg} // die 'BUG: no {orig_smsg}';
+        my $docid = $smsg->{num} = $orig_smsg->{num};
+        $self->{oidx}->add_overview($eml, $smsg); # may rethread
+        check_batch_limit({ %$sync, new_smsg => $smsg });
+        my $chash0 = $smsg->{chash} // die "BUG: $smsg->{blob} no {chash}";
+        my $stable = delete($by_chash->{$chash0}) //
+                                die "BUG: $smsg->{blob} chash missing";
+        my $idx = $self->idx_shard($docid);
+        my $top_smsg = pop @$stable;
+        $top_smsg == $smsg or die 'BUG: top_smsg != smsg';
+        my $ibx = _ibx_for($self, $sync, $smsg);
+        $idx->index_raw(undef, $eml, $smsg, $ibx->eidx_key);
+        for my $x (reverse @$stable) {
+                $ibx = _ibx_for($self, $sync, $x);
+                my $hdr = delete $x->{hdr} // die 'BUG: no {hdr}';
+                $idx->shard_add_eidx_info($docid, $ibx->eidx_key, $hdr);
+        }
+        return if $nr == 1; # likely, all good
+
+        warn "W: #$docid split into $nr due to deduplication change\n";
+        my @todo;
+        for my $ary (values %$by_chash) {
+                for my $x (reverse @$ary) {
+                        warn "removing #$docid xref3 $x->{blob}\n";
+                        my $n = $self->{oidx}->remove_xref3($docid, $x->{blob});
+                        die "BUG: $x->{blob} invalidated #$docid" if $n == 0;
+                }
+                my $x = pop(@$ary) // die "BUG: #$docid {by_chash} empty";
+                $x->{num} = delete($x->{xnum}) // die '{xnum} unset';
+                $ibx = _ibx_for($self, $sync, $x);
+                if (my $over = $ibx->over) {
+                        my $e = $over->get_art($x->{num});
+                        $e->{blob} eq $x->{blob} or die <<EOF;
+$x->{blob} != $e->{blob} (${\$ibx->eidx_key}:$e->{num});
+EOF
+                        push @todo, $ibx, $e;
+                        $over->dbh_close if _fd_constrained($self);
+                } else {
+                        die "$ibx->{inboxdir}: over.sqlite3 unusable: $!\n";
+                }
+        }
+        undef $by_chash;
+        while (my ($ibx, $e) = splice(@todo, 0, 2)) {
+                reindex_unseen($self, $sync, $ibx, $e);
+        }
+}
+
+sub _reindex_oid { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $req) = @_;
+        my $sync = $req->{sync};
+        my $self = $sync->{self};
+        my $orig_smsg = $req->{orig_smsg} // die 'BUG: no {orig_smsg}';
+        my $expect_oid = $req->{xr3r}->[$req->{ix}]->[2];
+        my $docid = $orig_smsg->{num};
+        if (is_bad_blob($oid, $type, $size, $expect_oid)) {
+                my $remain = $self->{oidx}->remove_xref3($docid, $expect_oid);
+                if ($remain == 0) {
+                        warn "W: #$docid gone or corrupted\n";
+                        $self->idx_shard($docid)->shard_remove($docid);
+                } elsif (my $next_oid = $req->{xr3r}->[++$req->{ix}]->[2]) {
+                        $self->git->cat_async($next_oid, \&_reindex_oid, $req);
+                } else {
+                        warn "BUG: #$docid gone (UNEXPECTED)\n";
+                        $self->idx_shard($docid)->shard_remove($docid);
+                }
+                return;
+        }
+        my $ci = $self->{current_info};
+        local $self->{current_info} = "$ci #$docid $oid";
+        my $re_smsg = bless { blob => $oid }, 'PublicInbox::Smsg';
+        $re_smsg->{bytes} = $size + crlf_adjust($$bref);
+        my $eml = PublicInbox::Eml->new($bref);
+        $re_smsg->populate($eml, { autime => $orig_smsg->{ds},
+                                cotime => $orig_smsg->{ts} });
+        my $chash = content_hash($eml);
+        $re_smsg->{chash} = $chash;
+        $re_smsg->{xnum} = $req->{xr3r}->[$req->{ix}]->[1];
+        $re_smsg->{ibx_id} = $req->{xr3r}->[$req->{ix}]->[0];
+        $re_smsg->{hdr} = $eml->header_obj;
+        push @{$req->{by_chash}->{$chash}}, $re_smsg;
+        if (my $next_oid = $req->{xr3r}->[++$req->{ix}]->[2]) {
+                $self->git->cat_async($next_oid, \&_reindex_oid, $req);
+        } else { # last $re_smsg is the highest priority xref3
+                local $self->{current_info} = "$ci #$docid";
+                _reindex_finalize($req, $re_smsg, $eml);
+        }
+}
+
+sub _reindex_smsg ($$$) {
+        my ($self, $sync, $smsg) = @_;
+        my $docid = $smsg->{num};
+        my $xr3 = $self->{oidx}->get_xref3($docid, 1);
+        if (scalar(@$xr3) == 0) { # _reindex_check_stale should've covered this
+                warn <<"";
+BUG? #$docid $smsg->{blob} is not referenced by inboxes during reindex
+
+                $self->{oidx}->delete_by_num($docid);
+                $self->idx_shard($docid)->shard_remove($docid);
+                return;
+        }
+
+        # we sort {xr3r} in the reverse order of {ibx_list} so we can
+        # hit the common case in _reindex_finalize without rereading
+        # from git (or holding multiple messages in memory).
+        my $id2pos = $sync->{id2pos}; # index in {ibx_list}
+        @$xr3 = sort {
+                $id2pos->{$b->[0]} <=> $id2pos->{$a->[0]}
+                                ||
+                $b->[1] <=> $a->[1] # break ties with {xnum}
+        } @$xr3;
+        @$xr3 = map { [ $_->[0], $_->[1], unpack('H*', $_->[2]) ] } @$xr3;
+        my $req = { orig_smsg => $smsg, sync => $sync, xr3r => $xr3, ix => 0 };
+        $self->git->cat_async($xr3->[$req->{ix}]->[2], \&_reindex_oid, $req);
+}
+
+sub checkpoint_due ($) {
+        my ($sync) = @_;
+        ${$sync->{need_checkpoint}} || (now() > $sync->{next_check});
+}
+
+sub host_ident () {
+        # I've copied FS images and only changed the hostname before,
+        # so prepend hostname.  Use `state' since these a BOFH can change
+        # these while this process is running and we always want to be
+        # able to release locks taken by this process.
+        state $retval = hostname . '-' . do {
+                my $m; # machine-id(5) is systemd
+                if (open(my $fh, '<', '/etc/machine-id')) { $m = <$fh> }
+                # (g)hostid(1) is in GNU coreutils, kern.hostid is most BSDs
+                chomp($m ||= `{ sysctl -n kern.hostid ||
+                                hostid || ghostid; } 2>/dev/null`
+                        || "no-machine-id-or-hostid-on-$^O");
+                $m;
+        };
+}
+
+sub eidxq_release {
+        my ($self) = @_;
+        my $expect = delete($self->{-eidxq_locked}) or return;
+        my ($owner_pid, undef) = split(/-/, $expect);
+        return if $owner_pid != $$; # shards may fork
+        my $oidx = $self->{oidx};
+        $oidx->begin_lazy;
+        my $cur = $oidx->eidx_meta('eidxq_lock') // '';
+        if ($cur eq $expect) {
+                $oidx->eidx_meta('eidxq_lock', '');
+                return 1;
+        } elsif ($cur ne '') {
+                warn "E: eidxq_lock($expect) stolen by $cur\n";
+        } else {
+                warn "E: eidxq_lock($expect) released by another process\n";
+        }
+        undef;
+}
+
+sub DESTROY {
+        my ($self) = @_;
+        eidxq_release($self) and $self->{oidx}->commit_lazy;
+}
+
+sub _eidxq_take ($) {
+        my ($self) = @_;
+        my $val = "$$-${\time}-$>-".host_ident;
+        $self->{oidx}->eidx_meta('eidxq_lock', $val);
+        $self->{-eidxq_locked} = $val;
+}
+
+sub eidxq_lock_acquire ($) {
+        my ($self) = @_;
+        my $oidx = $self->{oidx};
+        $oidx->begin_lazy;
+        my $cur = $oidx->eidx_meta('eidxq_lock') || return _eidxq_take($self);
+        if (my $locked = $self->{-eidxq_locked}) { # be lazy
+                return $locked if $locked eq $cur;
+        }
+        my ($pid, $time, $euid, $ident) = split(/-/, $cur, 4);
+        my $t = strftime('%Y-%m-%d %k:%M:%S', gmtime($time));
+        if ($euid == $> && $ident eq host_ident) {
+                if (kill(0, $pid)) {
+                        warn <<EOM; return;
+I: PID:$pid (re)indexing Xapian since $t, it will continue our work
+EOM
+                }
+                if ($!{ESRCH}) {
+                        warn "I: eidxq_lock is stale ($cur), clobbering\n";
+                        return _eidxq_take($self);
+                }
+                warn "E: kill(0, $pid) failed: $!\n"; # fall-through:
+        }
+        my $fn = $oidx->dbh->sqlite_db_filename;
+        warn <<EOF;
+W: PID:$pid, UID:$euid on $ident is indexing Xapian since $t
+W: If this is unexpected, delete `eidxq_lock' from the `eidx_meta' table:
+W:        sqlite3 $fn 'DELETE FROM eidx_meta WHERE key = "eidxq_lock"'
+EOF
+        undef;
+}
+
+sub eidxq_process ($$) { # for reindexing
+        my ($self, $sync) = @_;
+
+        return unless eidxq_lock_acquire($self);
+        my $dbh = $self->{oidx}->dbh;
+        my $tot = $dbh->selectrow_array('SELECT COUNT(*) FROM eidxq') or return;
+        ${$sync->{nr}} = 0;
+        local $sync->{-regen_fmt} = "%u/$tot\n";
+        my $pr = $sync->{-opt}->{-progress};
+        if ($pr) {
+                my $min = $dbh->selectrow_array('SELECT MIN(docid) FROM eidxq');
+                my $max = $dbh->selectrow_array('SELECT MAX(docid) FROM eidxq');
+                $pr->("Xapian indexing $min..$max (total=$tot)\n");
+        }
+        $sync->{id2pos} //= do {
+                my %id2pos;
+                my $pos = 0;
+                $id2pos{$_->{-ibx_id}} = $pos++ for @{$self->{ibx_list}};
+                \%id2pos;
+        };
+        my ($del, $iter);
+restart:
+        $del = $dbh->prepare('DELETE FROM eidxq WHERE docid = ?');
+        $iter = $dbh->prepare('SELECT docid FROM eidxq ORDER BY docid ASC');
+        $iter->execute;
+        while (defined(my $docid = $iter->fetchrow_array)) {
+                last if $sync->{quit};
+                if (my $smsg = $self->{oidx}->get_art($docid)) {
+                        _reindex_smsg($self, $sync, $smsg);
+                } else {
+                        warn "E: #$docid does not exist in over\n";
+                }
+                $del->execute($docid);
+                ++${$sync->{nr}};
+
+                if (checkpoint_due($sync)) {
+                        $dbh = $del = $iter = undef;
+                        reindex_checkpoint($self, $sync); # release lock
+                        $dbh = $self->{oidx}->dbh;
+                        goto restart;
+                }
+        }
+        $self->git->async_wait_all;
+        $pr->("reindexed ${$sync->{nr}}/$tot\n") if $pr;
+}
+
+sub _reindex_unseen { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $req) = @_;
+        return if is_bad_blob($oid, $type, $size, $req->{oid});
+        my $self = $req->{self} // die 'BUG: {self} unset';
+        local $self->{current_info} = "$self->{current_info} $oid";
+        my $new_smsg = bless { blob => $oid, }, 'PublicInbox::Smsg';
+        $new_smsg->{bytes} = $size + crlf_adjust($$bref);
+        my $eml = $req->{eml} = PublicInbox::Eml->new($bref);
+        $req->{new_smsg} = $new_smsg;
+        $req->{chash} = content_hash($eml);
+        $req->{mids} = mids($eml); # do_step iterates through this
+        do_step($req); # enter the normal indexing flow
+}
+
+# --reindex may catch totally unseen messages, this handles them
+sub reindex_unseen ($$$$) {
+        my ($self, $sync, $ibx, $xsmsg) = @_;
+        my $req = {
+                %$sync, # has {self}
+                autime => $xsmsg->{ds},
+                cotime => $xsmsg->{ts},
+                oid => $xsmsg->{blob},
+                ibx => $ibx,
+                xnum => $xsmsg->{num},
+                # {mids} and {chash} will be filled in at _reindex_unseen
+        };
+        warn "I: reindex_unseen ${\$ibx->eidx_key}:$req->{xnum}:$req->{oid}\n";
+        $self->git->cat_async($xsmsg->{blob}, \&_reindex_unseen, $req);
+}
+
+sub _reindex_check_unseen ($$$) {
+        my ($self, $sync, $ibx) = @_;
+        my $ibx_id = $ibx->{-ibx_id};
+        my $slice = 1000;
+        my ($beg, $end) = (1, $slice);
+
+        # first, check if we missed any messages in target $ibx
+        my $msgs;
+        my $pr = $sync->{-opt}->{-progress};
+        my $ekey = $ibx->eidx_key;
+        local $sync->{-regen_fmt} =
+                        "$ekey checking unseen %u/".$ibx->over->max."\n";
+        ${$sync->{nr}} = 0;
+
+        while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end)})) {
+                ${$sync->{nr}} = $beg;
+                $beg = $msgs->[-1]->{num} + 1;
+                $end = $beg + $slice;
+                if (checkpoint_due($sync)) {
+                        reindex_checkpoint($self, $sync); # release lock
+                }
+
+                my $inx3 = $self->{oidx}->dbh->prepare_cached(<<'', undef, 1);
+SELECT DISTINCT(docid) FROM xref3 WHERE
+ibx_id = ? AND xnum = ? AND oidbin = ?
+
+                for my $xsmsg (@$msgs) {
+                        my $oidbin = pack('H*', $xsmsg->{blob});
+                        $inx3->bind_param(1, $ibx_id);
+                        $inx3->bind_param(2, $xsmsg->{num});
+                        $inx3->bind_param(3, $oidbin, SQL_BLOB);
+                        $inx3->execute;
+                        my $docids = $inx3->fetchall_arrayref;
+                        # index messages which were totally missed
+                        # the first time around ASAP:
+                        if (scalar(@$docids) == 0) {
+                                reindex_unseen($self, $sync, $ibx, $xsmsg);
+                        } else { # already seen, reindex later
+                                for my $r (@$docids) {
+                                        $self->{oidx}->eidxq_add($r->[0]);
+                                }
+                        }
+                        last if $sync->{quit};
+                }
+                last if $sync->{quit};
+        }
+}
+
+sub _reindex_check_stale ($$$) {
+        my ($self, $sync, $ibx) = @_;
+        my $min = 0;
+        my $pr = $sync->{-opt}->{-progress};
+        my $fetching;
+        my $ekey = $ibx->eidx_key;
+        local $sync->{-regen_fmt} =
+                        "$ekey check stale/missing %u/".$ibx->over->max."\n";
+        ${$sync->{nr}} = 0;
+        do {
+                if (checkpoint_due($sync)) {
+                        reindex_checkpoint($self, $sync); # release lock
+                }
+                # now, check if there's stale xrefs
+                my $iter = $self->{oidx}->dbh->prepare_cached(<<'', undef, 1);
+SELECT docid,xnum,oidbin FROM xref3 WHERE ibx_id = ? AND docid > ?
+ORDER BY docid,xnum ASC LIMIT 10000
+
+                $iter->execute($ibx->{-ibx_id}, $min);
+                $fetching = undef;
+
+                while (my ($docid, $xnum, $oidbin) = $iter->fetchrow_array) {
+                        return if $sync->{quit};
+                        ${$sync->{nr}} = $xnum;
+
+                        $fetching = $min = $docid;
+                        my $smsg = $ibx->over->get_art($xnum);
+                        my $oidhex = unpack('H*', $oidbin);
+                        my $err;
+                        if (!$smsg) {
+                                $err = 'stale';
+                        } elsif ($smsg->{blob} ne $oidhex) {
+                                $err = "mismatch (!= $smsg->{blob})";
+                        } else {
+                                next; # likely, all good
+                        }
+                        # current_info already has eidx_key
+                        warn "$xnum:$oidhex (#$docid): $err\n";
+                        my $del = $self->{oidx}->dbh->prepare_cached(<<'');
+DELETE FROM xref3 WHERE ibx_id = ? AND xnum = ? AND oidbin = ?
+
+                        $del->bind_param(1, $ibx->{-ibx_id});
+                        $del->bind_param(2, $xnum);
+                        $del->bind_param(3, $oidbin, SQL_BLOB);
+                        $del->execute;
+
+                        # get_xref3 over-fetches, but this is a rare path:
+                        my $xr3 = $self->{oidx}->get_xref3($docid);
+                        my $idx = $self->idx_shard($docid);
+                        if (scalar(@$xr3) == 0) { # all gone
+                                $self->{oidx}->delete_by_num($docid);
+                                $self->{oidx}->eidxq_del($docid);
+                                $idx->shard_remove($docid);
+                        } else { # enqueue for reindex of remaining messages
+                                $idx->shard_remove_eidx_info($docid,
+                                                        $ibx->eidx_key);
+                                $self->{oidx}->eidxq_add($docid); # yes, add
+                        }
+                }
+        } while (defined $fetching);
+}
+
+sub _reindex_inbox ($$$) {
+        my ($self, $sync, $ibx) = @_;
+        my $ekey = $ibx->eidx_key;
+        local $self->{current_info} = $ekey;
+        if (defined(my $err = _ibx_index_reject($ibx))) {
+                warn "W: cannot reindex $ekey ($err)\n";
+        } else {
+                _reindex_check_unseen($self, $sync, $ibx);
+                _reindex_check_stale($self, $sync, $ibx) unless $sync->{quit};
+        }
+        delete @$ibx{qw(over mm search git)}; # won't need these for a bit
+}
+
+sub eidx_reindex {
+        my ($self, $sync) = @_;
+
+        # acquire eidxq_lock early because full reindex takes forever
+        # and incremental -extindex processes can run during our checkpoints
+        if (!eidxq_lock_acquire($self)) {
+                warn "E: aborting --reindex\n";
+                return;
+        }
+        for my $ibx (@{$self->{ibx_list}}) {
+                _reindex_inbox($self, $sync, $ibx);
+                last if $sync->{quit};
+        }
+        $self->git->async_wait_all; # ensure eidxq gets filled completely
+        eidxq_process($self, $sync) unless $sync->{quit};
+}
+
+sub sync_inbox {
+        my ($self, $sync, $ibx) = @_;
+        my $err = _sync_inbox($self, $sync, $ibx);
+        delete @$ibx{qw(mm over)};
+        warn $err, "\n" if defined($err);
+}
+
+sub eidx_sync { # main entry point
+        my ($self, $opt) = @_;
+
+        my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
+        local $self->{current_info} = '';
+        local $SIG{__WARN__} = sub {
+                $warn_cb->($self->{current_info}, ': ', @_);
+        };
+        $self->idx_init($opt); # acquire lock via V2Writable::_idx_init
+        $self->{oidx}->rethread_prepare($opt);
+        my $sync = {
+                need_checkpoint => \(my $need_checkpoint = 0),
+                check_intvl => 10,
+                next_check => now() + 10,
+                -opt => $opt,
+                # DO NOT SET {reindex} here, it's incompatible with reused
+                # V2Writable code, reindex is totally different here
+                # compared to v1/v2 inboxes because we have multiple histories
+                self => $self,
+                -regen_fmt => "%u/?\n",
+        };
+        local $SIG{USR1} = sub { $need_checkpoint = 1 };
+        my $quit = PublicInbox::SearchIdx::quit_cb($sync);
+        local $SIG{QUIT} = $quit;
+        local $SIG{INT} = $quit;
+        local $SIG{TERM} = $quit;
+        for my $ibx (@{$self->{ibx_list}}) {
+                $ibx->{-ibx_id} //= $self->{oidx}->ibx_id($ibx->eidx_key);
+        }
+        if (delete($opt->{reindex})) {
+                local $sync->{checkpoint_unlocks} = 1;
+                eidx_reindex($self, $sync);
+        }
+
+        # don't use $_ here, it'll get clobbered by reindex_checkpoint
+        if ($opt->{scan} // 1) {
+                for my $ibx (@{$self->{ibx_list}}) {
+                        last if $sync->{quit};
+                        sync_inbox($self, $sync, $ibx);
+                }
+        }
+        $self->{oidx}->rethread_done($opt) unless $sync->{quit};
+        eidxq_process($self, $sync) unless $sync->{quit};
+
+        eidxq_release($self);
+        done($self);
+        $sync; # for eidx_watch
+}
+
+sub update_last_commit { # overrides V2Writable
+        my ($self, $sync, $stk) = @_;
+        my $unit = $sync->{unit} // return;
+        my $latest_cmt = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}};
+        defined($latest_cmt) or return;
+        my $ibx = $sync->{ibx} or die 'BUG: {ibx} missing';
+        my $ekey = $ibx->eidx_key;
+        my $uv = $ibx->uidvalidity;
+        my $epoch = $unit->{epoch};
+        my $meta_key;
+        my $v = $ibx->version;
+        if ($v == 2) {
+                die 'No {epoch} for v2 unit' unless defined $epoch;
+                $meta_key = "lc-v2:$ekey//$uv;$epoch";
+        } elsif ($v == 1) {
+                die 'Unexpected {epoch} for v1 unit' if defined $epoch;
+                $meta_key = "lc-v1:$ekey//$uv";
+        } else {
+                die "Unsupported inbox version: $v";
+        }
+        my $last = $self->{oidx}->eidx_meta($meta_key);
+        if (defined $last && is_ancestor($self->git, $last, $latest_cmt)) {
+                my @cmd = (qw(rev-list --count), "$last..$latest_cmt");
+                chomp(my $n = $unit->{git}->qx(@cmd));
+                return if $n ne '' && $n == 0;
+        }
+        $self->{oidx}->eidx_meta($meta_key, $latest_cmt);
+}
+
+sub _idx_init { # with_umask callback
+        my ($self, $opt) = @_;
+        PublicInbox::V2Writable::_idx_init($self, $opt);
+        $self->{midx} = PublicInbox::MiscIdx->new($self);
+}
+
+sub idx_init { # similar to V2Writable
+        my ($self, $opt) = @_;
+        return if $self->{idx_shards};
+
+        $self->git->cleanup;
+
+        my $ALL = $self->git->{git_dir}; # ALL.git
+        PublicInbox::Import::init_bare($ALL) unless -d $ALL;
+        my $info_dir = "$ALL/objects/info";
+        my $alt = "$info_dir/alternates";
+        my $mode = 0644;
+        my (@old, @new, %seen); # seen: st_dev + st_ino
+        if (-e $alt) {
+                open(my $fh, '<', $alt) or die "open $alt: $!";
+                $mode = (stat($fh))[2] & 07777;
+                while (my $line = <$fh>) {
+                        chomp(my $d = $line);
+                        if (my @st = stat($d)) {
+                                next if $seen{"$st[0]\0$st[1]"}++;
+                        } else {
+                                warn "W: stat($d) failed (from $alt): $!\n";
+                                next if $opt->{-idx_gc};
+                        }
+                        push @old, $line;
+                }
+        }
+        for my $ibx (@{$self->{ibx_list}}) {
+                my $line = $ibx->git->{git_dir} . "/objects\n";
+                chomp(my $d = $line);
+                if (my @st = stat($d)) {
+                        next if $seen{"$st[0]\0$st[1]"}++;
+                } else {
+                        warn "W: stat($d) failed (from $ibx->{inboxdir}): $!\n";
+                        next if $opt->{-idx_gc};
+                }
+                push @new, $line;
+        }
+        if (scalar @new) {
+                push @old, @new;
+                my $o = \@old;
+                PublicInbox::V2Writable::write_alternates($info_dir, $mode, $o);
+        }
+        $self->parallel_init($self->{indexlevel});
+        $self->with_umask(\&_idx_init, $self, $opt);
+        $self->{oidx}->begin_lazy;
+        $self->{oidx}->eidx_prep;
+        $self->{midx}->begin_txn;
+}
+
+sub _watch_commit { # PublicInbox::DS::add_timer callback
+        my ($self) = @_;
+        delete $self->{-commit_timer};
+        eidxq_process($self, $self->{-watch_sync});
+        eidxq_release($self);
+        delete local $self->{-watch_sync}->{-regen_fmt};
+        reindex_checkpoint($self, $self->{-watch_sync});
+
+        # call event_step => done unless commit_timer is armed
+        PublicInbox::DS::requeue($self);
+}
+
+sub on_inbox_unlock { # called by PublicInbox::InboxIdle
+        my ($self, $ibx) = @_;
+        my $opt = $self->{-watch_sync}->{-opt};
+        my $pr = $opt->{-progress};
+        my $ekey = $ibx->eidx_key;
+        local $0 = "sync $ekey";
+        $pr->("indexing $ekey\n") if $pr;
+        $self->idx_init($opt);
+        sync_inbox($self, $self->{-watch_sync}, $ibx);
+        $self->{-commit_timer} //= PublicInbox::DS::add_timer(
+                                        $opt->{'commit-interval'} // 10,
+                                        \&_watch_commit, $self);
+}
+
+sub eidx_reload { # -extindex --watch SIGHUP handler
+        my ($self, $idler) = @_;
+        if ($self->{cfg}) {
+                my $pr = $self->{-watch_sync}->{-opt}->{-progress};
+                $pr->('reloading ...') if $pr;
+                delete $self->{-resync_queue};
+                @{$self->{ibx_list}} = ();
+                %{$self->{ibx_map}} = ();
+                delete $self->{-watch_sync}->{id2pos};
+                my $cfg = PublicInbox::Config->new;
+                attach_config($self, $cfg);
+                $idler->refresh($cfg);
+                $pr->(" done\n") if $pr;
+        } else {
+                warn "reload not supported without --all\n";
+        }
+}
+
+sub eidx_resync_start ($) { # -extindex --watch SIGUSR1 handler
+        my ($self) = @_;
+        $self->{-resync_queue} //= [ @{$self->{ibx_list}} ];
+        PublicInbox::DS::requeue($self); # trigger our ->event_step
+}
+
+sub event_step { # PublicInbox::DS::requeue callback
+        my ($self) = @_;
+        if (my $resync_queue = $self->{-resync_queue}) {
+                if (my $ibx = shift(@$resync_queue)) {
+                        on_inbox_unlock($self, $ibx);
+                        PublicInbox::DS::requeue($self);
+                } else {
+                        delete $self->{-resync_queue};
+                        _watch_commit($self);
+                }
+        } else {
+                done($self) unless $self->{-commit_timer};
+        }
+}
+
+sub eidx_watch { # public-inbox-extindex --watch main loop
+        my ($self, $opt) = @_;
+        local %SIG = %SIG;
+        for my $sig (qw(HUP USR1 TSTP QUIT INT TERM)) {
+                $SIG{$sig} = sub { warn "SIG$sig ignored while scanning\n" };
+        }
+        require PublicInbox::InboxIdle;
+        require PublicInbox::DS;
+        require PublicInbox::Syscall;
+        require PublicInbox::Sigfd;
+        my $idler = PublicInbox::InboxIdle->new($self->{cfg});
+        if (!$self->{cfg}) {
+                $idler->watch_inbox($_) for @{$self->{ibx_list}};
+        }
+        $_->subscribe_unlock(__PACKAGE__, $self) for @{$self->{ibx_list}};
+        my $pr = $opt->{-progress};
+        $pr->("performing initial scan ...\n") if $pr;
+        my $sync = eidx_sync($self, $opt); # initial sync
+        return if $sync->{quit};
+        my $oldset = PublicInbox::Sigfd::block_signals();
+        local $self->{current_info} = '';
+        my $cb = $SIG{__WARN__} || \&CORE::warn;
+        local $SIG{__WARN__} = sub { $cb->($self->{current_info}, ': ', @_) };
+        my $sig = {
+                HUP => sub { eidx_reload($self, $idler) },
+                USR1 => sub { eidx_resync_start($self) },
+                TSTP => sub { kill('STOP', $$) },
+        };
+        my $quit = PublicInbox::SearchIdx::quit_cb($sync);
+        $sig->{QUIT} = $sig->{INT} = $sig->{TERM} = $quit;
+        my $sigfd = PublicInbox::Sigfd->new($sig,
+                                        $PublicInbox::Syscall::SFD_NONBLOCK);
+        %SIG = (%SIG, %$sig) if !$sigfd;
+        local $self->{-watch_sync} = $sync; # for ->on_inbox_unlock
+        if (!$sigfd) {
+                # wake up every second to accept signals if we don't
+                # have signalfd or IO::KQueue:
+                PublicInbox::Sigfd::sig_setmask($oldset);
+                PublicInbox::DS->SetLoopTimeout(1000);
+        }
+        PublicInbox::DS->SetPostLoopCallback(sub { !$sync->{quit} });
+        $pr->("initial scan complete, entering event loop\n") if $pr;
+        PublicInbox::DS->EventLoop; # calls InboxIdle->event_step
+        done($self);
+}
+
+no warnings 'once';
+*done = \&PublicInbox::V2Writable::done;
+*with_umask = \&PublicInbox::InboxWritable::with_umask;
+*parallel_init = \&PublicInbox::V2Writable::parallel_init;
+*nproc_shards = \&PublicInbox::V2Writable::nproc_shards;
+*sync_prepare = \&PublicInbox::V2Writable::sync_prepare;
+*index_todo = \&PublicInbox::V2Writable::index_todo;
+*count_shards = \&PublicInbox::V2Writable::count_shards;
+*atfork_child = \&PublicInbox::V2Writable::atfork_child;
+*idx_shard = \&PublicInbox::V2Writable::idx_shard;
+*reindex_checkpoint = \&PublicInbox::V2Writable::reindex_checkpoint;
+
+1;
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 805076f0..f570a25d 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -24,7 +24,7 @@ sub generate {
 
 sub generate_thread_atom {
         my ($ctx) = @_;
-        my $msgs = $ctx->{msgs} = $ctx->{-inbox}->over->get_thread($ctx->{mid});
+        my $msgs = $ctx->{msgs} = $ctx->{ibx}->over->get_thread($ctx->{mid});
         return _no_thread() unless @$msgs;
         PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i);
 }
@@ -34,7 +34,7 @@ sub generate_html_index {
         # if the 'r' query parameter is given, it is a legacy permalink
         # which we must continue supporting:
         my $qp = $ctx->{qp};
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         if ($qp && !$qp->{r} && $ibx->over) {
                 return PublicInbox::View::index_topics($ctx);
         }
@@ -79,8 +79,8 @@ sub _no_thread () {
 
 sub recent_msgs {
         my ($ctx) = @_;
-        my $ibx = $ctx->{-inbox};
-        my $max = $ibx->{feedmax};
+        my $ibx = $ctx->{ibx};
+        my $max = $ibx->{feedmax} // 25;
         return PublicInbox::View::paginate_recent($ctx, $max) if $ibx->over;
 
         # only for rare v1 inboxes which aren't indexed at all
diff --git a/lib/PublicInbox/Filter/RubyLang.pm b/lib/PublicInbox/Filter/RubyLang.pm
index 06e4ea75..62cf5d20 100644
--- a/lib/PublicInbox/Filter/RubyLang.pm
+++ b/lib/PublicInbox/Filter/RubyLang.pm
@@ -16,7 +16,7 @@ sub new {
         my ($class, %opts) = @_;
         my $altid = delete $opts{-altid};
         my $self = $class->SUPER::new(%opts);
-        my $ibx = $self->{-inbox};
+        my $ibx = $self->{ibx};
         # altid = serial:ruby-core:file=msgmap.sqlite3
         if (!$altid && $ibx && $ibx->{altid}) {
                 $altid ||= $ibx->{altid}->[0];
diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm
new file mode 100644
index 00000000..fe6afef2
--- /dev/null
+++ b/lib/PublicInbox/Gcf2.pm
@@ -0,0 +1,110 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# backend for a git-cat-file-workalike based on libgit2,
+# other libgit2 stuff may go here, too.
+package PublicInbox::Gcf2;
+use strict;
+use PublicInbox::Spawn qw(which popen_rd);
+use Fcntl qw(LOCK_EX);
+use IO::Handle; # autoflush
+my (%CFG, $c_src, $lockfh);
+BEGIN {
+        # PublicInbox::Spawn will set PERL_INLINE_DIRECTORY
+        # to ~/.cache/public-inbox/inline-c if it exists
+        my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //
+                die 'PERL_INLINE_DIRECTORY not defined';
+        my $f = "$inline_dir/.public-inbox.lock";
+        open $lockfh, '>', $f or die "failed to open $f: $!\n";
+        my $pc = which($ENV{PKG_CONFIG} // 'pkg-config');
+        my ($dir) = (__FILE__ =~ m!\A(.+?)/[^/]+\z!);
+        my $rdr = {};
+        open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!";
+        for my $x (qw(libgit2)) {
+                my $l = popen_rd([$pc, '--libs', $x], undef, $rdr);
+                $l = do { local $/; <$l> };
+                next if $?;
+                my $c = popen_rd([$pc, '--cflags', $x], undef, $rdr);
+                $c = do { local $/; <$c> };
+                next if $?;
+
+                # note: we name C source files .h to prevent
+                # ExtUtils::MakeMaker from automatically trying to
+                # build them.
+                my $f = "$dir/gcf2_$x.h";
+                if (open(my $fh, '<', $f)) {
+                        chomp($l, $c);
+                        local $/;
+                        defined($c_src = <$fh>) or die "read $f: $!\n";
+                        $CFG{LIBS} = $l;
+                        $CFG{CCFLAGSEX} = $c;
+                        last;
+                } else {
+                        die "E: $f: $!\n";
+                }
+        }
+        die "E: libgit2 not installed\n" unless $c_src;
+
+        # CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
+        flock($lockfh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
+}
+
+# we use Capitalized and ALLCAPS for compatibility with old Inline::C
+use Inline C => Config => %CFG, BOOT => 'git_libgit2_init();';
+use Inline C => $c_src;
+undef $c_src;
+undef %CFG;
+undef $lockfh;
+
+sub add_alt ($$) {
+        my ($gcf2, $objdir) = @_;
+
+        # libgit2 (tested 0.27.7+dfsg.1-0.2 and 0.28.3+dfsg.1-1~bpo10+1
+        # in Debian) doesn't handle relative epochs properly when nested
+        # multiple levels.  Add all the absolute paths to workaround it,
+        # since $EXTINDEX_DIR/ALL.git/objects/info/alternates uses absolute
+        # paths to reference $V2INBOX_DIR/all.git/objects and
+        # $V2INBOX_DIR/all.git/objects/info/alternates uses relative paths
+        # to refer to $V2INBOX_DIR/git/$EPOCH.git/objects
+        #
+        # See https://bugs.debian.org/975607
+        if (open(my $fh, '<', "$objdir/info/alternates")) {
+                chomp(my @abs_alt = grep(m!^/!, <$fh>));
+                $gcf2->add_alternate($_) for @abs_alt;
+        }
+        $gcf2->add_alternate($objdir);
+}
+
+# Usage: $^X -MPublicInbox::Gcf2 -e 'PublicInbox::Gcf2::loop()'
+# (see lib/PublicInbox/Gcf2Client.pm)
+sub loop {
+        my $gcf2 = new();
+        my %seen;
+        STDERR->autoflush(1);
+        STDOUT->autoflush(1);
+
+        while (<STDIN>) {
+                chomp;
+                my ($oid, $git_dir) = split(/ /, $_, 2);
+                $seen{$git_dir}++ or add_alt($gcf2, "$git_dir/objects");
+                if (!$gcf2->cat_oid(1, $oid)) {
+                        # retry once if missing.  We only get unabbreviated OIDs
+                        # from SQLite or Xapian DBs, here, so malicious clients
+                        # can't trigger excessive retries:
+                        warn "I: $$ $oid missing, retrying in $git_dir\n";
+
+                        $gcf2 = new();
+                        %seen = ($git_dir => 1);
+                        add_alt($gcf2, "$git_dir/objects");
+
+                        if ($gcf2->cat_oid(1, $oid)) {
+                                warn "I: $$ $oid found after retry\n";
+                        } else {
+                                warn "W: $$ $oid missing after retry\n";
+                                print "$oid missing\n"; # mimic git-cat-file
+                        }
+                }
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/Gcf2Client.pm b/lib/PublicInbox/Gcf2Client.pm
new file mode 100644
index 00000000..ab486de5
--- /dev/null
+++ b/lib/PublicInbox/Gcf2Client.pm
@@ -0,0 +1,69 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# connects public-inbox processes to PublicInbox::Gcf2::loop()
+package PublicInbox::Gcf2Client;
+use strict;
+use parent qw(PublicInbox::DS);
+use PublicInbox::Git;
+use PublicInbox::Spawn qw(popen_rd);
+use IO::Handle ();
+use PublicInbox::Syscall qw(EPOLLONESHOT);
+# fields:
+#        async_cat => GitAsyncCat ref (read-only pipe)
+#        sock => writable pipe to Gcf2::loop
+
+
+sub new  {
+        my ($rdr) = @_;
+        my $self = bless {}, __PACKAGE__;
+        # ensure the child process has the same @INC we do:
+        my $env = { PERL5LIB => join(':', @INC) };
+        my ($out_r, $out_w);
+        pipe($out_r, $out_w) or die "pipe failed: $!";
+        $rdr //= {};
+        $rdr->{0} = $out_r;
+        my $cmd = [$^X, qw[-MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop()]];
+        @$self{qw(in pid)} = popen_rd($cmd, $env, $rdr);
+        fcntl($out_w, 1031, 4096) if $^O eq 'linux'; # 1031: F_SETPIPE_SZ
+        $out_w->autoflush(1);
+        $out_w->blocking(0);
+        $self->{inflight} = [];
+        $self->SUPER::new($out_w, EPOLLONESHOT); # detect errors once
+}
+
+sub fail {
+        my $self = shift;
+        $self->close; # PublicInbox::DS::close
+        PublicInbox::Git::fail($self, @_);
+}
+
+sub cat_async ($$$;$) {
+        my ($self, $req, $cb, $arg) = @_;
+        my $inflight = $self->{inflight};
+
+        # {wbuf} is rare, I hope:
+        cat_async_step($self, $inflight) if $self->{wbuf};
+
+        if (!$self->write(\"$req\n")) {
+                $self->fail("gcf2c write: $!") if !$self->{sock};
+        }
+        push @$inflight, $req, $cb, $arg;
+}
+
+# ensure PublicInbox::Git::cat_async_step never calls cat_async_retry
+sub alternates_changed {}
+
+# this is the write-only end of a pipe, DS->EventLoop will call this
+sub event_step {
+        my ($self) = @_;
+        $self->flush_write;
+        $self->close if !$self->{in}; # process died
+}
+
+no warnings 'once';
+
+# used by GitAsyncCat
+*cat_async_step = \&PublicInbox::Git::cat_async_step;
+
+1;
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index a7ba57f9..73dc7d3e 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -12,15 +12,19 @@ use v5.10.1;
 use parent qw(Exporter);
 use POSIX ();
 use IO::Handle; # ->autoflush
-use Errno qw(EINTR);
+use Errno qw(EINTR EAGAIN);
 use File::Glob qw(bsd_glob GLOB_NOSORT);
+use File::Spec ();
 use Time::HiRes qw(stat);
 use PublicInbox::Spawn qw(popen_rd);
 use PublicInbox::Tmpfile;
+use IO::Poll qw(POLLIN);
 use Carp qw(croak);
+use Digest::SHA ();
 our @EXPORT_OK = qw(git_unquote git_quote);
 our $PIPE_BUFSIZ = 65536; # Linux default
 our $in_cleanup;
+our $RDTIMEO = 60_000; # milliseconds
 
 use constant MAX_INFLIGHT =>
         (($^O eq 'linux' ? 4096 : POSIX::_POSIX_PIPE_BUF()) * 3)
@@ -92,9 +96,9 @@ sub alternates_changed {
 sub last_check_err {
         my ($self) = @_;
         my $fh = $self->{err_c} or return;
-        sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
+        sysseek($fh, 0, 0) or $self->fail("sysseek failed: $!");
         defined(sysread($fh, my $buf, -s $fh)) or
-                        fail($self, "sysread failed: $!");
+                        $self->fail("sysread failed: $!");
         $buf;
 }
 
@@ -103,19 +107,19 @@ sub _bidi_pipe {
         if ($self->{$pid}) {
                 if (defined $err) { # "err_c"
                         my $fh = $self->{$err};
-                        sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
-                        truncate($fh, 0) or fail($self, "truncate failed: $!");
+                        sysseek($fh, 0, 0) or $self->fail("sysseek failed: $!");
+                        truncate($fh, 0) or $self->fail("truncate failed: $!");
                 }
                 return;
         }
         my ($out_r, $out_w);
-        pipe($out_r, $out_w) or fail($self, "pipe failed: $!");
+        pipe($out_r, $out_w) or $self->fail("pipe failed: $!");
         my @cmd = (qw(git), "--git-dir=$self->{git_dir}",
                         qw(-c core.abbrev=40 cat-file), $batch);
         my $redir = { 0 => $out_r };
         if ($err) {
                 my $id = "git.$self->{git_dir}$batch.err";
-                my $fh = tmpfile($id) or fail($self, "tmpfile($id): $!");
+                my $fh = tmpfile($id) or $self->fail("tmpfile($id): $!");
                 $self->{$err} = $fh;
                 $redir->{2} = $fh;
         }
@@ -130,6 +134,8 @@ sub _bidi_pipe {
         $self->{$in} = $in_r;
 }
 
+sub poll_in ($) { IO::Poll::_poll($RDTIMEO, fileno($_[0]), my $ev = POLLIN) }
+
 sub my_read ($$$) {
         my ($fh, $rbuf, $len) = @_;
         my $left = $len - length($$rbuf);
@@ -138,9 +144,12 @@ sub my_read ($$$) {
                 $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf));
                 if ($r) {
                         $left -= $r;
+                } elsif (defined($r)) { # EOF
+                        return 0;
                 } else {
-                        next if (!defined($r) && $! == EINTR);
-                        return $r;
+                        next if ($! == EAGAIN and poll_in($fh));
+                        next if $! == EINTR; # may be set by sysread or poll_in
+                        return; # unrecoverable error
                 }
         }
         \substr($$rbuf, 0, $len, '');
@@ -152,9 +161,15 @@ sub my_readline ($$) {
                 if ((my $n = index($$rbuf, "\n")) >= 0) {
                         return substr($$rbuf, 0, $n + 1, '');
                 }
-                my $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf));
-                next if $r || (!defined($r) && $! == EINTR);
-                return defined($r) ? '' : undef; # EOF or error
+                my $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf))
+                                                                and next;
+
+                # return whatever's left on EOF
+                return substr($$rbuf, 0, length($$rbuf)+1, '') if defined($r);
+
+                next if ($! == EAGAIN and poll_in($fh));
+                next if $! == EINTR; # may be set by sysread or poll_in
+                return; # unrecoverable error
         }
 }
 
@@ -172,7 +187,7 @@ sub cat_async_retry ($$$$$) {
         for (my $i = 0; $i < @$inflight; $i += 3) {
                 $buf .= "$inflight->[$i]\n";
         }
-        print { $self->{out} } $buf or fail($self, "write error: $!");
+        print { $self->{out} } $buf or $self->fail("write error: $!");
         unshift(@$inflight, \$req, $cb, $arg); # \$ref to indicate retried
 
         cat_async_step($self, $inflight); # take one step
@@ -185,30 +200,34 @@ sub cat_async_step ($$) {
         my $rbuf = delete($self->{cat_rbuf}) // \(my $new = '');
         my ($bref, $oid, $type, $size);
         my $head = my_readline($self->{in}, $rbuf);
+        # ->fail may be called via Gcf2Client.pm
         if ($head =~ /^([0-9a-f]{40,}) (\S+) ([0-9]+)$/) {
                 ($oid, $type, $size) = ($1, $2, $3 + 0);
                 $bref = my_read($self->{in}, $rbuf, $size + 1) or
-                        fail($self, defined($bref) ? 'read EOF' : "read: $!");
-                chop($$bref) eq "\n" or fail($self, 'LF missing after blob');
-        } elsif ($head =~ / missing$/) {
+                        $self->fail(defined($bref) ? 'read EOF' : "read: $!");
+                chop($$bref) eq "\n" or $self->fail('LF missing after blob');
+        } elsif ($head =~ s/ missing\n//s) {
+                $oid = $head;
                 # ref($req) indicates it's already been retried
-                if (!ref($req) && !$in_cleanup && alternates_changed($self)) {
+                # -gcf2 retries internally, so it never hits this path:
+                if (!ref($req) && !$in_cleanup && $self->alternates_changed) {
                         return cat_async_retry($self, $inflight,
                                                 $req, $cb, $arg);
                 }
                 $type = 'missing';
-                $oid = ref($req) ? $$req : $req;
+                $oid = ref($req) ? $$req : $req if $oid eq '';
         } else {
-                fail($self, "Unexpected result from async git cat-file: $head");
+                my $err = $! ? " ($!)" : '';
+                $self->fail("bad result from async cat-file: $head$err");
         }
-        eval { $cb->($bref, $oid, $type, $size, $arg) };
         $self->{cat_rbuf} = $rbuf if $$rbuf ne '';
+        eval { $cb->($bref, $oid, $type, $size, $arg) };
         warn "E: $oid: $@\n" if $@;
 }
 
 sub cat_async_wait ($) {
         my ($self) = @_;
-        my $inflight = delete $self->{inflight} or return;
+        my $inflight = $self->{inflight} or return;
         while (scalar(@$inflight)) {
                 cat_async_step($self, $inflight);
         }
@@ -236,7 +255,7 @@ sub check_async_step ($$) {
         my ($self, $inflight_c) = @_;
         die 'BUG: inflight empty or odd' if scalar(@$inflight_c) < 3;
         my ($req, $cb, $arg) = splice(@$inflight_c, 0, 3);
-        my $rbuf = delete($self->{rbuf_c}) // \(my $new = '');
+        my $rbuf = delete($self->{chk_rbuf}) // \(my $new = '');
         chomp(my $line = my_readline($self->{in_c}, $rbuf));
         my ($hex, $type, $size) = split(/ /, $line);
 
@@ -246,16 +265,16 @@ sub check_async_step ($$) {
         # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
         if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') {
                 my $ret = my_read($self->{in_c}, $rbuf, $type + 1);
-                fail($self, defined($ret) ? 'read EOF' : "read: $!") if !$ret;
+                $self->fail(defined($ret) ? 'read EOF' : "read: $!") if !$ret;
         }
+        $self->{chk_rbuf} = $rbuf if $$rbuf ne '';
         eval { $cb->($hex, $type, $size, $arg, $self) };
         warn "E: check($req) $@\n" if $@;
-        $self->{rbuf_c} = $rbuf if $$rbuf ne '';
 }
 
 sub check_async_wait ($) {
         my ($self) = @_;
-        my $inflight_c = delete $self->{inflight_c} or return;
+        my $inflight_c = $self->{inflight_c} or return;
         while (scalar(@$inflight_c)) {
                 check_async_step($self, $inflight_c);
         }
@@ -272,10 +291,10 @@ sub check_async_begin ($) {
 sub check_async ($$$$) {
         my ($self, $oid, $cb, $arg) = @_;
         my $inflight_c = $self->{inflight_c} // check_async_begin($self);
-        if (scalar(@$inflight_c) >= MAX_INFLIGHT) {
+        while (scalar(@$inflight_c) >= MAX_INFLIGHT) {
                 check_async_step($self, $inflight_c);
         }
-        print { $self->{out_c} } $oid, "\n" or fail($self, "write error: $!");
+        print { $self->{out_c} } $oid, "\n" or $self->fail("write error: $!");
         push(@$inflight_c, $oid, $cb, $arg);
 }
 
@@ -302,10 +321,12 @@ sub check {
 
 sub _destroy {
         my ($self, $rbuf, $in, $out, $pid, $err) = @_;
-        my $p = delete $self->{$pid} or return;
         delete @$self{($rbuf, $in, $out)};
         delete $self->{$err} if $err; # `err_c'
 
+        # GitAsyncCat::event_step may delete {pid}
+        my $p = delete $self->{$pid} or return;
+
         # PublicInbox::DS may not be loaded
         eval { PublicInbox::DS::dwaitpid($p, undef, undef) };
         waitpid($p, 0) if $@; # wait synchronously if not in event loop
@@ -313,14 +334,23 @@ sub _destroy {
 
 sub cat_async_abort ($) {
         my ($self) = @_;
-        my $inflight = delete $self->{inflight} or die 'BUG: not in async';
+        if (my $inflight = $self->{inflight}) {
+                while (@$inflight) {
+                        my ($req, $cb, $arg) = splice(@$inflight, 0, 3);
+                        $req =~ s/ .*//; # drop git_dir for Gcf2Client
+                        eval { $cb->(undef, $req, undef, undef, $arg) };
+                        warn "E: $req: $@ (in abort)\n" if $@;
+                }
+                delete $self->{cat_rbuf};
+                delete $self->{inflight};
+        }
         cleanup($self);
 }
 
-sub fail {
+sub fail { # may be augmented in subclasses
         my ($self, $msg) = @_;
-        $self->{inflight} ? cat_async_abort($self) : cleanup($self);
-        croak("git $self->{git_dir}: $msg");
+        cat_async_abort($self);
+        croak(ref($self) . ' ' . ($self->{git_dir} // '') . ": $msg");
 }
 
 sub popen {
@@ -332,10 +362,19 @@ sub popen {
 sub qx {
         my ($self, @cmd) = @_;
         my $fh = $self->popen(@cmd);
-        local $/ = "\n";
-        return <$fh> if wantarray;
-        local $/;
-        <$fh>
+        local $/ = wantarray ? "\n" : undef;
+        <$fh>;
+}
+
+# check_async and cat_async may trigger the other, so ensure they're
+# both completely done by using this:
+sub async_wait_all ($) {
+        my ($self) = @_;
+        while (scalar(@{$self->{inflight_c} // []}) ||
+                        scalar(@{$self->{inflight} // []})) {
+                $self->check_async_wait;
+                $self->cat_async_wait;
+        }
 }
 
 # returns true if there are pending "git cat-file" processes
@@ -343,13 +382,15 @@ sub cleanup {
         my ($self) = @_;
         local $in_cleanup = 1;
         delete $self->{async_cat};
-        check_async_wait($self);
-        cat_async_wait($self);
+        async_wait_all($self);
+        delete $self->{inflight};
+        delete $self->{inflight_c};
         _destroy($self, qw(cat_rbuf in out pid));
         _destroy($self, qw(chk_rbuf in_c out_c pid_c err_c));
         !!($self->{pid} || $self->{pid_c});
 }
 
+
 # assuming a well-maintained repo, this should be a somewhat
 # accurate estimation of its size
 # TODO: show this in the WWW UI as a hint to potential cloners
@@ -394,8 +435,8 @@ sub pub_urls {
 
 sub cat_async_begin {
         my ($self) = @_;
-        cleanup($self) if alternates_changed($self);
-        batch_prepare($self);
+        cleanup($self) if $self->alternates_changed;
+        $self->batch_prepare;
         die 'BUG: already in async' if $self->{inflight};
         $self->{inflight} = [];
 }
@@ -403,24 +444,21 @@ sub cat_async_begin {
 sub cat_async ($$$;$) {
         my ($self, $oid, $cb, $arg) = @_;
         my $inflight = $self->{inflight} // cat_async_begin($self);
-        if (scalar(@$inflight) >= MAX_INFLIGHT) {
+        while (scalar(@$inflight) >= MAX_INFLIGHT) {
                 cat_async_step($self, $inflight);
         }
-
-        print { $self->{out} } $oid, "\n" or fail($self, "write error: $!");
+        print { $self->{out} } $oid, "\n" or $self->fail("write error: $!");
         push(@$inflight, $oid, $cb, $arg);
 }
 
-# this is safe to call inside $cb, but not guaranteed to enqueue
-# returns true if successful, undef if not.
 sub async_prefetch {
         my ($self, $oid, $cb, $arg) = @_;
-        if (defined($self->{async_cat}) && (my $inflight = $self->{inflight})) {
+        if (my $inflight = $self->{inflight}) {
                 # we could use MAX_INFLIGHT here w/o the halving,
                 # but lets not allow one client to monopolize a git process
                 if (scalar(@$inflight) < int(MAX_INFLIGHT/2)) {
                         print { $self->{out} } $oid, "\n" or
-                                                fail($self, "write error: $!");
+                                                $self->fail("write error: $!");
                         return push(@$inflight, $oid, $cb, $arg);
                 }
         }
@@ -451,6 +489,57 @@ sub modified ($) {
         $modified || time;
 }
 
+# for grokmirror, which doesn't read gitweb.description
+# templates/hooks--update.sample and git-multimail in git.git
+# only match "Unnamed repository", not the full contents of
+# templates/this--description in git.git
+sub manifest_entry {
+        my ($self, $epoch, $default_desc) = @_;
+        my ($fh, $pid) = $self->popen('show-ref');
+        my $dig = Digest::SHA->new(1);
+        while (read($fh, my $buf, 65536)) {
+                $dig->add($buf);
+        }
+        close $fh;
+        waitpid($pid, 0);
+        return if $?; # empty, uninitialized git repo
+        my $git_dir = $self->{git_dir};
+        my $ent = {
+                fingerprint => $dig->hexdigest,
+                reference => undef,
+                modified => modified($self),
+        };
+        chomp(my $owner = $self->qx('config', 'gitweb.owner'));
+        utf8::decode($owner);
+        $ent->{owner} = $owner eq '' ? undef : $owner;
+        my $desc = '';
+        if (open($fh, '<', "$git_dir/description")) {
+                local $/ = "\n";
+                chomp($desc = <$fh>);
+                utf8::decode($desc);
+        }
+        $desc = 'Unnamed repository' if $desc eq '';
+        if (defined $epoch && $desc =~ /\AUnnamed repository/) {
+                $desc = "$default_desc [epoch $epoch]";
+        }
+        $ent->{description} = $desc;
+        if (open($fh, '<', "$git_dir/objects/info/alternates")) {
+                # n.b.: GitPython doesn't seem to handle comments or C-quoted
+                # strings like native git does; and we don't for now, either.
+                local $/ = "\n";
+                chomp(my @alt = <$fh>);
+
+                # grokmirror only supports 1 alternate for "reference",
+                if (scalar(@alt) == 1) {
+                        my $objdir = "$git_dir/objects";
+                        my $ref = File::Spec->rel2abs($alt[0], $objdir);
+                        $ref =~ s!/[^/]+/?\z!!; # basename
+                        $ent->{reference} = $ref;
+                }
+        }
+        $ent;
+}
+
 1;
 __END__
 =pod
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index 5f785df7..dc97af16 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -3,40 +3,92 @@
 #
 # internal class used by PublicInbox::Git + PublicInbox::DS
 # This parses the output pipe of "git cat-file --batch"
-#
-# Note: this does NOT set the non-blocking flag, we expect `git cat-file'
-# to be a local process, and git won't start writing a blob until it's
-# fully read.  So minimize context switching and read as much as possible
-# and avoid holding a buffer in our heap any longer than it has to live.
 package PublicInbox::GitAsyncCat;
 use strict;
 use parent qw(PublicInbox::DS Exporter);
+use POSIX qw(WNOHANG);
 use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
-our @EXPORT = qw(git_async_cat);
-
-sub _add {
-        my ($class, $git) = @_;
-        $git->batch_prepare;
-        my $self = bless { git => $git }, $class;
-        $self->SUPER::new($git->{in}, EPOLLIN|EPOLLET);
-        \undef; # this is a true ref()
+our @EXPORT = qw(git_async_cat git_async_prefetch);
+use PublicInbox::Git ();
+
+our $GCF2C; # singleton PublicInbox::Gcf2Client
+
+sub close {
+        my ($self) = @_;
+
+        if (my $gitish = delete $self->{gitish}) {
+                PublicInbox::Git::cat_async_abort($gitish);
+        }
+        $self->SUPER::close; # PublicInbox::DS::close
 }
 
 sub event_step {
         my ($self) = @_;
-        my $git = $self->{git};
-        return $self->close if ($git->{in} // 0) != ($self->{sock} // 1);
-        my $inflight = $git->{inflight};
+        my $gitish = $self->{gitish} or return;
+        return $self->close if ($gitish->{in} // 0) != ($self->{sock} // 1);
+        my $inflight = $gitish->{inflight};
         if ($inflight && @$inflight) {
-                $git->cat_async_step($inflight);
-                $self->requeue if @$inflight || exists $git->{cat_rbuf};
+                $gitish->cat_async_step($inflight);
+
+                # child death?
+                if (($gitish->{in} // 0) != ($self->{sock} // 1)) {
+                        $self->close;
+                } elsif (@$inflight || exists $gitish->{cat_rbuf}) {
+                        # ok, more to do, requeue for fairness
+                        $self->requeue;
+                }
+        } elsif ((my $pid = waitpid($gitish->{pid}, WNOHANG)) > 0) {
+                # May happen if the child process is killed by a BOFH
+                # (or segfaults)
+                delete $gitish->{pid};
+                warn "E: gitish $pid exited with \$?=$?\n";
+                $self->close;
         }
 }
 
 sub git_async_cat ($$$$) {
         my ($git, $oid, $cb, $arg) = @_;
-        $git->cat_async($oid, $cb, $arg);
-        $git->{async_cat} //= _add(__PACKAGE__, $git);
+        my $gitish = $GCF2C //= eval {
+                require PublicInbox::Gcf2;
+                require PublicInbox::Gcf2Client;
+                PublicInbox::Gcf2Client::new();
+        } // 0; # 0: do not retry if libgit2 or Inline::C are missing
+        if ($gitish) { # Gcf2 active, {inflight} may be unset due to errors
+                $GCF2C->{inflight} or
+                        $gitish = $GCF2C = PublicInbox::Gcf2Client::new();
+                $oid .= " $git->{git_dir}";
+        } else {
+                $gitish = $git;
+        }
+        $gitish->cat_async($oid, $cb, $arg);
+        $gitish->{async_cat} //= do {
+                # read-only end of pipe (Gcf2Client is write-only end)
+                my $self = bless { gitish => $gitish }, __PACKAGE__;
+                $gitish->{in}->blocking(0);
+                $self->SUPER::new($gitish->{in}, EPOLLIN|EPOLLET);
+                \undef; # this is a true ref()
+        };
+}
+
+# this is safe to call inside $cb, but not guaranteed to enqueue
+# returns true if successful, undef if not.
+sub git_async_prefetch {
+        my ($git, $oid, $cb, $arg) = @_;
+        if ($GCF2C) {
+                if ($GCF2C->{async_cat} && !$GCF2C->{wbuf}) {
+                        $oid .= " $git->{git_dir}";
+                        return $GCF2C->cat_async($oid, $cb, $arg);
+                }
+        } elsif ($git->{async_cat} && (my $inflight = $git->{inflight})) {
+                # we could use MAX_INFLIGHT here w/o the halving,
+                # but lets not allow one client to monopolize a git process
+                if (@$inflight < int(PublicInbox::Git::MAX_INFLIGHT/2)) {
+                        print { $git->{out} } $oid, "\n" or
+                                                $git->fail("write error: $!");
+                        return push(@$inflight, $oid, $cb, $arg);
+                }
+        }
+        undef;
 }
 
 1;
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 20030433..5f701673 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -169,7 +169,7 @@ sub async_blob_cb { # git->cat_async callback
         if (!defined($oid)) {
                 # it's possible to have TOCTOU if an admin runs
                 # public-inbox-(edit|purge), just move onto the next message
-                warn "E: $smsg->{blob} missing in $self->{-inbox}->{inboxdir}\n";
+                warn "E: $smsg->{blob} missing in $self->{ibx}->{inboxdir}\n";
                 return $http->next_step($self->can('async_next'));
         }
         $smsg->{blob} eq $oid or bail($self, "BUG: $smsg->{blob} != $oid");
@@ -180,7 +180,7 @@ sub async_blob_cb { # git->cat_async callback
 
 sub smsg_blob {
         my ($self, $smsg) = @_;
-        git_async_cat($self->{-inbox}->git, $smsg->{blob},
+        git_async_cat($self->{ibx}->git, $smsg->{blob},
                         \&async_blob_cb, $self);
 }
 
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index c9a024d6..2af5ab0c 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -627,7 +627,7 @@ sub fetch_blob_cb { # called by git->cat_async via git_async_cat
         }
         my $pre;
         if (!$self->{wbuf} && (my $nxt = $msgs->[0])) {
-                $pre = $ibx->git->async_prefetch($nxt->{blob},
+                $pre = git_async_prefetch($ibx->git, $nxt->{blob},
                                                 \&fetch_blob_cb, $fetch_arg);
         }
         fetch_run_ops($self, $smsg, $bref, $ops, $partial);
@@ -1110,7 +1110,7 @@ sub search_uid_range { # long_response
         1; # more
 }
 
-sub parse_query ($$) {
+sub parse_imap_query ($$) {
         my ($self, $query) = @_;
         my $q = PublicInbox::IMAPsearchqp::parse($self, $query);
         if (ref($q)) {
@@ -1122,37 +1122,10 @@ sub parse_query ($$) {
         $q;
 }
 
-sub refill_xap ($$$$) {
-        my ($self, $uids, $range_info, $q) = @_;
-        my ($beg, $end) = @$range_info;
-        my $srch = $self->{ibx}->search;
-        my $opt = { mset => 2, limit => 1000 };
-        my $mset = $srch->mset("$q uid:$beg..$end", $opt);
-        @$uids = @{$srch->mset_to_artnums($mset)};
-        if (@$uids) {
-                $range_info->[0] = $uids->[-1] + 1; # update $beg
-                return; # possibly more
-        }
-        0; # all done
-}
-
-sub search_xap_range { # long_response
-        my ($self, $tag, $q, $range_info, $want_msn) = @_;
-        my $uids = [];
-        if (defined(my $err = refill_xap($self, $uids, $range_info, $q))) {
-                $err ||= 'OK Search done';
-                $self->write("\r\n$tag $err\r\n");
-                return;
-        }
-        msn_convert($self, $uids) if $want_msn;
-        $self->msg_more(join(' ', '', @$uids));
-        1; # more
-}
-
 sub search_common {
         my ($self, $tag, $query, $want_msn) = @_;
         my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
-        my $q = parse_query($self, $query);
+        my $q = parse_imap_query($self, $query);
         return "$tag $q\r\n" if !ref($q);
         my ($sql, $range_info) = delete @$q{qw(sql range_info)};
         if (!scalar(keys %$q)) { # overview.sqlite3
@@ -1160,11 +1133,17 @@ sub search_common {
                 long_response($self, \&search_uid_range,
                                 $tag, $sql, $range_info, $want_msn);
         } elsif ($q = $q->{xap}) {
-                $self->{ibx}->search or
+                my $srch = $self->{ibx}->isrch or
                         return "$tag BAD search not available for mailbox\r\n";
-                $self->msg_more('* SEARCH');
-                long_response($self, \&search_xap_range,
-                                $tag, $q, $range_info, $want_msn);
+                my $opt = {
+                        relevance => -1,
+                        limit => UID_SLICE,
+                        uid_range => $range_info
+                };
+                my $mset = $srch->mset($q, $opt);
+                my $uids = $srch->mset_to_artnums($mset, $opt);
+                msn_convert($self, $uids) if $want_msn;
+                "* SEARCH @$uids\r\n$tag OK Search done\r\n";
         } else {
                 "$tag BAD Error\r\n";
         }
diff --git a/lib/PublicInbox/IMAPD.pm b/lib/PublicInbox/IMAPD.pm
index 3c211ee1..fb945847 100644
--- a/lib/PublicInbox/IMAPD.pm
+++ b/lib/PublicInbox/IMAPD.pm
@@ -19,33 +19,34 @@ sub new {
                 err => \*STDERR,
                 out => \*STDOUT,
                 # accept_tls => { SSL_server => 1, ..., SSL_reuse_ctx => ... }
-                # pi_config => PublicInbox::Config
+                # pi_cfg => PublicInbox::Config
                 # idler => PublicInbox::InboxIdle
         }, $class;
 }
 
-sub imapd_refresh_ibx { # pi_config->each_inbox cb
+sub imapd_refresh_ibx { # pi_cfg->each_inbox cb
         my ($ibx, $imapd) = @_;
         my $ngname = $ibx->{newsgroup} or return;
-        if (ref $ngname) {
-                warn 'multiple newsgroups not supported: '.
-                        join(', ', @$ngname). "\n";
-                return;
-        } elsif ($ngname =~ m![^a-z0-9/_\.\-\~\@\+\=:]! ||
-                 $ngname =~ /\.[0-9]+\z/) {
+
+        # We require lower-case since IMAP mailbox names are
+        # case-insensitive (but -nntpd matches INN in being
+        # case-sensitive
+        if ($ngname =~ m![^a-z0-9/_\.\-\~\@\+\=:]! ||
+                        # don't confuse with 50K slices
+                        $ngname =~ /\.[0-9]+\z/) {
                 warn "mailbox name invalid: newsgroup=`$ngname'\n";
                 return;
         }
         $ibx->over or return;
         $ibx->{over} = undef;
-        my $mm = $ibx->mm or return;
-        $ibx->{mm} = undef;
 
         # RFC 3501 2.3.1.1 -  "A good UIDVALIDITY value to use in
         # this case is a 32-bit representation of the creation
         # date/time of the mailbox"
-        defined($ibx->{uidvalidity} = $mm->created_at) or return;
-        PublicInbox::IMAP::ensure_slices_exist($imapd, $ibx, $mm->max // 0);
+        eval { $ibx->uidvalidity };
+        my $mm = delete($ibx->{mm}) or return;
+        defined($ibx->{uidvalidity}) or return;
+        PublicInbox::IMAP::ensure_slices_exist($imapd, $ibx, $mm->max);
 
         # preload to avoid fragmentation:
         $ibx->description;
@@ -59,7 +60,7 @@ sub imapd_refresh_ibx { # pi_config->each_inbox cb
 }
 
 sub imapd_refresh_finalize {
-        my ($imapd, $pi_config) = @_;
+        my ($imapd, $pi_cfg) = @_;
         my $mailboxes;
         if (my $next = delete $imapd->{imapd_next}) {
                 $imapd->{mailboxes} = delete $next->{mailboxes};
@@ -77,40 +78,40 @@ sub imapd_refresh_finalize {
                         qq[* LIST (\\Has${no}Children) "." $u\r\n]
                 } keys %$mailboxes
         ];
-        $imapd->{pi_config} = $pi_config;
+        $imapd->{pi_cfg} = $pi_cfg;
         if (my $idler = $imapd->{idler}) {
-                $idler->refresh($pi_config);
+                $idler->refresh($pi_cfg);
         }
 }
 
-sub imapd_refresh_step { # pi_config->iterate_start cb
-        my ($pi_config, $section, $imapd) = @_;
+sub imapd_refresh_step { # pi_cfg->iterate_start cb
+        my ($pi_cfg, $section, $imapd) = @_;
         if (defined($section)) {
                 return if $section !~ m!\Apublicinbox\.([^/]+)\z!;
-                my $ibx = $pi_config->lookup_name($1) or return;
+                my $ibx = $pi_cfg->lookup_name($1) or return;
                 imapd_refresh_ibx($ibx, $imapd->{imapd_next});
         } else { # undef == "EOF"
-                imapd_refresh_finalize($imapd, $pi_config);
+                imapd_refresh_finalize($imapd, $pi_cfg);
         }
 }
 
 sub refresh_groups {
         my ($self, $sig) = @_;
-        my $pi_config = PublicInbox::Config->new;
+        my $pi_cfg = PublicInbox::Config->new;
         if ($sig) { # SIGHUP is handled through the event loop
                 $self->{imapd_next} = { dummies => {}, mailboxes => {} };
-                my $iter = PublicInbox::ConfigIter->new($pi_config,
+                my $iter = PublicInbox::ConfigIter->new($pi_cfg,
                                                 \&imapd_refresh_step, $self);
                 $iter->event_step;
         } else { # initial start is synchronous
                 $self->{dummies} = {};
-                $pi_config->each_inbox(\&imapd_refresh_ibx, $self);
-                imapd_refresh_finalize($self, $pi_config);
+                $pi_cfg->each_inbox(\&imapd_refresh_ibx, $self);
+                imapd_refresh_finalize($self, $pi_cfg);
         }
 }
 
 sub idler_start {
-        $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_config});
+        $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_cfg});
 }
 
 1;
diff --git a/lib/PublicInbox/IdxStack.pm b/lib/PublicInbox/IdxStack.pm
index ce75b46a..c55c5c36 100644
--- a/lib/PublicInbox/IdxStack.pm
+++ b/lib/PublicInbox/IdxStack.pm
@@ -6,19 +6,27 @@ package PublicInbox::IdxStack;
 use v5.10.1;
 use strict;
 use Fcntl qw(:seek);
-use constant FMT => eval { pack('Q', 1) } ? 'A1QQH*' : 'A1IIH*';
+use constant PACK_FMT => eval { pack('Q', 1) } ? 'A1QQH*H*' : 'A1IIH*H*';
 
 # start off in write-only mode
 sub new {
         open(my $io, '+>', undef) or die "open: $!";
+        # latest_cmt is still useful when the newest revision is a `d'(elete),
+        # otherwise we favor $sync->{latest_cmt} for checkpoints and {quit}
         bless { wr => $io, latest_cmt => $_[1] }, __PACKAGE__
 }
 
 # file_char = [d|m]
 sub push_rec {
-        my ($self, $file_char, $at, $ct, $blob_oid) = @_;
-        my $rec = pack(FMT, $file_char, $at, $ct, $blob_oid);
-        $self->{rec_size} //= length($rec);
+        my ($self, $file_char, $at, $ct, $blob_oid, $cmt_oid) = @_;
+        my $rec = pack(PACK_FMT, $file_char, $at, $ct, $blob_oid, $cmt_oid);
+        $self->{unpack_fmt} //= do {
+                my $len = length($cmt_oid);
+                my $fmt = PACK_FMT;
+                $fmt =~ s/H\*/H$len/g;
+                $self->{rec_size} = length($rec);
+                $fmt;
+        };
         print { $self->{wr} } $rec or die "print: $!";
         $self->{tot_size} += length($rec);
 }
@@ -46,7 +54,7 @@ sub pop_rec {
         my $r = read($io, my $buf, $sz);
         defined($r) or die "read: $!";
         $r == $sz or die "read($r != $sz)";
-        unpack(FMT, $buf);
+        unpack($self->{unpack_fmt}, $buf);
 }
 
 1;
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 2cb4896a..e0a84bfd 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -48,7 +48,7 @@ sub gfi_start {
 
         return ($self->{in}, $self->{out}) if $self->{pid};
 
-        my (@ret, $out_r, $out_w);
+        my ($in_r, $pid, $out_r, $out_w);
         pipe($out_r, $out_w) or die "pipe failed: $!";
 
         $self->lock_acquire;
@@ -56,27 +56,28 @@ sub gfi_start {
                 my ($git, $ref) = @$self{qw(git ref)};
                 local $/ = "\n";
                 chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $ref));
+                die "fatal: rev-parse --revs-only $ref: \$?=$?" if $?;
                 if ($self->{path_type} ne '2/38' && $self->{tip}) {
                         local $/ = "\0";
                         my @t = $git->qx(qw(ls-tree -r -z --name-only), $ref);
+                        die "fatal: ls-tree -r -z --name-only $ref: \$?=$?" if $?;
                         chomp @t;
                         $self->{-tree} = { map { $_ => 1 } @t };
                 }
                 my @cmd = ('git', "--git-dir=$git->{git_dir}",
                         qw(fast-import --quiet --done --date-format=raw));
-                my ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r });
+                ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r });
                 $out_w->autoflush(1);
                 $self->{in} = $in_r;
                 $self->{out} = $out_w;
                 $self->{pid} = $pid;
                 $self->{nchg} = 0;
-                @ret = ($in_r, $out_w);
         };
         if ($@) {
                 $self->lock_release;
                 die $@;
         }
-        @ret;
+        ($in_r, $out_w);
 }
 
 sub wfail () { die "write to fast-import failed: $!" }
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index e9efd29d..af6380a7 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -4,10 +4,10 @@
 # Represents a public-inbox (which may have multiple mailing addresses)
 package PublicInbox::Inbox;
 use strict;
-use warnings;
 use PublicInbox::Git;
 use PublicInbox::MID qw(mid2path);
 use PublicInbox::Eml;
+use List::Util qw(max);
 
 # Long-running "git-cat-file --batch" processes won't notice
 # unlinked packs, so we need to restart those processes occasionally.
@@ -74,18 +74,8 @@ sub _cleanup_later ($) {
         $CLEANUP->{"$self"} = $self;
 }
 
-sub _set_uint ($$$) {
-        my ($opts, $field, $default) = @_;
-        my $val = $opts->{$field};
-        if (defined $val) {
-                $val = $val->[-1] if ref($val) eq 'ARRAY';
-                $val = undef if $val !~ /\A[0-9]+\z/;
-        }
-        $opts->{$field} = $val || $default;
-}
-
 sub _set_limiter ($$$) {
-        my ($self, $pi_config, $pfx) = @_;
+        my ($self, $pi_cfg, $pfx) = @_;
         my $lkey = "-${pfx}_limiter";
         $self->{$lkey} ||= do {
                 # full key is: publicinbox.$NAME.httpbackendmax
@@ -96,7 +86,7 @@ sub _set_limiter ($$$) {
                         require PublicInbox::Qspawn;
                         $lim = PublicInbox::Qspawn::Limiter->new($val);
                 } elsif ($val =~ /\A[a-z][a-z0-9]*\z/) {
-                        $lim = $pi_config->limiter($val);
+                        $lim = $pi_cfg->limiter($val);
                         warn "$mkey limiter=$val not found\n" if !$lim;
                 } else {
                         warn "$mkey limiter=$val not understood\n";
@@ -110,14 +100,15 @@ sub new {
         my $v = $opts->{address} ||= [ 'public-inbox@example.com' ];
         my $p = $opts->{-primary_address} = ref($v) eq 'ARRAY' ? $v->[0] : $v;
         $opts->{domain} = ($p =~ /\@(\S+)\z/) ? $1 : 'localhost';
-        my $pi_config = delete $opts->{-pi_config};
-        _set_limiter($opts, $pi_config, 'httpbackend');
-        _set_uint($opts, 'feedmax', 25);
-        $opts->{nntpserver} ||= $pi_config->{'publicinbox.nntpserver'};
-        my $dir = $opts->{inboxdir};
-        if (defined $dir && -f "$dir/inbox.lock") {
-                $opts->{version} = 2;
+        my $pi_cfg = delete $opts->{-pi_cfg};
+        _set_limiter($opts, $pi_cfg, 'httpbackend');
+        my $fmax = $opts->{feedmax};
+        if (defined($fmax) && $fmax =~ /\A[0-9]+\z/) {
+                $opts->{feedmax} += 0;
+        } else {
+                delete $opts->{feedmax};
         }
+        $opts->{nntpserver} ||= $pi_cfg->{'publicinbox.nntpserver'};
 
         # allow any combination of multi-line or comma-delimited hide entries
         my $hide = {};
@@ -130,16 +121,18 @@ sub new {
         bless $opts, $class;
 }
 
-sub version { $_[0]->{version} // 1 }
+sub version {
+        $_[0]->{version} //= -f "$_[0]->{inboxdir}/inbox.lock" ? 2 : 1
+}
 
 sub git_epoch {
-        my ($self, $epoch) = @_;
-        $self->version == 2 or return;
+        my ($self, $epoch) = @_; # v2-only, callers always supply $epoch
         $self->{"$epoch.git"} ||= do {
                 my $git_dir = "$self->{inboxdir}/git/$epoch.git";
+                return unless -d $git_dir;
                 my $g = PublicInbox::Git->new($git_dir);
                 $g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
-                # no cleanup needed, we never cat-file off this, only clone
+                # caller must manually cleanup when done
                 $g;
         };
 }
@@ -160,19 +153,15 @@ sub max_git_epoch {
         my ($self) = @_;
         return if $self->version < 2;
         my $cur = $self->{-max_git_epoch};
-        my $changed = git($self)->alternates_changed;
-        if (!defined($cur) || $changed) {
+        my $changed;
+        if (!defined($cur) || ($changed = git($self)->alternates_changed)) {
                 git_cleanup($self) if $changed;
                 my $gits = "$self->{inboxdir}/git";
                 if (opendir my $dh, $gits) {
-                        my $max = -1;
-                        while (defined(my $git_dir = readdir($dh))) {
-                                $git_dir =~ m!\A([0-9]+)\.git\z! or next;
-                                $max = $1 if $1 > $max;
-                        }
-                        $cur = $self->{-max_git_epoch} = $max if $max >= 0;
-                } else {
-                        warn "opendir $gits failed: $!\n";
+                        my $max = max(map {
+                                substr($_, 0, -4) + 0; # drop ".git" suffix
+                        } grep(/\A[0-9]+\.git\z/, readdir($dh))) // return;
+                        $cur = $self->{-max_git_epoch} = $max;
                 }
         }
         $cur;
@@ -191,50 +180,54 @@ sub mm {
         };
 }
 
-sub search ($;$$) {
-        my ($self, $over_only, $ctx) = @_;
-        my $srch = $self->{search} ||= eval {
+sub search {
+        my ($self) = @_;
+        my $srch = $self->{search} //= eval {
                 _cleanup_later($self);
                 require PublicInbox::Search;
                 PublicInbox::Search->new($self);
         };
-        ($over_only || eval { $srch->xdb }) ? $srch : do {
-                $ctx and $ctx->{env}->{'psgi.errors'}->print(<<EOF);
-`$self->{name}' search went away unexpectedly
-EOF
-                undef;
-        };
+        (eval { $srch->xdb }) ? $srch : undef;
 }
 
+# isrch is preferred for read-only interfaces if available since it
+# reduces kernel cache and FD overhead
+sub isrch { $_[0]->{isrch} // search($_[0]) }
+
 sub over {
         $_[0]->{over} //= eval {
-                my $srch = search($_[0], 1) or return;
+                my $srch = $_[0]->{search} //= eval {
+                        _cleanup_later($_[0]);
+                        require PublicInbox::Search;
+                        PublicInbox::Search->new($_[0]);
+                };
                 my $over = PublicInbox::Over->new("$srch->{xpfx}/over.sqlite3");
                 $over->dbh; # may fail
                 $over;
         };
 }
 
+
 sub try_cat {
         my ($path) = @_;
-        my $rv = '';
-        if (open(my $fh, '<', $path)) {
-                local $/;
-                $rv = <$fh>;
-        }
-        $rv;
+        open(my $fh, '<', $path) or return '';
+        local $/;
+        <$fh> // '';
+}
+
+sub cat_desc ($) {
+        my $desc = try_cat($_[0]);
+        local $/ = "\n";
+        chomp $desc;
+        utf8::decode($desc);
+        $desc =~ s/\s+/ /smg;
+        $desc eq '' ? undef : $desc;
 }
 
 sub description {
         my ($self) = @_;
-        ($self->{description} //= do {
-                my $desc = try_cat("$self->{inboxdir}/description");
-                local $/ = "\n";
-                chomp $desc;
-                utf8::decode($desc);
-                $desc =~ s/\s+/ /smg;
-                $desc eq '' ? undef : $desc;
-        }) // '($INBOX_DIR/description missing)';
+        ($self->{description} //= cat_desc("$self->{inboxdir}/description")) //
+                '($INBOX_DIR/description missing)';
 }
 
 sub cloneurl {
@@ -331,7 +324,7 @@ sub msg_by_smsg ($$) {
         return unless defined $smsg;
         defined(my $blob = $smsg->{blob}) or return;
 
-        git($self)->cat_file($blob);
+        $self->git->cat_file($blob);
 }
 
 sub smsg_eml {
@@ -342,39 +335,35 @@ sub smsg_eml {
         $eml;
 }
 
-sub mid2num($$) {
-        my ($self, $mid) = @_;
-        my $mm = mm($self) or return;
-        $mm->num_for($mid);
-}
-
 sub smsg_by_mid ($$) {
         my ($self, $mid) = @_;
-        my $over = over($self) or return;
-        # favor the Message-ID we used for the NNTP article number:
-        defined(my $num = mid2num($self, $mid)) or return;
-        my $smsg = $over->get_art($num) or return;
-        PublicInbox::Smsg::psgi_cull($smsg);
+        my $over = $self->over or return;
+        my $smsg;
+        if (my $mm = $self->mm) {
+                # favor the Message-ID we used for the NNTP article number:
+                defined(my $num = $mm->num_for($mid)) or return;
+                $smsg = $over->get_art($num);
+        } else {
+                my ($id, $prev);
+                $smsg = $over->next_by_mid($mid, \$id, \$prev);
+        }
+        $smsg ? PublicInbox::Smsg::psgi_cull($smsg) : undef;
 }
 
 sub msg_by_mid ($$) {
         my ($self, $mid) = @_;
-
-        over($self) or
-                return msg_by_path($self, mid2path($mid));
-
         my $smsg = smsg_by_mid($self, $mid);
-        $smsg ? msg_by_smsg($self, $smsg) : undef;
+        $smsg ? msg_by_smsg($self, $smsg) : msg_by_path($self, mid2path($mid));
 }
 
 sub recent {
         my ($self, $opts, $after, $before) = @_;
-        over($self)->recent($opts, $after, $before);
+        $self->over->recent($opts, $after, $before);
 }
 
 sub modified {
         my ($self) = @_;
-        if (my $over = over($self)) {
+        if (my $over = $self->over) {
                 my $msgs = $over->recent({limit => 1});
                 if (my $smsg = $msgs->[0]) {
                         return $smsg->{ts};
@@ -428,4 +417,8 @@ sub on_unlock {
         }
 }
 
+sub uidvalidity { $_[0]->{uidvalidity} //= eval { $_[0]->mm->created_at } }
+
+sub eidx_key { $_[0]->{newsgroup} // $_[0]->{inboxdir} }
+
 1;
diff --git a/lib/PublicInbox/InboxIdle.pm b/lib/PublicInbox/InboxIdle.pm
index 60948bea..35aed696 100644
--- a/lib/PublicInbox/InboxIdle.pm
+++ b/lib/PublicInbox/InboxIdle.pm
@@ -2,13 +2,11 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # fields:
-# pi_config: PublicInbox::Config ref
 # inot: Linux::Inotify2-like object
 # pathmap => { inboxdir => [ ibx, watch1, watch2, watch3... ] } mapping
 package PublicInbox::InboxIdle;
 use strict;
 use parent qw(PublicInbox::DS);
-use Cwd qw(abs_path);
 use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
 my $IN_MODIFY = 0x02; # match Linux inotify
 my $ino_cls;
@@ -23,11 +21,7 @@ require PublicInbox::In2Tie if $ino_cls;
 
 sub in2_arm ($$) { # PublicInbox::Config::each_inbox callback
         my ($ibx, $self) = @_;
-        my $dir = abs_path($ibx->{inboxdir});
-        if (!defined($dir)) {
-                warn "W: $ibx->{inboxdir} not watched: $!\n";
-                return;
-        }
+        my $dir = $ibx->{inboxdir};
         my $inot = $self->{inot};
         my $cur = $self->{pathmap}->{$dir} //= [];
         my $lock = "$dir/".($ibx->version >= 2 ? 'inbox.lock' : 'ssoma.lock');
@@ -65,12 +59,15 @@ I: consider increasing /proc/sys/fs/inotify/max_user_watches
 }
 
 sub refresh {
-        my ($self, $pi_config) = @_;
-        $pi_config->each_inbox(\&in2_arm, $self);
+        my ($self, $pi_cfg) = @_;
+        $pi_cfg->each_inbox(\&in2_arm, $self);
 }
 
+# internal API for ease-of-use
+sub watch_inbox { in2_arm($_[1], $_[0]) };
+
 sub new {
-        my ($class, $pi_config) = @_;
+        my ($class, $pi_cfg) = @_;
         my $self = bless {}, $class;
         my $inot;
         if ($ino_cls) {
@@ -84,7 +81,7 @@ sub new {
         $self->{inot} = $inot;
         $self->{pathmap} = {}; # inboxdir => [ ibx, watch1, watch2, watch3...]
         $self->{on_unlock} = {}; # lock path => ibx
-        refresh($self, $pi_config);
+        refresh($self, $pi_cfg) if $pi_cfg;
         PublicInbox::FakeInotify::poll_once($self) if !$ino_cls;
         $self;
 }
@@ -95,7 +92,8 @@ sub event_step {
                 my @events = $self->{inot}->read; # Linux::Inotify2::read
                 my $on_unlock = $self->{on_unlock};
                 for my $ev (@events) {
-                        if (my $ibx = $on_unlock->{$ev->fullname}) {
+                        my $fn = $ev->fullname // next; # cancelled
+                        if (my $ibx = $on_unlock->{$fn}) {
                                 $ibx->on_unlock;
                         }
                 }
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 752f1997..b1d5caf5 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -46,12 +46,13 @@ sub _init_v1 {
                 require PublicInbox::Msgmap;
                 my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create
                 $sidx->begin_txn_lazy;
+                my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1);
                 if (defined $skip_artnum) {
-                        my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1);
                         $mm->{dbh}->begin_work;
                         $mm->skip_artnum($skip_artnum);
                         $mm->{dbh}->commit;
                 }
+                undef $mm; # ->created_at set
                 $sidx->commit_txn_lazy;
         } else {
                 open my $fh, '>>', "$self->{inboxdir}/ssoma.lock" or
@@ -64,7 +65,6 @@ sub init_inbox {
         if ($self->version == 1) {
                 my $dir = assert_usable_dir($self);
                 PublicInbox::Import::init_bare($dir);
-                $self->umask_prepare;
                 $self->with_umask(\&_init_v1, $self, $skip_artnum);
         } else {
                 my $v2w = importer($self);
@@ -102,7 +102,7 @@ sub filter {
                         $im->done;
                 }
 
-                my @args = (-inbox => $self);
+                my @args = (ibx => $self);
                 # basic line splitting, only
                 # Perhaps we can have proper quote splitting one day...
                 ($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
@@ -259,7 +259,7 @@ sub _umask_for {
 
 sub with_umask {
         my ($self, $cb, @arg) = @_;
-        my $old = umask $self->{umask};
+        my $old = umask($self->{umask} //= umask_prepare($self));
         my $rv = eval { $cb->(@arg) };
         my $err = $@;
         umask $old;
@@ -270,8 +270,7 @@ sub with_umask {
 sub umask_prepare {
         my ($self) = @_;
         my $perm = _git_config_perm($self);
-        my $umask = _umask_for($perm);
-        $self->{umask} = $umask;
+        _umask_for($perm);
 }
 
 sub cleanup ($) {
@@ -287,15 +286,24 @@ sub warn_ignore {
         # PublicInbox::MsgTime
         || $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/
         || $s =~ /^bad Date: .+? in /
+        # Encode::Unicode::UTF7
+        || $s =~ /^Bad UTF7 data escape at /
 }
 
 # this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..."
 sub warn_ignore_cb {
-        my $cb = $SIG{__WARN__} // sub { print STDERR @_ };
+        my $cb = $SIG{__WARN__} // \&CORE::warn;
         sub {
                 return if warn_ignore(@_);
                 $cb->(@_);
         }
 }
 
+# v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove
+sub git_dir_latest {
+        my ($self, $max) = @_;
+        defined($$max = $self->max_git_epoch) ?
+                "$self->{inboxdir}/git/$$max.git" : undef;
+}
+
 1;
diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm
new file mode 100644
index 00000000..7ca2f9e4
--- /dev/null
+++ b/lib/PublicInbox/Isearch.pm
@@ -0,0 +1,127 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Provides everything the PublicInbox::Search object does;
+# but uses global ExtSearch (->ALL) with an eidx_key query to
+# emulate per-Inbox search using ->ALL.
+package PublicInbox::Isearch;
+use strict;
+use v5.10.1;
+use PublicInbox::ExtSearch;
+use PublicInbox::Search;
+
+sub new {
+        my (undef, $ibx, $es) = @_;
+        bless { es => $es, eidx_key => $ibx->eidx_key }, __PACKAGE__;
+}
+
+sub _ibx_id ($) {
+        my ($self) = @_;
+        my $sth = $self->{es}->over->dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
+
+        $sth->execute($self->{eidx_key});
+        $sth->fetchrow_array //
+                die "E: `$self->{eidx_key}' not in $self->{es}->{topdir}\n";
+}
+
+
+sub mset {
+        my ($self, $str, $opt) = @_;
+        my %opt = $opt ? %$opt : ();
+        $opt{eidx_key} = $self->{eidx_key};
+        if (my $uid_range = $opt{uid_range}) {
+                my ($beg, $end) = @$uid_range;
+                my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
+                my $dbh = $self->{es}->{over}->dbh;
+                my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT MIN(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
+
+                $sth->execute($ibx_id, $beg, $end);
+                my @r = ($sth->fetchrow_array);
+
+                $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT MAX(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
+
+                $sth->execute($ibx_id, $beg, $end);
+                $r[1] = $sth->fetchrow_array;
+                if (defined($r[1]) && defined($r[0])) {
+                        $opt{limit} = $r[1] - $r[0] + 1;
+                } else {
+                        $r[1] //= 0xffffffff;
+                        $r[0] //= 0;
+                }
+                $opt{uid_range} = \@r;
+        }
+        $self->{es}->mset($str, \%opt);
+}
+
+sub mset_to_artnums {
+        my ($self, $mset, $opt) = @_;
+        my $docids = PublicInbox::Search::mset_to_artnums($self->{es}, $mset);
+        my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
+        my $qmarks = join(',', map { '?' } @$docids);
+        if ($opt && ($opt->{relevance} // 0) == -1) { # -1 => ENQ_ASCENDING
+                my $range = '';
+                my @r;
+                if (my $r = $opt->{uid_range}) {
+                        $range = 'AND xnum >= ? AND xnum <= ?';
+                        @r = @$r;
+                }
+                my $rows = $self->{es}->over->dbh->
+                        selectall_arrayref(<<"", undef, $ibx_id, @$docids, @r);
+SELECT xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks) $range
+ORDER BY xnum ASC
+
+                return [ map { $_->[0] } @$rows ];
+        }
+
+        my $rows = $self->{es}->over->dbh->
+                        selectall_arrayref(<<"", undef, $ibx_id, @$docids);
+SELECT docid,xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks)
+
+        my $i = -1;
+        my %order = map { $_ => ++$i } @$docids;
+        my @xnums;
+        for my $row (@$rows) { # @row = ($docid, $xnum)
+                my $idx = delete($order{$row->[0]}) // next;
+                $xnums[$idx] = $row->[1];
+        }
+        if (scalar keys %order) {
+                warn "W: $self->{es}->{topdir} #",
+                        join(', ', sort { $a <=> $b } keys %order),
+                        " not mapped to `$self->{eidx_key}'\n";
+                warn "W: $self->{es}->{topdir} may need to be reindexed\n";
+                @xnums = grep { defined } @xnums;
+        }
+        \@xnums;
+}
+
+sub mset_to_smsg {
+        my ($self, $ibx, $mset) = @_; # $ibx is a real inbox, not eidx
+        my $xnums = mset_to_artnums($self, $mset);
+        my $i = -1;
+        my %order = map { $_ => ++$i } @$xnums;
+        my $unordered = $ibx->over->get_all(@$xnums);
+        my @msgs;
+        for my $smsg (@$unordered) {
+                my $idx = delete($order{$smsg->{num}}) // do {
+                        warn "W: $ibx->{inboxdir} #$smsg->{num}\n";
+                        next;
+                };
+                $msgs[$idx] = $smsg;
+        }
+        if (scalar keys %order) {
+                warn "W: $ibx->{inboxdir} #",
+                        join(', ', sort { $a <=> $b } keys %order),
+                        " no longer valid\n";
+                warn "W: $self->{es}->{topdir} may need to be reindexed\n";
+        }
+        wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs;
+}
+
+sub has_threadid { 1 }
+
+sub help { $_[0]->{es}->help }
+
+1;
diff --git a/lib/PublicInbox/MDA.pm b/lib/PublicInbox/MDA.pm
index fa4a2ad8..0377a877 100644
--- a/lib/PublicInbox/MDA.pm
+++ b/lib/PublicInbox/MDA.pm
@@ -83,7 +83,7 @@ sub set_list_headers {
 }
 
 sub inboxes_for_list_id ($$) {
-        my ($klass, $config, $simple) = @_;
+        my ($klass, $pi_cfg, $simple) = @_;
 
         # newer Email::Simple allows header_raw, as does Email::MIME:
         my @list_ids = $simple->can('header_raw') ?
@@ -92,7 +92,7 @@ sub inboxes_for_list_id ($$) {
         my @dests;
         for my $list_id (@list_ids) {
                 $list_id =~ /<[ \t]*(.+)?[ \t]*>/ or next;
-                if (my $ibx = $config->lookup_list_id($1)) {
+                if (my $ibx = $pi_cfg->lookup_list_id($1)) {
                         push @dests, $ibx;
                 }
         }
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index 74820fb5..e02450fa 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -6,21 +6,12 @@ package PublicInbox::ManifestJsGz;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::WwwListing);
-use Digest::SHA ();
-use File::Spec ();
 use bytes (); # length
-use PublicInbox::Inbox;
-use PublicInbox::Git;
+use PublicInbox::Config;
 use IO::Compress::Gzip qw(gzip);
 use HTTP::Date qw(time2str);
-*try_cat = \&PublicInbox::Inbox::try_cat;
 
-our $json;
-for my $mod (qw(JSON::MaybeXS JSON JSON::PP)) {
-        eval "require $mod" or next;
-        # ->ascii encodes non-ASCII to "\uXXXX"
-        $json = $mod->new->ascii(1) and last;
-}
+our $json = PublicInbox::Config::json();
 
 # called by WwwListing
 sub url_regexp {
@@ -30,76 +21,29 @@ sub url_regexp {
         $ctx->SUPER::url_regexp('publicInbox.grokManifest', 'match=domain');
 }
 
-sub fingerprint ($) {
-        my ($git) = @_;
-        # TODO: convert to qspawn for fairness when there's
-        # thousands of repos
-        my ($fh, $pid) = $git->popen('show-ref');
-        my $dig = Digest::SHA->new(1);
-        while (read($fh, my $buf, 65536)) {
-                $dig->add($buf);
-        }
-        close $fh;
-        waitpid($pid, 0);
-        return if $?; # empty, uninitialized git repo
-        $dig->hexdigest;
+sub inject_entry ($$$;$) {
+        my ($ctx, $url_path, $ent, $git_dir) = @_;
+        $ctx->{-abs2urlpath}->{$git_dir // delete $ent->{git_dir}} = $url_path;
+        my $modified = $ent->{modified};
+        $ctx->{-mtime} = $modified if $modified > ($ctx->{-mtime} // 0);
+        $ctx->{manifest}->{$url_path} = $ent;
 }
 
 sub manifest_add ($$;$$) {
         my ($ctx, $ibx, $epoch, $default_desc) = @_;
         my $url_path = "/$ibx->{name}";
-        my $git_dir = $ibx->{inboxdir};
+        my $git;
         if (defined $epoch) {
-                $git_dir .= "/git/$epoch.git";
                 $url_path .= "/git/$epoch.git";
+                $git = $ibx->git_epoch($epoch) or return;
+        } else {
+                $git = $ibx->git;
         }
-        return unless -d $git_dir;
-        my $git = PublicInbox::Git->new($git_dir);
-        my $fingerprint = fingerprint($git) or return; # no empty repos
-
-        chomp(my $owner = $git->qx('config', 'gitweb.owner'));
-        chomp(my $desc = try_cat("$git_dir/description"));
-        utf8::decode($owner);
-        utf8::decode($desc);
-        $owner = undef if $owner eq '';
-        $desc = 'Unnamed repository' if $desc eq '';
-
-        # templates/hooks--update.sample and git-multimail in git.git
-        # only match "Unnamed repository", not the full contents of
-        # templates/this--description in git.git
-        if ($desc =~ /\AUnnamed repository/) {
-                $desc = "$default_desc [epoch $epoch]" if defined($epoch);
-        }
-
-        my $reference;
-        chomp(my $alt = try_cat("$git_dir/objects/info/alternates"));
-        if ($alt) {
-                # n.b.: GitPython doesn't seem to handle comments or C-quoted
-                # strings like native git does; and we don't for now, either.
-                my @alt = split(/\n+/, $alt);
-
-                # grokmirror only supports 1 alternate for "reference",
-                if (scalar(@alt) == 1) {
-                        my $objdir = "$git_dir/objects";
-                        $reference = File::Spec->rel2abs($alt[0], $objdir);
-                        $reference =~ s!/[^/]+/?\z!!; # basename
-                }
-        }
-        $ctx->{-abs2urlpath}->{$git_dir} = $url_path;
-        my $modified = $git->modified;
-        if ($modified > ($ctx->{-mtime} // 0)) {
-                $ctx->{-mtime} = $modified;
-        }
-        $ctx->{manifest}->{$url_path} = {
-                owner => $owner,
-                reference => $reference,
-                description => $desc,
-                modified => $modified,
-                fingerprint => $fingerprint,
-        };
+        my $ent = $git->manifest_entry($epoch, $default_desc) or return;
+        inject_entry($ctx, $url_path, $ent, $git->{git_dir});
 }
 
-sub ibx_entry {
+sub slow_manifest_add ($$) {
         my ($ctx, $ibx) = @_;
         eval {
                 if (defined(my $max = $ibx->max_git_epoch)) {
@@ -111,6 +55,29 @@ sub ibx_entry {
                         manifest_add($ctx, $ibx);
                 }
         };
+}
+
+sub eidx_manifest_add ($$$) {
+        my ($ctx, $ALL, $ibx) = @_;
+        if (my $data = $ALL->misc->inbox_data($ibx)) {
+                $data = $json->decode($data);
+                delete $data->{''}; # private
+                while (my ($url_path, $ent) = each %$data) {
+                        inject_entry($ctx, $url_path, $ent);
+                }
+        } else {
+                warn "E: `${\$ibx->eidx_key}' not indexed by $ALL->{topdir}\n";
+        }
+}
+
+sub ibx_entry {
+        my ($ctx, $ibx) = @_;
+        my $ALL = $ctx->{www}->{pi_cfg}->ALL;
+        if ($ALL) {
+                eidx_manifest_add($ctx, $ALL, $ibx);
+        } else {
+                slow_manifest_add($ctx, $ibx);
+        }
         warn "E: $@" if $@;
 }
 
@@ -134,7 +101,8 @@ sub psgi_triple {
 
 sub per_inbox {
         my ($ctx) = @_;
-        ibx_entry($ctx, $ctx->{-inbox});
+        # only one inbox, slow is probably OK
+        slow_manifest_add($ctx, $ctx->{ibx});
         psgi_triple($ctx);
 }
 
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 47025891..83fa7d8a 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -17,10 +17,10 @@ use PublicInbox::Eml;
 sub getline {
         my ($ctx) = @_; # ctx
         my $smsg = $ctx->{smsg} or return;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $eml = $ibx->smsg_eml($smsg) or return;
         my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
-        $ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
+        $ctx->zmore(msg_hdr($ctx, $eml));
         if ($n) {
                 $ctx->translate(msg_body($eml));
         } else { # last message
@@ -44,9 +44,9 @@ sub async_eml { # for async_blob_cb
         my ($ctx, $eml) = @_;
         my $smsg = delete $ctx->{smsg};
         # next message
-        $ctx->{smsg} = $ctx->{-inbox}->over->next_by_mid(@{$ctx->{next_arg}});
+        $ctx->{smsg} = $ctx->{ibx}->over->next_by_mid(@{$ctx->{next_arg}});
 
-        $ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
+        $ctx->zmore(msg_hdr($ctx, $eml));
         $ctx->{http_out}->write($ctx->translate(msg_body($eml)));
 }
 
@@ -56,7 +56,7 @@ sub res_hdr ($$) {
         $fn =~ s/^re:\s+//i;
         $fn = to_filename($fn) // 'no-subject';
         my @hdr = ('Content-Type');
-        if ($ctx->{-inbox}->{obfuscate}) {
+        if ($ctx->{ibx}->{obfuscate}) {
                 # obfuscation is stupid, but maybe scrapers are, too...
                 push @hdr, 'application/mbox';
                 $fn .= '.mbox';
@@ -71,17 +71,17 @@ sub res_hdr ($$) {
 # for rare cases where v1 inboxes aren't indexed w/ ->over at all
 sub no_over_raw ($) {
         my ($ctx) = @_;
-        my $mref = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return;
+        my $mref = $ctx->{ibx}->msg_by_mid($ctx->{mid}) or return;
         my $eml = PublicInbox::Eml->new($mref);
         [ 200, res_hdr($ctx, $eml->header_str('Subject')),
-                [ msg_hdr($ctx, $eml, $ctx->{mid}) . msg_body($eml) ] ]
+                [ msg_hdr($ctx, $eml) . msg_body($eml) ] ]
 }
 
 # /$INBOX/$MESSAGE_ID/raw
 sub emit_raw {
         my ($ctx) = @_;
-        $ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
-        my $over = $ctx->{-inbox}->over or return no_over_raw($ctx);
+        $ctx->{base_url} = $ctx->{ibx}->base_url($ctx->{env});
+        my $over = $ctx->{ibx}->over or return no_over_raw($ctx);
         my ($id, $prev);
         my $mip = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
         my $smsg = $ctx->{smsg} = $over->next_by_mid(@$mip) or return;
@@ -90,8 +90,8 @@ sub emit_raw {
         $ctx->psgi_response(200, $res_hdr);
 }
 
-sub msg_hdr ($$;$) {
-        my ($ctx, $eml, $mid) = @_;
+sub msg_hdr ($$) {
+        my ($ctx, $eml) = @_;
         my $header_obj = $eml->header_obj;
 
         # drop potentially confusing headers, ssoma already should've dropped
@@ -99,34 +99,11 @@ sub msg_hdr ($$;$) {
         foreach my $d (qw(Lines Bytes Content-Length Status)) {
                 $header_obj->header_set($d);
         }
-        my $ibx = $ctx->{-inbox};
-        my $base = $ctx->{base_url};
-        $mid = $ctx->{mid} unless defined $mid;
-        $mid = mid_escape($mid);
-        my @append = (
-                'Archived-At', "<$base$mid/>",
-                'List-Archive', "<$base>",
-                'List-Post', "<mailto:$ibx->{-primary_address}>",
-        );
         my $crlf = $header_obj->crlf;
         my $buf = $header_obj->as_string;
         # fixup old bug from import (pre-a0c07cba0e5d8b6a)
         $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
-        $buf = "From mboxrd\@z Thu Jan  1 00:00:00 1970" . $crlf . $buf;
-
-        for (my $i = 0; $i < @append; $i += 2) {
-                my $k = $append[$i];
-                my $v = $append[$i + 1];
-                my @v = $header_obj->header_raw($k);
-                foreach (@v) {
-                        if ($v eq $_) {
-                                $v = undef;
-                                last;
-                        }
-                }
-                $buf .= "$k: $v$crlf" if defined $v;
-        }
-        $buf .= $crlf;
+        "From mboxrd\@z Thu Jan  1 00:00:00 1970" . $crlf . $buf . $crlf;
 }
 
 sub msg_body ($) {
@@ -190,7 +167,7 @@ sub all_ids_cb {
 
 sub mbox_all_ids {
         my ($ctx) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $prev = 0;
         my $mm = $ctx->{mm} = $ibx->mm;
         my $ids = $mm->ids_after(\$prev) or return
@@ -203,27 +180,33 @@ sub mbox_all_ids {
         PublicInbox::MboxGz::mbox_gz($ctx, \&all_ids_cb, 'all');
 }
 
+sub gone ($$) {
+        my ($ctx, $what) = @_;
+        warn "W: `$ctx->{ibx}->{inboxdir}' $what went away unexpectedly\n";
+        undef;
+}
+
 sub results_cb {
         my ($ctx) = @_;
-        my $over = $ctx->{-inbox}->over or return;
+        my $over = $ctx->{ibx}->over or return gone($ctx, 'over');
         while (1) {
                 while (defined(my $num = shift(@{$ctx->{ids}}))) {
                         my $smsg = $over->get_art($num) or next;
                         return $smsg;
                 }
                 # refill result set
-                my $srch = $ctx->{-inbox}->search(undef, $ctx) or return;
+                my $srch = $ctx->{ibx}->isrch or return gone($ctx, 'search');
                 my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
                 my $size = $mset->size or return;
                 $ctx->{qopts}->{offset} += $size;
-                $ctx->{ids} = $srch->mset_to_artnums($mset);
+                $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts});
         }
 }
 
 sub results_thread_cb {
         my ($ctx) = @_;
 
-        my $over = $ctx->{-inbox}->over or return;
+        my $over = $ctx->{ibx}->over or return gone($ctx, 'over');
         while (1) {
                 while (defined(my $num = shift(@{$ctx->{xids}}))) {
                         my $smsg = $over->get_art($num) or next;
@@ -234,11 +217,11 @@ sub results_thread_cb {
                 next if $over->expand_thread($ctx);
 
                 # refill result set
-                my $srch = $ctx->{-inbox}->search(undef, $ctx) or return;
+                my $srch = $ctx->{ibx}->isrch or return gone($ctx, 'search');
                 my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
                 my $size = $mset->size or return;
                 $ctx->{qopts}->{offset} += $size;
-                $ctx->{ids} = $srch->mset_to_artnums($mset);
+                $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts});
         }
 
 }
@@ -247,19 +230,19 @@ sub mbox_all {
         my ($ctx, $q) = @_;
         my $q_string = $q->{'q'};
         return mbox_all_ids($ctx) if $q_string !~ /\S/;
-        my $srch = $ctx->{-inbox}->search or
+        my $srch = $ctx->{ibx}->isrch or
                 return PublicInbox::WWW::need($ctx, 'Search');
-        my $over = $ctx->{-inbox}->over or
+        my $over = $ctx->{ibx}->over or
                 return PublicInbox::WWW::need($ctx, 'Overview');
 
-        my $qopts = $ctx->{qopts} = { mset => 2 }; # order by docid
+        my $qopts = $ctx->{qopts} = { relevance => -1 }; # ORDER BY docid ASC
         $qopts->{thread} = 1 if $q->{t};
         my $mset = $srch->mset($q_string, $qopts);
         $qopts->{offset} = $mset->size or
                         return [404, [qw(Content-Type text/plain)],
                                 ["No results found\n"]];
         $ctx->{query} = $q_string;
-        $ctx->{ids} = $srch->mset_to_artnums($mset);
+        $ctx->{ids} = $srch->mset_to_artnums($mset, $qopts);
         require PublicInbox::MboxGz;
         my $fn;
         if ($q->{t} && $srch->has_threadid) {
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index 913be6e4..7b054845 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -22,7 +22,7 @@ sub async_next ($) {
 sub mbox_gz {
         my ($self, $cb, $fn) = @_;
         $self->{cb} = $cb;
-        $self->{base_url} = $self->{-inbox}->base_url($self->{env});
+        $self->{base_url} = $self->{ibx}->base_url($self->{env});
         $self->{gz} = PublicInbox::GzipFilter::gzip_or_die();
         $fn = to_filename($fn // '') // 'no-subject';
         # http://www.iana.org/assignments/media-types/application/gzip
@@ -37,8 +37,8 @@ sub getline {
         my ($self) = @_;
         my $cb = $self->{cb} or return;
         while (my $smsg = $cb->($self)) {
-                my $eml = $self->{-inbox}->smsg_eml($smsg) or next;
-                $self->zmore(msg_hdr($self, $eml, $smsg->{mid}));
+                my $eml = $self->{ibx}->smsg_eml($smsg) or next;
+                $self->zmore(msg_hdr($self, $eml));
                 return $self->translate(msg_body($eml));
         }
         # signal that we're done and can return undef next call:
diff --git a/lib/PublicInbox/MiscIdx.pm b/lib/PublicInbox/MiscIdx.pm
new file mode 100644
index 00000000..a04dd1c5
--- /dev/null
+++ b/lib/PublicInbox/MiscIdx.pm
@@ -0,0 +1,151 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# like PublicInbox::SearchIdx, but for searching for non-mail messages.
+# Things indexed include:
+# * inboxes themselves
+# * epoch information
+# * (maybe) git code repository information
+# Expect ~100K-1M documents with no parallelism opportunities,
+# so no sharding, here.
+#
+# See MiscSearch for read-only counterpart
+package PublicInbox::MiscIdx;
+use strict;
+use v5.10.1;
+use PublicInbox::InboxWritable;
+use PublicInbox::Search; # for SWIG Xapian and Search::Xapian compat
+use PublicInbox::SearchIdx qw(index_text term_generator add_val);
+use PublicInbox::Spawn qw(nodatacow_dir);
+use Carp qw(croak);
+use File::Path ();
+use PublicInbox::MiscSearch;
+use PublicInbox::Config;
+my $json;
+
+sub new {
+        my ($class, $eidx) = @_;
+        PublicInbox::SearchIdx::load_xapian_writable();
+        my $mi_dir = "$eidx->{xpfx}/misc";
+        File::Path::mkpath($mi_dir);
+        nodatacow_dir($mi_dir);
+        my $flags = $PublicInbox::SearchIdx::DB_CREATE_OR_OPEN;
+        $flags |= $PublicInbox::SearchIdx::DB_NO_SYNC if $eidx->{-no_fsync};
+        $json //= PublicInbox::Config::json();
+        bless {
+                mi_dir => $mi_dir,
+                flags => $flags,
+                indexlevel => 'full', # small DB, no point in medium?
+        }, $class;
+}
+
+sub begin_txn {
+        my ($self) = @_;
+        croak 'BUG: already in txn' if $self->{xdb}; # XXX make lazy?
+        my $wdb = $PublicInbox::Search::X{WritableDatabase};
+        my $xdb = eval { $wdb->new($self->{mi_dir}, $self->{flags}) };
+        croak "Failed opening $self->{mi_dir}: $@" if $@;
+        $self->{xdb} = $xdb;
+        $xdb->begin_transaction;
+}
+
+sub commit_txn {
+        my ($self) = @_;
+        croak 'BUG: not in txn' unless $self->{xdb}; # XXX make lazy?
+        delete($self->{xdb})->commit_transaction;
+}
+
+sub remove_eidx_key {
+        my ($self, $eidx_key) = @_;
+        my $xdb = $self->{xdb};
+        my $head = $xdb->postlist_begin('Q'.$eidx_key);
+        my $tail = $xdb->postlist_end('Q'.$eidx_key);
+        my @docids; # only one, unless we had bugs
+        for (; $head != $tail; $head++) {
+                push @docids, $head->get_docid;
+        }
+        for my $docid (@docids) {
+                $xdb->delete_document($docid);
+                warn "I: remove inbox docid #$docid ($eidx_key)\n";
+        }
+}
+
+# adds or updates according to $eidx_key
+sub index_ibx {
+        my ($self, $ibx) = @_;
+        my $eidx_key = $ibx->eidx_key;
+        my $xdb = $self->{xdb};
+        # Q = uniQue in Xapian terminology
+        my $head = $xdb->postlist_begin('Q'.$eidx_key);
+        my $tail = $xdb->postlist_end('Q'.$eidx_key);
+        my ($docid, @drop);
+        for (; $head != $tail; $head++) {
+                if (defined $docid) {
+                        my $i = $head->get_docid;
+                        push @drop, $i;
+                        warn <<EOF;
+W: multiple inboxes keyed to `$eidx_key', deleting #$i
+EOF
+                } else {
+                        $docid = $head->get_docid;
+                }
+        }
+        $xdb->delete_document($_) for @drop; # just in case
+
+        my $doc = $PublicInbox::Search::X{Document}->new;
+        term_generator($self)->set_document($doc);
+
+        # allow sorting by modified and uidvalidity (created at)
+        add_val($doc, $PublicInbox::MiscSearch::MODIFIED, $ibx->modified);
+        add_val($doc, $PublicInbox::MiscSearch::UIDVALIDITY, $ibx->uidvalidity);
+
+        $doc->add_boolean_term('Q'.$eidx_key); # uniQue id
+        $doc->add_boolean_term('T'.'inbox'); # Type
+
+        if (defined($ibx->{newsgroup}) && $ibx->nntp_usable) {
+                $doc->add_boolean_term('T'.'newsgroup'); # additional Type
+        }
+
+        # force reread from disk, {description} could be loaded from {misc}
+        delete $ibx->{description};
+        my $desc = $ibx->description;
+
+        # description = S/Subject (or title)
+        # address = A/Author
+        index_text($self, $desc, 1, 'S');
+        index_text($self, $ibx->{name}, 1, 'XNAME');
+        my %map = (
+                address => 'A',
+                listid => 'XLISTID',
+                infourl => 'XINFOURL',
+                url => 'XURL'
+        );
+        while (my ($f, $pfx) = each %map) {
+                for my $v (@{$ibx->{$f} // []}) {
+                        index_text($self, $v, 1, $pfx);
+                }
+        }
+        my $data = {};
+        if (defined(my $max = $ibx->max_git_epoch)) { # v2
+                my $pfx = "/$ibx->{name}/git/";
+                for my $epoch (0..$max) {
+                        my $git = $ibx->git_epoch($epoch) or return;
+                        if (my $ent = $git->manifest_entry($epoch, $desc)) {
+                                $data->{"$pfx$epoch.git"} = $ent;
+                                $ent->{git_dir} = $git->{git_dir};
+                        }
+                        $git->cleanup; # ->modified starts cat-file --batch
+                }
+        } elsif (my $ent = $ibx->git->manifest_entry) { # v1
+                $ent->{git_dir} = $ibx->{inboxdir};
+                $data->{"/$ibx->{name}"} = $ent;
+        }
+        $doc->set_data($json->encode($data));
+        if (defined $docid) {
+                $xdb->replace_document($docid, $doc);
+        } else {
+                $xdb->add_document($doc);
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
new file mode 100644
index 00000000..6683d564
--- /dev/null
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -0,0 +1,191 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# read-only counterpart to MiscIdx
+package PublicInbox::MiscSearch;
+use strict;
+use v5.10.1;
+use PublicInbox::Search qw(retry_reopen int_val);
+my $json;
+
+# Xapian value columns:
+our $MODIFIED = 0;
+our $UIDVALIDITY = 1; # (created time)
+
+# avoid conflicting with message Search::prob_prefix for UI/UX reasons
+my %PROB_PREFIX = (
+        description => 'S', # $INBOX_DIR/description
+        address => 'A',
+        listid => 'XLISTID',
+        url => 'XURL',
+        infourl => 'XINFOURL',
+        name => 'XNAME',
+        '' => 'S A XLISTID XNAME XURL XINFOURL'
+);
+
+sub new {
+        my ($class, $dir) = @_;
+        PublicInbox::Search::load_xapian();
+        $json //= PublicInbox::Config::json();
+        bless {
+                xdb => $PublicInbox::Search::X{Database}->new($dir)
+        }, $class;
+}
+
+# read-only
+sub mi_qp_new ($) {
+        my ($self) = @_;
+        my $xdb = $self->{xdb};
+        my $qp = $PublicInbox::Search::X{QueryParser}->new;
+        $qp->set_default_op(PublicInbox::Search::OP_AND());
+        $qp->set_database($xdb);
+        $qp->set_stemmer(PublicInbox::Search::stemmer($self));
+        $qp->set_stemming_strategy(PublicInbox::Search::STEM_SOME());
+        my $cb = $qp->can('set_max_wildcard_expansion') //
+                $qp->can('set_max_expansion'); # Xapian 1.5.0+
+        $cb->($qp, 100);
+        $cb = $qp->can('add_valuerangeprocessor') //
+                $qp->can('add_rangeprocessor'); # Xapian 1.5.0+
+        while (my ($name, $prefix) = each %PROB_PREFIX) {
+                $qp->add_prefix($name, $_) for split(/ /, $prefix);
+        }
+        $qp->add_boolean_prefix('type', 'T');
+        $qp;
+}
+
+sub misc_enquire_once { # retry_reopen callback
+        my ($self, $qr, $opt) = @_;
+        my $eq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
+        $eq->set_query($qr);
+        my $desc = !$opt->{asc};
+        my $rel = $opt->{relevance} // 0;
+        if ($rel == -1) { # ORDER BY docid/UID
+                $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
+                $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
+        } elsif ($rel) {
+                $eq->set_sort_by_relevance_then_value($MODIFIED, $desc);
+        } else {
+                $eq->set_sort_by_value_then_relevance($MODIFIED, $desc);
+        }
+        $eq->get_mset($opt->{offset} || 0, $opt->{limit} || 200);
+}
+
+sub mset {
+        my ($self, $qs, $opt) = @_;
+        $opt ||= {};
+        reopen($self);
+        my $qp = $self->{qp} //= mi_qp_new($self);
+        $qs = 'type:inbox' if $qs eq '';
+        my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
+        $opt->{relevance} = 1 unless exists $opt->{relevance};
+        retry_reopen($self, \&misc_enquire_once, $qr, $opt);
+}
+
+sub ibx_matches_once { # retry_reopen callback
+        my ($self, $qr, $by_newsgroup) = @_;
+        # double in case no newsgroups are configured:
+        my $limit = scalar(keys %$by_newsgroup) * 2;
+        my $opt = { limit => $limit, offset => 0, relevance => -1 };
+        my $ret = {}; # newsgroup => $ibx of matches
+        while (1) {
+                my $mset = misc_enquire_once($self, $qr, $opt);
+                for my $mi ($mset->items) {
+                        my $doc = $mi->get_document;
+                        my $end = $doc->termlist_end;
+                        my $cur = $doc->termlist_begin;
+                        $cur->skip_to('Q');
+                        if ($cur != $end) {
+                                my $ng = $cur->get_termname; # eidx_key
+                                $ng =~ s/\AQ// or warn "BUG: no `Q': $ng";
+                                if (my $ibx = $by_newsgroup->{$ng}) {
+                                        $ret->{$ng} = $ibx;
+                                }
+                        } else {
+                                warn <<EOF;
+W: docid=${\$mi->get_docid} has no `Q' (eidx_key) term
+EOF
+                        }
+                }
+                my $nr = $mset->size;
+                return $ret if $nr < $limit;
+                $opt->{offset} += $nr;
+        }
+}
+
+# returns a newsgroup => PublicInbox::Inbox mapping
+sub newsgroup_matches {
+        my ($self, $qs, $pi_cfg) = @_;
+        my $qp = $self->{qp} //= mi_qp_new($self);
+        $qs .= ' type:inbox';
+        my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
+        retry_reopen($self, \&ibx_matches_once, $qr, $pi_cfg->{-by_newsgroup});
+}
+
+sub ibx_data_once {
+        my ($self, $ibx) = @_;
+        my $xdb = $self->{xdb};
+        my $term = 'Q'.$ibx->eidx_key; # may be {inboxdir}, so private
+        my $head = $xdb->postlist_begin($term);
+        my $tail = $xdb->postlist_end($term);
+        if ($head != $tail) {
+                my $doc = $xdb->get_document($head->get_docid);
+                $ibx->{uidvalidity} //= int_val($doc, $UIDVALIDITY);
+                $ibx->{-modified} = int_val($doc, $MODIFIED);
+                $doc->get_data;
+        } else {
+                undef;
+        }
+}
+
+sub inbox_data {
+        my ($self, $ibx) = @_;
+        retry_reopen($self, \&ibx_data_once, $ibx);
+}
+
+sub ibx_cache_load {
+        my ($doc, $cache) = @_;
+        my $end = $doc->termlist_end;
+        my $cur = $doc->termlist_begin;
+        $cur->skip_to('Q');
+        return if $cur == $end;
+        my $eidx_key = $cur->get_termname;
+        $eidx_key =~ s/\AQ// or return; # expired
+        my $ce = $cache->{$eidx_key} = {};
+        $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
+        $ce->{-modified} = int_val($doc, $MODIFIED);
+        $ce->{description} = do {
+                # extract description from manifest.js.gz epoch description
+                my $d;
+                my $data = $json->decode($doc->get_data);
+                for (values %$data) {
+                        $d = $_->{description} // next;
+                        $d =~ s/ \[epoch [0-9]+\]\z// or next;
+                        last;
+                }
+                $d;
+        }
+}
+
+sub _nntpd_cache_load { # retry_reopen callback
+        my ($self) = @_;
+        my $opt = { limit => $self->{xdb}->get_doccount * 10, relevance => -1 };
+        my $mset = mset($self, 'type:newsgroup type:inbox', $opt);
+        my $cache = {};
+        for my $it ($mset->items) {
+                ibx_cache_load($it->get_document, $cache);
+        }
+        $cache
+}
+
+# returns { newsgroup => $cache_entry } mapping, $cache_entry contains
+# anything which may trigger seeks at startup, currently: description,
+# -modified, and uidvalidity.
+sub nntpd_cache_load {
+        my ($self) = @_;
+        retry_reopen($self, \&_nntpd_cache_load);
+}
+
+no warnings 'once';
+*reopen = \&PublicInbox::Search::reopen;
+
+1;
diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index f15875e3..a8c874af 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -36,8 +36,7 @@ sub new_file {
                 create_tables($dbh);
                 $self->created_at(time) unless $self->created_at;
 
-                my $max = $self->max // 0;
-                $self->num_highwater($max);
+                $self->num_highwater(max($self));
                 $dbh->commit;
         }
         $self;
@@ -144,7 +143,7 @@ sub max {
         my $sth = $_[0]->{dbh}->prepare_cached('SELECT MAX(num) FROM msgmap',
                                                 undef, 1);
         $sth->execute;
-        $sth->fetchrow_array;
+        $sth->fetchrow_array // 0;
 }
 
 sub minmax {
@@ -153,7 +152,7 @@ sub minmax {
         my $sth = $_[0]->{dbh}->prepare_cached('SELECT MIN(num) FROM msgmap',
                                                 undef, 1);
         $sth->execute;
-        ($sth->fetchrow_array, max($_[0]));
+        ($sth->fetchrow_array // 0, max($_[0]));
 }
 
 sub mid_delete {
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 2f821fa6..11a7ffb8 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -5,7 +5,7 @@
 # fields:
 # nntpd: PublicInbox::NNTPD ref
 # article: per-session current article number
-# ng: PublicInbox::Inbox ref
+# ibx: PublicInbox::Inbox ref
 # long_cb: long_response private data
 package PublicInbox::NNTP;
 use strict;
@@ -17,6 +17,8 @@ use PublicInbox::DS qw(now);
 use Digest::SHA qw(sha1_hex);
 use Time::Local qw(timegm timelocal);
 use PublicInbox::GitAsyncCat;
+use PublicInbox::Address;
+
 use constant {
         LINE_MAX => 512, # RFC 977 section 2.3
         r501 => '501 command syntax error',
@@ -31,9 +33,9 @@ use Errno qw(EAGAIN);
 my $ONE_MSGID = qr/\A$MID_EXTRACT\z/;
 my @OVERVIEW = qw(Subject From Date Message-ID References);
 my $OVERVIEW_FMT = join(":\r\n", @OVERVIEW, qw(Bytes Lines), '') .
-                "Xref:full\r\n";
+                "Xref:full\r\n.";
 my $LIST_HEADERS = join("\r\n", @OVERVIEW,
-                        qw(:bytes :lines Xref To Cc)) . "\r\n";
+                        qw(:bytes :lines Xref To Cc)) . "\r\n.";
 my $CAPABILITIES = <<"";
 101 Capability list:\r
 VERSION 2\r
@@ -92,8 +94,7 @@ sub process_line ($$) {
                 err($self, 'error from: %s (%s)', $l, $err);
                 $res = '503 program fault - command not performed';
         }
-        return 0 unless defined $res;
-        res($self, $res);
+        defined($res) ? res($self, $res) : 0;
 }
 
 # The keyword argument is not used (rfc3977 5.2.2)
@@ -109,9 +110,7 @@ sub cmd_capabilities ($;$) {
 
 sub cmd_mode ($$) {
         my ($self, $arg) = @_;
-        $arg = uc $arg;
-        return r501 unless $arg eq 'READER';
-        '201 Posting prohibited';
+        uc($arg) eq 'READER' ? '201 Posting prohibited' : r501;
 }
 
 sub cmd_slave ($) { '202 slave status noted' }
@@ -120,46 +119,66 @@ sub cmd_xgtitle ($;$) {
         my ($self, $wildmat) = @_;
         more($self, '282 list of groups and descriptions follows');
         list_newsgroups($self, $wildmat);
-        '.'
 }
 
-sub list_overview_fmt ($) {
-        my ($self) = @_;
-        $self->msg_more($OVERVIEW_FMT);
-}
+sub list_overview_fmt ($) { $OVERVIEW_FMT }
 
-sub list_headers ($;$) {
-        my ($self) = @_;
-        $self->msg_more($LIST_HEADERS);
+sub list_headers ($;$) { $LIST_HEADERS }
+
+sub list_active_i { # "LIST ACTIVE" and also just "LIST" (no args)
+        my ($self, $groupnames) = @_;
+        my @window = splice(@$groupnames, 0, 100) or return 0;
+        my $ibx;
+        my $groups = $self->{nntpd}->{pi_cfg}->{-by_newsgroup};
+        for my $ngname (@window) {
+                $ibx = $groups->{$ngname} and group_line($self, $ibx);
+        }
+        scalar(@$groupnames); # continue if there's more
 }
 
-sub list_active ($;$) {
+sub list_active ($;$) { # called by cmd_list
         my ($self, $wildmat) = @_;
         wildmat2re($wildmat);
-        foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                $ng->{newsgroup} =~ $wildmat or next;
-                group_line($self, $ng);
+        long_response($self, \&list_active_i, [
+                grep(/$wildmat/, @{$self->{nntpd}->{groupnames}}) ]);
+}
+
+sub list_active_times_i {
+        my ($self, $groupnames) = @_;
+        my @window = splice(@$groupnames, 0, 100) or return 0;
+        my $groups = $self->{nntpd}->{pi_cfg}->{-by_newsgroup};
+        for my $ngname (@window) {
+                my $ibx = $groups->{$ngname} or next;
+                my $c = eval { $ibx->uidvalidity } // time;
+                more($self, "$ngname $c <$ibx->{-primary_address}>");
         }
+        scalar(@$groupnames); # continue if there's more
 }
 
-sub list_active_times ($;$) {
+sub list_active_times ($;$) { # called by cmd_list
         my ($self, $wildmat) = @_;
         wildmat2re($wildmat);
-        foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                $ng->{newsgroup} =~ $wildmat or next;
-                my $c = eval { $ng->mm->created_at } || time;
-                more($self, "$ng->{newsgroup} $c $ng->{-primary_address}");
+        long_response($self, \&list_active_times_i, [
+                grep(/$wildmat/, @{$self->{nntpd}->{groupnames}}) ]);
+}
+
+sub list_newsgroups_i {
+        my ($self, $groupnames) = @_;
+        my @window = splice(@$groupnames, 0, 100) or return 0;
+        my $groups = $self->{nntpd}->{pi_cfg}->{-by_newsgroup};
+        my $ibx;
+        for my $ngname (@window) {
+                $ibx = $groups->{$ngname} and
+                        more($self, "$ngname ".$ibx->description);
         }
+        scalar(@$groupnames); # continue if there's more
 }
 
-sub list_newsgroups ($;$) {
+sub list_newsgroups ($;$) { # called by cmd_list
         my ($self, $wildmat) = @_;
         wildmat2re($wildmat);
-        foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                $ng->{newsgroup} =~ $wildmat or next;
-                my $d = $ng->description;
-                more($self, "$ng->{newsgroup} $d");
-        }
+        long_response($self, \&list_newsgroups_i, [
+                grep(/$wildmat/, @{$self->{nntpd}->{groupnames}}) ]);
 }
 
 # LIST SUBSCRIPTIONS, DISTRIB.PATS are not supported
@@ -168,6 +187,7 @@ sub cmd_list ($;$$) {
         if (scalar @args) {
                 my $arg = shift @args;
                 $arg =~ tr/A-Z./a-z_/;
+                my $ret = $arg eq 'active';
                 $arg = "list_$arg";
                 $arg = $self->can($arg);
                 return r501 unless $arg && args_ok($arg, scalar @args);
@@ -175,24 +195,22 @@ sub cmd_list ($;$$) {
                 $arg->($self, @args);
         } else {
                 more($self, '215 list of newsgroups follows');
-                foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                        group_line($self, $ng);
-                }
+                long_response($self, \&list_active_i, [ # copy array
+                        @{$self->{nntpd}->{groupnames}} ]);
         }
-        '.'
 }
 
 sub listgroup_range_i {
         my ($self, $beg, $end) = @_;
-        my $r = $self->{ng}->mm->msg_range($beg, $end, 'num');
+        my $r = $self->{ibx}->mm->msg_range($beg, $end, 'num');
         scalar(@$r) or return;
-        more($self, join("\r\n", map { $_->[0] } @$r));
+        $self->msg_more(join('', map { "$_->[0]\r\n" } @$r));
         1;
 }
 
 sub listgroup_all_i {
         my ($self, $num) = @_;
-        my $ary = $self->{ng}->mm->ids_after($num);
+        my $ary = $self->{ibx}->mm->ids_after($num);
         scalar(@$ary) or return;
         more($self, join("\r\n", @$ary));
         1;
@@ -205,7 +223,7 @@ sub cmd_listgroup ($;$$) {
                 return $res if ($res !~ /\A211 /);
                 more($self, $res);
         }
-        $self->{ng} or return '412 no newsgroup selected';
+        $self->{ibx} or return '412 no newsgroup selected';
         if (defined $range) {
                 my $r = get_range($self, $range);
                 return $r unless ref $r;
@@ -242,9 +260,22 @@ sub parse_time ($$;$) {
 }
 
 sub group_line ($$) {
-        my ($self, $ng) = @_;
-        my ($min, $max) = $ng->mm->minmax;
-        more($self, "$ng->{newsgroup} $max $min n") if defined $min && defined $max;
+        my ($self, $ibx) = @_;
+        my ($min, $max) = $ibx->mm->minmax;
+        more($self, "$ibx->{newsgroup} $max $min n");
+}
+
+sub newgroups_i {
+        my ($self, $ts, $i, $groupnames) = @_;
+        my $end = $$i + 100;
+        my $groups = $self->{nntpd}->{pi_cfg}->{-by_newsgroup};
+        while ($$i < $end) {
+                my $ngname = $groupnames->[$$i++] // return;
+                my $ibx = $groups->{$ngname} or next; # expired on reload
+                next unless (eval { $ibx->uidvalidity } // 0) > $ts;
+                group_line($self, $ibx);
+        }
+        1;
 }
 
 sub cmd_newgroups ($$$;$$) {
@@ -254,12 +285,8 @@ sub cmd_newgroups ($$$;$$) {
 
         # TODO dists
         more($self, '231 list of new newsgroups follows');
-        foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                my $c = eval { $ng->mm->created_at } || 0;
-                next unless $c > $ts;
-                group_line($self, $ng);
-        }
-        '.'
+        long_response($self, \&newgroups_i, $ts, \(my $i = 0),
+                                $self->{nntpd}->{groupnames});
 }
 
 sub wildmat2re (;$) {
@@ -294,23 +321,27 @@ sub ngpat2re (;$) {
 }
 
 sub newnews_i {
-        my ($self, $overs, $ts, $prev) = @_;
-        my $over = $overs->[0];
-        my $msgs = $over->query_ts($ts, $$prev);
-        if (scalar @$msgs) {
-                more($self, '<' .
-                        join(">\r\n<", map { $_->{mid} } @$msgs ).
-                        '>');
-                $$prev = $msgs->[-1]->{num};
-        } else {
-                shift @$overs;
-                if (@$overs) { # continue onto next newsgroup
-                        $$prev = 0;
-                        return 1;
-                } else { # break out of the long response.
-                        return;
+        my ($self, $names, $ts, $prev) = @_;
+        my $ngname = $names->[0];
+        if (my $ibx = $self->{nntpd}->{pi_cfg}->{-by_newsgroup}->{$ngname}) {
+                if (my $over = $ibx->over) {
+                        my $msgs = $over->query_ts($ts, $$prev);
+                        if (scalar @$msgs) {
+                                $self->msg_more(join('', map {
+                                                        "<$_->{mid}>\r\n";
+                                                } @$msgs));
+                                $$prev = $msgs->[-1]->{num};
+                                return 1; # continue on current group
+                        }
                 }
         }
+        shift @$names;
+        if (@$names) { # continue onto next newsgroup
+                $$prev = 0;
+                1;
+        } else { # all done, break out of the long_response
+                undef;
+        }
 }
 
 sub cmd_newnews ($$$$;$$) {
@@ -321,30 +352,22 @@ sub cmd_newnews ($$$$;$$) {
         my ($keep, $skip) = split('!', $newsgroups, 2);
         ngpat2re($keep);
         ngpat2re($skip);
-        my @overs;
-        foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
-                $ng->{newsgroup} =~ $keep or next;
-                $ng->{newsgroup} =~ $skip and next;
-                my $over = $ng->over or next;
-                push @overs, $over;
-        };
-        return '.' unless @overs;
-
+        my @names = grep(!/$skip/, grep(/$keep/,
+                                @{$self->{nntpd}->{groupnames}}));
+        return '.' unless scalar(@names);
         my $prev = 0;
-        long_response($self, \&newnews_i, \@overs, $ts, \$prev);
+        long_response($self, \&newnews_i, \@names, $ts, \$prev);
 }
 
 sub cmd_group ($$) {
         my ($self, $group) = @_;
-        my $no_such = '411 no such news group';
         my $nntpd = $self->{nntpd};
-        my $ng = $nntpd->{groups}->{$group} or return $no_such;
+        my $ibx = $nntpd->{pi_cfg}->{-by_newsgroup}->{$group} or
+                return '411 no such news group';
         $nntpd->idler_start;
 
-        $self->{ng} = $ng;
-        my ($min, $max) = $ng->mm->minmax;
-        $min ||= 0;
-        $max ||= 0;
+        $self->{ibx} = $ibx;
+        my ($min, $max) = $ibx->mm->minmax;
         $self->{article} = $min;
         my $est_size = $max - $min;
         "211 $est_size $min $max $group";
@@ -352,13 +375,13 @@ sub cmd_group ($$) {
 
 sub article_adj ($$) {
         my ($self, $off) = @_;
-        my $ng = $self->{ng} or return '412 no newsgroup selected';
+        my $ibx = $self->{ibx} or return '412 no newsgroup selected';
 
         my $n = $self->{article};
         defined $n or return '420 no current article has been selected';
 
         $n += $off;
-        my $mid = $ng->mm->mid_for($n);
+        my $mid = $ibx->mm->mid_for($n);
         unless ($mid) {
                 $n = $off > 0 ? 'next' : 'previous';
                 return "421 no $n article in this group";
@@ -374,8 +397,8 @@ sub cmd_last ($) { article_adj($_[0], -1) }
 # the single-point-of-failure a single server provides.
 sub cmd_post ($) {
         my ($self) = @_;
-        my $ng = $self->{ng};
-        $ng ? "440 mailto:$ng->{-primary_address} to post"
+        my $ibx = $self->{ibx};
+        $ibx ? "440 mailto:$ibx->{-primary_address} to post"
                 : '440 posting not allowed'
 }
 
@@ -395,19 +418,41 @@ sub header_append ($$$) {
         $hdr->header_set($k, @v, $v);
 }
 
-sub xref ($$$$) {
-        my ($self, $ng, $n, $mid) = @_;
-        my $ret = $self->{nntpd}->{servername} . " $ng->{newsgroup}:$n";
+sub xref_by_tc ($$$) {
+        my ($xref, $pi_cfg, $smsg) = @_;
+        my $by_addr = $pi_cfg->{-by_addr};
+        my $mid = $smsg->{mid};
+        for my $f (qw(to cc)) {
+                my @ibxs = map {
+                        $by_addr->{lc($_)} // ()
+                } (PublicInbox::Address::emails($smsg->{$f} // ''));
+                for my $ibx (@ibxs) {
+                        my $ngname = $ibx->{newsgroup} // next;
+                        next if defined $xref->{$ngname};
+                        $xref->{$ngname} = eval { $ibx->mm->num_for($mid) };
+                }
+        }
+}
 
-        # num_for is pretty cheap and sometimes we'll lookup the existence
-        # of an article without getting even the OVER info.  In other words,
-        # I'm not sure if its worth optimizing by scanning To:/Cc: and
-        # PublicInbox::ExtMsg on the PSGI end is just as expensive
-        foreach my $other (@{$self->{nntpd}->{grouplist}}) {
-                next if $ng eq $other;
-                my $num = eval { $other->mm->num_for($mid) } or next;
-                $ret .= " $other->{newsgroup}:$num";
+sub xref ($$$) {
+        my ($self, $cur_ibx, $smsg) = @_;
+        my $nntpd = $self->{nntpd};
+        my $cur_ng = $cur_ibx->{newsgroup};
+        my $xref;
+        if (my $ALL = $nntpd->{pi_cfg}->ALL) {
+                $xref = $ALL->nntp_xref_for($cur_ibx, $smsg);
+                xref_by_tc($xref, $nntpd->{pi_cfg}, $smsg);
+        } else { # slow path
+                $xref = { $cur_ng => $smsg->{num} };
+                my $mid = $smsg->{mid};
+                for my $ibx (values %{$nntpd->{pi_cfg}->{-by_newsgroup}}) {
+                        next if defined($xref->{$ibx->{newsgroup}});
+                        my $num = eval { $ibx->mm->num_for($mid) } // next;
+                        $xref->{$ibx->{newsgroup}} = $num;
+                }
         }
+        my $ret = "$nntpd->{servername} $cur_ng:".delete($xref->{$cur_ng});
+        $ret .= " $_:$xref->{$_}" for (sort keys %$xref);
         $ret;
 }
 
@@ -430,7 +475,7 @@ sub set_nntp_headers ($$) {
 
         # clobber some existing headers
         my $ibx = $smsg->{-ibx};
-        my $xref = xref($smsg->{nntp}, $ibx, $smsg->{num}, $mid);
+        my $xref = xref($smsg->{nntp}, $ibx, $smsg);
         $hdr->header_set('Xref', $xref);
 
         # RFC 5536 3.1.4
@@ -442,53 +487,34 @@ sub set_nntp_headers ($$) {
         # *something* here is required for leafnode, try to follow
         # RFC 5536 3.1.5...
         $hdr->header_set('Path', $server_name . '!not-for-mail');
-
-        header_append($hdr, 'List-Post', "<mailto:$ibx->{-primary_address}>");
-        if (my $url = $ibx->base_url) {
-                $mid = mid_escape($mid);
-                header_append($hdr, 'Archived-At', "<$url$mid/>");
-                header_append($hdr, 'List-Archive', "<$url>");
-        }
 }
 
 sub art_lookup ($$$) {
         my ($self, $art, $code) = @_;
-        my $ng = $self->{ng};
-        my ($n, $mid);
+        my ($ibx, $n);
         my $err;
         if (defined $art) {
                 if ($art =~ /\A[0-9]+\z/) {
                         $err = '423 no such article number in this group';
                         $n = int($art);
-                        goto find_mid;
+                        goto find_ibx;
                 } elsif ($art =~ $ONE_MSGID) {
-                        $mid = $1;
-                        $err = r430;
-                        $n = $ng->mm->num_for($mid) if $ng;
-                        goto found if defined $n;
-                        foreach my $g (values %{$self->{nntpd}->{groups}}) {
-                                $n = $g->mm->num_for($mid);
-                                if (defined $n) {
-                                        $ng = $g;
-                                        goto found;
-                                }
-                        }
-                        return $err;
+                        ($ibx, $n) = mid_lookup($self, $1);
+                        goto found if $ibx;
+                        return r430;
                 } else {
                         return r501;
                 }
         } else {
                 $err = '420 no current article has been selected';
-                $n = $self->{article};
-                defined $n or return $err;
-find_mid:
-                $ng or return '412 no newsgroup has been selected';
-                $mid = $ng->mm->mid_for($n);
-                defined $mid or return $err;
+                $n = $self->{article} // return $err;
+find_ibx:
+                $ibx = $self->{ibx} or
+                                return '412 no newsgroup has been selected';
         }
 found:
-        my $smsg = $ng->over->get_art($n) or return $err;
-        $smsg->{-ibx} = $ng;
+        my $smsg = $ibx->over->get_art($n) or return $err;
+        $smsg->{-ibx} = $ibx;
         if ($code == 223) { # STAT
                 set_art($self, $n);
                 "223 $n <$smsg->{mid}> article retrieved - " .
@@ -498,7 +524,7 @@ found:
                 $smsg->{nntp_code} = $code;
                 set_art($self, $art);
                 # this dereferences to `undef'
-                ${git_async_cat($ng->git, $smsg->{blob}, \&blob_cb, $smsg)};
+                ${git_async_cat($ibx->git, $smsg->{blob}, \&blob_cb, $smsg)};
         }
 }
 
@@ -598,10 +624,10 @@ sub cmd_help ($) {
 
 sub get_range ($$) {
         my ($self, $range) = @_;
-        my $ng = $self->{ng} or return '412 no news group has been selected';
+        my $ibx = $self->{ibx} or return '412 no news group has been selected';
         defined $range or return '420 No article(s) selected';
         my ($beg, $end);
-        my ($min, $max) = $ng->mm->minmax;
+        my ($min, $max) = $ibx->mm->minmax;
         if ($range =~ /\A([0-9]+)\z/) {
                 $beg = $end = $1;
         } elsif ($range =~ /\A([0-9]+)-\z/) {
@@ -671,9 +697,9 @@ sub long_response ($$;@) {
 
 sub hdr_msgid_range_i {
         my ($self, $beg, $end) = @_;
-        my $r = $self->{ng}->mm->msg_range($beg, $end);
+        my $r = $self->{ibx}->mm->msg_range($beg, $end);
         @$r or return;
-        more($self, join("\r\n", map { "$_->[0] <$_->[1]>" } @$r));
+        $self->msg_more(join('', map { "$_->[0] <$_->[1]>\r\n" } @$r));
         1;
 }
 
@@ -681,9 +707,9 @@ sub hdr_message_id ($$$) { # optimize XHDR Message-ID [range] for slrnpull.
         my ($self, $xhdr, $range) = @_;
 
         if (defined $range && $range =~ $ONE_MSGID) {
-                my ($ng, $n) = mid_lookup($self, $1);
+                my ($ibx, $n) = mid_lookup($self, $1);
                 return r430 unless $n;
-                hdr_mid_response($self, $xhdr, $ng, $n, $range, $range);
+                hdr_mid_response($self, $xhdr, $ibx, $n, $range, $range);
         } else { # numeric range
                 $range = $self->{article} unless defined $range;
                 my $r = get_range($self, $range);
@@ -695,28 +721,54 @@ sub hdr_message_id ($$$) { # optimize XHDR Message-ID [range] for slrnpull.
 
 sub mid_lookup ($$) {
         my ($self, $mid) = @_;
-        my $self_ng = $self->{ng};
-        if ($self_ng) {
-                my $n = $self_ng->mm->num_for($mid);
-                return ($self_ng, $n) if defined $n;
+        my $cur_ibx = $self->{ibx};
+        if ($cur_ibx) {
+                my $n = $cur_ibx->mm->num_for($mid);
+                return ($cur_ibx, $n) if defined $n;
         }
-        foreach my $ng (values %{$self->{nntpd}->{groups}}) {
-                next if defined $self_ng && $ng eq $self_ng;
-                my $n = $ng->mm->num_for($mid);
-                return ($ng, $n) if defined $n;
+        my $pi_cfg = $self->{nntpd}->{pi_cfg};
+        if (my $ALL = $pi_cfg->ALL) {
+                my ($id, $prev);
+                while (my $smsg = $ALL->over->next_by_mid($mid, \$id, \$prev)) {
+                        my $xr3 = $ALL->over->get_xref3($smsg->{num});
+                        if (my @x = grep(/:$smsg->{blob}\z/, @$xr3)) {
+                                my ($ngname, $xnum) = split(/:/, $x[0]);
+                                my $ibx = $pi_cfg->{-by_newsgroup}->{$ngname};
+                                return ($ibx, $xnum) if $ibx;
+                                # fall through to trying all xref3s
+                        } else {
+                                warn <<EOF;
+W: xref3 missing for <$mid> ($smsg->{blob}) in $ALL->{topdir}, -extindex bug?
+EOF
+                        }
+                        # try all xref3s
+                        for my $x (@$xr3) {
+                                my ($ngname, $xnum) = split(/:/, $x);
+                                my $ibx = $pi_cfg->{-by_newsgroup}->{$ngname};
+                                return ($ibx, $xnum) if $ibx;
+                                warn "W: `$ngname' does not exist for #$xnum\n";
+                        }
+                }
+                # no warning here, $mid is just invalid
+        } else { # slow path for non-ALL users
+                for my $ibx (values %{$pi_cfg->{-by_newsgroup}}) {
+                        next if defined $cur_ibx && $ibx eq $cur_ibx;
+                        my $n = $ibx->mm->num_for($mid);
+                        return ($ibx, $n) if defined $n;
+                }
         }
         (undef, undef);
 }
 
 sub xref_range_i {
         my ($self, $beg, $end) = @_;
-        my $ng = $self->{ng};
-        my $r = $ng->mm->msg_range($beg, $end);
-        @$r or return;
-        more($self, join("\r\n", map {
-                my $num = $_->[0];
-                "$num ".xref($self, $ng, $num, $_->[1]);
-        } @$r));
+        my $ibx = $self->{ibx};
+        my $msgs = $ibx->over->query_xover($$beg, $end);
+        scalar(@$msgs) or return;
+        $$beg = $msgs->[-1]->{num} + 1;
+        $self->msg_more(join('', map {
+                "$_->{num} ".xref($self, $ibx, $_) . "\r\n";
+        } @$msgs));
         1;
 }
 
@@ -725,10 +777,11 @@ sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin
 
         if (defined $range && $range =~ $ONE_MSGID) {
                 my $mid = $1;
-                my ($ng, $n) = mid_lookup($self, $mid);
+                my ($ibx, $n) = mid_lookup($self, $mid);
                 return r430 unless $n;
-                hdr_mid_response($self, $xhdr, $ng, $n, $range,
-                                xref($self, $ng, $n, $mid));
+                my $smsg = $ibx->over->get_art($n) or return;
+                hdr_mid_response($self, $xhdr, $ibx, $n, $range,
+                                xref($self, $ibx, $smsg));
         } else { # numeric range
                 $range = $self->{article} unless defined $range;
                 my $r = get_range($self, $range);
@@ -747,7 +800,7 @@ sub over_header_for {
 
 sub smsg_range_i {
         my ($self, $beg, $end, $field) = @_;
-        my $over = $self->{ng}->over;
+        my $over = $self->{ibx}->over;
         my $msgs = $over->query_xover($$beg, $end);
         scalar(@$msgs) or return;
         my $tmp = '';
@@ -770,10 +823,10 @@ sub smsg_range_i {
 sub hdr_smsg ($$$$) {
         my ($self, $xhdr, $field, $range) = @_;
         if (defined $range && $range =~ $ONE_MSGID) {
-                my ($ng, $n) = mid_lookup($self, $1);
+                my ($ibx, $n) = mid_lookup($self, $1);
                 return r430 unless defined $n;
-                my $v = over_header_for($ng->over, $n, $field);
-                hdr_mid_response($self, $xhdr, $ng, $n, $range, $v);
+                my $v = over_header_for($ibx->over, $n, $field);
+                hdr_mid_response($self, $xhdr, $ibx, $n, $range, $v);
         } else { # numeric range
                 $range = $self->{article} unless defined $range;
                 my $r = get_range($self, $range);
@@ -813,26 +866,26 @@ sub cmd_xhdr ($$;$) {
 }
 
 sub hdr_mid_prefix ($$$$$) {
-        my ($self, $xhdr, $ng, $n, $mid) = @_;
+        my ($self, $xhdr, $ibx, $n, $mid) = @_;
         return $mid if $xhdr;
 
         # HDR for RFC 3977 users
-        if (my $self_ng = $self->{ng}) {
-                ($self_ng eq $ng) ? $n : '0';
+        if (my $cur_ibx = $self->{ibx}) {
+                ($cur_ibx eq $ibx) ? $n : '0';
         } else {
                 '0';
         }
 }
 
 sub hdr_mid_response ($$$$$$) {
-        my ($self, $xhdr, $ng, $n, $mid, $v) = @_;
+        my ($self, $xhdr, $ibx, $n, $mid, $v) = @_;
         my $res = '';
         if ($xhdr) {
                 $res .= r221 . "\r\n";
                 $res .= "$mid $v\r\n";
         } else {
                 $res .= r225 . "\r\n";
-                my $pfx = hdr_mid_prefix($self, $xhdr, $ng, $n, $mid);
+                my $pfx = hdr_mid_prefix($self, $xhdr, $ibx, $n, $mid);
                 $res .= "$pfx $v\r\n";
         }
         res($self, $res .= '.');
@@ -841,14 +894,14 @@ sub hdr_mid_response ($$$$$$) {
 
 sub xrover_i {
         my ($self, $beg, $end) = @_;
-        my $h = over_header_for($self->{ng}->over, $$beg, 'references');
+        my $h = over_header_for($self->{ibx}->over, $$beg, 'references');
         more($self, "$$beg $h") if defined($h);
         $$beg++ < $end;
 }
 
 sub cmd_xrover ($;$) {
         my ($self, $range) = @_;
-        my $ng = $self->{ng} or return '412 no newsgroup selected';
+        my $ibx = $self->{ibx} or return '412 no newsgroup selected';
         (defined $range && $range =~ /[<>]/) and
                 return '420 No article(s) selected'; # no message IDs
 
@@ -859,11 +912,11 @@ sub cmd_xrover ($;$) {
         long_response($self, \&xrover_i, @$r);
 }
 
-sub over_line ($$$$) {
-        my ($self, $ng, $num, $smsg) = @_;
+sub over_line ($$$) {
+        my ($self, $ibx, $smsg) = @_;
         # n.b. field access and procedural calls can be
         # 10%-15% faster than OO method calls:
-        my $s = join("\t", $num,
+        my $s = join("\t", $smsg->{num},
                 $smsg->{subject},
                 $smsg->{from},
                 PublicInbox::Smsg::date($smsg),
@@ -871,23 +924,28 @@ sub over_line ($$$$) {
                 $smsg->{references},
                 $smsg->{bytes},
                 $smsg->{lines},
-                "Xref: " . xref($self, $ng, $num, $smsg->{mid}));
+                "Xref: " . xref($self, $ibx, $smsg));
         utf8::encode($s);
-        $s
+        $s .= "\r\n";
 }
 
 sub cmd_over ($;$) {
         my ($self, $range) = @_;
         if ($range && $range =~ $ONE_MSGID) {
-                my ($ng, $n) = mid_lookup($self, $1);
+                my ($ibx, $n) = mid_lookup($self, $1);
                 defined $n or return r430;
-                my $smsg = $ng->over->get_art($n) or return r430;
+                my $smsg = $ibx->over->get_art($n) or return r430;
                 more($self, '224 Overview information follows (multi-line)');
 
                 # Only set article number column if it's the current group
-                my $self_ng = $self->{ng};
-                $n = 0 if (!$self_ng || $self_ng ne $ng);
-                more($self, over_line($self, $ng, $n, $smsg));
+                # (RFC 3977 8.3.2)
+                my $cur_ibx = $self->{ibx};
+                if (!$cur_ibx || $cur_ibx ne $ibx) {
+                        # set {-orig_num} for nntp_xref_for
+                        $smsg->{-orig_num} = $smsg->{num};
+                        $smsg->{num} = 0;
+                }
+                $self->msg_more(over_line($self, $ibx, $smsg));
                 '.';
         } else {
                 cmd_xover($self, $range);
@@ -896,13 +954,13 @@ sub cmd_over ($;$) {
 
 sub xover_i {
         my ($self, $beg, $end) = @_;
-        my $ng = $self->{ng};
-        my $msgs = $ng->over->query_xover($$beg, $end);
+        my $ibx = $self->{ibx};
+        my $msgs = $ibx->over->query_xover($$beg, $end);
         my $nr = scalar @$msgs or return;
 
         # OVERVIEW.FMT
-        more($self, join("\r\n", map {
-                over_line($self, $ng, $_->{num}, $_);
+        $self->msg_more(join('', map {
+                over_line($self, $ibx, $_);
                 } @$msgs));
         $$beg = $msgs->[-1]->{num} + 1;
 }
@@ -949,12 +1007,28 @@ sub cmd_xpath ($$) {
         return r501 unless $mid =~ $ONE_MSGID;
         $mid = $1;
         my @paths;
-        foreach my $ng (values %{$self->{nntpd}->{groups}}) {
-                my $n = $ng->mm->num_for($mid);
-                push @paths, "$ng->{newsgroup}/$n" if defined $n;
+        my $pi_cfg = $self->{nntpd}->{pi_cfg};
+        my $groups = $pi_cfg->{-by_newsgroup};
+        if (my $ALL = $pi_cfg->ALL) {
+                my ($id, $prev, %seen);
+                while (my $smsg = $ALL->over->next_by_mid($mid, \$id, \$prev)) {
+                        my $xr3 = $ALL->over->get_xref3($smsg->{num});
+                        for my $x (@$xr3) {
+                                my ($ngname, $n) = split(/:/, $x);
+                                $x = "$ngname/$n";
+                                if ($groups->{$ngname} && !$seen{$x}++) {
+                                        push(@paths, $x);
+                                }
+                        }
+                }
+        } else { # slow path, no point in using long_response
+                for my $ibx (values %$groups) {
+                        my $n = $ibx->mm->num_for($mid) // next;
+                        push @paths, "$ibx->{newsgroup}/$n";
+                }
         }
         return '430 no such article on server' unless @paths;
-        '223 '.join(' ', @paths);
+        '223 '.join(' ', sort(@paths));
 }
 
 sub res ($$) { do_write($_[0], $_[1] . "\r\n") }
diff --git a/lib/PublicInbox/NNTPD.pm b/lib/PublicInbox/NNTPD.pm
index 6b762d89..6907a03c 100644
--- a/lib/PublicInbox/NNTPD.pm
+++ b/lib/PublicInbox/NNTPD.pm
@@ -12,8 +12,8 @@ use PublicInbox::InboxIdle;
 
 sub new {
         my ($class) = @_;
-        my $pi_config = PublicInbox::Config->new;
-        my $name = $pi_config->{'publicinbox.nntpserver'};
+        my $pi_cfg = PublicInbox::Config->new;
+        my $name = $pi_cfg->{'publicinbox.nntpserver'};
         if (!defined($name) or $name eq '') {
                 $name = hostname;
         } elsif (ref($name) eq 'ARRAY') {
@@ -24,8 +24,7 @@ sub new {
                 groups => {},
                 err => \*STDERR,
                 out => \*STDOUT,
-                grouplist => [],
-                pi_config => $pi_config,
+                pi_cfg => $pi_cfg,
                 servername => $name,
                 greet => \"201 $name ready - post via email\r\n",
                 # accept_tls => { SSL_server => 1, ..., SSL_reuse_ctx => ... }
@@ -35,40 +34,33 @@ sub new {
 
 sub refresh_groups {
         my ($self, $sig) = @_;
-        my $pi_config = $sig ? PublicInbox::Config->new : $self->{pi_config};
-        my $new = {};
-        my @list;
-        $pi_config->each_inbox(sub {
-                my ($ng) = @_;
-                my $ngname = $ng->{newsgroup} or return;
-                if (ref $ngname) {
-                        warn 'multiple newsgroups not supported: '.
-                                join(', ', @$ngname). "\n";
-                # Newsgroup name needs to be compatible with RFC 3977
-                # wildmat-exact and RFC 3501 (IMAP) ATOM-CHAR.
-                # Leave out a few chars likely to cause problems or conflicts:
-                # '|', '<', '>', ';', '#', '$', '&',
-                } elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]!) {
-                        warn "newsgroup name invalid: `$ngname'\n";
-                } elsif ($ng->nntp_usable) {
-                        # Only valid if msgmap and search works
-                        $new->{$ngname} = $ng;
-                        push @list, $ng;
-
+        my $pi_cfg = $sig ? PublicInbox::Config->new : $self->{pi_cfg};
+        my $groups = $pi_cfg->{-by_newsgroup}; # filled during each_inbox
+        my $cache = eval { $pi_cfg->ALL->misc->nntpd_cache_load } // {};
+        $pi_cfg->each_inbox(sub {
+                my ($ibx) = @_;
+                my $ngname = $ibx->{newsgroup} // return;
+                my $ce = $cache->{$ngname};
+                if (($ce and (%$ibx = (%$ibx, %$ce))) || $ibx->nntp_usable) {
+                        # only valid if msgmap and over works
                         # preload to avoid fragmentation:
-                        $ng->description;
-                        $ng->base_url;
+                        $ibx->description;
+                        $ibx->base_url;
+                } else {
+                        delete $groups->{$ngname};
+                        delete $ibx->{newsgroup};
+                        # Note: don't be tempted to delete more for memory
+                        # savings just yet: NNTP, IMAP, and WWW may all
+                        # run in the same process someday.
                 }
         });
-        @list =        sort { $a->{newsgroup} cmp $b->{newsgroup} } @list;
-        $self->{grouplist} = \@list;
-        $self->{pi_config} = $pi_config;
+        $self->{groupnames} = [ sort(keys %$groups) ];
         # this will destroy old groups that got deleted
-        %{$self->{groups}} = %$new;
+        $self->{pi_cfg} = $pi_cfg;
 }
 
 sub idler_start {
-        $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_config});
+        $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_cfg});
 }
 
 1;
diff --git a/lib/PublicInbox/NewsWWW.pm b/lib/PublicInbox/NewsWWW.pm
index 6bed0103..61d9ae7c 100644
--- a/lib/PublicInbox/NewsWWW.pm
+++ b/lib/PublicInbox/NewsWWW.pm
@@ -13,9 +13,8 @@ use PublicInbox::MID qw(mid_escape);
 use PublicInbox::Hval qw(prurl);
 
 sub new {
-        my ($class, $pi_config) = @_;
-        $pi_config ||= PublicInbox::Config->new;
-        bless { pi_config => $pi_config }, $class;
+        my ($class, $pi_cfg) = @_;
+        bless { pi_cfg => $pi_cfg // PublicInbox::Config->new }, $class;
 }
 
 sub redirect ($$) {
@@ -47,8 +46,8 @@ sub call {
         # /inbox.foo.bar/123456
         my (undef, @parts) = split(m!/!, $env->{PATH_INFO});
         my ($ng, $article) = @parts;
-        my $pi_config = $self->{pi_config};
-        if (my $ibx = $pi_config->lookup_newsgroup($ng)) {
+        my $pi_cfg = $self->{pi_cfg};
+        if (my $ibx = $pi_cfg->lookup_newsgroup($ng)) {
                 my $url = prurl($env, $ibx->{url});
                 my $code = 301;
                 if (defined $article && $article =~ /\A[0-9]+\z/) {
@@ -63,7 +62,6 @@ sub call {
                 return redirect($code, $url);
         }
 
-        my $res;
         my @try = (join('/', @parts));
 
         # trailing slash is in the rest of our WWW, so maybe some users
@@ -72,13 +70,30 @@ sub call {
                 pop @parts;
                 push @try, join('/', @parts);
         }
-
-        foreach my $mid (@try) {
-                my $arg = [ $mid ];
-                $pi_config->each_inbox(\&try_inbox, $arg);
-                defined($res = $arg->[1]) and last;
+        my $ALL = $pi_cfg->ALL;
+        if (my $over = $ALL ? $ALL->over : undef) {
+                my $by_eidx_key = $pi_cfg->{-by_eidx_key};
+                for my $mid (@try) {
+                        my ($id, $prev);
+                        while (my $x = $over->next_by_mid($mid, \$id, \$prev)) {
+                                my $xr3 = $over->get_xref3($x->{num});
+                                for (@$xr3) {
+                                        s/:[0-9]+:$x->{blob}\z// or next;
+                                        my $ibx = $by_eidx_key->{$_} // next;
+                                        my $url = $ibx->base_url or next;
+                                        $url .= mid_escape($mid) . '/';
+                                        return redirect(302, $url);
+                                }
+                        }
+                }
+        } else { # slow path, scan every inbox
+                for my $mid (@try) {
+                        my $arg = [ $mid ]; # [1] => result
+                        $pi_cfg->each_inbox(\&try_inbox, $arg);
+                        return $arg->[1] if $arg->[1];
+                }
         }
-        $res || [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ];
+        [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ];
 }
 
 1;
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index 08112386..49ba180b 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -260,6 +260,27 @@ SELECT num,tid,ds,ts,ddd FROM over WHERE num = ? LIMIT 1
         $smsg ? load_from_row($smsg) : undef;
 }
 
+sub get_xref3 {
+        my ($self, $num, $raw) = @_;
+        my $dbh = dbh($self);
+        my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id,xnum,oidbin FROM xref3 WHERE docid = ? ORDER BY ibx_id,xnum ASC
+
+        $sth->execute($num);
+        my $rows = $sth->fetchall_arrayref;
+        return $rows if $raw;
+        my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT eidx_key FROM inboxes WHERE ibx_id = ?
+
+        [ map {
+                my $r = $_;
+                $eidx_key_sth->execute($r->[0]);
+                my $eidx_key = $eidx_key_sth->fetchrow_array;
+                $eidx_key //= "missing://ibx_id=$r->[0]";
+                "$eidx_key:$r->[1]:".unpack('H*', $r->[2]);
+        } @$rows ];
+}
+
 sub next_by_mid {
         my ($self, $mid, $id, $prev) = @_;
         my $dbh = dbh($self);
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 840e2c2a..dcc2cff3 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -79,6 +79,11 @@ SELECT $id_col FROM $tbl WHERE $val_col = ? LIMIT 1
         }
 }
 
+sub ibx_id {
+        my ($self, $eidx_key) = @_;
+        id_for($self, 'inboxes', 'ibx_id', eidx_key => $eidx_key);
+}
+
 sub sid {
         my ($self, $path) = @_;
         return unless defined $path && $path ne '';
@@ -267,6 +272,13 @@ sub subject_path ($) {
         lc($subj);
 }
 
+sub ddd_for ($) {
+        my ($smsg) = @_;
+        my $dd = $smsg->to_doc_data;
+        utf8::encode($dd);
+        compress($dd);
+}
+
 sub add_overview {
         my ($self, $eml, $smsg) = @_;
         $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!;
@@ -278,10 +290,7 @@ sub add_overview {
                 $xpath = subject_path($subj);
                 $xpath = id_compress($xpath);
         }
-        my $dd = $smsg->to_doc_data;
-        utf8::encode($dd);
-        $dd = compress($dd);
-        add_over($self, $smsg, $mids, $refs, $xpath, $dd);
+        add_over($self, $smsg, $mids, $refs, $xpath, ddd_for($smsg));
 }
 
 sub _add_over {
@@ -385,13 +394,12 @@ sub create_tables {
 
         $dbh->do(<<'');
 CREATE TABLE IF NOT EXISTS over (
-        num INTEGER NOT NULL, /* NNTP article number == IMAP UID */
+        num INTEGER PRIMARY KEY NOT NULL, /* NNTP article number == IMAP UID */
         tid INTEGER NOT NULL, /* THREADID (IMAP REFERENCES threading, JMAP) */
         sid INTEGER, /* Subject ID (IMAP ORDEREDSUBJECT "threading") */
         ts INTEGER, /* IMAP INTERNALDATE (Received: header, git commit time) */
         ds INTEGER, /* RFC-2822 sent Date: header, git author time */
-        ddd VARBINARY, /* doc-data-deflated (->to_doc_data, ->load_from_data) */
-        UNIQUE (num)
+        ddd VARBINARY /* doc-data-deflated (->to_doc_data, ->load_from_data) */
 )
 
         $dbh->do('CREATE INDEX IF NOT EXISTS idx_tid ON over (tid)');
@@ -465,10 +473,14 @@ sub dbh_close {
 
 sub create {
         my ($self) = @_;
-        unless (-r $self->{filename}) {
+        my $fn = $self->{filename} // do {
+                Carp::confess('BUG: no {filename}') unless $self->{dbh};
+                return;
+        };
+        unless (-r $fn) {
                 require File::Path;
                 require File::Basename;
-                File::Path::mkpath(File::Basename::dirname($self->{filename}));
+                File::Path::mkpath(File::Basename::dirname($fn));
         }
         # create the DB:
         PublicInbox::Over::dbh($self);
@@ -518,4 +530,162 @@ EOM
         $pr->("I: rethread culled $total ghosts\n") if $pr && $total;
 }
 
+# used for cross-inbox search
+sub eidx_prep ($) {
+        my ($self) = @_;
+        $self->{-eidx_prep} //= do {
+                my $dbh = $self->dbh;
+                $dbh->do(<<"");
+INSERT OR IGNORE INTO counter (key) VALUES ('eidx_docid')
+
+                $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS inboxes (
+        ibx_id INTEGER PRIMARY KEY AUTOINCREMENT,
+        eidx_key VARCHAR(255) NOT NULL, /* {newsgroup} // {inboxdir} */
+        UNIQUE (eidx_key)
+)
+
+                $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS xref3 (
+        docid INTEGER NOT NULL, /* <=> over.num */
+        ibx_id INTEGER NOT NULL, /* <=> inboxes.ibx_id */
+        xnum INTEGER NOT NULL, /* NNTP article number in ibx */
+        oidbin VARBINARY NOT NULL, /* 20-byte SHA-1 or 32-byte SHA-256 */
+        UNIQUE (docid, ibx_id, xnum, oidbin)
+)
+
+        $dbh->do('CREATE INDEX IF NOT EXISTS idx_docid ON xref3 (docid)');
+
+        # performance critical, this is not UNIQUE since we may need to
+        # tolerate some old bugs from indexing mirrors
+        $dbh->do('CREATE INDEX IF NOT EXISTS idx_nntp ON '.
+                'xref3 (oidbin,xnum,ibx_id)');
+
+                $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS eidx_meta (
+        key VARCHAR(255) PRIMARY KEY,
+        val VARCHAR(255) NOT NULL
+)
+
+                # A queue of current docids which need reindexing.
+                # eidxq persists across aborted -extindex invocations
+                # Currently used for "-extindex --reindex" for Xapian
+                # data, but may be used in more places down the line.
+                $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS eidxq (
+        docid INTEGER PRIMARY KEY NOT NULL
+)
+
+                $dbh;
+        };
+}
+
+sub eidx_meta { # requires transaction
+        my ($self, $key, $val) = @_;
+
+        my $sql = 'SELECT val FROM eidx_meta WHERE key = ? LIMIT 1';
+        my $dbh = $self->{dbh};
+        defined($val) or return $dbh->selectrow_array($sql, undef, $key);
+
+        my $prev = $dbh->selectrow_array($sql, undef, $key);
+        if (defined $prev) {
+                $sql = 'UPDATE eidx_meta SET val = ? WHERE key = ?';
+                $dbh->do($sql, undef, $val, $key);
+        } else {
+                $sql = 'INSERT INTO eidx_meta (key,val) VALUES (?,?)';
+                $dbh->do($sql, undef, $key, $val);
+        }
+        $prev;
+}
+
+sub eidx_max {
+        my ($self) = @_;
+        get_counter($self->{dbh}, 'eidx_docid');
+}
+
+sub add_xref3 {
+        my ($self, $docid, $xnum, $oidhex, $eidx_key) = @_;
+        begin_lazy($self);
+        my $ibx_id = ibx_id($self, $eidx_key);
+        my $oidbin = pack('H*', $oidhex);
+        my $sth = $self->{dbh}->prepare_cached(<<'');
+INSERT OR IGNORE INTO xref3 (docid, ibx_id, xnum, oidbin) VALUES (?, ?, ?, ?)
+
+        $sth->bind_param(1, $docid);
+        $sth->bind_param(2, $ibx_id);
+        $sth->bind_param(3, $xnum);
+        $sth->bind_param(4, $oidbin, SQL_BLOB);
+        $sth->execute;
+}
+
+# returns remaining reference count to $docid
+sub remove_xref3 {
+        my ($self, $docid, $oidhex, $eidx_key, $rm_eidx_info) = @_;
+        begin_lazy($self);
+        my $oidbin = pack('H*', $oidhex);
+        my ($sth, $ibx_id);
+        if (defined $eidx_key) {
+                $ibx_id = ibx_id($self, $eidx_key);
+                $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM xref3 WHERE docid = ? AND ibx_id = ? AND oidbin = ?
+
+                $sth->bind_param(1, $docid);
+                $sth->bind_param(2, $ibx_id);
+                $sth->bind_param(3, $oidbin, SQL_BLOB);
+        } else {
+                $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM xref3 WHERE docid = ? AND oidbin = ?
+
+                $sth->bind_param(1, $docid);
+                $sth->bind_param(2, $oidbin, SQL_BLOB);
+        }
+        $sth->execute;
+        $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE docid = ?
+
+        $sth->execute($docid);
+        my $nr = $sth->fetchrow_array;
+        if ($nr == 0) {
+                delete_by_num($self, $docid);
+        } elsif (defined($ibx_id) && $rm_eidx_info) {
+                # if deduplication rules in ContentHash change, it's
+                # possible a docid can have multiple rows with the
+                # same ibx_id.  This governs whether or not we call
+                # ->shard_remove_eidx_info in ExtSearchIdx.
+                $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE docid = ? AND ibx_id = ?
+
+                $sth->execute($docid, $ibx_id);
+                my $count = $sth->fetchrow_array;
+                $$rm_eidx_info = ($count == 0);
+        }
+        $nr;
+}
+
+# for when an xref3 goes missing, this does NOT update {ts}
+sub update_blob {
+        my ($self, $smsg, $oidhex) = @_;
+        my $sth = $self->{dbh}->prepare(<<'');
+UPDATE over SET ddd = ? WHERE num = ?
+
+        $smsg->{blob} = $oidhex;
+        $sth->bind_param(1, ddd_for($smsg), SQL_BLOB);
+        $sth->bind_param(2, $smsg->{num});
+        $sth->execute;
+}
+
+sub eidxq_add {
+        my ($self, $docid) = @_;
+        $self->dbh->prepare_cached(<<'')->execute($docid);
+INSERT OR IGNORE INTO eidxq (docid) VALUES (?)
+
+}
+
+sub eidxq_del {
+        my ($self, $docid) = @_;
+        $self->dbh->prepare_cached(<<'')->execute($docid);
+DELETE FROM eidxq WHERE docid = ?
+
+}
+
 1;
diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm
index 88b6d390..2aa2042a 100644
--- a/lib/PublicInbox/Qspawn.pm
+++ b/lib/PublicInbox/Qspawn.pm
@@ -359,12 +359,12 @@ sub new {
 }
 
 sub setup_rlimit {
-        my ($self, $name, $config) = @_;
+        my ($self, $name, $cfg) = @_;
         foreach my $rlim (@PublicInbox::Spawn::RLIMITS) {
                 my $k = lc($rlim);
                 $k =~ tr/_//d;
                 $k = "publicinboxlimiter.$name.$k";
-                defined(my $v = $config->{$k}) or next;
+                defined(my $v = $cfg->{$k}) or next;
                 my @rlimit = split(/\s*,\s*/, $v);
                 if (scalar(@rlimit) == 1) {
                         push @rlimit, $rlimit[0];
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index fb35b747..fb3e9975 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -6,7 +6,7 @@
 package PublicInbox::Search;
 use strict;
 use parent qw(Exporter);
-our @EXPORT_OK = qw(mdocid);
+our @EXPORT_OK = qw(retry_reopen int_val);
 use List::Util qw(max);
 
 # values for searching, changing the numeric value breaks
@@ -54,11 +54,15 @@ use constant {
 
 use PublicInbox::Smsg;
 use PublicInbox::Over;
-my $QP_FLAGS;
-our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem);
+our $QP_FLAGS;
+our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query);
 our $Xap; # 'Search::Xapian' or 'Xapian'
-my $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor')
-my $ENQ_ASCENDING;
+our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor')
+
+# ENQ_DESCENDING and ENQ_ASCENDING weren't in SWIG Xapian.pm prior to 1.4.16,
+# let's hope the ABI is stable
+our $ENQ_DESCENDING = 0;
+our $ENQ_ASCENDING = 1;
 
 sub load_xapian () {
         return 1 if defined $Xap;
@@ -84,15 +88,8 @@ sub load_xapian () {
                         'NumberRangeProcessor' : 'NumberValueRangeProcessor');
                 $X{$_} = $Xap.'::'.$_ for (keys %X);
 
-                # ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm,
-                # so lets hope this part of the ABI is stable because it's
-                # just an integer:
-                $ENQ_ASCENDING = $x eq 'Xapian' ?
-                                1 : Search::Xapian::ENQ_ASCENDING();
-
-                # for Smsg:
-                *PublicInbox::Smsg::sortable_unserialise =
-                                                $Xap.'::sortable_unserialise';
+                *sortable_serialise = $x.'::sortable_serialise';
+                *sortable_unserialise = $x.'::sortable_unserialise';
                 # n.b. FLAG_PURE_NOT is expensive not suitable for a public
                 # website as it could become a denial-of-service vector
                 # FLAG_PHRASE also seems to cause performance problems chert
@@ -193,38 +190,41 @@ sub xdir ($;$) {
         }
 }
 
-sub _xdb ($) {
+sub xdb_sharded {
+        my ($self) = @_;
+        opendir(my $dh, $self->{xpfx}) or return; # not initialized yet
+
+        # We need numeric sorting so shard[0] is first for reading
+        # Xapian metadata, if needed
+        my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return;
+        my (@xdb, $slow_phrase);
+        for (0..$last) {
+                my $shard_dir = "$self->{xpfx}/$_";
+                if (-d $shard_dir && -r _) {
+                        push @xdb, $X{Database}->new($shard_dir);
+                        $slow_phrase ||= -f "$shard_dir/iamchert";
+                } else { # gaps from missing epochs throw off mdocid()
+                        warn "E: $shard_dir missing or unreadable\n";
+                        return;
+                }
+        }
+        $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase;
+        $self->{nshard} = scalar(@xdb);
+        my $xdb = shift @xdb;
+        $xdb->add_database($_) for @xdb;
+        $xdb;
+}
+
+sub _xdb {
         my ($self) = @_;
         my $dir = xdir($self, 1);
-        my ($xdb, $slow_phrase);
-        my $qpf = \($self->{qp_flags} ||= $QP_FLAGS);
+        $self->{qp_flags} //= $QP_FLAGS;
         if ($self->{ibx_ver} >= 2) {
-                my @xdb;
-                opendir(my $dh, $dir) or return; # not initialized yet
-
-                # We need numeric sorting so shard[0] is first for reading
-                # Xapian metadata, if needed
-                my $last = max(grep(/\A[0-9]+\z/, readdir($dh)));
-                return if !defined($last);
-                for (0..$last) {
-                        my $shard_dir = "$dir/$_";
-                        if (-d $shard_dir && -r _) {
-                                push @xdb, $X{Database}->new($shard_dir);
-                                $slow_phrase ||= -f "$shard_dir/iamchert";
-                        } else { # gaps from missing epochs throw off mdocid()
-                                warn "E: $shard_dir missing or unreadable\n";
-                                return;
-                        }
-                }
-                $self->{nshard} = scalar(@xdb);
-                $xdb = shift @xdb;
-                $xdb->add_database($_) for @xdb;
+                xdb_sharded($self);
         } else {
-                $slow_phrase = -f "$dir/iamchert";
-                $xdb = $X{Database}->new($dir);
+                $self->{qp_flags} |= FLAG_PHRASE() if !-f "$dir/iamchert";
+                $X{Database}->new($dir);
         }
-        $$qpf |= FLAG_PHRASE() unless $slow_phrase;
-        $xdb;
 }
 
 # v2 Xapian docids don't conflict, so they're identical to
@@ -244,9 +244,9 @@ sub mset_to_artnums {
 
 sub xdb ($) {
         my ($self) = @_;
-        $self->{xdb} ||= do {
+        $self->{xdb} //= do {
                 load_xapian();
-                _xdb($self);
+                $self->_xdb;
         };
 }
 
@@ -285,20 +285,19 @@ sub mset {
         $opts ||= {};
         my $qp = $self->{qp} //= qparse_new($self);
         my $query = $qp->parse_query($query_string, $self->{qp_flags});
-        $opts->{relevance} = 1 unless exists $opts->{relevance};
         _do_enquire($self, $query, $opts);
 }
 
 sub retry_reopen {
-        my ($self, $cb, $arg) = @_;
+        my ($self, $cb, @arg) = @_;
         for my $i (1..10) {
                 if (wantarray) {
                         my @ret;
-                        eval { @ret = $cb->($arg) };
+                        eval { @ret = $cb->($self, @arg) };
                         return @ret unless $@;
                 } else {
                         my $ret;
-                        eval { $ret = $cb->($arg) };
+                        eval { $ret = $cb->($self, @arg) };
                         return $ret unless $@;
                 }
                 # Exception: The revision being read has been discarded -
@@ -318,7 +317,7 @@ sub retry_reopen {
 
 sub _do_enquire {
         my ($self, $query, $opts) = @_;
-        retry_reopen($self, \&_enquire_once, [ $self, $query, $opts ]);
+        retry_reopen($self, \&_enquire_once, $query, $opts);
 }
 
 # returns true if all docs have the THREADID value
@@ -328,19 +327,32 @@ sub has_threadid ($) {
 }
 
 sub _enquire_once { # retry_reopen callback
-        my ($self, $query, $opts) = @{$_[0]};
+        my ($self, $query, $opts) = @_;
         my $xdb = xdb($self);
+        if (defined(my $eidx_key = $opts->{eidx_key})) {
+                $query = $X{Query}->new(OP_FILTER(), $query, 'O'.$eidx_key);
+        }
+        if (defined(my $uid_range = $opts->{uid_range})) {
+                my $range = $X{Query}->new(OP_VALUE_RANGE(), UID,
+                                        sortable_serialise($uid_range->[0]),
+                                        sortable_serialise($uid_range->[1]));
+                $query = $X{Query}->new(OP_FILTER(), $query, $range);
+        }
         my $enquire = $X{Enquire}->new($xdb);
         $enquire->set_query($query);
         $opts ||= {};
         my $desc = !$opts->{asc};
-        if (($opts->{mset} || 0) == 2) { # mset == 2: ORDER BY docid/UID
+        my $rel = $opts->{relevance} // 0;
+        if ($rel == -1) { # ORDER BY docid/UID
+                $enquire->set_weighting_scheme($X{BoolWeight}->new);
                 $enquire->set_docid_order($ENQ_ASCENDING);
+        } elsif ($rel == 0) {
+                $enquire->set_sort_by_value_then_relevance(TS, $desc);
+        } elsif ($rel == -2) {
                 $enquire->set_weighting_scheme($X{BoolWeight}->new);
-        } elsif ($opts->{relevance}) {
+                $enquire->set_docid_order($ENQ_DESCENDING);
+        } else { # rel > 0
                 $enquire->set_sort_by_relevance_then_value(TS, $desc);
-        } else {
-                $enquire->set_sort_by_value_then_relevance(TS, $desc);
         }
 
         # `mairix -t / --threads' or JMAP collapseThreads
@@ -426,4 +438,10 @@ sub help {
         \@ret;
 }
 
+sub int_val ($$) {
+        my ($doc, $col) = @_;
+        my $val = $doc->get_value($col) or return; # undefined is '' in Xapian
+        sortable_unserialise($val) + 0; # PV => IV conversion
+}
+
 1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index c36fc6c7..b3361e05 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -15,15 +15,17 @@ use PublicInbox::InboxWritable;
 use PublicInbox::MID qw(mids_for_index mids);
 use PublicInbox::MsgIter;
 use PublicInbox::IdxStack;
-use Carp qw(croak);
+use Carp qw(croak carp);
 use POSIX qw(strftime);
+use Time::Local qw(timegm);
 use PublicInbox::OverIdx;
 use PublicInbox::Spawn qw(spawn nodatacow_dir);
 use PublicInbox::Git qw(git_unquote);
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
-our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size);
+our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size prepare_stack
+        index_text term_generator add_val is_bad_blob);
 my $X = \%PublicInbox::Search::X;
-my ($DB_CREATE_OR_OPEN, $DB_OPEN);
+our ($DB_CREATE_OR_OPEN, $DB_OPEN);
 our $DB_NO_SYNC = 0;
 our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff : 1_000_000;
 use constant DEBUG => !!$ENV{DEBUG};
@@ -31,11 +33,11 @@ use constant DEBUG => !!$ENV{DEBUG};
 my $xapianlevels = qr/\A(?:full|medium)\z/;
 my $hex = '[a-f0-9]';
 my $OID = $hex .'{40,}';
+our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/;
 
 sub new {
         my ($class, $ibx, $creat, $shard) = @_;
         ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
-        my $levels = qr/\A(?:full|medium|basic)\z/;
         my $inboxdir = $ibx->{inboxdir};
         my $version = $ibx->version;
         my $indexlevel = 'full';
@@ -45,7 +47,7 @@ sub new {
                 $altid = [ map { PublicInbox::AltId->new($ibx, $_); } @$altid ];
         }
         if ($ibx->{indexlevel}) {
-                if ($ibx->{indexlevel} =~ $levels) {
+                if ($ibx->{indexlevel} =~ $INDEXLEVELS) {
                         $indexlevel = $ibx->{indexlevel};
                 } else {
                         die("Invalid indexlevel $ibx->{indexlevel}\n");
@@ -65,7 +67,6 @@ sub new {
                 $self->{-set_skip_docdata_once} = 1;
                 $self->{-skip_docdata} = 1;
         }
-        $ibx->umask_prepare;
         if ($version == 1) {
                 $self->{lock_path} = "$inboxdir/ssoma.lock";
                 my $dir = $self->xdir;
@@ -135,7 +136,7 @@ sub idx_acquire {
                 }
         }
         return unless defined $flag;
-        $flag |= $DB_NO_SYNC if $self->{ibx}->{-no_fsync};
+        $flag |= $DB_NO_SYNC if ($self->{ibx} // $self->{eidx})->{-no_fsync};
         my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) };
         croak "Failed opening $dir: $@" if $@;
         $self->{xdb} = $xdb;
@@ -152,7 +153,7 @@ sub term_generator ($) { # write-only
 
         $self->{term_generator} //= do {
                 my $tg = $X->{TermGenerator}->new;
-                $tg->set_stemmer($self->stemmer);
+                $tg->set_stemmer(PublicInbox::Search::stemmer($self));
                 $tg;
         }
 }
@@ -323,6 +324,16 @@ sub index_xapian { # msg_iter callback
         }
 }
 
+sub index_list_id ($$$) {
+        my ($self, $doc, $hdr) = @_;
+        for my $l ($hdr->header_raw('List-Id')) {
+                $l =~ /<([^>]+)>/ or next;
+                my $lid = lc $1;
+                $doc->add_boolean_term('G' . $lid);
+                index_text($self, $lid, 1, 'XL'); # probabilistic
+        }
+}
+
 sub index_ids ($$$$) {
         my ($self, $doc, $hdr, $mids) = @_;
         for my $mid (@$mids) {
@@ -336,16 +347,12 @@ sub index_ids ($$$$) {
                 }
         }
         $doc->add_boolean_term('Q' . $_) for @$mids;
-        for my $l ($hdr->header_raw('List-Id')) {
-                $l =~ /<([^>]+)>/ or next;
-                my $lid = lc $1;
-                $doc->add_boolean_term('G' . $lid);
-                index_text($self, $lid, 1, 'XL'); # probabilistic
-        }
+        index_list_id($self, $doc, $hdr);
 }
 
-sub add_xapian ($$$$) {
+sub eml2doc ($$$;$) {
         my ($self, $eml, $smsg, $mids) = @_;
+        $mids //= mids_for_index($eml);
         my $doc = $X->{Document}->new;
         add_val($doc, PublicInbox::Search::TS(), $smsg->{ts});
         my @ds = gmtime($smsg->{ds});
@@ -361,6 +368,9 @@ sub add_xapian ($$$$) {
         $tg->set_document($doc);
         index_headers($self, $smsg);
 
+        if (defined(my $eidx_key = $smsg->{eidx_key})) {
+                $doc->add_boolean_term('O'.$eidx_key);
+        }
         msg_iter($eml, \&index_xapian, [ $self, $doc ]);
         index_ids($self, $doc, $eml, $mids);
 
@@ -385,6 +395,12 @@ sub add_xapian ($$$$) {
                         }
                 }
         }
+        $doc;
+}
+
+sub add_xapian ($$$$) {
+        my ($self, $eml, $smsg, $mids) = @_;
+        my $doc = eml2doc($self, $eml, $smsg, $mids);
         $self->{xdb}->replace_document($smsg->{num}, $doc);
 }
 
@@ -434,32 +450,81 @@ sub add_message {
         $smsg->{num};
 }
 
+sub _get_doc ($$) {
+        my ($self, $docid) = @_;
+        my $doc = eval { $self->{xdb}->get_document($docid) };
+        $doc // do {
+                warn "E: $@\n" if $@;
+                warn "E: #$docid missing in Xapian\n";
+                undef;
+        }
+}
+
+sub add_eidx_info {
+        my ($self, $docid, $eidx_key, $eml) = @_;
+        begin_txn_lazy($self);
+        my $doc = _get_doc($self, $docid) or return;
+        term_generator($self)->set_document($doc);
+        $doc->add_boolean_term('O'.$eidx_key);
+        index_list_id($self, $doc, $eml);
+        $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub remove_eidx_info {
+        my ($self, $docid, $eidx_key, $eml) = @_;
+        begin_txn_lazy($self);
+        my $doc = _get_doc($self, $docid) or return;
+        eval { $doc->remove_term('O'.$eidx_key) };
+        warn "W: ->remove_term O$eidx_key: $@\n" if $@;
+        for my $l ($eml ? $eml->header_raw('List-Id') : ()) {
+                $l =~ /<([^>]+)>/ or next;
+                my $lid = lc $1;
+                eval { $doc->remove_term('G' . $lid) };
+                warn "W: ->remove_term G$lid: $@\n" if $@;
+
+                # nb: we don't remove the XL probabilistic terms
+                # since terms may overlap if cross-posted.
+                #
+                # IOW, a message which has both <foo.example.com>
+                # and <bar.example.com> would have overlapping
+                # "XLexample" and "XLcom" as terms and which we
+                # wouldn't know if they're safe to remove if we just
+                # unindex <foo.example.com> while preserving
+                # <bar.example.com>.
+                #
+                # In any case, this entire sub is will likely never
+                # be needed and users using the "l:" prefix are probably
+                # rarer.
+        }
+        $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub smsg_from_doc ($) {
+        my ($doc) = @_;
+        my $data = $doc->get_data or return;
+        my $smsg = bless {}, 'PublicInbox::Smsg';
+        $smsg->{ts} = int_val($doc, PublicInbox::Search::TS());
+        my $dt = int_val($doc, PublicInbox::Search::DT());
+        my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $dt);
+        $smsg->{ds} = timegm($ss, $mm, $hh, $dd, $mon - 1, $yyyy);
+        $smsg->load_from_data($data);
+        $smsg;
+}
+
 sub xdb_remove {
-        my ($self, $oid, @removed) = @_;
+        my ($self, @docids) = @_;
         my $xdb = $self->{xdb} or return;
-        for my $num (@removed) {
-                my $doc = eval { $xdb->get_document($num) };
-                unless ($doc) {
-                        warn "E: $@\n" if $@;
-                        warn "E: #$num $oid missing in Xapian\n";
-                        next;
-                }
-                my $smsg = bless {}, 'PublicInbox::Smsg';
-                $smsg->load_expand($doc);
-                my $blob = $smsg->{blob} // '(unset)';
-                if ($blob eq $oid) {
-                        $xdb->delete_document($num);
-                } else {
-                        warn "E: #$num $oid != $blob in Xapian\n";
-                }
+        for my $docid (@docids) {
+                eval { $xdb->delete_document($docid) };
+                warn "E: #$docid not in in Xapian? $@\n" if $@;
         }
 }
 
-sub remove_by_oid {
-        my ($self, $oid, $num) = @_;
-        die "BUG: remove_by_oid is v2-only\n" if $self->{oidx};
+sub remove_by_docid {
+        my ($self, $num) = @_;
+        die "BUG: remove_by_docid is v2-only\n" if $self->{oidx};
         $self->begin_txn_lazy;
-        xdb_remove($self, $oid, $num) if need_xapian($self);
+        xdb_remove($self, $num) if need_xapian($self);
 }
 
 sub index_git_blob_id {
@@ -484,8 +549,8 @@ sub unindex_eml {
                 $tmp{$_}++ for @removed;
         }
         if (!$nr) {
-                $mids = join('> <', @$mids);
-                warn "W: <$mids> missing for removal from overview\n";
+                my $m = join('> <', @$mids);
+                warn "W: <$m> missing for removal from overview\n";
         }
         while (my ($num, $nr) = each %tmp) {
                 warn "BUG: $num appears >1 times ($nr) for $oid\n" if $nr != 1;
@@ -495,7 +560,7 @@ sub unindex_eml {
         } else { # just in case msgmap and over.sqlite3 become desynched:
                 $self->{mm}->mid_delete($mids->[0]);
         }
-        xdb_remove($self, $oid, keys %tmp) if need_xapian($self);
+        xdb_remove($self, keys %tmp) if need_xapian($self);
 }
 
 sub index_mm {
@@ -526,34 +591,63 @@ sub crlf_adjust ($) {
         }
 }
 
+sub is_bad_blob ($$$$) {
+        my ($oid, $type, $size, $expect_oid) = @_;
+        if ($type ne 'blob') {
+                carp "W: $expect_oid is not a blob (type=$type)";
+                return 1;
+        }
+        croak "BUG: $oid != $expect_oid" if $oid ne $expect_oid;
+        $size == 0 ? 1 : 0; # size == 0 means purged
+}
+
 sub index_both { # git->cat_async callback
         my ($bref, $oid, $type, $size, $sync) = @_;
+        return if is_bad_blob($oid, $type, $size, $sync->{oid});
         my ($nr, $max) = @$sync{qw(nr max)};
         ++$$nr;
         $$max -= $size;
         $size += crlf_adjust($$bref);
         my $smsg = bless { bytes => $size, blob => $oid }, 'PublicInbox::Smsg';
         my $self = $sync->{sidx};
+        local $self->{current_info} = "$self->{current_info}: $oid";
         my $eml = PublicInbox::Eml->new($bref);
         $smsg->{num} = index_mm($self, $eml, $oid, $sync) or
                 die "E: could not generate NNTP article number for $oid";
         add_message($self, $eml, $smsg, $sync);
+        ++$self->{nidx};
+        my $cur_cmt = $sync->{cur_cmt} // die 'BUG: {cur_cmt} missing';
+        ${$sync->{latest_cmt}} = $cur_cmt;
 }
 
 sub unindex_both { # git->cat_async callback
-        my ($bref, $oid, $type, $size, $self) = @_;
+        my ($bref, $oid, $type, $size, $sync) = @_;
+        return if is_bad_blob($oid, $type, $size, $sync->{oid});
+        my $self = $sync->{sidx};
+        local $self->{current_info} = "$self->{current_info}: $oid";
         unindex_eml($self, $oid, PublicInbox::Eml->new($bref));
+        # may be undef if leftover
+        if (defined(my $cur_cmt = $sync->{cur_cmt})) {
+                ${$sync->{latest_cmt}} = $cur_cmt;
+        }
+        ++$self->{nidx};
+}
+
+sub with_umask {
+        my $self = shift;
+        ($self->{ibx} // $self->{eidx})->with_umask(@_);
 }
 
 # called by public-inbox-index
 sub index_sync {
         my ($self, $opt) = @_;
         delete $self->{lock_path} if $opt->{-skip_lock};
-        $self->{ibx}->with_umask(\&_index_sync, $self, $opt);
-        if ($opt->{reindex}) {
+        $self->with_umask(\&_index_sync, $self, $opt);
+        if ($opt->{reindex} && !$opt->{quit}) {
                 my %again = %$opt;
                 delete @again{qw(rethread reindex)};
                 index_sync($self, \%again);
+                $opt->{quit} = $again{quit}; # propagate to caller
         }
 }
 
@@ -569,46 +663,44 @@ sub check_size { # check_async cb for -index --max-size=...
 
 sub v1_checkpoint ($$;$) {
         my ($self, $sync, $stk) = @_;
-        $self->{ibx}->git->check_async_wait;
-        $self->{ibx}->git->cat_async_wait;
+        $self->{ibx}->git->async_wait_all;
 
-        # latest_cmt may be undef
-        my $newest = $stk ? $stk->{latest_cmt} : undef;
-        if ($newest) {
+        # $newest may be undef
+        my $newest = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}};
+        if (defined($newest)) {
                 my $cur = $self->{mm}->last_commit || '';
                 if (need_update($self, $cur, $newest)) {
                         $self->{mm}->last_commit($newest);
                 }
-        } else {
-                ${$sync->{max}} = $self->{batch_bytes};
         }
+        ${$sync->{max}} = $self->{batch_bytes};
 
         $self->{mm}->{dbh}->commit;
-        if ($newest && need_xapian($self)) {
-                my $xdb = $self->{xdb};
+        my $xdb = need_xapian($self) ? $self->{xdb} : undef;
+        if ($newest && $xdb) {
                 my $cur = $xdb->get_metadata('last_commit');
                 if (need_update($self, $cur, $newest)) {
                         $xdb->set_metadata('last_commit', $newest);
                 }
-
+        }
+        if ($stk) { # all done if $stk is passed
                 # let SearchView know a full --reindex was done so it can
                 # generate ->has_threadid-dependent links
-                if ($sync->{reindex} && !ref($sync->{reindex})) {
+                if ($xdb && $sync->{reindex} && !ref($sync->{reindex})) {
                         my $n = $xdb->get_metadata('has_threadid');
                         $xdb->set_metadata('has_threadid', '1') if $n ne '1';
                 }
+                $self->{oidx}->rethread_done($sync->{-opt}); # all done
         }
-
-        $self->{oidx}->rethread_done($sync->{-opt}) if $newest; # all done
         commit_txn_lazy($self);
-        $self->{ibx}->git->cleanup;
+        $sync->{ibx}->git->cleanup;
         my $nr = ${$sync->{nr}};
         idx_release($self, $nr);
         # let another process do some work...
         if (my $pr = $sync->{-opt}->{-progress}) {
                 $pr->("indexed $nr/$sync->{ntodo}\n") if $nr;
         }
-        if (!$stk) { # more to come
+        if (!$stk && !$sync->{quit}) { # more to come
                 begin_txn_lazy($self);
                 $self->{mm}->{dbh}->begin_work;
         }
@@ -617,27 +709,32 @@ sub v1_checkpoint ($$;$) {
 # only for v1
 sub process_stack {
         my ($self, $sync, $stk) = @_;
-        my $git = $self->{ibx}->git;
+        my $git = $sync->{ibx}->git;
         my $max = $self->{batch_bytes};
         my $nr = 0;
         $sync->{nr} = \$nr;
         $sync->{max} = \$max;
         $sync->{sidx} = $self;
+        $sync->{latest_cmt} = \(my $latest_cmt);
 
         $self->{mm}->{dbh}->begin_work;
         if (my @leftovers = keys %{delete($sync->{D}) // {}}) {
                 warn('W: unindexing '.scalar(@leftovers)." leftovers\n");
                 for my $oid (@leftovers) {
+                        last if $sync->{quit};
                         $oid = unpack('H*', $oid);
-                        $git->cat_async($oid, \&unindex_both, $self);
+                        $git->cat_async($oid, \&unindex_both, $sync);
                 }
         }
         if ($sync->{max_size} = $sync->{-opt}->{max_size}) {
                 $sync->{index_oid} = \&index_both;
         }
-        while (my ($f, $at, $ct, $oid) = $stk->pop_rec) {
+        while (my ($f, $at, $ct, $oid, $cur_cmt) = $stk->pop_rec) {
+                my $arg = { %$sync, cur_cmt => $cur_cmt, oid => $oid };
+                last if $sync->{quit};
                 if ($f eq 'm') {
-                        my $arg = { %$sync, autime => $at, cotime => $ct };
+                        $arg->{autime} = $at;
+                        $arg->{cotime} = $ct;
                         if ($sync->{max_size}) {
                                 $git->check_async($oid, \&check_size, $arg);
                         } else {
@@ -645,17 +742,17 @@ sub process_stack {
                         }
                         v1_checkpoint($self, $sync) if $max <= 0;
                 } elsif ($f eq 'd') {
-                        $git->cat_async($oid, \&unindex_both, $self);
+                        $git->cat_async($oid, \&unindex_both, $arg);
                 }
         }
-        v1_checkpoint($self, $sync, $stk);
+        v1_checkpoint($self, $sync, $sync->{quit} ? undef : $stk);
 }
 
-sub log2stack ($$$$) {
-        my ($sync, $git, $range, $ibx) = @_;
+sub log2stack ($$$) {
+        my ($sync, $git, $range) = @_;
         my $D = $sync->{D}; # OID_BIN => NR (if reindexing, undef otherwise)
         my ($add, $del);
-        if ($ibx->version == 1) {
+        if ($sync->{ibx}->version == 1) {
                 my $path = $hex.'{2}/'.$hex.'{38}';
                 $add = qr!\A:000000 100644 \S+ ($OID) A\t$path$!;
                 $del = qr!\A:100644 000000 ($OID) \S+ D\t$path$!;
@@ -669,17 +766,18 @@ sub log2stack ($$$$) {
         my $fh = $git->popen(qw(log --raw -r --pretty=tformat:%at-%ct-%H
                                 --no-notes --no-color --no-renames --no-abbrev),
                                 $range);
-        my ($at, $ct, $stk);
+        my ($at, $ct, $stk, $cmt);
         while (<$fh>) {
+                return if $sync->{quit};
                 if (/\A([0-9]+)-([0-9]+)-($OID)$/o) {
-                        ($at, $ct) = ($1 + 0, $2 + 0);
-                        $stk //= PublicInbox::IdxStack->new($3);
+                        ($at, $ct, $cmt) = ($1 + 0, $2 + 0, $3);
+                        $stk //= PublicInbox::IdxStack->new($cmt);
                 } elsif (/$del/) {
                         my $oid = $1;
                         if ($D) { # reindex case
                                 $D->{pack('H*', $oid)}++;
                         } else { # non-reindex case:
-                                $stk->push_rec('d', $at, $ct, $oid);
+                                $stk->push_rec('d', $at, $ct, $oid, $cmt);
                         }
                 } elsif (/$add/) {
                         my $oid = $1;
@@ -687,12 +785,10 @@ sub log2stack ($$$$) {
                                 my $oid_bin = pack('H*', $oid);
                                 my $nr = --$D->{$oid_bin};
                                 delete($D->{$oid_bin}) if $nr <= 0;
-
                                 # nr < 0 (-1) means it never existed
-                                $stk->push_rec('m', $at, $ct, $oid) if $nr < 0;
-                        } else {
-                                $stk->push_rec('m', $at, $ct, $oid);
+                                next if $nr >= 0;
                         }
+                        $stk->push_rec('m', $at, $ct, $oid, $cmt);
                 }
         }
         close $fh or die "git log failed: \$?=$?";
@@ -700,9 +796,9 @@ sub log2stack ($$$$) {
         $stk->read_prepare;
 }
 
-sub prepare_stack ($$$) {
-        my ($self, $sync, $range) = @_;
-        my $git = $self->{ibx}->git;
+sub prepare_stack ($$) {
+        my ($sync, $range) = @_;
+        my $git = $sync->{ibx}->git;
 
         if (index($range, '..') < 0) {
                 # don't show annoying git errors to users who run -index
@@ -711,7 +807,7 @@ sub prepare_stack ($$$) {
                 return PublicInbox::IdxStack->new->read_prepare if $?;
         }
         $sync->{D} = $sync->{reindex} ? {} : undef; # OID_BIN => NR
-        log2stack($sync, $git, $range, $self->{ibx});
+        log2stack($sync, $git, $range);
 }
 
 # --is-ancestor requires git 1.8.0+
@@ -759,15 +855,30 @@ sub reindex_from ($$) {
         ref($reindex) eq 'HASH' ? $reindex->{from} : '';
 }
 
+sub quit_cb ($) {
+        my ($sync) = @_;
+        sub {
+                # we set {-opt}->{quit} too, so ->index_sync callers
+                # can abort multi-inbox loops this way
+                $sync->{quit} = $sync->{-opt}->{quit} = 1;
+                warn "gracefully quitting\n";
+        }
+}
+
 # indexes all unindexed messages (v1 only)
 sub _index_sync {
         my ($self, $opt) = @_;
         my $tip = $opt->{ref} || 'HEAD';
-        my $git = $self->{ibx}->git;
+        my $ibx = $self->{ibx};
+        local $self->{current_info} = "$ibx->{inboxdir}";
         $self->{batch_bytes} = $opt->{batch_size} // $BATCH_BYTES;
-        $git->batch_prepare;
+        $ibx->git->batch_prepare;
         my $pr = $opt->{-progress};
-        my $sync = { reindex => $opt->{reindex}, -opt => $opt };
+        my $sync = { reindex => $opt->{reindex}, -opt => $opt, ibx => $ibx };
+        my $quit = quit_cb($sync);
+        local $SIG{QUIT} = $quit;
+        local $SIG{INT} = $quit;
+        local $SIG{TERM} = $quit;
         my $xdb = $self->begin_txn_lazy;
         $self->{oidx}->rethread_prepare($opt);
         my $mm = _msgmap_init($self);
@@ -785,10 +896,10 @@ sub _index_sync {
         my $lx = reindex_from($sync->{reindex}, $last_commit);
         my $range = $lx eq '' ? $tip : "$lx..$tip";
         $pr->("counting changes\n\t$range ... ") if $pr;
-        my $stk = prepare_stack($self, $sync, $range);
+        my $stk = prepare_stack($sync, $range);
         $sync->{ntodo} = $stk ? $stk->num_records : 0;
         $pr->("$sync->{ntodo}\n") if $pr; # continue previous line
-        process_stack($self, $sync, $stk);
+        process_stack($self, $sync, $stk) if !$sync->{quit};
 }
 
 sub DESTROY {
@@ -808,7 +919,7 @@ sub _begin_txn {
 
 sub begin_txn_lazy {
         my ($self) = @_;
-        $self->{ibx}->with_umask(\&_begin_txn, $self) if !$self->{txn};
+        $self->with_umask(\&_begin_txn, $self) if !$self->{txn};
 }
 
 # store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard)
@@ -836,6 +947,10 @@ sub set_metadata_once {
 
 sub _commit_txn {
         my ($self) = @_;
+        if (my $eidx = $self->{eidx}) {
+                $eidx->git->async_wait_all;
+                $eidx->{transact_bytes} = 0;
+        }
         if (my $xdb = $self->{xdb}) {
                 set_metadata_once($self);
                 $xdb->commit_transaction;
@@ -846,7 +961,7 @@ sub _commit_txn {
 sub commit_txn_lazy {
         my ($self) = @_;
         delete($self->{txn}) and
-                $self->{ibx}->with_umask(\&_commit_txn, $self);
+                $self->with_umask(\&_commit_txn, $self);
 }
 
 sub worker_done {
@@ -857,4 +972,39 @@ sub worker_done {
         die "$$ $0 still in transaction\n" if $self->{txn};
 }
 
+sub eidx_shard_new {
+        my ($class, $eidx, $shard) = @_;
+        my $self = bless {
+                eidx => $eidx,
+                xpfx => $eidx->{xpfx},
+                indexlevel => $eidx->{indexlevel},
+                -skip_docdata => 1,
+                shard => $shard,
+                creat => 1,
+        }, $class;
+        $self->{-set_indexlevel_once} = 1 if $self->{indexlevel} eq 'medium';
+        $self;
+}
+
+# ensure there's no stale Xapian docs by treating $over as canonical
+sub over_check {
+        my ($self, $over) = @_;
+        begin_txn_lazy($self);
+        my $sth = $over->dbh->prepare(<<'');
+SELECT COUNT(*) FROM over WHERE num = ?
+
+        my $xdb = $self->{xdb};
+        my $cur = $xdb->postlist_begin('');
+        my $end = $xdb->postlist_end('');
+        my $xdir = $self->xdir;
+        for (; $cur != $end; $cur++) {
+                my $docid = $cur->get_docid;
+                $sth->execute($docid);
+                my $x = $sth->fetchrow_array;
+                next if $x > 0;
+                warn "I: removing $xdir #$docid, not in `over'\n";
+                $xdb->delete_document($docid);
+        }
+}
+
 1;
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index f23d23d0..2e654769 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -7,13 +7,16 @@ package PublicInbox::SearchIdxShard;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::SearchIdx);
+use bytes qw(length);
 use IO::Handle (); # autoflush
 use PublicInbox::Eml;
+use PublicInbox::Sigfd;
 
 sub new {
-        my ($class, $v2w, $shard) = @_;
+        my ($class, $v2w, $shard) = @_; # v2w may be ExtSearchIdx
         my $ibx = $v2w->{ibx};
-        my $self = $class->SUPER::new($ibx, 1, $shard);
+        my $self = $ibx ? $class->SUPER::new($ibx, 1, $shard)
+                        : $class->eidx_shard_new($v2w, $shard);
         # create the DB before forking:
         $self->idx_acquire;
         $self->set_metadata_once;
@@ -27,9 +30,13 @@ sub spawn_worker {
         my ($r, $w);
         pipe($r, $w) or die "pipe failed: $!\n";
         $w->autoflush(1);
+        my $oldset = PublicInbox::Sigfd::block_signals();
         my $pid = fork;
         defined $pid or die "fork failed: $!\n";
         if ($pid == 0) {
+                # these signals are localized in parent
+                $SIG{$_} = 'IGNORE' for (qw(TERM INT QUIT));
+                PublicInbox::Sigfd::sig_setmask($oldset);
                 my $bnote = $v2w->atfork_child;
                 close $w or die "failed to close: $!";
 
@@ -42,71 +49,122 @@ sub spawn_worker {
                 die "unexpected MM $self->{mm}" if $self->{mm};
                 exit;
         }
+        PublicInbox::Sigfd::sig_setmask($oldset);
         $self->{pid} = $pid;
         $self->{w} = $w;
         close $r or die "failed to close: $!";
 }
 
+sub eml ($$) {
+        my ($r, $len) = @_;
+        return if $len == 0;
+        my $n = read($r, my $bref, $len) or die "read: $!\n";
+        $n == $len or die "short read: $n != $len\n";
+        PublicInbox::Eml->new(\$bref);
+}
+
 # this reads all the writes to $self->{w} from the parent process
 sub shard_worker_loop ($$$$$) {
         my ($self, $v2w, $r, $shard, $bnote) = @_;
-        $0 = "pi-v2-shard[$shard]";
+        $0 = "shard[$shard]";
         $self->begin_txn_lazy;
         while (my $line = readline($r)) {
+                chomp $line;
                 $v2w->{current_info} = "[$shard] $line";
-                if ($line eq "commit\n") {
+                if ($line eq 'commit') {
                         $self->commit_txn_lazy;
-                } elsif ($line eq "close\n") {
+                } elsif ($line eq 'close') {
                         $self->idx_release;
-                } elsif ($line eq "barrier\n") {
+                } elsif ($line eq 'barrier') {
                         $self->commit_txn_lazy;
                         # no need to lock < 512 bytes is atomic under POSIX
                         print $bnote "barrier $shard\n" or
                                         die "write failed for barrier $!\n";
-                } elsif ($line =~ /\AD ([a-f0-9]{40,}) ([0-9]+)\n\z/s) {
-                        $self->remove_by_oid($1, $2 + 0);
+                } elsif ($line =~ /\AD ([0-9]+)\z/s) {
+                        $self->remove_by_docid($1 + 0);
+                } elsif ($line =~ s/\A\+X //) {
+                        my ($len, $docid, $eidx_key) = split(/ /, $line, 3);
+                        $self->add_eidx_info($docid, $eidx_key, eml($r, $len));
+                } elsif ($line =~ s/\A-X //) {
+                        my ($len, $docid, $eidx_key) = split(/ /, $line, 3);
+                        $self->remove_eidx_info($docid, $eidx_key,
+                                                        eml($r, $len));
+                } elsif ($line =~ s/\AO ([^\n]+)//) {
+                        my $over_fn = $1;
+                        $over_fn =~ tr/\0/\n/;
+                        $self->over_check(PublicInbox::Over->new($over_fn));
                 } else {
-                        chomp $line;
+                        my $eidx_key;
+                        if ($line =~ s/\AX=(.+)\0//) {
+                                $eidx_key = $1;
+                                $v2w->{current_info} =~ s/\0/\\0 /;
+                        }
                         # n.b. $mid may contain spaces(!)
-                        my ($to_read, $bytes, $num, $blob, $ds, $ts, $tid, $mid)
+                        my ($len, $bytes, $num, $oid, $ds, $ts, $tid, $mid)
                                 = split(/ /, $line, 8);
                         $self->begin_txn_lazy;
-                        my $n = read($r, my $msg, $to_read) or die "read: $!\n";
-                        $n == $to_read or die "short read: $n != $to_read\n";
-                        my $mime = PublicInbox::Eml->new(\$msg);
                         my $smsg = bless {
                                 bytes => $bytes,
                                 num => $num + 0,
-                                blob => $blob,
+                                blob => $oid,
                                 mid => $mid,
                                 tid => $tid,
                                 ds => $ds,
                                 ts => $ts,
                         }, 'PublicInbox::Smsg';
-                        $self->add_message($mime, $smsg);
+                        $smsg->{eidx_key} = $eidx_key if defined($eidx_key);
+                        $self->add_message(eml($r, $len), $smsg);
                 }
         }
         $self->worker_done;
 }
 
 sub index_raw {
-        my ($self, $msgref, $eml, $smsg) = @_;
+        my ($self, $msgref, $eml, $smsg, $eidx_key) = @_;
         if (my $w = $self->{w}) {
+                my @ekey = defined($eidx_key) ? ("X=$eidx_key\0") : ();
+                $msgref //= \($eml->as_string);
+                $smsg->{raw_bytes} //= length($$msgref);
                 # mid must be last, it can contain spaces (but not LF)
-                print $w join(' ', @$smsg{qw(raw_bytes bytes
+                print $w @ekey, join(' ', @$smsg{qw(raw_bytes bytes
                                                 num blob ds ts tid mid)}),
                         "\n", $$msgref or die "failed to write shard $!\n";
         } else {
                 if ($eml) {
-                        undef $$msgref;
+                        undef($$msgref) if $msgref;
                 } else { # --xapian-only + --sequential-shard:
                         $eml = PublicInbox::Eml->new($msgref);
                 }
                 $self->begin_txn_lazy;
+                $smsg->{eidx_key} = $eidx_key if defined $eidx_key;
                 $self->add_message($eml, $smsg);
         }
 }
 
+sub shard_add_eidx_info {
+        my ($self, $docid, $eidx_key, $eml) = @_;
+        if (my $w = $self->{w}) {
+                my $hdr = $eml->header_obj->as_string;
+                my $len = length($hdr);
+                print $w "+X $len $docid $eidx_key\n", $hdr or
+                        die "failed to write shard: $!";
+        } else {
+                $self->add_eidx_info($docid, $eidx_key, $eml);
+        }
+}
+
+sub shard_remove_eidx_info {
+        my ($self, $docid, $eidx_key, $eml) = @_;
+        if (my $w = $self->{w}) {
+                my $hdr = $eml ? $eml->header_obj->as_string : '';
+                my $len = length($hdr);
+                print $w "-X $len $docid $eidx_key\n", $hdr or
+                        die "failed to write shard: $!";
+        } else {
+                $self->remove_eidx_info($docid, $eidx_key, $eml);
+        }
+}
+
 sub atfork_child {
         close $_[0]->{w} or die "failed to close write pipe: $!\n";
 }
@@ -144,11 +202,22 @@ sub shard_close {
 }
 
 sub shard_remove {
-        my ($self, $oid, $num) = @_;
-        if (my $w = $self->{w}) { # triggers remove_by_oid in a shard child
-                print $w "D $oid $num\n" or die "failed to write remove $!";
+        my ($self, $num) = @_;
+        if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
+                print $w "D $num\n" or die "failed to write remove $!";
         } else { # same process
-                $self->remove_by_oid($oid, $num);
+                $self->remove_by_docid($num);
+        }
+}
+
+sub shard_over_check {
+        my ($self, $over) = @_;
+        if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
+                my ($over_fn) = $over->{dbh}->sqlite_db_filename;
+                $over_fn =~ tr/\n/\0/;
+                print $w "O $over_fn\n" or die "failed to write over $!";
+        } else {
+                $self->over_check($over);
         }
 }
 
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 60f692b2..8fb3a030 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -42,7 +42,7 @@ sub thread {
         # We'll trust the client Date: header here instead of the Received:
         # time since this is for display (and not retrieval)
         _set_parent(\%id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $rootset = [ grep {
                         !delete($_->{parent}) && $_->visible($ibx)
                 } values %id_table ];
@@ -166,7 +166,7 @@ sub order_children {
 
         my %seen = ($cur => 1); # self-referential loop prevention
         my @q = ($cur);
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         while (defined($cur = shift @q)) {
                 my $c = $cur->{children}; # The hashref here...
 
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index c482f1c9..f568f31c 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -14,7 +14,7 @@ use PublicInbox::WwwAtomStream;
 use PublicInbox::WwwStream qw(html_oneshot);
 use PublicInbox::SearchThread;
 use PublicInbox::SearchQuery;
-use PublicInbox::Search qw(mdocid);
+use PublicInbox::Search;
 my %rmap_inc;
 
 sub mbox_results {
@@ -30,7 +30,7 @@ sub mbox_results {
 
 sub sres_top_html {
         my ($ctx) = @_;
-        my $srch = $ctx->{-inbox}->search or
+        my $srch = $ctx->{ibx}->isrch or
                 return PublicInbox::WWW::need($ctx, 'Search');
         my $q = PublicInbox::SearchQuery->new($ctx->{qp});
         my $x = $q->{x};
@@ -93,9 +93,9 @@ sub mset_summary {
         my $pad = length("$total");
         my $pfx = ' ' x $pad;
         my $res = \($ctx->{-html_tip});
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
-        my @nums = @{$ibx->search->mset_to_artnums($mset)};
+        my @nums = @{$ibx->isrch->mset_to_artnums($mset)};
         my %num2msg = map { $_->{num} => $_ } @{$ibx->over->get_all(@nums)};
         my ($min, $max);
 
@@ -156,7 +156,7 @@ sub path2inc ($) {
 
 sub err_txt {
         my ($ctx, $err) = @_;
-        my $u = $ctx->{-inbox}->base_url($ctx->{env}) . '_/text/help/';
+        my $u = $ctx->{ibx}->base_url($ctx->{env}) . '_/text/help/';
         $err =~ s/^\s*Exception:\s*//; # bad word to show users :P
         $err =~ s!(\S+)!path2inc($1)!sge;
         $err = ascii_html($err);
@@ -201,7 +201,7 @@ sub search_nav_top {
         }
         my $A = $q->qs_html(x => 'A', r => undef);
         $rv .= qq{|<a\nhref="?$A">Atom feed</a>]};
-        if ($ctx->{-inbox}->search->has_threadid) {
+        if ($ctx->{ibx}->isrch->has_threadid) {
                 $rv .= qq{\n\t\t\tdownload mbox.gz: } .
                         # we set name=z w/o using it since it seems required for
                         # lynx (but works fine for w3m).
@@ -286,14 +286,13 @@ sub get_pct ($) {
 
 sub mset_thread {
         my ($ctx, $mset, $q) = @_;
-        my $ibx = $ctx->{-inbox};
-        my $nshard = $ibx->search->{nshard} // 1;
-        my %pct = map { mdocid($nshard, $_) => get_pct($_) } $mset->items;
-        my $msgs = $ibx->over->get_all(keys %pct);
-        $_->{pct} = $pct{$_->{num}} for @$msgs;
+        my $ibx = $ctx->{ibx};
+        my @pct = map { get_pct($_) } $mset->items;
+        my $msgs = $ibx->isrch->mset_to_smsg($ibx, $mset);
+        my $i = 0;
+        $_->{pct} = $pct[$i++] for @$msgs;
         my $r = $q->{r};
         if ($r) { # for descriptions in search_nav_bot
-                my @pct = values %pct;
                 $q->{-min_pct} = min(@pct);
                 $q->{-max_pct} = max(@pct);
         }
@@ -354,7 +353,7 @@ sub ctx_prepare {
 
 sub adump {
         my ($cb, $mset, $q, $ctx) = @_;
-        $ctx->{ids} = $ctx->{-inbox}->search->mset_to_artnums($mset);
+        $ctx->{ids} = $ctx->{ibx}->isrch->mset_to_artnums($mset);
         $ctx->{search_query} = $q; # used by WwwAtomStream::atom_header
         PublicInbox::WwwAtomStream->response($ctx, 200, \&adump_i);
 }
@@ -363,7 +362,7 @@ sub adump {
 sub adump_i {
         my ($ctx) = @_;
         while (my $num = shift @{$ctx->{ids}}) {
-                my $smsg = eval { $ctx->{-inbox}->over->get_art($num) } or next;
+                my $smsg = eval { $ctx->{ibx}->over->get_art($num) } or next;
                 return $smsg;
         }
 }
diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm
index 171e0a00..14086538 100644
--- a/lib/PublicInbox/Smsg.pm
+++ b/lib/PublicInbox/Smsg.pm
@@ -15,13 +15,6 @@ our @EXPORT_OK = qw(subject_normalized);
 use PublicInbox::MID qw(mids);
 use PublicInbox::Address;
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
-use Time::Local qw(timegm);
-
-sub get_val ($$) {
-        my ($doc, $col) = @_;
-        # sortable_unserialise is defined by PublicInbox::Search::load_xapian()
-        sortable_unserialise($doc->get_value($col));
-}
 
 sub to_doc_data {
         my ($self) = @_;
@@ -61,17 +54,6 @@ sub load_from_data ($$) {
         ) = split(/\n/, $_[1]);
 }
 
-sub load_expand {
-        my ($self, $doc) = @_;
-        my $data = $doc->get_data or return;
-        $self->{ts} = get_val($doc, PublicInbox::Search::TS());
-        my $dt = get_val($doc, PublicInbox::Search::DT());
-        my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $dt);
-        $self->{ds} = timegm($ss, $mm, $hh, $dd, $mon - 1, $yyyy);
-        load_from_data($self, $data);
-        $self;
-}
-
 sub psgi_cull ($) {
         my ($self) = @_;
 
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 83f7a4ee..a53f28b1 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -216,7 +216,7 @@ sub filename_query ($) {
 
 sub find_smsgs ($$$) {
         my ($self, $ibx, $want) = @_;
-        my $srch = $ibx->search or return;
+        my $srch = $ibx->isrch or return;
 
         my $post = $want->{oid_b} or die 'BUG: no {oid_b}';
         $post =~ /\A[a-f0-9]+\z/ or die "BUG: oid_b not hex: $post";
diff --git a/lib/PublicInbox/Spamcheck.pm b/lib/PublicInbox/Spamcheck.pm
index ffebb3cf..218fcc01 100644
--- a/lib/PublicInbox/Spamcheck.pm
+++ b/lib/PublicInbox/Spamcheck.pm
@@ -7,8 +7,8 @@ use strict;
 use warnings;
 
 sub get {
-        my ($config, $key, $default) = @_;
-        my $spamcheck = $config->{$key};
+        my ($cfg, $key, $default) = @_;
+        my $spamcheck = $cfg->{$key};
         $spamcheck = $default unless $spamcheck;
 
         return if !$spamcheck || $spamcheck eq 'none';
diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm
index e4f00a2a..c403f78a 100644
--- a/lib/PublicInbox/Syscall.pm
+++ b/lib/PublicInbox/Syscall.pm
@@ -227,38 +227,46 @@ sub epoll_ctl_mod8 {
 our $epoll_wait_events;
 our $epoll_wait_size = 0;
 sub epoll_wait_mod4 {
-    # resize our static buffer if requested size is bigger than we've ever done
-    if ($_[1] > $epoll_wait_size) {
-        $epoll_wait_size = $_[1];
-        $epoll_wait_events = "\0" x 12 x $epoll_wait_size;
-    }
-    my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
-    for (0..$ct-1) {
-        @{$_[3]->[$_]}[1,0] = unpack("LL", substr($epoll_wait_events, 12*$_, 8));
-    }
-    return $ct;
+        my ($epfd, $maxevents, $timeout_msec, $events) = @_;
+        # resize our static buffer if maxevents bigger than we've ever done
+        if ($maxevents > $epoll_wait_size) {
+                $epoll_wait_size = $maxevents;
+                vec($epoll_wait_events, $maxevents * 12 * 8 - 1, 1) = 0;
+        }
+        @$events = ();
+        my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
+                        $maxevents, $timeout_msec);
+        for (0..$ct - 1) {
+                # 12-byte struct epoll_event
+                # 4 bytes uint32_t events mask (skipped, useless to us)
+                # 8 bytes: epoll_data_t union (first 4 bytes are the fd)
+                # So we skip the first 4 bytes and take the middle 4:
+                $events->[$_] = unpack('L', substr($epoll_wait_events,
+                                                        12 * $_ + 4, 4));
+        }
 }
 
 sub epoll_wait_mod8 {
-    # resize our static buffer if requested size is bigger than we've ever done
-    if ($_[1] > $epoll_wait_size) {
-        $epoll_wait_size = $_[1];
-        $epoll_wait_events = "\0" x 16 x $epoll_wait_size;
-    }
-    my $ct;
-    if ($no_deprecated) {
-        $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0, undef);
-    } else {
-        $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
-    }
-    for (0..$ct-1) {
-        # 16 byte epoll_event structs, with format:
-        #    4 byte mask [idx 1]
-        #    4 byte padding (we put it into idx 2, useless)
-        #    8 byte data (first 4 bytes are fd, into idx 0)
-        @{$_[3]->[$_]}[1,2,0] = unpack("LLL", substr($epoll_wait_events, 16*$_, 12));
-    }
-    return $ct;
+        my ($epfd, $maxevents, $timeout_msec, $events) = @_;
+
+        # resize our static buffer if maxevents bigger than we've ever done
+        if ($maxevents > $epoll_wait_size) {
+                $epoll_wait_size = $maxevents;
+                vec($epoll_wait_events, $maxevents * 16 * 8 - 1, 1) = 0;
+        }
+        @$events = ();
+        my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
+                        $maxevents, $timeout_msec,
+                        $no_deprecated ? undef : ());
+        for (0..$ct - 1) {
+                # 16-byte struct epoll_event
+                # 4 bytes uint32_t events mask (skipped, useless to us)
+                # 4 bytes padding (skipped, useless)
+                # 8 bytes epoll_data_t union (first 4 bytes are the fd)
+                # So skip the first 8 bytes, take 4, and ignore the last 4:
+                $events->[$_] = unpack('L', substr($epoll_wait_events,
+                                                        16 * $_ + 8, 4));
+        }
 }
 
 sub signalfd ($$$) {
diff --git a/lib/PublicInbox/Tmpfile.pm b/lib/PublicInbox/Tmpfile.pm
index 25bb3a52..eb0fce00 100644
--- a/lib/PublicInbox/Tmpfile.pm
+++ b/lib/PublicInbox/Tmpfile.pm
@@ -2,8 +2,8 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::Tmpfile;
 use strict;
-use warnings;
-use base qw(Exporter);
+use v5.10.1;
+use parent qw(Exporter);
 our @EXPORT = qw(tmpfile);
 use Fcntl qw(:DEFAULT);
 use Errno qw(EEXIST);
@@ -13,6 +13,9 @@ use File::Spec;
 # unlinked filename which makes sense when viewed with lsof
 # (at least on Linux)
 # And if we ever stop caring to have debuggable filenames, O_TMPFILE :)
+#
+# This is also for Perl <5.32 which lacks: open(..., '+>>', undef)
+# <https://rt.perl.org/Ticket/Display.html?id=134221>
 sub tmpfile ($;$$) {
         my ($id, $sock, $append) = @_;
         if (defined $sock) {
diff --git a/lib/PublicInbox/Unsubscribe.pm b/lib/PublicInbox/Unsubscribe.pm
index 945e7ae7..ae0b0679 100644
--- a/lib/PublicInbox/Unsubscribe.pm
+++ b/lib/PublicInbox/Unsubscribe.pm
@@ -12,7 +12,8 @@ use warnings;
 use Crypt::CBC;
 use Plack::Util;
 use MIME::Base64 qw(decode_base64url);
-my $CODE_URL = 'https://public-inbox.org/public-inbox.git';
+my @CODE_URL = qw(http://ou63pmih66umazou.onion/public-inbox.git
+        https://public-inbox.org/public-inbox.git);
 my @CT_HTML = ('Content-Type', 'text/html; charset=UTF-8');
 
 sub new {
@@ -38,13 +39,15 @@ sub new {
         my $unsubscribe = $opt{unsubscribe} or
                 die "`unsubscribe' callback not given\n";
 
+        my $code_url = $opt{code_url} || \@CODE_URL;
+        $code_url = [ $code_url ] if ref($code_url) ne 'ARRAY';
         bless {
-                pi_config => $opt{pi_config}, # PublicInbox::Config
+                pi_cfg => $opt{pi_config}, # PublicInbox::Config
                 owner_email => $opt{owner_email},
                 cipher => $cipher,
                 unsubscribe => $unsubscribe,
                 contact => qq(<a\nhref="mailto:$e">$e</a>),
-                code_url => $opt{code_url} || $CODE_URL,
+                code_url => $code_url,
                 confirm => $opt{confirm},
         }, $class;
 }
@@ -138,7 +141,7 @@ sub r {
                 "<html><head><title>$title</title></head><body><pre>".
                 join("\n", "<b>$title</b>\n", @body) . '</pre><hr>'.
                 "<pre>This page is available under AGPL-3.0+\n" .
-                "git clone $self->{code_url}\n" .
+                join('', map { "git clone $_\n" } @{$self->{code_url}}) .
                 qq(Email $self->{contact} if you have any questions).
                 '</pre></body></html>'
         ] ];
@@ -149,9 +152,9 @@ sub archive_info {
         my $archive_url = $self->{archive_urls}->{$list_addr};
 
         unless ($archive_url) {
-                if (my $config = $self->{pi_config}) {
+                if (my $cfg = $self->{pi_cfg}) {
                         # PublicInbox::Config::lookup
-                        my $ibx = $config->lookup($list_addr);
+                        my $ibx = $cfg->lookup($list_addr);
                         # PublicInbox::Inbox::base_url
                         $archive_url = $ibx->base_url if $ibx;
                 }
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index b8abfa94..567582c5 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -17,7 +17,8 @@ use PublicInbox::InboxWritable;
 use PublicInbox::OverIdx;
 use PublicInbox::Msgmap;
 use PublicInbox::Spawn qw(spawn popen_rd);
-use PublicInbox::SearchIdx qw(log2stack crlf_adjust is_ancestor check_size);
+use PublicInbox::SearchIdx qw(log2stack crlf_adjust is_ancestor check_size
+        is_bad_blob);
 use IO::Handle; # ->autoflush
 use File::Temp ();
 
@@ -65,11 +66,21 @@ sub nproc_shards ($) {
 
 sub count_shards ($) {
         my ($self) = @_;
-        # always load existing shards in case core count changes:
-        # Also, shard count may change while -watch is running
-        my $srch = $self->{ibx}->search or return 0;
-        delete $self->{ibx}->{search};
-        $srch->{nshard} // 0
+        if (my $ibx = $self->{ibx}) {
+                # always load existing shards in case core count changes:
+                # Also, shard count may change while -watch is running
+                my $srch = $ibx->search or return 0;
+                delete $ibx->{search};
+                $srch->{nshard} // 0
+        } else { # ExtSearchIdx
+                $self->{nshard} // do {
+                        if ($self->xdb_sharded) {
+                                $self->{nshard} // die 'BUG: {nshard} unset';
+                        } else {
+                                0;
+                        }
+                }
+        }
 }
 
 sub new {
@@ -86,8 +97,6 @@ sub new {
                         die "$dir does not exist\n";
                 }
         }
-        $v2ibx->umask_prepare;
-
         my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION;
         my $self = {
                 ibx => $v2ibx,
@@ -117,12 +126,9 @@ sub init_inbox {
         }
         $self->idx_init;
         $self->{mm}->skip_artnum($skip_artnum) if defined $skip_artnum;
-        my $epoch_max = -1;
-        git_dir_latest($self, \$epoch_max);
-        if (defined $skip_epoch && $epoch_max == -1) {
-                $epoch_max = $skip_epoch;
-        }
-        $self->git_init($epoch_max >= 0 ? $epoch_max : 0);
+        my $max = $self->{ibx}->max_git_epoch;
+        $max = $skip_epoch if (defined($skip_epoch) && !defined($max));
+        $self->git_init($max // 0);
         $self->done;
 }
 
@@ -133,12 +139,17 @@ sub add {
         $self->{ibx}->with_umask(\&_add, $self, $eml, $check_cb);
 }
 
+sub idx_shard ($$) {
+        my ($self, $num) = @_;
+        $self->{idx_shards}->[$num % scalar(@{$self->{idx_shards}})];
+}
+
 # indexes a message, returns true if checkpointing is needed
 sub do_idx ($$$$) {
         my ($self, $msgref, $mime, $smsg) = @_;
         $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
         $self->{oidx}->add_overview($mime, $smsg);
-        my $idx = idx_shard($self, $smsg->{num} % $self->{shards});
+        my $idx = idx_shard($self, $smsg->{num});
         $idx->index_raw($msgref, $mime, $smsg);
         my $n = $self->{transact_bytes} += $smsg->{raw_bytes};
         $n >= $self->{batch_bytes};
@@ -249,11 +260,6 @@ sub v2_num_for_harder {
         ($num, $mid0);
 }
 
-sub idx_shard {
-        my ($self, $shard_i) = @_;
-        $self->{idx_shards}->[$shard_i];
-}
-
 sub _idx_init { # with_umask callback
         my ($self, $opt) = @_;
         $self->lock_acquire unless $opt && $opt->{-skip_lock};
@@ -264,7 +270,6 @@ sub _idx_init { # with_umask callback
         $self->{shards} = $nshards if $nshards && $nshards != $self->{shards};
         $self->{batch_bytes} = $opt->{batch_size} //
                                 $PublicInbox::SearchIdx::BATCH_BYTES;
-        $self->{batch_bytes} *= $self->{shards} if $self->{parallel};
 
         # need to create all shards before initializing msgmap FD
         # idx_shards must be visible to all forked processes
@@ -272,14 +277,34 @@ sub _idx_init { # with_umask callback
         my $idx = $self->{idx_shards} = [];
         push @$idx, PublicInbox::SearchIdxShard->new($self, $_) for (0..$max);
 
+        # SearchIdxShard may do their own flushing, so don't scale
+        # until after forking
+        $self->{batch_bytes} *= $self->{shards} if $self->{parallel};
+
+        my $ibx = $self->{ibx} or return; # ExtIdxSearch
+
         # Now that all subprocesses are up, we can open the FDs
         # for SQLite:
         my $mm = $self->{mm} = PublicInbox::Msgmap->new_file(
-                                "$self->{ibx}->{inboxdir}/msgmap.sqlite3",
-                                $self->{ibx}->{-no_fsync} ? 2 : 1);
+                                "$ibx->{inboxdir}/msgmap.sqlite3",
+                                $ibx->{-no_fsync} ? 2 : 1);
         $mm->{dbh}->begin_work;
 }
 
+sub parallel_init ($$) {
+        my ($self, $indexlevel) = @_;
+        if (($indexlevel // 'full') eq 'basic') {
+                $self->{parallel} = 0;
+        } else {
+                pipe(my ($r, $w)) or die "pipe failed: $!";
+                # pipe for barrier notifications doesn't need to be big,
+                # 1031: F_SETPIPE_SZ
+                fcntl($w, 1031, 4096) if $^O eq 'linux';
+                $self->{bnote} = [ $r, $w ];
+                $w->autoflush(1);
+        }
+}
+
 # idempotent
 sub idx_init {
         my ($self, $opt) = @_;
@@ -292,17 +317,7 @@ sub idx_init {
         delete @$ibx{qw(mm search)};
         $ibx->git->cleanup;
 
-        $self->{parallel} = 0 if ($ibx->{indexlevel}//'') eq 'basic';
-        if ($self->{parallel}) {
-                pipe(my ($r, $w)) or die "pipe failed: $!";
-                # pipe for barrier notifications doesn't need to be big,
-                # 1031: F_SETPIPE_SZ
-                fcntl($w, 1031, 4096) if $^O eq 'linux';
-                $self->{bnote} = [ $r, $w ];
-                $w->autoflush(1);
-        }
-
-        $ibx->umask_prepare;
+        parallel_init($self, $ibx->{indexlevel});
         $ibx->with_umask(\&_idx_init, $self, $opt);
 }
 
@@ -312,14 +327,10 @@ sub idx_init {
 sub _replace_oids ($$$) {
         my ($self, $mime, $replace_map) = @_;
         $self->done;
-        my $pfx = "$self->{ibx}->{inboxdir}/git";
+        my $ibx = $self->{ibx};
+        my $pfx = "$ibx->{inboxdir}/git";
         my $rewrites = []; # epoch => commit
-        my $max = $self->{epoch_max};
-
-        unless (defined($max)) {
-                defined(my $latest = git_dir_latest($self, \$max)) or return;
-                $self->{epoch_max} = $max;
-        }
+        my $max = $self->{epoch_max} //= $ibx->max_git_epoch // return;
 
         foreach my $i (0..$max) {
                 my $git_dir = "$pfx/$i.git";
@@ -414,7 +425,7 @@ sub rewrite_internal ($$;$$$) {
                         } else { # ->purge or ->remove
                                 $self->{mm}->num_delete($num);
                         }
-                        unindex_oid_remote($self, $oid, $mid);
+                        unindex_oid_aux($self, $oid, $mid);
                 }
         }
 
@@ -467,7 +478,7 @@ sub git_hash_raw ($$) {
         my ($self, $raw) = @_;
         # grab the expected OID we have to reindex:
         pipe(my($in, $w)) or die "pipe: $!";
-        my $git_dir = $self->{ibx}->git->{git_dir};
+        my $git_dir = $self->git->{git_dir};
         my $cmd = ['git', "--git-dir=$git_dir", qw(hash-object --stdin)];
         my $r = popen_rd($cmd, undef, { 0 => $in });
         print $w $$raw or die "print \$w: $!";
@@ -531,11 +542,11 @@ W: $list
         }
 
         # make sure we really got the OID:
-        my ($blob, $type, $bytes) = $self->{ibx}->git->check($expect_oid);
+        my ($blob, $type, $bytes) = $self->git->check($expect_oid);
         $blob eq $expect_oid or die "BUG: $expect_oid not found after replace";
 
         # don't leak FDs to Xapian:
-        $self->{ibx}->git->cleanup;
+        $self->git->cleanup;
 
         # reindex modified messages:
         for my $smsg (@$need_reindex) {
@@ -558,7 +569,7 @@ sub last_epoch_commit ($$;$) {
         $self->{mm}->last_commit_xap($v, $i, $cmt);
 }
 
-sub set_last_commits ($) {
+sub set_last_commits ($) { # this is NOT for ExtSearchIdx
         my ($self) = @_;
         defined(my $epoch_max = $self->{epoch_max}) or return;
         my $last_commit = $self->{last_commit};
@@ -600,34 +611,40 @@ sub checkpoint ($;$) {
         }
         my $shards = $self->{idx_shards};
         if ($shards) {
-                my $dbh = $self->{mm}->{dbh};
+                my $mm = $self->{mm};
+                my $dbh = $mm->{dbh} if $mm;
 
                 # SQLite msgmap data is second in importance
-                $dbh->commit;
+                $dbh->commit if $dbh;
 
                 # SQLite overview is third
                 $self->{oidx}->commit_lazy;
 
                 # Now deal with Xapian
                 if ($wait) {
-                        my $barrier = $self->barrier_init(scalar @$shards);
+                        my $barrier = barrier_init($self, scalar @$shards);
 
                         # each shard needs to issue a barrier command
                         $_->shard_barrier for @$shards;
 
                         # wait for each Xapian shard
-                        $self->barrier_wait($barrier);
+                        barrier_wait($self, $barrier);
                 } else {
                         $_->shard_commit for @$shards;
                 }
 
+                my $midx = $self->{midx}; # misc index
+                $midx->commit_txn if $midx;
+
                 # last_commit is special, don't commit these until
-                # remote shards are done:
-                $dbh->begin_work;
+                # Xapian shards are done:
+                $dbh->begin_work if $dbh;
                 set_last_commits($self);
-                $dbh->commit;
-
-                $dbh->begin_work;
+                if ($dbh) {
+                        $dbh->commit;
+                        $dbh->begin_work;
+                }
+                $midx->begin_txn if $midx;
         }
         $self->{total_bytes} += $self->{transact_bytes};
         $self->{transact_bytes} = 0;
@@ -667,14 +684,27 @@ sub done {
         }
         eval { $self->{oidx}->dbh_close };
         $err .= "over close: $@\n" if $@;
+        delete $self->{midx};
         delete $self->{bnote};
         my $nbytes = $self->{total_bytes};
         $self->{total_bytes} = 0;
         $self->lock_release(!!$nbytes) if $shards;
-        $self->{ibx}->git->cleanup;
+        $self->git->cleanup;
         die $err if $err;
 }
 
+sub write_alternates ($$$) {
+        my ($info_dir, $mode, $out) = @_;
+        my $fh = File::Temp->new(TEMPLATE => 'alt-XXXXXXXX', DIR => $info_dir);
+        my $tmp = $fh->filename;
+        print $fh @$out or die "print $tmp: $!\n";
+        chmod($mode, $fh) or die "fchmod $tmp: $!\n";
+        close $fh or die "close $tmp $!\n";
+        my $alt = "$info_dir/alternates";
+        rename($tmp, $alt) or die "rename $tmp => $alt: $!\n";
+        $fh->unlink_on_destroy(0);
+}
+
 sub fill_alternates ($$) {
         my ($self, $epoch) = @_;
 
@@ -713,15 +743,8 @@ sub fill_alternates ($$) {
                 }
         }
         return unless $new;
-
-        my $fh = File::Temp->new(TEMPLATE => 'alt-XXXXXXXX', DIR => $info_dir);
-        my $tmp = $fh->filename;
-        print $fh join("\n", sort { $alt{$b} <=> $alt{$a} } keys %alt), "\n"
-                or die "print $tmp: $!\n";
-        chmod($mode, $fh) or die "fchmod $tmp: $!\n";
-        close $fh or die "close $tmp $!\n";
-        rename($tmp, $alt) or die "rename $tmp => $alt: $!\n";
-        $fh->unlink_on_destroy(0);
+        write_alternates($info_dir, $mode,
+                [join("\n", sort { $alt{$b} <=> $alt{$a} } keys %alt), "\n"]);
 }
 
 sub git_init {
@@ -735,23 +758,6 @@ sub git_init {
         $git_dir
 }
 
-sub git_dir_latest {
-        my ($self, $max) = @_;
-        $$max = -1;
-        my $pfx = "$self->{ibx}->{inboxdir}/git";
-        return unless -d $pfx;
-        my $latest;
-        opendir my $dh, $pfx or die "opendir $pfx: $!\n";
-        while (defined(my $git_dir = readdir($dh))) {
-                $git_dir =~ m!\A([0-9]+)\.git\z! or next;
-                if ($1 > $$max) {
-                        $$max = $1;
-                        $latest = "$pfx/$git_dir";
-                }
-        }
-        $latest;
-}
-
 sub importer {
         my ($self) = @_;
         my $im = $self->{im};
@@ -770,7 +776,7 @@ sub importer {
         }
         my $epoch = 0;
         my $max;
-        my $latest = git_dir_latest($self, \$max);
+        my $latest = $self->{ibx}->git_dir_latest(\$max);
         if (defined $latest) {
                 my $git = PublicInbox::Git->new($latest);
                 my $packed_bytes = $git->packed_bytes;
@@ -861,29 +867,50 @@ sub atfork_child {
 sub reindex_checkpoint ($$) {
         my ($self, $sync) = @_;
 
-        $self->{ibx}->git->cleanup; # *async_wait
+        $self->git->async_wait_all;
+        $self->update_last_commit($sync);
         ${$sync->{need_checkpoint}} = 0;
         my $mm_tmp = $sync->{mm_tmp};
         $mm_tmp->atfork_prepare if $mm_tmp;
-        $self->done; # release lock
+        die 'BUG: {im} during reindex' if $self->{im};
+        if ($self->{ibx_map} && !$sync->{checkpoint_unlocks}) {
+                checkpoint($self, 1); # no need to release lock on pure index
+        } else {
+                $self->done; # release lock
+        }
 
-        if (my $pr = $sync->{-opt}->{-progress}) {
+        if (my $pr = $sync->{-regen_fmt} ? $sync->{-opt}->{-progress} : undef) {
                 $pr->(sprintf($sync->{-regen_fmt}, ${$sync->{nr}}));
         }
 
         # allow -watch or -mda to write...
         $self->idx_init($sync->{-opt}); # reacquire lock
+        if (my $intvl = $sync->{check_intvl}) { # eidx
+                $sync->{next_check} = PublicInbox::DS::now() + $intvl;
+        }
         $mm_tmp->atfork_parent if $mm_tmp;
 }
 
+sub index_finalize ($$) {
+        my ($arg, $index) = @_;
+        ++$arg->{self}->{nidx};
+        if (defined(my $cur = $arg->{cur_cmt})) {
+                ${$arg->{latest_cmt}} = $cur;
+        } elsif ($index) {
+                die 'BUG: {cur_cmt} missing';
+        } # else { unindexing @leftovers doesn't set {cur_cmt}
+}
+
 sub index_oid { # cat_async callback
         my ($bref, $oid, $type, $size, $arg) = @_;
-        return if $size == 0; # purged
+        is_bad_blob($oid, $type, $size, $arg->{oid}) and
+                return index_finalize($arg, 1); # size == 0 purged returns here
+        my $self = $arg->{self};
+        local $self->{current_info} = "$self->{current_info} $oid";
         my ($num, $mid0);
         my $eml = PublicInbox::Eml->new($$bref);
         my $mids = mids($eml);
         my $chash = content_hash($eml);
-        my $self = $arg->{v2w};
 
         if (scalar(@$mids) == 0) {
                 warn "E: $oid has no Message-ID, skipping\n";
@@ -950,36 +977,39 @@ sub index_oid { # cat_async callback
         if (do_idx($self, $bref, $eml, $smsg)) {
                 ${$arg->{need_checkpoint}} = 1;
         }
+        index_finalize($arg, 1);
 }
 
 # only update last_commit for $i on reindex iff newer than current
-sub update_last_commit ($$$$) {
-        my ($self, $git, $i, $cmt) = @_;
-        my $last = last_epoch_commit($self, $i);
-        if (defined $last && is_ancestor($git, $last, $cmt)) {
-                my @cmd = (qw(rev-list --count), "$last..$cmt");
-                chomp(my $n = $git->qx(@cmd));
+sub update_last_commit {
+        my ($self, $sync, $stk) = @_;
+        my $unit = $sync->{unit} // return;
+        my $latest_cmt = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}};
+        defined($latest_cmt) or return;
+        my $last = last_epoch_commit($self, $unit->{epoch});
+        if (defined $last && is_ancestor($self->git, $last, $latest_cmt)) {
+                my @cmd = (qw(rev-list --count), "$last..$latest_cmt");
+                chomp(my $n = $unit->{git}->qx(@cmd));
                 return if $n ne '' && $n == 0;
         }
-        last_epoch_commit($self, $i, $cmt);
+        last_epoch_commit($self, $unit->{epoch}, $latest_cmt);
 }
 
-sub git_dir_n ($$) { "$_[0]->{ibx}->{inboxdir}/git/$_[1].git" }
-
-sub last_commits ($$) {
-        my ($self, $epoch_max) = @_;
+sub last_commits {
+        my ($self, $sync) = @_;
         my $heads = [];
-        for (my $i = $epoch_max; $i >= 0; $i--) {
+        for (my $i = $sync->{epoch_max}; $i >= 0; $i--) {
                 $heads->[$i] = last_epoch_commit($self, $i);
         }
         $heads;
 }
 
 # returns a revision range for git-log(1)
-sub log_range ($$$$$) {
-        my ($self, $sync, $git, $i, $tip) = @_;
+sub log_range ($$$) {
+        my ($sync, $unit, $tip) = @_;
         my $opt = $sync->{-opt};
         my $pr = $opt->{-progress} if (($opt->{verbose} || 0) > 1);
+        my $i = $unit->{epoch};
         my $cur = $sync->{ranges}->[$i] or do {
                 $pr->("$i.git indexing all of $tip\n") if $pr;
                 return $tip; # all of it
@@ -993,7 +1023,8 @@ sub log_range ($$$$$) {
 
         my $range = "$cur..$tip";
         $pr->("$i.git checking contiguity... ") if $pr;
-        if (is_ancestor($git, $cur, $tip)) { # common case
+        my $git = $unit->{git};
+        if (is_ancestor($sync->{self}->git, $cur, $tip)) { # common case
                 $pr->("OK\n") if $pr;
                 my $n = $git->qx(qw(rev-list --count), $range);
                 chomp($n);
@@ -1018,63 +1049,103 @@ Rewritten history? (in $git->{git_dir})
                         warn "discarding history at $cur\n";
                 }
                 warn <<"";
-reindexing $git->{git_dir} starting at
-$range
-
-                $sync->{unindex_range}->{$i} = "$base..$cur";
+reindexing $git->{git_dir}
+starting at $range
+
+                # $cur^0 may no longer exist if pruned by git
+                if ($git->qx(qw(rev-parse -q --verify), "$cur^0")) {
+                        $unit->{unindex_range} = "$base..$cur";
+                } elsif ($base && $git->qx(qw(rev-parse -q --verify), $base)) {
+                        $unit->{unindex_range} = "$base..";
+                } else {
+                        warn "W: unable to unindex before $range\n";
+                }
         }
         $range;
 }
 
-sub sync_prepare ($$$) {
-        my ($self, $sync, $epoch_max) = @_;
+# overridden by ExtSearchIdx
+sub artnum_max { $_[0]->{mm}->num_highwater }
+
+sub sync_prepare ($$) {
+        my ($self, $sync) = @_;
+        $sync->{ranges} = sync_ranges($self, $sync);
         my $pr = $sync->{-opt}->{-progress};
         my $regen_max = 0;
-        my $head = $self->{ibx}->{ref_head} || 'refs/heads/master';
-
-        # reindex stops at the current heads and we later rerun index_sync
-        # without {reindex}
-        my $reindex_heads = last_commits($self, $epoch_max) if $sync->{reindex};
-
-        for (my $i = $epoch_max; $i >= 0; $i--) {
-                my $git_dir = git_dir_n($self, $i);
+        my $head = $sync->{ibx}->{ref_head} || 'HEAD';
+        my $pfx;
+        if ($pr) {
+                ($pfx) = ($sync->{ibx}->{inboxdir} =~ m!([^/]+)\z!g);
+                $pfx //= $sync->{ibx}->{inboxdir};
+        }
+
+        my $reindex_heads;
+        if ($self->{ibx_map}) {
+                # ExtSearchIdx won't index messages unless they're in
+                # over.sqlite3 for a given inbox, so don't read beyond
+                # what's in the per-inbox index.
+                $reindex_heads = [];
+                my $v = PublicInbox::Search::SCHEMA_VERSION;
+                my $mm = $sync->{ibx}->mm;
+                for my $i (0..$sync->{epoch_max}) {
+                        $reindex_heads->[$i] = $mm->last_commit_xap($v, $i);
+                }
+        } elsif ($sync->{reindex}) { # V2 inbox
+                # reindex stops at the current heads and we later
+                # rerun index_sync without {reindex}
+                $reindex_heads = $self->last_commits($sync);
+        }
+        if ($sync->{max_size} = $sync->{-opt}->{max_size}) {
+                $sync->{index_oid} = $self->can('index_oid');
+        }
+        my $git_pfx = "$sync->{ibx}->{inboxdir}/git";
+        for (my $i = $sync->{epoch_max}; $i >= 0; $i--) {
+                my $git_dir = "$git_pfx/$i.git";
                 -d $git_dir or next; # missing epochs are fine
                 my $git = PublicInbox::Git->new($git_dir);
+                my $unit = { git => $git, epoch => $i };
+                my $tip;
                 if ($reindex_heads) {
-                        $head = $reindex_heads->[$i] or next;
+                        $tip = $head = $reindex_heads->[$i] or next;
+                } else {
+                        $tip = $git->qx(qw(rev-parse -q --verify), $head);
+                        next if $?; # new repo
+                        chomp $tip;
                 }
-                chomp(my $tip = $git->qx(qw(rev-parse -q --verify), $head));
-
-                next if $?; # new repo
-                my $range = log_range($self, $sync, $git, $i, $tip) or next;
+                my $range = log_range($sync, $unit, $tip) or next;
                 # can't use 'rev-list --count' if we use --diff-filter
-                $pr->("$i.git counting $range ... ") if $pr;
+                $pr->("$pfx $i.git counting $range ... ") if $pr;
                 # Don't bump num_highwater on --reindex by using {D}.
                 # We intentionally do NOT use {D} in the non-reindex case
                 # because we want NNTP article number gaps from unindexed
                 # messages to show up in mirrors, too.
                 $sync->{D} //= $sync->{reindex} ? {} : undef; # OID_BIN => NR
-                my $stk = log2stack($sync, $git, $range, $self->{ibx});
+                my $stk = log2stack($sync, $git, $range);
+                return 0 if $sync->{quit};
                 my $nr = $stk ? $stk->num_records : 0;
                 $pr->("$nr\n") if $pr;
-                $sync->{stacks}->[$i] = $stk if $stk;
+                $unit->{stack} = $stk; # may be undef
+                unshift @{$sync->{todo}}, $unit;
                 $regen_max += $nr;
         }
+        return 0 if $sync->{quit};
 
         # XXX this should not happen unless somebody bypasses checks in
         # our code and blindly injects "d" file history into git repos
         if (my @leftovers = keys %{delete($sync->{D}) // {}}) {
                 warn('W: unindexing '.scalar(@leftovers)." leftovers\n");
-                my $arg = { v2w => $self };
-                my $all = $self->{ibx}->git;
+                local $self->{current_info} = 'leftover ';
+                my $unindex_oid = $self->can('unindex_oid');
                 for my $oid (@leftovers) {
+                        last if $sync->{quit};
                         $oid = unpack('H*', $oid);
-                        $self->{current_info} = "leftover $oid";
-                        $all->cat_async($oid, \&unindex_oid, $arg);
+                        my $req = { %$sync, oid => $oid };
+                        $self->git->cat_async($oid, $unindex_oid, $req);
                 }
-                $all->cat_async_wait;
+                $self->git->cat_async_wait;
         }
-        if (!$regen_max && !keys(%{$self->{unindex_range}})) {
+        return 0 if $sync->{quit};
+        if (!$regen_max) {
                 $sync->{-regen_fmt} = "%u/?\n";
                 return 0;
         }
@@ -1085,22 +1156,25 @@ sub sync_prepare ($$$) {
         $sync->{-regen_fmt} = "% ${pad}u/$regen_max\n";
         $sync->{nr} = \(my $nr = 0);
         return -1 if $sync->{reindex};
-        $regen_max + $self->{mm}->num_highwater() || 0;
+        $regen_max + $self->artnum_max || 0;
 }
 
-sub unindex_oid_remote ($$$) {
+sub unindex_oid_aux ($$$) {
         my ($self, $oid, $mid) = @_;
         my @removed = $self->{oidx}->remove_oid($oid, $mid);
         for my $num (@removed) {
-                my $idx = idx_shard($self, $num % $self->{shards});
-                $idx->shard_remove($oid, $num);
+                my $idx = idx_shard($self, $num);
+                $idx->shard_remove($num);
         }
 }
 
 sub unindex_oid ($$;$) { # git->cat_async callback
-        my ($bref, $oid, $type, $size, $sync) = @_;
-        my $self = $sync->{v2w};
-        my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef;
+        my ($bref, $oid, $type, $size, $arg) = @_;
+        is_bad_blob($oid, $type, $size, $arg->{oid}) and
+                return index_finalize($arg, 0);
+        my $self = $arg->{self};
+        local $self->{current_info} = "$self->{current_info} $oid";
+        my $unindexed = $arg->{in_unindex} ? $arg->{unindexed} : undef;
         my $mm = $self->{mm};
         my $mids = mids(PublicInbox::Eml->new($bref));
         undef $$bref;
@@ -1123,43 +1197,46 @@ sub unindex_oid ($$;$) { # git->cat_async callback
                         }
                         $mm->num_delete($num);
                 }
-                unindex_oid_remote($self, $oid, $mid);
+                unindex_oid_aux($self, $oid, $mid);
         }
+        index_finalize($arg, 0);
 }
 
+sub git { $_[0]->{ibx}->git }
+
 # this is rare, it only happens when we get discontiguous history in
 # a mirror because the source used -purge or -edit
-sub unindex ($$$$) {
-        my ($self, $sync, $git, $unindex_range) = @_;
+sub unindex_todo ($$$) {
+        my ($self, $sync, $unit) = @_;
+        my $unindex_range = delete($unit->{unindex_range}) // return;
         my $unindexed = $sync->{unindexed} //= {}; # $mid0 => $num
         my $before = scalar keys %$unindexed;
         # order does not matter, here:
-        my @cmd = qw(log --raw -r
-                        --no-notes --no-color --no-abbrev --no-renames);
-        my $fh = $git->popen(@cmd, $unindex_range);
-        my $all = $self->{ibx}->git;
+        my $fh = $unit->{git}->popen(qw(log --raw -r --no-notes --no-color
+                                --no-abbrev --no-renames), $unindex_range);
         local $sync->{in_unindex} = 1;
+        my $unindex_oid = $self->can('unindex_oid');
         while (<$fh>) {
                 /\A:\d{6} 100644 $OID ($OID) [AM]\tm$/o or next;
-                $all->cat_async($1, \&unindex_oid, $sync);
+                $self->git->cat_async($1, $unindex_oid, { %$sync, oid => $1 });
         }
         close $fh or die "git log failed: \$?=$?";
-        $all->cat_async_wait;
+        $self->git->cat_async_wait;
 
         return unless $sync->{-opt}->{prune};
         my $after = scalar keys %$unindexed;
         return if $before == $after;
 
         # ensure any blob can not longer be accessed via dumb HTTP
-        PublicInbox::Import::run_die(['git', "--git-dir=$git->{git_dir}",
+        PublicInbox::Import::run_die(['git',
+                "--git-dir=$unit->{git}->{git_dir}",
                 qw(-c gc.reflogExpire=now gc --prune=all --quiet)]);
 }
 
-sub sync_ranges ($$$) {
-        my ($self, $sync, $epoch_max) = @_;
+sub sync_ranges ($$) {
+        my ($self, $sync) = @_;
         my $reindex = $sync->{reindex};
-
-        return last_commits($self, $epoch_max) unless $reindex;
+        return $self->last_commits($sync) unless $reindex;
         return [] if ref($reindex) ne 'HASH';
 
         my $ranges = $reindex->{from}; # arrayref;
@@ -1171,8 +1248,8 @@ sub sync_ranges ($$$) {
 
 sub index_xap_only { # git->cat_async callback
         my ($bref, $oid, $type, $size, $smsg) = @_;
-        my $self = $smsg->{v2w};
-        my $idx = idx_shard($self, $smsg->{num} % $self->{shards});
+        my $self = $smsg->{self};
+        my $idx = idx_shard($self, $smsg->{num});
         $smsg->{raw_bytes} = $size;
         $idx->index_raw($bref, undef, $smsg);
         $self->{transact_bytes} += $size;
@@ -1190,8 +1267,9 @@ sub index_xap_step ($$$;$) {
                         "$beg..$end (% $step)\n");
         }
         for (my $num = $beg; $num <= $end; $num += $step) {
+                last if $sync->{quit};
                 my $smsg = $ibx->over->get_art($num) or next;
-                $smsg->{v2w} = $self;
+                $smsg->{self} = $self;
                 $ibx->git->cat_async($smsg->{blob}, \&index_xap_only, $smsg);
                 if ($self->{transact_bytes} >= $self->{batch_bytes}) {
                         ${$sync->{nr}} = $num;
@@ -1200,37 +1278,53 @@ sub index_xap_step ($$$;$) {
         }
 }
 
-sub index_epoch ($$$) {
-        my ($self, $sync, $i) = @_;
-
-        my $git_dir = git_dir_n($self, $i);
-        -d $git_dir or return; # missing epochs are fine
-        my $git = PublicInbox::Git->new($git_dir);
-        if (my $unindex_range = delete $sync->{unindex_range}->{$i}) { # rare
-                unindex($self, $sync, $git, $unindex_range);
-        }
-        defined(my $stk = $sync->{stacks}->[$i]) or return;
-        $sync->{stacks}->[$i] = undef;
-        my $all = $self->{ibx}->git;
-        while (my ($f, $at, $ct, $oid) = $stk->pop_rec) {
-                $self->{current_info} = "$i.git $oid";
+sub index_todo ($$$) {
+        my ($self, $sync, $unit) = @_;
+        return if $sync->{quit};
+        unindex_todo($self, $sync, $unit);
+        my $stk = delete($unit->{stack}) or return;
+        my $all = $self->git;
+        my $index_oid = $self->can('index_oid');
+        my $unindex_oid = $self->can('unindex_oid');
+        my $pfx;
+        if ($unit->{git}->{git_dir} =~ m!/([^/]+)/git/([0-9]+\.git)\z!) {
+                $pfx = "$1 $2"; # v2
+        } else { # v1
+                ($pfx) = ($unit->{git}->{git_dir} =~ m!/([^/]+)\z!g);
+                $pfx //= $unit->{git}->{git_dir};
+        }
+        local $self->{current_info} = "$pfx ";
+        local $sync->{latest_cmt} = \(my $latest_cmt);
+        local $sync->{unit} = $unit;
+        while (my ($f, $at, $ct, $oid, $cmt) = $stk->pop_rec) {
+                if ($sync->{quit}) {
+                        warn "waiting to quit...\n";
+                        $all->async_wait_all;
+                        $self->update_last_commit($sync);
+                        return;
+                }
+                my $req = {
+                        %$sync,
+                        autime => $at,
+                        cotime => $ct,
+                        oid => $oid,
+                        cur_cmt => $cmt
+                };
                 if ($f eq 'm') {
-                        my $arg = { %$sync, autime => $at, cotime => $ct };
                         if ($sync->{max_size}) {
-                                $all->check_async($oid, \&check_size, $arg);
+                                $all->check_async($oid, \&check_size, $req);
                         } else {
-                                $all->cat_async($oid, \&index_oid, $arg);
+                                $all->cat_async($oid, $index_oid, $req);
                         }
                 } elsif ($f eq 'd') {
-                        $all->cat_async($oid, \&unindex_oid, $sync);
+                        $all->cat_async($oid, $unindex_oid, $req);
                 }
                 if (${$sync->{need_checkpoint}}) {
                         reindex_checkpoint($self, $sync);
                 }
         }
-        $all->check_async_wait;
-        $all->cat_async_wait;
-        update_last_commit($self, $git, $i, $stk->{latest_cmt});
+        $all->async_wait_all;
+        $self->update_last_commit($sync, $stk);
 }
 
 sub xapian_only {
@@ -1243,7 +1337,7 @@ sub xapian_only {
                 $sync //= {
                         need_checkpoint => \(my $bool = 0),
                         -opt => $opt,
-                        v2w => $self,
+                        self => $self,
                         nr => \(my $nr = 0),
                         -regen_fmt => "%u/?\n",
                 };
@@ -1251,6 +1345,7 @@ sub xapian_only {
                 if ($seq || !$self->{parallel}) {
                         my $shard_end = $self->{shards} - 1;
                         for my $i (0..$shard_end) {
+                                last if $sync->{quit};
                                 index_xap_step($self, $sync, $art_beg + $i);
                                 if ($i != $shard_end) {
                                         reindex_checkpoint($self, $sync);
@@ -1260,7 +1355,7 @@ sub xapian_only {
                         index_xap_step($self, $sync, $art_beg, 1);
                 }
         }
-        $self->{ibx}->git->cat_async_wait;
+        $self->git->cat_async_wait;
         $self->done;
 }
 
@@ -1270,11 +1365,19 @@ sub index_sync {
         $opt //= {};
         return xapian_only($self, $opt) if $opt->{xapian_only};
 
-        my $pr = $opt->{-progress};
         my $epoch_max;
-        my $latest = git_dir_latest($self, \$epoch_max);
-        return unless defined $latest;
+        my $latest = $self->{ibx}->git_dir_latest(\$epoch_max) // return;
+        if ($opt->{'fast-noop'}) { # nanosecond (st_ctim) comparison
+                use Time::HiRes qw(stat);
+                if (my @mm = stat("$self->{ibx}->{inboxdir}/msgmap.sqlite3")) {
+                        my $c = $mm[10]; # 10 = ctime (nsec NV)
+                        my @hd = stat("$latest/refs/heads");
+                        my @pr = stat("$latest/packed-refs");
+                        return if $c > ($hd[10] // 0) && $c > ($pr[10] // 0);
+                }
+        }
 
+        my $pr = $opt->{-progress};
         my $seq = $opt->{sequential_shard};
         my $art_beg; # the NNTP article number we start xapian_only at
         my $idxlevel = $self->{ibx}->{indexlevel};
@@ -1285,13 +1388,18 @@ sub index_sync {
         $self->{oidx}->rethread_prepare($opt);
         my $sync = {
                 need_checkpoint => \(my $bool = 0),
-                unindex_range => {}, # EPOCH => oid_old..oid_new
                 reindex => $opt->{reindex},
                 -opt => $opt,
-                v2w => $self,
+                self => $self,
+                ibx => $self->{ibx},
+                epoch_max => $epoch_max,
         };
-        $sync->{ranges} = sync_ranges($self, $sync, $epoch_max);
-        if (sync_prepare($self, $sync, $epoch_max)) {
+        my $quit = PublicInbox::SearchIdx::quit_cb($sync);
+        local $SIG{QUIT} = $quit;
+        local $SIG{INT} = $quit;
+        local $SIG{TERM} = $quit;
+
+        if (sync_prepare($self, $sync)) {
                 # tmp_clone seems to fail if inside a transaction, so
                 # we rollback here (because we opened {mm} for reading)
                 # Note: we do NOT rely on DBI transactions for atomicity;
@@ -1303,16 +1411,13 @@ sub index_sync {
 
                 # xapian_only works incrementally w/o --reindex
                 if ($seq && !$opt->{reindex}) {
-                        $art_beg = $sync->{mm_tmp}->max;
-                        $art_beg++ if defined($art_beg);
+                        $art_beg = $sync->{mm_tmp}->max || -1;
+                        $art_beg++;
                 }
         }
-        if ($sync->{max_size} = $opt->{max_size}) {
-                $sync->{index_oid} = \&index_oid;
-        }
         # work forwards through history
-        index_epoch($self, $sync, $_) for (0..$epoch_max);
-        $self->{oidx}->rethread_done($opt);
+        index_todo($self, $sync, $_) for @{delete($sync->{todo}) // []};
+        $self->{oidx}->rethread_done($opt) unless $sync->{quit};
         $self->done;
 
         if (my $nr = $sync->{nr}) {
@@ -1320,14 +1425,21 @@ sub index_sync {
                 $pr->('all.git '.sprintf($sync->{-regen_fmt}, $$nr)) if $pr;
         }
 
+        my $quit_warn;
         # deal with Xapian shards sequentially
         if ($seq && delete($sync->{mm_tmp})) {
-                $self->{ibx}->{indexlevel} = $idxlevel;
-                xapian_only($self, $opt, $sync, $art_beg);
+                if ($sync->{quit}) {
+                        $quit_warn = 1;
+                } else {
+                        $self->{ibx}->{indexlevel} = $idxlevel;
+                        xapian_only($self, $opt, $sync, $art_beg);
+                        $quit_warn = 1 if $sync->{quit};
+                }
         }
 
         # --reindex on the command-line
-        if ($opt->{reindex} && !ref($opt->{reindex}) && $idxlevel ne 'basic') {
+        if (!$sync->{quit} && $opt->{reindex} &&
+                        !ref($opt->{reindex}) && $idxlevel ne 'basic') {
                 $self->lock_acquire;
                 my $s0 = PublicInbox::SearchIdx->new($self->{ibx}, 0, 0);
                 if (my $xdb = $s0->idx_acquire) {
@@ -1339,12 +1451,16 @@ sub index_sync {
         }
 
         # reindex does not pick up new changes, so we rerun w/o it:
-        if ($opt->{reindex}) {
+        if ($opt->{reindex} && !$sync->{quit}) {
                 my %again = %$opt;
                 $sync = undef;
                 delete @again{qw(rethread reindex -skip_lock)};
                 index_sync($self, \%again);
+                $opt->{quit} = $again{quit}; # propagate to caller
         }
+        warn <<EOF if $quit_warn;
+W: interrupted, --xapian-only --reindex required upon restart
+EOF
 }
 
 1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 1d5119cd..a27e9369 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -48,7 +48,7 @@ sub msg_page_i {
 # /$INBOX/$MSGID/ for unindexed v1 inboxes
 sub no_over_html ($) {
         my ($ctx) = @_;
-        my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return; # 404
+        my $bref = $ctx->{ibx}->msg_by_mid($ctx->{mid}) or return; # 404
         my $eml = PublicInbox::Eml->new($bref);
         $ctx->{mhref} = '';
         PublicInbox::WwwStream::init($ctx);
@@ -64,7 +64,7 @@ sub no_over_html ($) {
 
 sub msg_page {
         my ($ctx) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
         my $over = $ctx->{over} = $ibx->over or return no_over_html($ctx);
         my ($id, $prev);
@@ -88,7 +88,7 @@ sub msg_reply ($$) {
          'https://en.wikipedia.org/wiki/Posting_style#Interleaved_style';
 
         my $info = '';
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         if (my $url = $ibx->{infourl}) {
                 $url = prurl($ctx->{env}, $url);
                 $info = qq(\n  List information: <a\nhref="$url">$url</a>\n);
@@ -421,7 +421,7 @@ sub stream_thread ($$) {
 sub thread_html {
         my ($ctx) = @_;
         my $mid = $ctx->{mid};
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my ($nr, $msgs) = $ibx->over->get_thread($mid);
         return missing_thread($ctx) if $nr == 0;
 
@@ -554,7 +554,7 @@ EOF
 sub add_text_body { # callback for each_part
         my ($p, $ctx) = @_;
         my $upfx = $ctx->{mhref};
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new;
         # $p - from each_part: [ Email::MIME-like, depth, $idx ]
         my ($part, $depth, $idx) = @$p;
@@ -639,7 +639,7 @@ sub add_text_body { # callback for each_part
 
 sub _msg_page_prepare_obuf {
         my ($eml, $ctx) = @_;
-        my $over = $ctx->{-inbox}->over;
+        my $over = $ctx->{ibx}->over;
         my $obfs_ibx = $ctx->{-obfs_ibx};
         my $rv = '';
         my $mids = mids_for_index($eml);
@@ -729,7 +729,7 @@ sub SKEL_EXPAND () {
 sub thread_skel ($$$) {
         my ($skel, $ctx, $hdr) = @_;
         my $mid = mids($hdr)->[0];
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my ($nr, $msgs) = $ibx->over->get_thread($mid);
         my $parent = in_reply_to($hdr);
         $$skel .= "\n<b>Thread overview: </b>";
@@ -800,7 +800,7 @@ sub _parent_headers {
 # returns a string buffer
 sub html_footer {
         my ($ctx, $hdr) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $upfx = '../';
         my $skel;
         my $rv = '<pre>';
@@ -1072,7 +1072,7 @@ sub acc_topic { # walk_thread callback
         my ($ctx, $level, $smsg) = @_;
         my $mid = $smsg->{mid};
         my $has_blob = $smsg->{blob} // do {
-                if (my $by_mid = $ctx->{-inbox}->smsg_by_mid($mid)) {
+                if (my $by_mid = $ctx->{ibx}->smsg_by_mid($mid)) {
                         %$smsg = (%$smsg, %$by_mid);
                         1;
                 }
@@ -1116,7 +1116,7 @@ sub dump_topics {
         }
 
         my @out;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
 
         # sort by recency, this allows new posts to "bump" old topics...
@@ -1194,7 +1194,7 @@ sub paginate_recent ($$) {
         $t =~ s/\A([0-9]{8,14})-// and $after = str2ts($1);
         $t =~ /\A([0-9]{8,14})\z/ and $before = str2ts($1);
 
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $msgs = $ibx->recent($opts, $after, $before);
         my $nr = scalar @$msgs;
         if ($nr < $lim && defined($after)) {
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 87927d5e..3f34ea82 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -197,7 +197,7 @@ sub show ($$;$) {
 
         $ctx->{'log'} = tmpfile("solve.$oid_b");
         $ctx->{fn} = $fn;
-        my $solver = PublicInbox::SolverGit->new($ctx->{-inbox},
+        my $solver = PublicInbox::SolverGit->new($ctx->{ibx},
                                                 \&solve_result, $ctx);
         # PSGI server will call this immediately and give us a callback (-wcb)
         sub {
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 37f55347..52630ae3 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -32,9 +32,8 @@ our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
 our $OID_RE = qr![a-f0-9]{7,}!;
 
 sub new {
-        my ($class, $pi_config) = @_;
-        $pi_config ||= PublicInbox::Config->new;
-        bless { pi_config => $pi_config }, $class;
+        my ($class, $pi_cfg) = @_;
+        bless { pi_cfg => $pi_cfg // PublicInbox::Config->new }, $class;
 }
 
 # backwards compatibility, do not use
@@ -169,14 +168,14 @@ sub preload {
                 eval "require PublicInbox::$_;";
         }
         if (ref($self)) {
-                my $pi_config = $self->{pi_config};
-                if (defined($pi_config->{'publicinbox.cgitrc'})) {
-                        $pi_config->limiter('-cgit');
+                my $pi_cfg = $self->{pi_cfg};
+                if (defined($pi_cfg->{'publicinbox.cgitrc'})) {
+                        $pi_cfg->limiter('-cgit');
                 }
                 $self->cgit;
                 $self->stylesheets_prepare($_) for ('', '../', '../../');
                 $self->news_www;
-                $pi_config->each_inbox(\&preload_inbox);
+                $pi_cfg->each_inbox(\&preload_inbox);
         }
 }
 
@@ -210,9 +209,10 @@ sub news_cgit_fallback ($) {
 # returns undef if valid, array ref response if invalid
 sub invalid_inbox ($$) {
         my ($ctx, $inbox) = @_;
-        my $ibx = $ctx->{www}->{pi_config}->lookup_name($inbox);
+        my $ibx = $ctx->{www}->{pi_cfg}->lookup_name($inbox) //
+                        $ctx->{www}->{pi_cfg}->lookup_ei($inbox);
         if (defined $ibx) {
-                $ctx->{-inbox} = $ibx;
+                $ctx->{ibx} = $ibx;
                 return;
         }
 
@@ -230,11 +230,11 @@ sub invalid_inbox_mid {
         return $ret if $ret;
 
         my $mid = $ctx->{mid} = uri_unescape($mid_ue);
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) {
                 my ($x2, $x38) = ($1, $2);
                 # this is horrifically wasteful for legacy URLs:
-                my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return;
+                my $str = $ctx->{ibx}->msg_by_path("$x2/$x38") or return;
                 my $s = PublicInbox::Eml->new($str);
                 $mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID'));
                 return r301($ctx, $inbox, mid_escape($mid));
@@ -285,7 +285,7 @@ sub get_mid_html {
 # /$INBOX/$MESSAGE_ID/t/
 sub get_thread {
         my ($ctx, $flat) = @_;
-        $ctx->{-inbox}->over or return need($ctx, 'Overview');
+        $ctx->{ibx}->over or return need($ctx, 'Overview');
         $ctx->{flat} = $flat;
         require PublicInbox::View;
         PublicInbox::View::thread_html($ctx);
@@ -338,7 +338,7 @@ EOF
 # especially on older systems.  Stick to zlib since that's what git uses.
 sub get_thread_mbox {
         my ($ctx, $sfx) = @_;
-        my $over = $ctx->{-inbox}->over or return need($ctx, 'Overview');
+        my $over = $ctx->{ibx}->over or return need($ctx, 'Overview');
         require PublicInbox::Mbox;
         PublicInbox::Mbox::thread_mbox($ctx, $over, $sfx);
 }
@@ -347,7 +347,7 @@ sub get_thread_mbox {
 # /$INBOX/$MESSAGE_ID/t.atom                  -> thread as Atom feed
 sub get_thread_atom {
         my ($ctx) = @_;
-        $ctx->{-inbox}->over or return need($ctx, 'Overview');
+        $ctx->{ibx}->over or return need($ctx, 'Overview');
         require PublicInbox::Feed;
         PublicInbox::Feed::generate_thread_atom($ctx);
 }
@@ -412,11 +412,11 @@ sub legacy_redirects {
 
 sub r301 {
         my ($ctx, $inbox, $mid_ue, $suffix) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         unless ($ibx) {
                 my $r404 = invalid_inbox($ctx, $inbox);
                 return $r404 if $r404;
-                $ibx = $ctx->{-inbox};
+                $ibx = $ctx->{ibx};
         }
         my $url = $ibx->base_url($ctx->{env});
         my $qs = $ctx->{env}->{QUERY_STRING};
@@ -453,7 +453,7 @@ sub msg_page {
 sub serve_git {
         my ($ctx, $epoch, $path) = @_;
         my $env = $ctx->{env};
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $git = defined $epoch ? $ibx->git_epoch($epoch) : $ibx->git;
         $git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404();
 }
@@ -461,7 +461,7 @@ sub serve_git {
 sub mbox_results {
         my ($ctx) = @_;
         if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
-                $ctx->{-inbox}->search or return need($ctx, 'search');
+                $ctx->{ibx}->isrch or return need($ctx, 'search');
                 require PublicInbox::SearchView;
                 return PublicInbox::SearchView::mbox_results($ctx);
         }
@@ -480,18 +480,18 @@ sub news_www {
         my ($self) = @_;
         $self->{news_www} ||= do {
                 require PublicInbox::NewsWWW;
-                PublicInbox::NewsWWW->new($self->{pi_config});
+                PublicInbox::NewsWWW->new($self->{pi_cfg});
         }
 }
 
 sub cgit {
         my ($self) = @_;
         $self->{cgit} ||= do {
-                my $pi_config = $self->{pi_config};
+                my $pi_cfg = $self->{pi_cfg};
 
-                if (defined($pi_config->{'publicinbox.cgitrc'})) {
+                if (defined($pi_cfg->{'publicinbox.cgitrc'})) {
                         require PublicInbox::Cgit;
-                        PublicInbox::Cgit->new($pi_config);
+                        PublicInbox::Cgit->new($pi_cfg);
                 } else {
                         require Plack::Util;
                         Plack::Util::inline_object(call => sub { r404() });
@@ -537,7 +537,7 @@ sub stylesheets_prepare ($$) {
         } || sub { $_[0] };
 
         my $css_map = {};
-        my $stylesheets = $self->{pi_config}->{css} || [];
+        my $stylesheets = $self->{pi_cfg}->{css} || [];
         my $links = [];
         my $inline_ok = 1;
 
@@ -641,7 +641,7 @@ sub get_css ($$$) {
         my $css = $css_map->{$key};
         if (!defined($css) && $key eq 'userContent') {
                 my $env = $ctx->{env};
-                $css = PublicInbox::UserContent::sample($ctx->{-inbox}, $env);
+                $css = PublicInbox::UserContent::sample($ctx->{ibx}, $env);
         }
         defined $css or return r404();
         my $h = [ 'Content-Length', bytes::length($css),
@@ -653,7 +653,7 @@ sub get_css ($$$) {
 sub get_description {
         my ($ctx, $inbox) = @_;
         invalid_inbox($ctx, $inbox) || do {
-                my $d = $ctx->{-inbox}->description . "\n";
+                my $d = $ctx->{ibx}->description . "\n";
                 [ 200, [ 'Content-Length', bytes::length($d),
                         'Content-Type', 'text/plain' ], [ $d ] ];
         };
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 8bbce929..bc296e01 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -41,7 +41,7 @@ sub compile_watchheaders ($) {
 }
 
 sub new {
-        my ($class, $config) = @_;
+        my ($class, $cfg) = @_;
         my (%mdmap, $spamc);
         my (%imap, %nntp); # url => [inbox objects] or 'watchspam'
 
@@ -50,7 +50,7 @@ sub new {
         # indefinitely...
         foreach my $pfx (qw(publicinboxwatch publicinboxlearn)) {
                 my $k = "$pfx.watchspam";
-                defined(my $dirs = $config->{$k}) or next;
+                defined(my $dirs = $cfg->{$k}) or next;
                 $dirs = PublicInbox::Config::_array($dirs);
                 for my $dir (@$dirs) {
                         my $url;
@@ -69,10 +69,10 @@ sub new {
 
         my $k = 'publicinboxwatch.spamcheck';
         my $default = undef;
-        my $spamcheck = PublicInbox::Spamcheck::get($config, $k, $default);
+        my $spamcheck = PublicInbox::Spamcheck::get($cfg, $k, $default);
         $spamcheck = _spamcheck_cb($spamcheck) if $spamcheck;
 
-        $config->each_inbox(sub {
+        $cfg->each_inbox(sub {
                 # need to make all inboxes writable for spam removal:
                 my $ibx = $_[0] = PublicInbox::InboxWritable->new($_[0]);
 
@@ -113,7 +113,7 @@ sub new {
                 spamcheck => $spamcheck,
                 mdmap => \%mdmap,
                 mdre => $mdre,
-                config => $config,
+                pi_cfg => $cfg,
                 imap => scalar keys %imap ? \%imap : undef,
                 nntp => scalar keys %nntp? \%nntp : undef,
                 importers => {},
@@ -175,7 +175,7 @@ sub _remove_spam {
         $path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return;
         my $eml = eml_from_path($path) or return;
         local $SIG{__WARN__} = warn_ignore_cb();
-        $self->{config}->each_inbox(\&remove_eml_i, $self, $eml, $path);
+        $self->{pi_cfg}->each_inbox(\&remove_eml_i, $self, $eml, $path);
 }
 
 sub import_eml ($$$) {
@@ -217,7 +217,7 @@ sub _try_path {
                 warn "unmappable dir: $1\n";
                 return;
         }
-        my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+        my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
         local $SIG{__WARN__} = sub {
                 my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
                 $warn_cb->($pfx, "path: $path\n", @_);
@@ -316,7 +316,7 @@ sub cfg_bool ($$$) {
 # flesh out common IMAP-specific data structures
 sub imap_common_init ($) {
         my ($self) = @_;
-        my $cfg = $self->{config};
+        my $cfg = $self->{pi_cfg};
         my $mic_args = {}; # scheme://authority => Mail:IMAPClient arg
         for my $url (sort keys %{$self->{imap}}) {
                 my $uri = PublicInbox::URIimap->new($url);
@@ -418,7 +418,7 @@ sub imap_import_msg ($$$$$) {
                 if ($flags =~ /\\Seen\b/) {
                         local $SIG{__WARN__} = warn_ignore_cb();
                         my $eml = PublicInbox::Eml->new($raw);
-                        $self->{config}->each_inbox(\&remove_eml_i,
+                        $self->{pi_cfg}->each_inbox(\&remove_eml_i,
                                                 $self, $eml, "$url UID:$uid");
                 }
         } else {
@@ -467,7 +467,7 @@ sub imap_fetch_all ($$$) {
         my $key = $req;
         $key =~ s/\.PEEK//;
         my ($uids, $batch);
-        my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+        my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
         local $SIG{__WARN__} = sub {
                 my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
                 $batch //= '?';
@@ -775,7 +775,7 @@ sub watch_imap_init ($$) {
 # flesh out common NNTP-specific data structures
 sub nntp_common_init ($) {
         my ($self) = @_;
-        my $cfg = $self->{config};
+        my $cfg = $self->{pi_cfg};
         my $nn_args = {}; # scheme://authority => Net::NNTP->new arg
         for my $url (sort keys %{$self->{nntp}}) {
                 my $sec = uri_section(uri_new($url));
@@ -929,7 +929,7 @@ sub nntp_fetch_all ($$$) {
         $beg = $l_art + 1;
 
         warn "I: $url fetching ARTICLE $beg..$end\n";
-        my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+        my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
         my ($err, $art);
         local $SIG{__WARN__} = sub {
                 my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
@@ -966,7 +966,7 @@ sub nntp_fetch_all ($$$) {
                         }
                 } elsif ($inboxes eq 'watchspam') {
                         my $eml = PublicInbox::Eml->new(\$raw);
-                        $self->{config}->each_inbox(\&remove_eml_i,
+                        $self->{pi_cfg}->each_inbox(\&remove_eml_i,
                                         $self, $eml, "$url ARTICLE $art");
                 } else {
                         die "BUG: destination unknown $inboxes";
diff --git a/lib/PublicInbox/WwwAltId.pm b/lib/PublicInbox/WwwAltId.pm
index 2818400e..204e2f82 100644
--- a/lib/PublicInbox/WwwAltId.pm
+++ b/lib/PublicInbox/WwwAltId.pm
@@ -30,7 +30,7 @@ sub check_output {
 sub sqldump ($$) {
         my ($ctx, $altid_pfx) = @_;
         my $env = $ctx->{env};
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $altid_map = $ibx->altid_map;
         my $fn = $altid_map->{$altid_pfx};
         unless (defined $fn) {
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 388def12..912f860e 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -15,7 +15,7 @@ use PublicInbox::MsgTime qw(msg_timestamp);
 
 sub new {
         my ($class, $ctx, $cb) = @_;
-        $ctx->{feed_base_url} = $ctx->{-inbox}->base_url($ctx->{env});
+        $ctx->{feed_base_url} = $ctx->{ibx}->base_url($ctx->{env});
         $ctx->{cb} = $cb || \&PublicInbox::GzipFilter::close;
         $ctx->{emit_header} = 1;
         bless $ctx, $class;
@@ -53,7 +53,7 @@ sub getline {
         my ($self) = @_;
         my $cb = $self->{cb} or return;
         while (my $smsg = $cb->($self)) {
-                my $eml = $self->{-inbox}->smsg_eml($smsg) or next;
+                my $eml = $self->{ibx}->smsg_eml($smsg) or next;
                 return $self->translate(feed_entry($self, $smsg, $eml));
         }
         delete $self->{cb};
@@ -82,7 +82,7 @@ sub to_uuid ($) {
 
 sub atom_header {
         my ($ctx, $title) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $base_url = $ctx->{feed_base_url};
         my $search_q = $ctx->{search_query};
         my $self_url = $base_url;
@@ -136,10 +136,10 @@ sub feed_entry {
         $title = title_tag($title);
 
         my $from = $eml->header('From') // $eml->header('Sender') //
-                $ctx->{-inbox}->{-primary_address};
+                $ctx->{ibx}->{-primary_address};
         my ($email) = PublicInbox::Address::emails($from);
         my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
-        $email = ascii_html($email // $ctx->{-inbox}->{-primary_address});
+        $email = ascii_html($email // $ctx->{ibx}->{-primary_address});
 
         my $s = delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '';
         $s .= "<entry><author><name>$name</name><email>$email</email>" .
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index 09c66d02..0fe63e42 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -16,7 +16,7 @@ sub referer_match ($) {
         return 1 if $referer eq ''; # no referer is always OK for wget/curl
 
         # prevent deep-linking from other domains on some browsers (Firefox)
-        # n.b.: $ctx->{-inbox}->base_url($env) with INBOX_URL won't work
+        # n.b.: $ctx->{ibx}->base_url($env) with INBOX_URL won't work
         # with dillo, we can only match "$url_scheme://$HTTP_HOST/" without
         # path components
         my $base_url = $env->{'psgi.url_scheme'} . '://' .
@@ -88,15 +88,15 @@ sub get_attach ($$$) {
         $ctx->{idx} = $idx;
         bless $ctx, __PACKAGE__;
         my $eml;
-        if ($ctx->{smsg} = $ctx->{-inbox}->smsg_by_mid($ctx->{mid})) {
+        if ($ctx->{smsg} = $ctx->{ibx}->smsg_by_mid($ctx->{mid})) {
                 return sub { # public-inbox-httpd-only
                         $ctx->{wcb} = $_[0];
                         scan_attach($ctx);
                 } if $ctx->{env}->{'pi-httpd.async'};
                 # generic PSGI:
-                $eml = $ctx->{-inbox}->smsg_eml($ctx->{smsg});
-        } elsif (!$ctx->{-inbox}->over) {
-                if (my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid})) {
+                $eml = $ctx->{ibx}->smsg_eml($ctx->{smsg});
+        } elsif (!$ctx->{ibx}->over) {
+                if (my $bref = $ctx->{ibx}->msg_by_mid($ctx->{mid})) {
                         $eml = PublicInbox::Eml->new($bref);
                 }
         }
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index bda2761c..4b3f1674 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -44,7 +44,7 @@ sub url_regexp {
         my ($ctx, $key, $default) = @_;
         $key //= 'publicInbox.wwwListing';
         $default //= '404';
-        my $v = $ctx->{www}->{pi_config}->{lc $key} // $default;
+        my $v = $ctx->{www}->{pi_cfg}->{lc $key} // $default;
 again:
         if ($v eq 'match=domain') {
                 my $h = $ctx->{env}->{HTTP_HOST} // $ctx->{env}->{SERVER_NAME};
@@ -69,8 +69,11 @@ sub hide_key { 'www' }
 sub response {
         my ($class, $ctx) = @_;
         bless $ctx, $class;
+        if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
+                $ALL->misc->reopen;
+        }
         my $re = $ctx->url_regexp or return $ctx->psgi_triple;
-        my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_config},
+        my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
                                                 \&list_match_i, $re, $ctx);
         sub {
                 $ctx->{-wcb} = $_[0]; # HTTP server callback
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 638f4e27..958251a3 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -12,11 +12,12 @@ our @EXPORT_OK = qw(html_oneshot);
 use bytes (); # length
 use PublicInbox::Hval qw(ascii_html prurl ts2str);
 our $TOR_URL = 'https://www.torproject.org/';
-our $CODE_URL = 'https://public-inbox.org/public-inbox.git';
+our $CODE_URL = [ qw(http://ou63pmih66umazou.onion/public-inbox.git
+        https://public-inbox.org/public-inbox.git) ];
 
 sub base_url ($) {
         my $ctx = shift;
-        my $base_url = $ctx->{-inbox}->base_url($ctx->{env});
+        my $base_url = $ctx->{ibx}->base_url($ctx->{env});
         chop $base_url; # no trailing slash for clone
         $base_url;
 }
@@ -35,7 +36,7 @@ sub async_eml { # for async_blob_cb
 
 sub html_top ($) {
         my ($ctx) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $desc = ascii_html($ibx->description);
         my $title = delete($ctx->{-title_html}) // $desc;
         my $upfx = $ctx->{-upfx} || '';
@@ -54,7 +55,7 @@ sub html_top ($) {
                         qq(<a\nhref="$color">color</a> / ).
                         qq(<a\nhref=#mirror>mirror</a> / ).
                         qq(<a\nhref="$atom">Atom feed</a>);
-        if ($ibx->search) {
+        if ($ibx->isrch) {
                 my $q_val = delete($ctx->{-q_value_html}) // '';
                 $q_val = qq(\nvalue="$q_val") if $q_val ne '';
                 # XXX gross, for SearchView.pm
@@ -78,22 +79,24 @@ sub html_top ($) {
 
 sub coderepos ($) {
         my ($ctx) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $cr = $ctx->{ibx}->{coderepo} // return ();
+        my $cfg = $ctx->{www}->{pi_cfg};
+        my $upfx = ($ctx->{-upfx} // ''). '../';
         my @ret;
-        if (defined(my $cr = $ibx->{coderepo})) {
-                my $cfg = $ctx->{www}->{pi_config};
-                my $env = $ctx->{env};
-                for my $cr_name (@$cr) {
-                        my $urls = $cfg->{"coderepo.$cr_name.cgiturl"};
-                        if ($urls) {
-                                $ret[0] //= <<EOF;
+        for my $cr_name (@$cr) {
+                my $urls = $cfg->{"coderepo.$cr_name.cgiturl"} // next;
+                $ret[0] //= <<EOF;
 code repositories for the project(s) associated with this inbox:
 EOF
-                                $ret[0] .= "\n\t".prurl($env, $_) for @$urls;
-                        }
+                for (@$urls) {
+                        # relative or absolute URL?, prefix relative "foo.git"
+                        # with appropriate number of "../"
+                        my $u = m!\A(?:[a-z\+]+:)?//! ? $_ : $upfx.$_;
+                        $u = ascii_html(prurl($ctx->{env}, $u));
+                        $ret[0] .= qq(\n\t<a\nhref="$u">$u</a>);
                 }
         }
-        @ret; # may be empty
+        @ret; # may be empty, this sub is called as an arg for join()
 }
 
 sub code_footer ($) {
@@ -109,7 +112,7 @@ sub _html_end {
 id=mirror>This inbox may be cloned and mirrored by anyone:</a>
 EOF
 
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $desc = ascii_html($ibx->description);
 
         my @urls;
@@ -143,10 +146,10 @@ EOF
         }
 
         $urls .= "\n" . join('', map { "\tgit clone --mirror $_\n" } @urls);
-        my $addrs = $ibx->{address};
-        $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
-        my $v = defined $max ? '-V2' : '-V1';
-        $urls .= <<EOF;
+        if (my $addrs = $ibx->{address}) {
+                $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
+                my $v = defined $max ? '-V2' : '-V1';
+                $urls .= <<EOF;
 
         # If you have public-inbox 1.1+ installed, you may
         # initialize and index your mirror using the following commands:
@@ -154,6 +157,7 @@ EOF
                 $addrs
         public-inbox-index $dir
 EOF
+        }
         my $cfg_link = ($ctx->{-upfx} // '').'_/text/config/raw';
         $urls .= <<EOF;
 
@@ -184,7 +188,7 @@ sub getline {
         my $cb = $ctx->{cb} or return;
         while (defined(my $x = $cb->($ctx))) { # x = smsg or scalar non-ref
                 if (ref($x)) { # smsg
-                        my $eml = $ctx->{-inbox}->smsg_eml($x) or next;
+                        my $eml = $ctx->{ibx}->smsg_eml($x) or next;
                         $ctx->{smsg} = $x;
                         return $ctx->translate($cb->($ctx, $eml));
                 } else { # scalar
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index 04c9b1c4..a8560916 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -49,7 +49,7 @@ sub get_text {
 
         # enforce trailing slash for "wget -r" compatibility
         if (!$have_tslash && $code == 200) {
-                my $url = $ctx->{-inbox}->base_url($env);
+                my $url = $ctx->{ibx}->base_url($env);
                 $url .= "_/text/$key/";
 
                 return [ 302, [ 'Content-Type', 'text/plain',
@@ -100,7 +100,7 @@ sub _srch_prefix ($$) {
 
 sub _colors_help ($$) {
         my ($ctx, $txt) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $env = $ctx->{env};
         my $base_url = $ibx->base_url($env);
         $$txt .= "color customization for $base_url\n";
@@ -135,7 +135,7 @@ sub URI_PATH () { '^A-Za-z0-9\-\._~/' }
 # n.b. this is a perfect candidate for memoization
 sub inbox_config ($$$) {
         my ($ctx, $hdr, $txt) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
         my $name = dq_escape($ibx->{name});
         my $inboxdir = '/path/to/top-level-inbox';
@@ -189,9 +189,9 @@ EOF
 ; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
 ; an HTTP endpoint which reconstructs git blobs via git-apply(1).
 EOF
-                my $pi_config = $ctx->{www}->{pi_config};
+                my $pi_cfg = $ctx->{www}->{pi_cfg};
                 for my $cr_name (@$cr) {
-                        my $urls = $pi_config->{"coderepo.$cr_name.cgiturl"};
+                        my $urls = $pi_cfg->{"coderepo.$cr_name.cgiturl"};
                         my $path = "/path/to/$cr_name";
                         $cr_name = dq_escape($cr_name);
 
@@ -221,7 +221,7 @@ sub _default_text ($$$$) {
         return inbox_config($ctx, $hdr, $txt) if $key eq 'config';
         return if $key ne 'help'; # TODO more keys?
 
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $base_url = $ibx->base_url($ctx->{env});
         $$txt .= "public-inbox help for $base_url\n";
         $$txt .= <<EOF;
@@ -250,7 +250,7 @@ EOF
 
         # n.b. we use the Xapian DB for any regeneratable,
         # order-of-arrival-independent data.
-        my $srch = $ibx->search;
+        my $srch = $ibx->isrch;
         if ($srch) {
                 $$txt .= <<EOF;
 search
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 6a74daf9..ca2345f7 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -109,8 +109,7 @@ sub prepare_reindex ($$$) {
                         $opt->{reindex}->{from} = $lc;
                 }
         } else { # v2
-                my $max;
-                $im->git_dir_latest(\$max) or return;
+                my $max = $ibx->max_git_epoch // return;
                 my $from = $opt->{reindex}->{from};
                 my $mm = $ibx->mm;
                 my $v = PublicInbox::Search::SCHEMA_VERSION();
@@ -271,7 +270,6 @@ sub run {
 
         local %SIG = %SIG;
         setup_signals();
-        $ibx->umask_prepare;
         $ibx->with_umask(\&_run, $ibx, $cb, $opt);
 }
 
diff --git a/lib/PublicInbox/gcf2_libgit2.h b/lib/PublicInbox/gcf2_libgit2.h
new file mode 100644
index 00000000..800c6bad
--- /dev/null
+++ b/lib/PublicInbox/gcf2_libgit2.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2020 all contributors <meta@public-inbox.org>
+ * License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+ *
+ * libgit2 for Inline::C
+ * Avoiding Git::Raw since it doesn't guarantee a stable API,
+ * while libgit2 itself seems reasonably stable.
+ */
+#include <git2.h>
+#include <sys/uio.h>
+#include <errno.h>
+#include <poll.h>
+
+static void croak_if_err(int rc, const char *msg)
+{
+        if (rc != GIT_OK) {
+                const git_error *e = giterr_last();
+
+                croak("%d %s (%s)", rc, msg, e ? e->message : "unknown");
+        }
+}
+
+SV *new()
+{
+        git_odb *odb;
+        SV *ref, *self;
+        int rc = git_odb_new(&odb);
+        croak_if_err(rc, "git_odb_new");
+
+        ref = newSViv((IV)odb);
+        self = newRV_noinc(ref);
+        sv_bless(self, gv_stashpv("PublicInbox::Gcf2", GV_ADD));
+        SvREADONLY_on(ref);
+
+        return self;
+}
+
+static git_odb *odb_ptr(SV *self)
+{
+        return (git_odb *)SvIV(SvRV(self));
+}
+
+void DESTROY(SV *self)
+{
+        git_odb_free(odb_ptr(self));
+}
+
+/* needs "$GIT_DIR/objects", not $GIT_DIR */
+void add_alternate(SV *self, const char *objects_path)
+{
+        int rc = git_odb_add_disk_alternate(odb_ptr(self), objects_path);
+        croak_if_err(rc, "git_odb_add_disk_alternate");
+}
+
+#define CAPA(v) (sizeof(v) / sizeof((v)[0]))
+
+/*
+ * returns true on success, false on failure
+ * this requires an unabbreviated git OID
+ */
+int cat_oid(SV *self, int fd, SV *oidsv)
+{
+        /*
+         * adjust when libgit2 gets SHA-256 support, we return the
+         * same header as git-cat-file --batch "$OID $TYPE $SIZE\n"
+         */
+        char hdr[GIT_OID_HEXSZ + sizeof(" commit 18446744073709551615")];
+        struct iovec vec[3];
+        size_t nvec = CAPA(vec);
+        git_oid oid;
+        git_odb_object *object = NULL;
+        int rc, err = 0;
+        STRLEN oidlen;
+        char *oidptr = SvPV(oidsv, oidlen);
+
+        /* same trailer as git-cat-file --batch */
+        vec[2].iov_len = 1;
+        vec[2].iov_base = "\n";
+
+        rc = git_oid_fromstrn(&oid, oidptr, oidlen);
+        if (rc == GIT_OK)
+                rc = git_odb_read(&object, odb_ptr(self), &oid);
+        if (rc == GIT_OK) {
+                vec[0].iov_base = hdr;
+                vec[1].iov_base = (void *)git_odb_object_data(object);
+                vec[1].iov_len = git_odb_object_size(object);
+
+                git_oid_nfmt(hdr, GIT_OID_HEXSZ, git_odb_object_id(object));
+                vec[0].iov_len = GIT_OID_HEXSZ +
+                                snprintf(hdr + GIT_OID_HEXSZ,
+                                        sizeof(hdr) - GIT_OID_HEXSZ,
+                                        " %s %zu\n",
+                                        git_object_type2string(
+                                                git_odb_object_type(object)),
+                                        vec[1].iov_len);
+        } else { /* caller retries */
+                nvec = 0;
+        }
+        while (nvec && !err) {
+                ssize_t w = writev(fd, vec + CAPA(vec) - nvec, nvec);
+
+                if (w > 0) {
+                        size_t done = 0;
+                        size_t i;
+
+                        for (i = CAPA(vec) - nvec; i < CAPA(vec); i++) {
+                                if (w >= vec[i].iov_len) {
+                                        /* fully written vec */
+                                        w -= vec[i].iov_len;
+                                        done++;
+                                } else { /* partially written vec */
+                                        char *p = vec[i].iov_base;
+                                        vec[i].iov_base = p + w;
+                                        vec[i].iov_len -= w;
+                                        break;
+                                }
+                        }
+                        nvec -= done;
+                } else if (w < 0) {
+                        err = errno;
+                        switch (err) {
+                        case EAGAIN: {
+                                struct pollfd pfd;
+                                pfd.events = POLLOUT;
+                                pfd.fd = fd;
+                                poll(&pfd, 1, -1);
+                        }
+                                /* fall-through */
+                        case EINTR:
+                                err = 0;
+                        }
+                } else { /* w == 0 */
+                        err = ENOSPC;
+                }
+        }
+        if (object)
+                git_odb_object_free(object);
+        if (err)
+                croak("writev error: %s", strerror(err));
+
+        return rc == GIT_OK;
+}