about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/Admin.pm75
-rw-r--r--lib/PublicInbox/AdminEdit.pm67
-rw-r--r--lib/PublicInbox/Git.pm45
-rw-r--r--lib/PublicInbox/Import.pm101
-rw-r--r--lib/PublicInbox/Inbox.pm2
-rw-r--r--lib/PublicInbox/NNTP.pm27
-rw-r--r--lib/PublicInbox/Search.pm15
-rw-r--r--lib/PublicInbox/SearchIdx.pm41
-rw-r--r--lib/PublicInbox/SearchMsg.pm6
-rw-r--r--lib/PublicInbox/V2Writable.pm221
-rw-r--r--lib/PublicInbox/WWW.pm17
-rw-r--r--lib/PublicInbox/WwwListing.pm174
-rw-r--r--lib/PublicInbox/Xapcmd.pm222
13 files changed, 757 insertions, 256 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 4a862c6d..8a2f2043 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -9,6 +9,8 @@ use warnings;
 use Cwd 'abs_path';
 use base qw(Exporter);
 our @EXPORT_OK = qw(resolve_repo_dir);
+my $CFG; # all the admin stuff is a singleton
+require PublicInbox::Config;
 
 sub resolve_repo_dir {
         my ($cd, $ver) = @_;
@@ -66,36 +68,65 @@ $ibx->{mainrepo} has unexpected indexlevel in Xapian: $m
         $l;
 }
 
-sub resolve_inboxes {
-        my ($argv, $warn_on_unconfigured) = @_;
-        require PublicInbox::Config;
+sub unconfigured_ibx ($$) {
+        my ($dir, $i) = @_;
+        my $name = "unconfigured-$i";
+        PublicInbox::Inbox->new({
+                name => $name,
+                address => [ "$name\@example.com" ],
+                mainrepo => $dir,
+                # TODO: consumers may want to warn on this:
+                #-unconfigured => 1,
+        });
+}
+
+sub config () { $CFG //= eval { PublicInbox::Config->new } }
+
+sub resolve_inboxes ($;$) {
+        my ($argv, $opt) = @_;
         require PublicInbox::Inbox;
+        $opt ||= {};
 
-        my @ibxs = map { resolve_repo_dir($_) } @$argv;
-        push(@ibxs, resolve_repo_dir()) unless @ibxs;
+        my $cfg = config();
+        if ($opt->{all}) {
+                my $cfgfile = PublicInbox::Config::default_file();
+                $cfg or die "--all specified, but $cfgfile not readable\n";
+                @$argv and die "--all specified, but directories specified\n";
+        }
 
+        my $min_ver = $opt->{-min_inbox_version} || 0;
+        my (@old, @ibxs);
         my %dir2ibx;
-        if (my $config = eval { PublicInbox::Config->new }) {
-                $config->each_inbox(sub {
+        if ($cfg) {
+                $cfg->each_inbox(sub {
                         my ($ibx) = @_;
+                        $ibx->{version} ||= 1;
                         $dir2ibx{abs_path($ibx->{mainrepo})} = $ibx;
                 });
-        } elsif ($warn_on_unconfigured) {
-                # do we really care about this?  It's annoying...
-                warn $warn_on_unconfigured, "\n";
         }
-        for my $i (0..$#ibxs) {
-                my $dir = $ibxs[$i];
-                $ibxs[$i] = $dir2ibx{$dir} ||= do {
-                        my $name = "unconfigured-$i";
-                        PublicInbox::Inbox->new({
-                                name => $name,
-                                address => [ "$name\@example.com" ],
-                                mainrepo => $dir,
-                                # TODO: consumers may want to warn on this:
-                                #-unconfigured => 1,
-                        });
-                };
+        if ($opt->{all}) {
+                my @all = values %dir2ibx;
+                @all = grep { $_->{version} >= $min_ver } @all;
+                push @ibxs, @all;
+        } else { # directories specified on the command-line
+                my $i = 0;
+                my @dirs = @$argv;
+                push @dirs, '.' unless @dirs;
+                foreach (@dirs) {
+                        my $v;
+                        my $dir = resolve_repo_dir($_, \$v);
+                        if ($v < $min_ver) {
+                                push @old, $dir;
+                                next;
+                        }
+                        my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i);
+                        $i++;
+                        push @ibxs, $ibx;
+                }
+        }
+        if (@old) {
+                die "inboxes $min_ver inboxes not supported by $0\n\t",
+                    join("\n\t", @old), "\n";
         }
         @ibxs;
 }
diff --git a/lib/PublicInbox/AdminEdit.pm b/lib/PublicInbox/AdminEdit.pm
new file mode 100644
index 00000000..169feba0
--- /dev/null
+++ b/lib/PublicInbox/AdminEdit.pm
@@ -0,0 +1,67 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# common stuff between -edit, -purge (and maybe -learn in the future)
+package PublicInbox::AdminEdit;
+use strict;
+use warnings;
+use PublicInbox::Admin;
+our @OPT = qw(all force|f verbose|v!);
+
+sub check_editable ($) {
+        my ($ibxs) = @_;
+
+        foreach my $ibx (@$ibxs) {
+                my $lvl = $ibx->{indexlevel};
+                if (defined $lvl) {
+                        PublicInbox::Admin::indexlevel_ok_or_die($lvl);
+                        next;
+                }
+
+                # Undefined indexlevel, so `full'...
+                # Search::Xapian exists and the DB can be read, at least, fine
+                $ibx->search and next;
+
+                # it's possible for a Xapian directory to exist,
+                # but Search::Xapian to go missing/broken.
+                # Make sure it's purged in that case:
+                $ibx->over or die "no over.sqlite3 in $ibx->{mainrepo}\n";
+
+                # $ibx->{search} is populated by $ibx->over call
+                my $xdir_ro = $ibx->{search}->xdir(1);
+                my $npart = 0;
+                foreach my $part (<$xdir_ro/*>) {
+                        if (-d $part && $part =~ m!/[0-9]+\z!) {
+                                my $bytes = 0;
+                                $bytes += -s $_ foreach glob("$part/*");
+                                $npart++ if $bytes;
+                        }
+                }
+                if ($npart) {
+                        PublicInbox::Admin::require_or_die('-search');
+                } else {
+                        # somebody could "rm -r" all the Xapian directories;
+                        # let them purge the overview, at least
+                        $ibx->{indexlevel} ||= 'basic';
+                }
+        }
+}
+
+# takes the output of V2Writable::purge and V2Writable::replace
+# $rewrites = [ array commits keyed by epoch ]
+sub show_rewrites ($$$) {
+        my ($fh, $ibx, $rewrites) = @_;
+        print $fh "$ibx->{mainrepo}:";
+        if (scalar @$rewrites) {
+                my $epoch = -1;
+                my @out = map {;
+                        ++$epoch;
+                        "$epoch.git: ".(defined($_) ? $_ : '(unchanged)')
+                } @$rewrites;
+                print $fh join("\n\t", '', @out), "\n";
+        } else {
+                print $fh " NONE\n";
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 68445b3c..f5c7a95c 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -145,41 +145,24 @@ again:
                 fail($self, "Unexpected result from git cat-file: $head");
 
         my $size = $1;
-        my $ref_type = $ref ? ref($ref) : '';
-
         my $rv;
         my $left = $size;
-        $$ref = $size if ($ref_type eq 'SCALAR');
-        my $cb_err;
-
-        if ($ref_type eq 'CODE') {
-                $rv = eval { $ref->($in, \$left) };
-                $cb_err = $@;
-                # drain the rest
-                my $max = 8192;
-                while ($left > 0) {
-                        my $r = read($in, my $x, $left > $max ? $max : $left);
-                        defined($r) or fail($self, "read failed: $!");
-                        $r == 0 and fail($self, 'exited unexpectedly');
-                        $left -= $r;
-                }
-        } else {
-                my $offset = 0;
-                my $buf = '';
-                while ($left > 0) {
-                        my $r = read($in, $buf, $left, $offset);
-                        defined($r) or fail($self, "read failed: $!");
-                        $r == 0 and fail($self, 'exited unexpectedly');
-                        $left -= $r;
-                        $offset += $r;
-                }
-                $rv = \$buf;
+        $$ref = $size if $ref;
+
+        my $offset = 0;
+        my $buf = '';
+        while ($left > 0) {
+                my $r = read($in, $buf, $left, $offset);
+                defined($r) or fail($self, "read failed: $!");
+                $r == 0 and fail($self, 'exited unexpectedly');
+                $left -= $r;
+                $offset += $r;
         }
+        $rv = \$buf;
 
-        my $r = read($in, my $buf, 1);
+        my $r = read($in, my $lf, 1);
         defined($r) or fail($self, "read failed: $!");
-        fail($self, 'newline missing after blob') if ($r != 1 || $buf ne "\n");
-        die $cb_err if $cb_err;
+        fail($self, 'newline missing after blob') if ($r != 1 || $lf ne "\n");
 
         $rv;
 }
@@ -320,7 +303,7 @@ sub modified ($) {
                 chomp $oid;
                 my $buf = cat_file($self, $oid) or next;
                 $$buf =~ /^committer .*?> ([0-9]+) [\+\-]?[0-9]+/sm or next;
-                my $cmt_time = $1;
+                my $cmt_time = $1 + 0;
                 $modified = $cmt_time if $cmt_time > $modified;
         }
         $modified || time;
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 2c4bad92..137b2b78 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -277,7 +277,7 @@ sub git_timestamp {
         "$ts $zone";
 }
 
-sub extract_author_info ($) {
+sub extract_cmt_info ($) {
         my ($mime) = @_;
 
         my $sender = '';
@@ -314,7 +314,17 @@ sub extract_author_info ($) {
                 $name = '';
                 warn "no name in From: $from or Sender: $sender\n";
         }
-        ($name, $email);
+
+        my $hdr = $mime->header_obj;
+
+        my $subject = $hdr->header('Subject');
+        $subject = '(no subject)' unless defined $subject;
+        # Mime decoding can create nulls replace them with spaces to protect git
+        $subject =~ tr/\0/ /;
+        utf8::encode($subject);
+        my $at = git_timestamp(my @at = msg_datestamp($hdr));
+        my $ct = git_timestamp(my @ct = msg_timestamp($hdr));
+        ($name, $email, $at, $ct, $subject);
 }
 
 # kill potentially confusing/misleading headers
@@ -361,19 +371,7 @@ sub clean_tree_v2 ($$$) {
 sub add {
         my ($self, $mime, $check_cb) = @_; # mime = Email::MIME
 
-        my ($name, $email) = extract_author_info($mime);
-        my $hdr = $mime->header_obj;
-        my @at = msg_datestamp($hdr);
-        my @ct = msg_timestamp($hdr);
-        my $author_time_raw = git_timestamp(@at);
-        my $commit_time_raw = git_timestamp(@ct);
-
-        my $subject = $mime->header('Subject');
-        $subject = '(no subject)' unless defined $subject;
-        # Mime decoding can create nulls replace them with spaces to protect git
-        $subject =~ tr/\0/ /;
-        utf8::encode($subject);
-
+        my ($name, $email, $at, $ct, $subject) = extract_cmt_info($mime);
         my $path_type = $self->{path_type};
         my $path;
         if ($path_type eq '2/38') {
@@ -416,8 +414,8 @@ sub add {
         }
 
         print $w "commit $ref\nmark :$commit\n",
-                "author $name <$email> $author_time_raw\n",
-                "committer $self->{ident} $commit_time_raw\n" or wfail;
+                "author $name <$email> $at\n",
+                "committer $self->{ident} $ct\n" or wfail;
         print $w "data ", (length($subject) + 1), "\n",
                 $subject, "\n\n" or wfail;
         if ($tip ne '') {
@@ -486,33 +484,45 @@ sub digest2mid ($$) {
         "$dt.$b64" . '@z';
 }
 
-sub clean_purge_buffer {
-        my ($oids, $buf) = @_;
-        my $cmt_msg = 'purged '.join(' ',@$oids)."\n";
+sub rewrite_commit ($$$$) {
+        my ($self, $oids, $buf, $mime) = @_;
+        my ($name, $email, $at, $ct, $subject);
+        if ($mime) {
+                ($name, $email, $at, $ct, $subject) = extract_cmt_info($mime);
+        } else {
+                $name = $email = '';
+                $subject = 'purged '.join(' ', @$oids);
+        }
         @$oids = ();
-
+        $subject .= "\n";
         foreach my $i (0..$#$buf) {
                 my $l = $buf->[$i];
                 if ($l =~ /^author .* ([0-9]+ [\+-]?[0-9]+)$/) {
-                        $buf->[$i] = "author <> $1\n";
+                        $at //= $1;
+                        $buf->[$i] = "author $name <$email> $at\n";
+                } elsif ($l =~ /^committer .* ([0-9]+ [\+-]?[0-9]+)$/) {
+                        $ct //= $1;
+                        $buf->[$i] = "committer $self->{ident} $ct\n";
                 } elsif ($l =~ /^data ([0-9]+)/) {
-                        $buf->[$i++] = "data " . length($cmt_msg) . "\n";
-                        $buf->[$i] = $cmt_msg;
+                        $buf->[$i++] = "data " . length($subject) . "\n";
+                        $buf->[$i] = $subject;
                         last;
                 }
         }
 }
 
-sub purge_oids {
-        my ($self, $purge) = @_;
-        my $tmp = "refs/heads/purge-".((keys %$purge)[0]);
+# returns the new commit OID if a replacement was done
+# returns undef if nothing was done
+sub replace_oids {
+        my ($self, $mime, $replace_map) = @_; # oid => raw string
+        my $tmp = "refs/heads/replace-".((keys %$replace_map)[0]);
         my $old = $self->{'ref'};
         my $git = $self->{git};
         my @export = (qw(fast-export --no-data --use-done-feature), $old);
         my $rd = $git->popen(@export);
         my ($r, $w) = $self->gfi_start;
         my @buf;
-        my $npurge = 0;
+        my $nreplace = 0;
         my @oids;
         my ($done, $mark);
         my $tree = $self->{-tree};
@@ -535,10 +545,13 @@ sub purge_oids {
                 } elsif (/^M 100644 ([a-f0-9]+) (\w+)/) {
                         my ($oid, $path) = ($1, $2);
                         $tree->{$path} = 1;
-                        if ($purge->{$oid}) {
+                        my $sref = $replace_map->{$oid};
+                        if (defined $sref) {
                                 push @oids, $oid;
-                                my $cmd = "M 100644 inline $path\ndata 0\n\n";
-                                push @buf, $cmd;
+                                my $n = length($$sref);
+                                push @buf, "M 100644 inline $path\ndata $n\n";
+                                push @buf, $$sref; # hope CoW works...
+                                push @buf, "\n";
                         } else {
                                 push @buf, $_;
                         }
@@ -547,11 +560,13 @@ sub purge_oids {
                         push @buf, $_ if $tree->{$path};
                 } elsif ($_ eq "\n") {
                         if (@oids) {
-                                my $out = join('', @buf);
-                                $out =~ s/^/# /sgm;
-                                warn "purge rewriting\n", $out, "\n";
-                                clean_purge_buffer(\@oids, \@buf);
-                                $npurge++;
+                                if (!$mime) {
+                                        my $out = join('', @buf);
+                                        $out =~ s/^/# /sgm;
+                                        warn "purge rewriting\n", $out, "\n";
+                                }
+                                rewrite_commit($self, \@oids, \@buf, $mime);
+                                $nreplace++;
                         }
                         $w->print(@buf, "\n") or wfail;
                         @buf = ();
@@ -569,28 +584,30 @@ sub purge_oids {
                 $w->print(@buf) or wfail;
         }
         die 'done\n not seen from fast-export' unless $done;
-        chomp(my $cmt = $self->get_mark(":$mark")) if $npurge;
+        chomp(my $cmt = $self->get_mark(":$mark")) if $nreplace;
         $self->{nchg} = 0; # prevent _update_git_info until update-ref:
         $self->done;
         my @git = ('git', "--git-dir=$git->{git_dir}");
 
-        run_die([@git, qw(update-ref), $old, $tmp]) if $npurge;
+        run_die([@git, qw(update-ref), $old, $tmp]) if $nreplace;
 
         run_die([@git, qw(update-ref -d), $tmp]);
 
-        return if $npurge == 0;
+        return if $nreplace == 0;
 
         run_die([@git, qw(-c gc.reflogExpire=now gc --prune=all)]);
+
+        # check that old OIDs are gone
         my $err = 0;
-        foreach my $oid (keys %$purge) {
+        foreach my $oid (keys %$replace_map) {
                 my @info = $git->check($oid);
                 if (@info) {
-                        warn "$oid not purged\n";
+                        warn "$oid not replaced\n";
                         $err++;
                 }
         }
         _update_git_info($self, 0);
-        die "Failed to purge $err object(s)\n" if $err;
+        die "Failed to replace $err object(s)\n" if $err;
         $cmt;
 }
 
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index c9330332..10f716ca 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -191,7 +191,7 @@ sub search ($;$) {
         my $srch = $self->{search} ||= eval {
                 _cleanup_later($self);
                 require PublicInbox::Search;
-                PublicInbox::Search->new($self, $self->{altid});
+                PublicInbox::Search->new($self);
         };
         ($over_only || eval { $srch->xdb }) ? $srch : undef;
 }
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 85778c44..fa412f8c 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -435,6 +435,26 @@ sub xref ($$$$) {
 sub set_nntp_headers ($$$$$) {
         my ($self, $hdr, $ng, $n, $mid) = @_;
 
+        # why? leafnode requires a Path: header for some inexplicable
+        # reason.  We'll fake the shortest one possible.
+        $hdr->header_set('Path', 'y');
+
+        # leafnode (and maybe other NNTP clients) have trouble dealing
+        # with v2 messages which have multiple Message-IDs (either due
+        # to our own content-based dedupe or buggy git-send-email versions).
+        my @mids = $hdr->header('Message-ID');
+        if (scalar(@mids) > 1) {
+                my $mid0 = "<$mid>";
+                $hdr->header_set('Message-ID', $mid0);
+                my @alt = $hdr->header('X-Alt-Message-ID');
+                my %seen = map { $_ => 1 } (@alt, $mid0);
+                foreach my $m (@mids) {
+                        next if $seen{$m}++;
+                        push @alt, $m;
+                }
+                $hdr->header_set('X-Alt-Message-ID', @alt);
+        }
+
         # clobber some
         my $xref = xref($self, $ng, $n, $mid);
         $hdr->header_set('Xref', $xref);
@@ -516,6 +536,13 @@ sub _header ($) {
         my $hdr = $_[0]->header_obj->as_string;
         utf8::encode($hdr);
         $hdr =~ s/(?<!\r)\n/\r\n/sg;
+
+        # for leafnode compatibility, we need to ensure Message-ID headers
+        # are only a single line.  We can't subclass Email::Simple::Header
+        # and override _default_fold_at in here, either; since that won't
+        # affect messages already in the archive.
+        $hdr =~ s/^(Message-ID:)[ \t]*\r\n[ \t]+([^\r]+)\r\n/$1 $2\r\n/igsm;
+
         $hdr
 }
 
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 9903f427..098c97cd 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -170,17 +170,12 @@ sub xdb ($) {
 }
 
 sub new {
-        my ($class, $mainrepo, $altid) = @_;
-        my $version = 1;
-        my $ibx = $mainrepo;
-        if (ref $ibx) {
-                $version = $ibx->{version} || 1;
-                $mainrepo = $ibx->{mainrepo};
-        }
+        my ($class, $ibx) = @_;
+        ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
         my $self = bless {
-                mainrepo => $mainrepo,
-                altid => $altid,
-                version => $version,
+                mainrepo => $ibx->{mainrepo},
+                altid => $ibx->{altid},
+                version => $ibx->{version} // 1,
         }, $class;
         my $dir = xdir($self, 1);
         $self->{over_ro} = PublicInbox::Over->new("$dir/over.sqlite3");
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 99856286..a088ce75 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -30,31 +30,22 @@ my $xapianlevels = qr/\A(?:full|medium)\z/;
 
 sub new {
         my ($class, $ibx, $creat, $part) = @_;
+        ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
         my $levels = qr/\A(?:full|medium|basic)\z/;
-        my $mainrepo = $ibx; # for "public-inbox-index" w/o entry in config
-        my $git_dir = $mainrepo;
-        my ($altid, $git);
-        my $version = 1;
+        my $mainrepo = $ibx->{mainrepo};
+        my $version = $ibx->{version} || 1;
         my $indexlevel = 'full';
-        if (ref $ibx) {
-                $mainrepo = $ibx->{mainrepo};
-                $altid = $ibx->{altid};
-                $version = $ibx->{version} || 1;
-                if ($altid) {
-                        require PublicInbox::AltId;
-                        $altid = [ map {
-                                PublicInbox::AltId->new($ibx, $_);
-                        } @$altid ];
-                }
-                if ($ibx->{indexlevel}) {
-                        if ($ibx->{indexlevel} =~ $levels) {
-                                $indexlevel = $ibx->{indexlevel};
-                        } else {
-                                die("Invalid indexlevel $ibx->{indexlevel}\n");
-                        }
+        my $altid = $ibx->{altid};
+        if ($altid) {
+                require PublicInbox::AltId;
+                $altid = [ map { PublicInbox::AltId->new($ibx, $_); } @$altid ];
+        }
+        if ($ibx->{indexlevel}) {
+                if ($ibx->{indexlevel} =~ $levels) {
+                        $indexlevel = $ibx->{indexlevel};
+                } else {
+                        die("Invalid indexlevel $ibx->{indexlevel}\n");
                 }
-        } else { # FIXME: old tests: old tests
-                $ibx = { mainrepo => $git_dir, version => 1 };
         }
         $ibx = PublicInbox::InboxWritable->new($ibx);
         my $self = bless {
@@ -117,7 +108,11 @@ sub _xdb_acquire {
                 }
         }
         return unless defined $flag;
-        $self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag);
+        my $xdb = eval { Search::Xapian::WritableDatabase->new($dir, $flag) };
+        if ($@) {
+                die "Failed opening $dir: ", $@;
+        }
+        $self->{xdb} = $xdb;
 }
 
 sub add_val ($$$) {
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 5f3c8af8..96a26b15 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -25,12 +25,6 @@ sub wrap {
         bless { mid => $mid }, $class;
 }
 
-sub get {
-        my ($class, $head, $db, $mid) = @_;
-        my $doc_id = $head->get_docid;
-        load_expand(wrap($class, $mid), $db->get_document($doc_id));
-}
-
 sub get_val ($$) {
         my ($doc, $col) = @_;
         Search::Xapian::sortable_unserialise($doc->get_value($col));
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index a8c33ef4..76e61e86 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -11,7 +11,7 @@ use PublicInbox::SearchIdxPart;
 use PublicInbox::MIME;
 use PublicInbox::Git;
 use PublicInbox::Import;
-use PublicInbox::MID qw(mids);
+use PublicInbox::MID qw(mids references);
 use PublicInbox::ContentId qw(content_id content_digest);
 use PublicInbox::Inbox;
 use PublicInbox::OverIdx;
@@ -23,7 +23,14 @@ use IO::Handle;
 # an estimate of the post-packed size to the raw uncompressed size
 my $PACKING_FACTOR = 0.4;
 
-# assume 2 cores if GNU nproc(1) is not available
+# SATA storage lags behind what CPUs are capable of, so relying on
+# nproc(1) can be misleading and having extra Xapian partions is a
+# waste of FDs and space.  It can also lead to excessive IO latency
+# and slow things down.  Users on NVME or other fast storage can
+# use the NPROC env or switches in our script/public-inbox-* programs
+# to increase Xapian partitions.
+our $NPROC_MAX_DEFAULT = 4;
+
 sub nproc_parts ($) {
         my ($creat_opt) = @_;
         if (ref($creat_opt) eq 'HASH') {
@@ -32,7 +39,14 @@ sub nproc_parts ($) {
                 }
         }
 
-        my $n = int($ENV{NPROC} || `nproc 2>/dev/null` || 2);
+        my $n = $ENV{NPROC};
+        if (!$n) {
+                chomp($n = `nproc 2>/dev/null`);
+                # assume 2 cores if GNU nproc(1) is not available
+                $n = 2 if !$n;
+                $n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT;
+        }
+
         # subtract for the main process and git-fast-import
         $n -= 1;
         $n < 1 ? 1 : $n;
@@ -116,6 +130,18 @@ sub add {
         });
 }
 
+# indexes a message, returns true if checkpointing is needed
+sub do_idx ($$$$$$$) {
+        my ($self, $msgref, $mime, $len, $num, $oid, $mid0) = @_;
+        $self->{over}->add_overview($mime, $len, $num, $oid, $mid0);
+        my $npart = $self->{partitions};
+        my $part = $num % $npart;
+        my $idx = idx_part($self, $part);
+        $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime);
+        my $n = $self->{transact_bytes} += $len;
+        $n >= (PublicInbox::SearchIdx::BATCH_BYTES * $npart);
+}
+
 sub _add {
         my ($self, $mime, $check_cb) = @_;
 
@@ -141,13 +167,7 @@ sub _add {
         $self->{last_commit}->[$self->{epoch_max}] = $cmt;
 
         my ($oid, $len, $msgref) = @{$im->{last_object}};
-        $self->{over}->add_overview($mime, $len, $num, $oid, $mid0);
-        my $nparts = $self->{partitions};
-        my $part = $num % $nparts;
-        my $idx = $self->idx_part($part);
-        $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime);
-        my $n = $self->{transact_bytes} += $len;
-        if ($n > (PublicInbox::SearchIdx::BATCH_BYTES * $nparts)) {
+        if (do_idx($self, $msgref, $mime, $len, $num, $oid, $mid0)) {
                 $self->checkpoint;
         }
 
@@ -291,26 +311,30 @@ sub idx_init {
         });
 }
 
-sub purge_oids ($$) {
-        my ($self, $purge) = @_; # $purge = { $object_id => 1, ... }
+# returns an array mapping [ epoch => latest_commit ]
+# latest_commit may be undef if nothing was done to that epoch
+# $replace_map = { $object_id => $strref, ... }
+sub _replace_oids ($$$) {
+        my ($self, $mime, $replace_map) = @_;
         $self->done;
         my $pfx = "$self->{-inbox}->{mainrepo}/git";
-        my $purges = [];
+        my $rewrites = []; # epoch => commit
         my $max = $self->{epoch_max};
 
         unless (defined($max)) {
                 defined(my $latest = git_dir_latest($self, \$max)) or return;
                 $self->{epoch_max} = $max;
         }
+
         foreach my $i (0..$max) {
                 my $git_dir = "$pfx/$i.git";
                 -d $git_dir or next;
                 my $git = PublicInbox::Git->new($git_dir);
                 my $im = $self->import_init($git, 0, 1);
-                $purges->[$i] = $im->purge_oids($purge);
+                $rewrites->[$i] = $im->replace_oids($mime, $replace_map);
                 $im->done;
         }
-        $purges;
+        $rewrites;
 }
 
 sub content_ids ($) {
@@ -333,25 +357,31 @@ sub content_matches ($$) {
         0
 }
 
-sub remove_internal ($$$$) {
-        my ($self, $mime, $cmt_msg, $purge) = @_;
+# used for removing or replacing (purging)
+sub rewrite_internal ($$;$$$) {
+        my ($self, $old_mime, $cmt_msg, $new_mime, $sref) = @_;
         $self->idx_init;
-        my $im = $self->importer unless $purge;
+        my ($im, $need_reindex, $replace_map);
+        if ($sref) {
+                $replace_map = {}; # oid => sref
+                $need_reindex = [] if $new_mime;
+        } else {
+                $im = $self->importer;
+        }
         my $over = $self->{over};
-        my $cids = content_ids($mime);
+        my $cids = content_ids($old_mime);
         my $parts = $self->{idx_parts};
-        my $mm = $self->{mm};
         my $removed;
-        my $mids = mids($mime->header_obj);
+        my $mids = mids($old_mime->header_obj);
 
         # We avoid introducing new blobs into git since the raw content
         # can be slightly different, so we do not need the user-supplied
         # message now that we have the mids and content_id
-        $mime = undef;
+        $old_mime = undef;
         my $mark;
 
         foreach my $mid (@$mids) {
-                my %gone;
+                my %gone; # num => [ smsg, raw ]
                 my ($id, $prev);
                 while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
                         my $msg = get_blob($self, $smsg);
@@ -374,17 +404,21 @@ sub remove_internal ($$$$) {
                 }
                 foreach my $num (keys %gone) {
                         my ($smsg, $orig) = @{$gone{$num}};
-                        $mm->num_delete($num);
                         # $removed should only be set once assuming
                         # no bugs in our deduplication code:
                         $removed = $smsg;
                         my $oid = $smsg->{blob};
-                        if ($purge) {
-                                $purge->{$oid} = 1;
+                        if ($replace_map) {
+                                $replace_map->{$oid} = $sref;
                         } else {
                                 ($mark, undef) = $im->remove($orig, $cmt_msg);
                         }
                         $orig = undef;
+                        if ($need_reindex) { # ->replace
+                                push @$need_reindex, $smsg;
+                        } else { # ->purge or ->remove
+                                $self->{mm}->num_delete($num);
+                        }
                         unindex_oid_remote($self, $oid, $mid);
                 }
         }
@@ -393,8 +427,9 @@ sub remove_internal ($$$$) {
                 my $cmt = $im->get_mark($mark);
                 $self->{last_commit}->[$self->{epoch_max}] = $cmt;
         }
-        if ($purge && scalar keys %$purge) {
-                return purge_oids($self, $purge);
+        if ($replace_map && scalar keys %$replace_map) {
+                my $rewrites = _replace_oids($self, $new_mime, $replace_map);
+                return { rewrites => $rewrites, need_reindex => $need_reindex };
         }
         $removed;
 }
@@ -403,22 +438,125 @@ sub remove_internal ($$$$) {
 sub remove {
         my ($self, $mime, $cmt_msg) = @_;
         $self->{-inbox}->with_umask(sub {
-                remove_internal($self, $mime, $cmt_msg, undef);
+                rewrite_internal($self, $mime, $cmt_msg);
         });
 }
 
+sub _replace ($$;$$) {
+        my ($self, $old_mime, $new_mime, $sref) = @_;
+        my $rewritten = $self->{-inbox}->with_umask(sub {
+                rewrite_internal($self, $old_mime, undef, $new_mime, $sref);
+        }) or return;
+
+        my $rewrites = $rewritten->{rewrites};
+        # ->done is called if there are rewrites since we gc+prune from git
+        $self->idx_init if @$rewrites;
+
+        for my $i (0..$#$rewrites) {
+                defined(my $cmt = $rewrites->[$i]) or next;
+                $self->{last_commit}->[$i] = $cmt;
+        }
+        $rewritten;
+}
+
 # public
 sub purge {
         my ($self, $mime) = @_;
-        my $purges = $self->{-inbox}->with_umask(sub {
-                remove_internal($self, $mime, undef, {});
-        }) or return;
-        $self->idx_init if @$purges; # ->done is called on purges
-        for my $i (0..$#$purges) {
-                defined(my $cmt = $purges->[$i]) or next;
-                $self->{last_commit}->[$i] = $cmt;
+        my $rewritten = _replace($self, $mime, undef, \'') or return;
+        $rewritten->{rewrites}
+}
+
+# returns the git object_id of $fh, does not write the object to FS
+sub git_hash_raw ($$) {
+        my ($self, $raw) = @_;
+        # grab the expected OID we have to reindex:
+        open my $tmp_fh, '+>', undef or die "failed to open tmp: $!";
+        $tmp_fh->autoflush(1);
+        print $tmp_fh $$raw or die "print \$tmp_fh: $!";
+        sysseek($tmp_fh, 0, 0) or die "seek failed: $!";
+
+        my ($r, $w);
+        pipe($r, $w) or die "failed to create pipe: $!";
+        my $rdr = { 0 => fileno($tmp_fh), 1 => fileno($w) };
+        my $git_dir = $self->{-inbox}->git->{git_dir};
+        my $cmd = ['git', "--git-dir=$git_dir", qw(hash-object --stdin)];
+        my $pid = spawn($cmd, undef, $rdr);
+        close $w;
+        local $/ = "\n";
+        chomp(my $oid = <$r>);
+        waitpid($pid, 0) == $pid or die "git hash-object did not finish";
+        die "git hash-object failed: $?" if $?;
+        $oid =~ /\A[a-f0-9]{40}\z/ or die "OID not expected: $oid";
+        $oid;
+}
+
+sub _check_mids_match ($$$) {
+        my ($old_list, $new_list, $hdrs) = @_;
+        my %old_mids = map { $_ => 1 } @$old_list;
+        my %new_mids = map { $_ => 1 } @$new_list;
+        my @old = keys %old_mids;
+        my @new = keys %new_mids;
+        my $err = "$hdrs may not be changed when replacing\n";
+        die $err if scalar(@old) != scalar(@new);
+        delete @new_mids{@old};
+        delete @old_mids{@new};
+        die $err if (scalar(keys %old_mids) || scalar(keys %new_mids));
+}
+
+# Changing Message-IDs or References with ->replace isn't supported.
+# The rules for dealing with messages with multiple or conflicting
+# Message-IDs are pretty complex and rethreading hasn't been fully
+# implemented, yet.
+sub check_mids_match ($$) {
+        my ($old_mime, $new_mime) = @_;
+        my $old = $old_mime->header_obj;
+        my $new = $new_mime->header_obj;
+        _check_mids_match(mids($old), mids($new), 'Message-ID(s)');
+        _check_mids_match(references($old), references($new),
+                        'References/In-Reply-To');
+}
+
+# public
+sub replace ($$$) {
+        my ($self, $old_mime, $new_mime) = @_;
+
+        check_mids_match($old_mime, $new_mime);
+
+        # mutt will always add Content-Length:, Status:, Lines: when editing
+        PublicInbox::Import::drop_unwanted_headers($new_mime);
+
+        my $raw = $new_mime->as_string;
+        my $expect_oid = git_hash_raw($self, \$raw);
+        my $rewritten = _replace($self, $old_mime, $new_mime, \$raw) or return;
+        my $need_reindex = $rewritten->{need_reindex};
+
+        # just in case we have bugs in deduplication code:
+        my $n = scalar(@$need_reindex);
+        if ($n > 1) {
+                my $list = join(', ', map {
+                                        "$_->{num}: <$_->{mid}>"
+                                } @$need_reindex);
+                warn <<"";
+W: rewritten $n messages matching content of original message (expected: 1).
+W: possible bug in public-inbox, NNTP article IDs and Message-IDs follow:
+W: $list
+
+        }
+
+        # make sure we really got the OID:
+        my ($oid, $type, $len) = $self->{-inbox}->git->check($expect_oid);
+        $oid eq $expect_oid or die "BUG: $expect_oid not found after replace";
+
+        # don't leak FDs to Xapian:
+        $self->{-inbox}->git->cleanup;
+
+        # reindex modified messages:
+        for my $smsg (@$need_reindex) {
+                my $num = $smsg->{num};
+                my $mid0 = $smsg->{mid};
+                do_idx($self, \$raw, $new_mime, $len, $num, $oid, $mid0);
         }
-        $purges;
+        $rewritten->{rewrites};
 }
 
 sub last_commit_part ($$;$) {
@@ -772,15 +910,8 @@ sub reindex_oid ($$$$) {
         }
         $sync->{mm_tmp}->mid_delete($mid0) or
                 die "failed to delete <$mid0> for article #$num\n";
-
-        $self->{over}->add_overview($mime, $len, $num, $oid, $mid0);
-        my $nparts = $self->{partitions};
-        my $part = $num % $nparts;
-        my $idx = $self->idx_part($part);
-        $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime);
-        my $n = $self->{transact_bytes} += $len;
         $sync->{nr}++;
-        if ($n > (PublicInbox::SearchIdx::BATCH_BYTES * $nparts)) {
+        if (do_idx($self, $msgref, $mime, $len, $num, $oid, $mid0)) {
                 $git->cleanup;
                 $sync->{mm_tmp}->atfork_prepare;
                 $self->done; # release lock
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 7ea98204..e4682636 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -74,7 +74,7 @@ sub call {
         my $method = $env->{REQUEST_METHOD};
 
         if ($method eq 'POST') {
-                if ($path_info =~ m!$INBOX_RE/(?:([0-9]+)/)?
+                if ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)?
                                         (git-upload-pack)\z!x) {
                         my ($part, $path) = ($2, $3);
                         return invalid_inbox($ctx, $1) ||
@@ -88,7 +88,7 @@ sub call {
         }
 
         # top-level indices and feeds
-        if ($path_info eq '/') {
+        if ($path_info eq '/' || $path_info eq '/manifest.js.gz') {
                 www_listing($self)->call($env);
         } elsif ($path_info =~ m!$INBOX_RE\z!o) {
                 invalid_inbox($ctx, $1) || r301($ctx, $1);
@@ -98,7 +98,7 @@ sub call {
                 invalid_inbox($ctx, $1) || get_atom($ctx);
         } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
                 invalid_inbox($ctx, $1) || get_new($ctx);
-        } elsif ($path_info =~ m!$INBOX_RE/(?:([0-9]+)/)?
+        } elsif ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)?
                                 ($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
                 my ($part, $path) = ($2, $3);
                 invalid_inbox($ctx, $1) || serve_git($ctx, $part, $path);
@@ -126,6 +126,8 @@ sub call {
                 get_text($ctx, $1, $2);
         } elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-\.]+)\.css\z!o) {
                 get_css($ctx, $1, $2);
+        } elsif ($path_info =~ m!$INBOX_RE/manifest\.js\.gz\z!o) {
+                get_inbox_manifest($ctx, $1, $2);
         } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) {
                 get_vcs_object($ctx, $1, $2);
         } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/
@@ -490,6 +492,15 @@ sub www_listing {
         }
 }
 
+# GET $INBOX/manifest.js.gz
+sub get_inbox_manifest ($$$) {
+        my ($ctx, $inbox, $key) = @_;
+        my $r404 = invalid_inbox($ctx, $inbox);
+        return $r404 if $r404;
+        require PublicInbox::WwwListing;
+        PublicInbox::WwwListing::js($ctx->{env}, [$ctx->{-inbox}]);
+}
+
 sub get_attach {
         my ($ctx, $idx, $fn) = @_;
         require PublicInbox::WwwAttach;
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index e1473b3d..e2724cc4 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -9,26 +9,33 @@ use warnings;
 use PublicInbox::Hval qw(ascii_html);
 use PublicInbox::Linkify;
 use PublicInbox::View;
+use bytes ();
+use HTTP::Date qw(time2str);
+require Digest::SHA;
+require File::Spec;
+{ no warnings 'once'; *try_cat = *PublicInbox::Inbox::try_cat };
 
-sub list_all ($$) {
-        my ($self, undef) = @_;
+sub list_all ($$$) {
+        my ($self, $env, $hide_key) = @_;
         my @list;
         $self->{pi_config}->each_inbox(sub {
                 my ($ibx) = @_;
-                push @list, $ibx unless $ibx->{-hide}->{www};
+                push @list, $ibx unless $ibx->{-hide}->{$hide_key};
         });
         \@list;
 }
 
-sub list_match_domain ($$) {
-        my ($self, $env) = @_;
+sub list_match_domain ($$$) {
+        my ($self, $env, $hide_key) = @_;
         my @list;
         my $host = $env->{HTTP_HOST} // $env->{SERVER_NAME};
         $host =~ s/:[0-9]+\z//;
         my $re = qr!\A(?:https?:)?//\Q$host\E(?::[0-9]+)?/!i;
         $self->{pi_config}->each_inbox(sub {
                 my ($ibx) = @_;
-                push @list, $ibx if !$ibx->{-hide}->{www} && $ibx->{url} =~ $re;
+                if (!$ibx->{-hide}->{$hide_key} && $ibx->{url} =~ $re) {
+                        push @list, $ibx;
+                }
         });
         \@list;
 }
@@ -42,21 +49,27 @@ my %VALID = (
         404 => *list_404,
 );
 
+sub set_cb ($$$) {
+        my ($pi_config, $k, $default) = @_;
+        my $v = $pi_config->{lc $k} // $default;
+        $VALID{$v} || do {
+                warn <<"";
+`$v' is not a valid value for `$k'
+$k be one of `all', `match=domain', or `404'
+
+                $VALID{$default};
+        };
+}
+
 sub new {
         my ($class, $www) = @_;
-        my $k = 'publicinbox.wwwListing';
         my $pi_config = $www->{pi_config};
-        my $v = $pi_config->{lc($k)} // 404;
         bless {
                 pi_config => $pi_config,
                 style => $www->style("\0"),
-                list_cb => $VALID{$v} || do {
-                        warn <<"";
-`$v' is not a valid value for `$k'
-$k be one of `all', `match=domain', or `404'
-
-                        *list_404;
-                },
+                www_cb => set_cb($pi_config, 'publicInbox.wwwListing', 404),
+                manifest_cb => set_cb($pi_config, 'publicInbox.grokManifest',
+                                        'match=domain'),
         }, $class;
 }
 
@@ -74,22 +87,20 @@ sub ibx_entry {
         $tmp;
 }
 
-# not really a stand-alone PSGI app, but maybe it could be...
-sub call {
-        my ($self, $env) = @_;
-        my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
-        my $list = $self->{list_cb}->($self, $env);
-        my $code = 404;
+sub html ($$) {
+        my ($env, $list) = @_;
         my $title = 'public-inbox';
         my $out = '';
+        my $code = 404;
         if (@$list) {
+                $title .= ' - listing';
+                $code = 200;
+
                 # Swartzian transform since ->modified is expensive
                 @$list = sort {
                         $b->[0] <=> $a->[0]
                 } map { [ $_->modified, $_ ] } @$list;
 
-                $code = 200;
-                $title .= ' - listing';
                 my $tmp = join("\n", map { ibx_entry(@$_, $env) } @$list);
                 my $l = PublicInbox::Linkify->new;
                 $l->linkify_1($tmp);
@@ -98,7 +109,122 @@ sub call {
         $out = "<html><head><title>$title</title></head><body>" . $out;
         $out .= '<pre>'. PublicInbox::WwwStream::code_footer($env) .
                 '</pre></body></html>';
-        [ $code, $h, [ $out ] ]
+
+        my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
+        [ $code, $h, [ $out ] ];
+}
+
+my $json;
+sub _json () {
+        for my $mod (qw(JSON::MaybeXS JSON JSON::PP)) {
+                eval "require $mod" or next;
+                # ->ascii encodes non-ASCII to "\uXXXX"
+                return $mod->new->ascii(1);
+        }
+        die;
+}
+
+sub fingerprint ($) {
+        my ($git) = @_;
+        my $fh = $git->popen('show-ref') or
+                die "popen($git->{git_dir} show-ref) failed: $!";
+
+        my $dig = Digest::SHA->new(1);
+        while (read($fh, my $buf, 65536)) {
+                $dig->add($buf);
+        }
+        close $fh;
+        return if $?; # empty, uninitialized git repo
+        $dig->hexdigest;
+}
+
+sub manifest_add ($$;$) {
+        my ($manifest, $ibx, $epoch) = @_;
+        my $url_path = "/$ibx->{name}";
+        my $git_dir = $ibx->{mainrepo};
+        if (defined $epoch) {
+                $git_dir .= "/git/$epoch.git";
+                $url_path .= "/git/$epoch.git";
+        }
+        return unless -d $git_dir;
+        my $git = PublicInbox::Git->new($git_dir);
+        my $fingerprint = fingerprint($git) or return; # no empty repos
+
+        chomp(my $owner = $git->qx('config', 'gitweb.owner'));
+        chomp(my $desc = try_cat("$git_dir/description"));
+        $owner = undef if $owner eq '';
+        $desc = 'Unnamed repository' if $desc eq '';
+
+        my $reference;
+        chomp(my $alt = try_cat("$git_dir/objects/info/alternates"));
+        if ($alt) {
+                # n.b.: GitPython doesn't seem to handle comments or C-quoted
+                # strings like native git does; and we don't for now, either.
+                my @alt = split(/\n+/, $alt);
+
+                # grokmirror only supports 1 alternate for "reference",
+                if (scalar(@alt) == 1) {
+                        my $objdir = "$git_dir/objects";
+                        $reference = File::Spec->rel2abs($alt[0], $objdir);
+                        $reference =~ s!/[^/]+/?\z!!; # basename
+                }
+        }
+        $manifest->{-abs2urlpath}->{$git_dir} = $url_path;
+        my $modified = $git->modified;
+        if ($modified > $manifest->{-mtime}) {
+                $manifest->{-mtime} = $modified;
+        }
+        $manifest->{$url_path} = {
+                owner => $owner,
+                reference => $reference,
+                description => $desc,
+                modified => $modified,
+                fingerprint => $fingerprint,
+        };
+}
+
+# manifest.js.gz
+sub js ($$) {
+        my ($env, $list) = @_;
+        eval { require IO::Compress::Gzip } or return [ 404, [], [] ];
+
+        my $manifest = { -abs2urlpath => {}, -mtime => 0 };
+        for my $ibx (@$list) {
+                if (defined(my $max = $ibx->max_git_part)) {
+                        for my $epoch (0..$max) {
+                                manifest_add($manifest, $ibx, $epoch);
+                        }
+                } else {
+                        manifest_add($manifest, $ibx);
+                }
+        }
+        my $abs2urlpath = delete $manifest->{-abs2urlpath};
+        my $mtime = delete $manifest->{-mtime};
+        while (my ($url_path, $repo) = each %$manifest) {
+                defined(my $abs = $repo->{reference}) or next;
+                $repo->{reference} = $abs2urlpath->{$abs};
+        }
+        my $out;
+        IO::Compress::Gzip::gzip(\(($json ||= _json())->encode($manifest)) =>
+                                 \$out);
+        $manifest = undef;
+        [ 200, [ qw(Content-Type application/gzip),
+                 'Last-Modified', time2str($mtime),
+                 'Content-Length', bytes::length($out) ], [ $out ] ];
+}
+
+# not really a stand-alone PSGI app, but maybe it could be...
+sub call {
+        my ($self, $env) = @_;
+
+        if ($env->{PATH_INFO} eq '/manifest.js.gz') {
+                # grokmirror uses relative paths, so it's domain-dependent
+                my $list = $self->{manifest_cb}->($self, $env, 'manifest');
+                js($env, $list);
+        } else { # /
+                my $list = $self->{www_cb}->($self, $env, 'www');
+                html($env, $list);
+        }
 }
 
 1;
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index dad080c8..e1c6fe3a 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -17,34 +17,66 @@ our @COMPACT_OPT = qw(jobs|j=i quiet|q blocksize|b=s no-full|n fuller|F);
 
 sub commit_changes ($$$) {
         my ($ibx, $tmp, $opt) = @_;
-
+        my $new_parts = $opt->{reshard};
         my $reindex = $opt->{reindex};
         my $im = $ibx->importer(0);
         $im->lock_acquire if !$opt->{-coarse_lock};
 
         $SIG{INT} or die 'BUG: $SIG{INT} not handled';
+        my @old_part;
 
         while (my ($old, $new) = each %$tmp) {
-                my @st = stat($old) or die "failed to stat($old): $!\n";
+                my @st = stat($old);
+                if (!@st && !defined($opt->{reshard})) {
+                        die "failed to stat($old): $!";
+                }
 
                 my $over = "$old/over.sqlite3";
                 if (-f $over) { # only for v1, v2 over is untouched
+                        defined $new or die "BUG: $over exists when culling v2";
                         $over = PublicInbox::Over->new($over);
                         my $tmp_over = "$new/over.sqlite3";
                         $over->connect->sqlite_backup_to_file($tmp_over);
                         $over = undef;
                 }
-                chmod($st[2] & 07777, $new) or die "chmod $old: $!\n";
 
+                if (!defined($new)) { # culled partition
+                        push @old_part, $old;
+                        next;
+                }
+
+                if (@st) {
+                        chmod($st[2] & 07777, $new) or die "chmod $old: $!\n";
+                        rename($old, "$new/old") or
+                                        die "rename $old => $new/old: $!\n";
+                }
                 # Xtmpdir->DESTROY won't remove $new after this:
-                rename($old, "$new/old") or die "rename $old => $new/old: $!\n";
                 rename($new, $old) or die "rename $new => $old: $!\n";
-                my $prev = "$old/old";
-                remove_tree($prev) or die "failed to remove $prev: $!\n";
+                if (@st) {
+                        my $prev = "$old/old";
+                        remove_tree($prev) or
+                                die "failed to remove $prev: $!\n";
+                }
         }
+        remove_tree(@old_part);
         $tmp->done;
         if (!$opt->{-coarse_lock}) {
                 $opt->{-skip_lock} = 1;
+
+                if ($im->can('count_partitions')) {
+                        my $pr = $opt->{-progress};
+                        my $n = $im->count_partitions;
+                        if (defined $new_parts && $n != $new_parts) {
+                                die
+"BUG: counted $n partitions after repartioning to $new_parts";
+                        }
+                        my $prev = $im->{partitions};
+                        if ($pr && $prev != $n) {
+                                $pr->("partition count changed: $prev => $n\n");
+                                $im->{partitions} = $n;
+                        }
+                }
+
                 PublicInbox::Admin::index_inbox($ibx, $opt);
                 # implicit lock_release
         } else {
@@ -139,31 +171,59 @@ sub run {
         my $tmp = PublicInbox::Xtmpdirs->new;
         my $v = $ibx->{version} ||= 1;
         my @q;
+        my $new_parts = $opt->{reshard};
+        if (defined $new_parts && $new_parts <= 0) {
+                die "--reshard must be a positive number\n";
+        }
 
         # we want temporary directories to be as deep as possible,
         # so v2 partitions can keep "xap$SCHEMA_VERSION" on a separate FS.
         if ($v == 1) {
+                if (defined $new_parts) {
+                        warn
+"--reshard=$new_parts ignored for v1 $ibx->{mainrepo}\n";
+                }
                 my $old_parent = dirname($old);
                 same_fs_or_die($old_parent, $old);
-                $tmp->{$old} = tempdir('xapcmd-XXXXXXXX', DIR => $old_parent);
-                push @q, [ $old, $tmp->{$old} ];
+                my $v = PublicInbox::Search::SCHEMA_VERSION();
+                my $wip = tempdir("xapian$v-XXXXXXXX", DIR => $old_parent);
+                $tmp->{$old} = $wip;
+                push @q, [ $old, $wip ];
         } else {
                 opendir my $dh, $old or die "Failed to opendir $old: $!\n";
+                my @old_parts;
                 while (defined(my $dn = readdir($dh))) {
                         if ($dn =~ /\A[0-9]+\z/) {
-                                my $tmpl = "$dn-XXXXXXXX";
-                                my $dst = tempdir($tmpl, DIR => $old);
-                                same_fs_or_die($old, $dst);
-                                my $cur = "$old/$dn";
-                                push @q, [ $cur, $dst ];
-                                $tmp->{$cur} = $dst;
+                                push @old_parts, $dn;
                         } elsif ($dn eq '.' || $dn eq '..') {
                         } elsif ($dn =~ /\Aover\.sqlite3/) {
                         } else {
                                 warn "W: skipping unknown dir: $old/$dn\n"
                         }
                 }
-                die "No Xapian parts found in $old\n" unless @q;
+                die "No Xapian parts found in $old\n" unless @old_parts;
+
+                my ($src, $max_part);
+                if (!defined($new_parts) || $new_parts == scalar(@old_parts)) {
+                        # 1:1 copy
+                        $max_part = scalar(@old_parts) - 1;
+                } else {
+                        # M:N copy
+                        $max_part = $new_parts - 1;
+                        $src = [ map { "$old/$_" } @old_parts ];
+                }
+                foreach my $dn (0..$max_part) {
+                        my $tmpl = "$dn-XXXXXXXX";
+                        my $wip = tempdir($tmpl, DIR => $old);
+                        same_fs_or_die($old, $wip);
+                        my $cur = "$old/$dn";
+                        push @q, [ $src // $cur , $wip ];
+                        $tmp->{$cur} = $wip;
+                }
+                # mark old parts to be unlinked
+                if ($src) {
+                        $tmp->{$_} ||= undef for @$src;
+                }
         }
         my $im = $ibx->importer(0);
         my $max = $opt->{jobs} || scalar(@q);
@@ -197,10 +257,11 @@ sub cpdb_retryable ($$) {
 }
 
 sub progress_pfx ($) {
-        my @p = split('/', $_[0]);
+        my ($wip) = @_; # tempdir v2: ([0-9])+-XXXXXXXX
+        my @p = split('/', $wip);
 
         # return "xap15/0" for v2, or "xapian15" for v1:
-        ($p[-1] =~ /\A[0-9]+\z/) ? "$p[-2]/$p[-1]" : $p[-1];
+        ($p[-1] =~ /\A([0-9]+)/) ? "$p[-2]/$1" : $p[-1];
 }
 
 # xapian-compact wrapper
@@ -243,12 +304,74 @@ sub compact ($$) {
         }
 }
 
+sub cpdb_loop ($$$;$$) {
+        my ($src, $dst, $pr_data, $cur_part, $new_parts) = @_;
+        my ($pr, $fmt, $nr, $pfx);
+        if ($pr_data) {
+                $pr = $pr_data->{pr};
+                $fmt = $pr_data->{fmt};
+                $nr = \($pr_data->{nr});
+                $pfx = $pr_data->{pfx};
+        }
+
+        my ($it, $end);
+        do {
+                eval {
+                        $it = $src->postlist_begin('');
+                        $end = $src->postlist_end('');
+                };
+        } while (cpdb_retryable($src, $pfx));
+
+        do {
+                eval {
+                        for (; $it != $end; $it++) {
+                                my $docid = $it->get_docid;
+                                if (defined $new_parts) {
+                                        my $dst_part = $docid % $new_parts;
+                                        next if $dst_part != $cur_part;
+                                }
+                                my $doc = $src->get_document($docid);
+                                $dst->replace_document($docid, $doc);
+                                if ($pr_data && !(++$$nr  & 1023)) {
+                                        $pr->(sprintf($fmt, $$nr));
+                                }
+                        }
+
+                        # unlike copydatabase(1), we don't copy spelling
+                        # and synonym data (or other user metadata) since
+                        # the Perl APIs don't expose iterators for them
+                        # (and public-inbox does not use those features)
+                };
+        } while (cpdb_retryable($src, $pfx));
+}
+
 # Like copydatabase(1), this is horribly slow; and it doesn't seem due
 # to the overhead of Perl.
 sub cpdb ($$) {
         my ($args, $opt) = @_;
         my ($old, $new) = @$args;
-        my $src = Search::Xapian::Database->new($old);
+        my ($src, $cur_part);
+        my $new_parts;
+        if (ref($old) eq 'ARRAY') {
+                ($cur_part) = ($new =~ m!xap[0-9]+/([0-9]+)\b!);
+                defined $cur_part or
+                        die "BUG: could not extract partition # from $new";
+                $new_parts = $opt->{reshard};
+                defined $new_parts or die 'BUG: got array src w/o --partition';
+
+                # repartitioning, M:N copy means have full read access
+                foreach (@$old) {
+                        if ($src) {
+                                my $sub = Search::Xapian::Database->new($_);
+                                $src->add_database($sub);
+                        } else {
+                                $src = Search::Xapian::Database->new($_);
+                        }
+                }
+        } else {
+                $src = Search::Xapian::Database->new($old);
+        }
+
         my ($xtmp, $tmp);
         if ($opt->{compact}) {
                 my $newdir = dirname($new);
@@ -264,10 +387,9 @@ sub cpdb ($$) {
         # of other bugs:
         my $creat = Search::Xapian::DB_CREATE();
         my $dst = Search::Xapian::WritableDatabase->new($tmp, $creat);
-        my ($it, $end);
-        my ($nr, $tot, $fmt); # progress output
         my $pr = $opt->{-progress};
-        my $pfx = $opt->{-progress_pfx} = progress_pfx($old);
+        my $pfx = $opt->{-progress_pfx} = progress_pfx($new);
+        my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr;
 
         do {
                 eval {
@@ -276,44 +398,45 @@ sub cpdb ($$) {
                         $dst->set_metadata('last_commit', $lc) if $lc;
 
                         # only the first xapian partition (0) gets 'indexlevel'
-                        if ($old =~ m!(?:xapian[0-9]+|xap[0-9]+/0)\z!) {
+                        if ($new =~ m!(?:xapian[0-9]+|xap[0-9]+/0)\b!) {
                                 my $l = $src->get_metadata('indexlevel');
                                 if ($l eq 'medium') {
                                         $dst->set_metadata('indexlevel', $l);
                                 }
                         }
-
-                        $it = $src->postlist_begin('');
-                        $end = $src->postlist_end('');
-                        if ($pr) {
-                                $nr = 0;
-                                $tot = $src->get_doccount;
-                                $fmt = "$pfx % ".length($tot)."u/$tot\n";
-                                $pr->("$pfx copying $tot documents\n");
-                        }
-                };
-        } while (cpdb_retryable($src, $pfx));
-
-        do {
-                eval {
-                        while ($it != $end) {
-                                my $docid = $it->get_docid;
-                                my $doc = $src->get_document($docid);
-                                $dst->replace_document($docid, $doc);
-                                $it->inc;
-                                if ($pr && !(++$nr & 1023)) {
-                                        $pr->(sprintf($fmt, $nr));
+                        if ($pr_data) {
+                                my $tot = $src->get_doccount;
+
+                                # we can only estimate when repartitioning,
+                                # because removed spam causes slight imbalance
+                                my $est = '';
+                                if (defined $cur_part && $new_parts > 1) {
+                                        $tot = int($tot/$new_parts);
+                                        $est = 'around ';
                                 }
+                                my $fmt = "$pfx % ".length($tot)."u/$tot\n";
+                                $pr->("$pfx copying $est$tot documents\n");
+                                $pr_data->{fmt} = $fmt;
+                                $pr_data->{total} = $tot;
                         }
-
-                        # unlike copydatabase(1), we don't copy spelling
-                        # and synonym data (or other user metadata) since
-                        # the Perl APIs don't expose iterators for them
-                        # (and public-inbox does not use those features)
                 };
         } while (cpdb_retryable($src, $pfx));
 
-        $pr->(sprintf($fmt, $nr)) if $pr;
+        if (defined $new_parts) {
+                # we rely on document IDs matching NNTP article number,
+                # so we can't have the combined DB support rewriting
+                # document IDs.  Thus we iterate through each shard
+                # individually.
+                $src = undef;
+                foreach (@$old) {
+                        my $old = Search::Xapian::Database->new($_);
+                        cpdb_loop($old, $dst, $pr_data, $cur_part, $new_parts);
+                }
+        } else {
+                cpdb_loop($src, $dst, $pr_data);
+        }
+
+        $pr->(sprintf($pr_data->{fmt}, $pr_data->{nr})) if $pr;
         return unless $xtmp;
 
         $src = $dst = undef; # flushes and closes
@@ -358,6 +481,7 @@ sub DESTROY {
         my $owner_pid = delete $owner{"$self"} or return;
         return if $owner_pid != $$;
         foreach my $new (values %$self) {
+                defined $new or next; # may be undef if repartitioning
                 remove_tree($new) unless -d "$new/old";
         }
         done($self);