about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Admin.pm13
-rw-r--r--lib/PublicInbox/Config.pm4
-rw-r--r--lib/PublicInbox/Import.pm31
-rw-r--r--lib/PublicInbox/Inbox.pm27
-rw-r--r--lib/PublicInbox/InboxWritable.pm4
-rw-r--r--lib/PublicInbox/Mbox.pm7
-rw-r--r--lib/PublicInbox/NNTP.pm2
-rw-r--r--lib/PublicInbox/Qspawn.pm4
-rw-r--r--lib/PublicInbox/SearchIdx.pm28
-rw-r--r--lib/PublicInbox/SearchThread.pm67
-rw-r--r--lib/PublicInbox/SearchView.pm4
-rw-r--r--lib/PublicInbox/TestCommon.pm26
-rw-r--r--lib/PublicInbox/V2Writable.pm20
-rw-r--r--lib/PublicInbox/View.pm55
-rw-r--r--lib/PublicInbox/WatchMaildir.pm14
-rw-r--r--lib/PublicInbox/WwwAtomStream.pm5
-rw-r--r--lib/PublicInbox/WwwText.pm4
17 files changed, 188 insertions, 127 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 336b7d4c..62ddbe82 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -71,7 +71,7 @@ sub resolve_inboxes ($;$$) {
         my ($argv, $opt, $cfg) = @_;
         $opt ||= {};
 
-        $cfg //= eval { PublicInbox::Config->new };
+        $cfg //= PublicInbox::Config->new;
         if ($opt->{all}) {
                 my $cfgfile = PublicInbox::Config::default_file();
                 $cfg or die "--all specified, but $cfgfile not readable\n";
@@ -234,4 +234,15 @@ sub progress_prepare ($) {
         }
 }
 
+# same unit factors as git:
+sub parse_unsigned ($) {
+        my ($max_size) = @_;
+
+        $$max_size =~ /\A([0-9]+)([kmg])?\z/i or return;
+        my ($n, $unit_factor) = ($1, $2 // '');
+        my %u = ( k => 1024, m => 1024**2, g => 1024**3 );
+        $$max_size = $n * ($u{lc($unit_factor)} // 1);
+        1;
+}
+
 1;
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 917939ca..458f29b2 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -367,7 +367,7 @@ sub _fill {
         my $ibx = {};
 
         foreach my $k (qw(inboxdir filter newsgroup
-                        watch watchheader httpbackendmax
+                        watch httpbackendmax
                         replyto feedmax nntpserver indexlevel)) {
                 my $v = $self->{"$pfx.$k"};
                 $ibx->{$k} = $v if defined $v;
@@ -388,7 +388,7 @@ sub _fill {
         # TODO: more arrays, we should support multi-value for
         # more things to encourage decentralization
         foreach my $k (qw(address altid nntpmirror coderepo hide listid url
-                        infourl)) {
+                        infourl watchheader)) {
                 if (defined(my $v = $self->{"$pfx.$k"})) {
                         $ibx->{$k} = _array($v);
                 }
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index c72c1e92..95d654f6 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -440,14 +440,31 @@ sub run_die ($;$$) {
         $? == 0 or die join(' ', @$cmd) . " failed: $?\n";
 }
 
+my @INIT_FILES = ('HEAD' => "ref: refs/heads/master\n",
+                'description' => <<EOD,
+Unnamed repository; edit this file 'description' to name the repository.
+EOD
+                'config' => <<EOC);
+[core]
+        repositoryFormatVersion = 0
+        filemode = true
+        bare = true
+[repack]
+        writeBitmaps = true
+EOC
+
 sub init_bare {
-        my ($dir) = @_;
-        my @cmd = (qw(git init --bare -q), $dir);
-        run_die(\@cmd);
-        # set a reasonable default:
-        @cmd = (qw/git config/, "--file=$dir/config",
-                'repack.writeBitmaps', 'true');
-        run_die(\@cmd);
+        my ($dir) = @_; # or self
+        $dir = $dir->{git}->{git_dir} if ref($dir);
+        require File::Path;
+        File::Path::mkpath([ map { "$dir/$_" } qw(objects/info refs/heads) ]);
+        for (my $i = 0; $i < @INIT_FILES; $i++) {
+                my $f = $dir.'/'.$INIT_FILES[$i++];
+                next if -f $f;
+                open my $fh, '>', $f or die "open $f: $!";
+                print $fh $INIT_FILES[$i] or die "print $f: $!";
+                close $fh or die "close $f: $!";
+        }
 }
 
 sub done {
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 95ffd039..186eb420 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -82,7 +82,7 @@ sub _set_uint ($$$) {
 sub _set_limiter ($$$) {
         my ($self, $pi_config, $pfx) = @_;
         my $lkey = "-${pfx}_limiter";
-        $self->{$lkey} ||= eval {
+        $self->{$lkey} ||= do {
                 # full key is: publicinbox.$NAME.httpbackendmax
                 my $mkey = $pfx.'max';
                 my $val = $self->{$mkey} or return;
@@ -130,7 +130,7 @@ sub version { $_[0]->{version} // 1 }
 sub git_epoch {
         my ($self, $epoch) = @_;
         $self->version == 2 or return;
-        $self->{"$epoch.git"} ||= eval {
+        $self->{"$epoch.git"} ||= do {
                 my $git_dir = "$self->{inboxdir}/git/$epoch.git";
                 my $g = PublicInbox::Git->new($git_dir);
                 $g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
@@ -141,7 +141,7 @@ sub git_epoch {
 
 sub git {
         my ($self) = @_;
-        $self->{git} ||= eval {
+        $self->{git} ||= do {
                 my $git_dir = $self->{inboxdir};
                 $git_dir .= '/all.git' if $self->version == 2;
                 my $g = PublicInbox::Git->new($git_dir);
@@ -219,19 +219,22 @@ sub try_cat {
 
 sub description {
         my ($self) = @_;
-        $self->{description} //= do {
+        ($self->{description} //= do {
                 my $desc = try_cat("$self->{inboxdir}/description");
                 local $/ = "\n";
                 chomp $desc;
                 $desc =~ s/\s+/ /smg;
-                $desc eq '' ? '($INBOX_DIR/description missing)' : $desc;
-        };
+                $desc eq '' ? undef : $desc;
+        }) // '($INBOX_DIR/description missing)';
 }
 
 sub cloneurl {
         my ($self) = @_;
-        $self->{cloneurl} //=
-                [ split(/\s+/s, try_cat("$self->{inboxdir}/cloneurl")) ];
+        ($self->{cloneurl} //= do {
+                my $s = try_cat("$self->{inboxdir}/cloneurl");
+                my @urls = split(/\s+/s, $s);
+                scalar(@urls) ? \@urls : undef
+        }) // [];
 }
 
 sub base_url {
@@ -308,9 +311,7 @@ sub nntp_usable {
 # for v1 users w/o SQLite only
 sub msg_by_path ($$;$) {
         my ($self, $path, $ref) = @_;
-        my $str = git($self)->cat_file('HEAD:'.$path, $ref);
-        $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str;
-        $str;
+        git($self)->cat_file('HEAD:'.$path, $ref);
 }
 
 sub msg_by_smsg ($$;$) {
@@ -321,9 +322,7 @@ sub msg_by_smsg ($$;$) {
         return unless defined $smsg;
         defined(my $blob = $smsg->{blob}) or return;
 
-        my $str = git($self)->cat_file($blob, $ref);
-        $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str;
-        $str;
+        git($self)->cat_file($blob, $ref);
 }
 
 sub smsg_mime {
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index f2ba21fc..31aa76c6 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -111,7 +111,7 @@ sub is_maildir_path ($) {
         (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0;
 }
 
-sub maildir_path_load ($) {
+sub mime_from_path ($) {
         my ($path) = @_;
         if (open my $fh, '<', $path) {
                 local $/;
@@ -138,7 +138,7 @@ sub import_maildir {
                 opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n";
                 while (defined(my $fn = readdir($dh))) {
                         next unless is_maildir_basename($fn);
-                        my $mime = maildir_path_load("$dir/$fn") or next;
+                        my $mime = mime_from_path("$dir/$fn") or next;
 
                         if (my $filter = $self->filter($im)) {
                                 my $ret = $filter->scrub($mime) or return;
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index d5beceaf..9995140c 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -106,8 +106,11 @@ sub msg_hdr ($$;$) {
                 'List-Post', "<mailto:$ibx->{-primary_address}>",
         );
         my $crlf = $header_obj->crlf;
-        my $buf = "From mboxrd\@z Thu Jan  1 00:00:00 1970\n" .
-                        $header_obj->as_string;
+        my $buf = $header_obj->as_string;
+        # fixup old bug from import (pre-a0c07cba0e5d8b6a)
+        $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+        $buf = "From mboxrd\@z Thu Jan  1 00:00:00 1970" . $crlf . $buf;
+
         for (my $i = 0; $i < @append; $i += 2) {
                 my $k = $append[$i];
                 my $v = $append[$i + 1];
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index d1f75f6f..c79f198b 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -506,6 +506,8 @@ sub set_art {
 sub msg_hdr_write ($$$) {
         my ($self, $hdr, $body_follows) = @_;
         $hdr = $hdr->as_string;
+        # fixup old bug from import (pre-a0c07cba0e5d8b6a)
+        $hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
         utf8::encode($hdr);
         $hdr =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
 
diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm
index aebcb1f7..c09e8d2c 100644
--- a/lib/PublicInbox/Qspawn.pm
+++ b/lib/PublicInbox/Qspawn.pm
@@ -281,10 +281,6 @@ sub psgi_return_init_cb {
                                         ${$self->{hdr_buf}}, $filter);
                 $wcb->($r);
         }
-
-        # Workaround a leak under Perl 5.16.3 when combined with
-        # Plack::Middleware::Deflater:
-        $wcb = undef;
 }
 
 sub psgi_return_start { # may run later, much later...
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 05689941..25118f43 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -64,6 +64,7 @@ sub new {
                 $self->{lock_path} = "$inboxdir/ssoma.lock";
                 my $dir = $self->xdir;
                 $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
+                $self->{index_max_size} = $ibx->{index_max_size};
         } elsif ($version == 2) {
                 defined $shard or die "shard is required for v2\n";
                 # shard is a number
@@ -551,13 +552,9 @@ sub unindex_both {
 
 sub do_cat_mail {
         my ($git, $blob, $sizeref) = @_;
-        my $mime = eval {
-                my $str = $git->cat_file($blob, $sizeref);
-                # fixup bugs from import:
-                $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
-                PublicInbox::MIME->new($str);
-        };
-        $@ ? undef : $mime;
+        my $str = $git->cat_file($blob, $sizeref) or
+                die "BUG: $blob not found in $git->{git_dir}";
+        PublicInbox::MIME->new($str);
 }
 
 # called by public-inbox-index
@@ -576,6 +573,16 @@ sub batch_adjust ($$$$$) {
         }
 }
 
+sub too_big ($$$) {
+        my ($self, $git, $oid) = @_;
+        my $max_size = $self->{index_max_size} or return;
+        my (undef, undef, $size) = $git->check($oid);
+        die "E: bad $oid in $git->{git_dir}\n" if !defined($size);
+        return if $size <= $max_size;
+        warn "W: skipping $oid ($size > $max_size)\n";
+        1;
+}
+
 # only for v1
 sub read_log {
         my ($self, $log, $add_cb, $del_cb, $batch_cb) = @_;
@@ -602,7 +609,8 @@ sub read_log {
                                 }
                                 next;
                         }
-                        my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+                        next if too_big($self, $git, $blob);
+                        my $mime = do_cat_mail($git, $blob, \$bytes);
                         my $smsg = bless {}, 'PublicInbox::Smsg';
                         batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr);
                         $smsg->{blob} = $blob;
@@ -610,7 +618,7 @@ sub read_log {
                         $add_cb->($self, $mime, $smsg);
                 } elsif ($line =~ /$delmsg/o) {
                         my $blob = $1;
-                        $D{$blob} = 1;
+                        $D{$blob} = 1 unless too_big($self, $git, $blob);
                 } elsif ($line =~ /^commit ($h40)/o) {
                         $latest = $1;
                         $newest ||= $latest;
@@ -623,7 +631,7 @@ sub read_log {
         close($log) or die "git log failed: \$?=$?";
         # get the leftovers
         foreach my $blob (keys %D) {
-                my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+                my $mime = do_cat_mail($git, $blob, \$bytes);
                 $del_cb->($self, $mime);
         }
         $batch_cb->($nr, $latest, $newest);
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 38d1aa6e..60f692b2 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -24,7 +24,16 @@ use PublicInbox::MID qw($MID_EXTRACT);
 
 sub thread {
         my ($msgs, $ordersub, $ctx) = @_;
-        my $id_table = {};
+
+        # A. put all current $msgs (non-ghosts) into %id_table
+        my %id_table = map {;
+                # this delete saves around 4K across 1K messages
+                # TODO: move this to a more appropriate place, breaks tests
+                # if we do it during psgi_cull
+                delete $_->{num};
+
+                $_->{mid} => PublicInbox::SearchThread::Msg::cast($_);
+        } @$msgs;
 
         # Sadly, we sort here anyways since the fill-in-the-blanks References:
         # can be shakier if somebody used In-Reply-To with multiple, disparate
@@ -32,36 +41,21 @@ sub thread {
         # always determine ordering when somebody uses multiple In-Reply-To.
         # We'll trust the client Date: header here instead of the Received:
         # time since this is for display (and not retrieval)
-        _add_message($id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs;
+        _set_parent(\%id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs;
         my $ibx = $ctx->{-inbox};
         my $rootset = [ grep {
                         !delete($_->{parent}) && $_->visible($ibx)
-                } values %$id_table ];
-        $id_table = undef;
+                } values %id_table ];
         $rootset = $ordersub->($rootset);
         $_->order_children($ordersub, $ctx) for @$rootset;
         $rootset;
 }
 
-sub _get_cont_for_id ($$) {
-        my ($id_table, $mid) = @_;
-        $id_table->{$mid} ||= PublicInbox::SearchThread::Msg->new($mid);
-}
-
-sub _add_message ($$) {
-        my ($id_table, $smsg) = @_;
-
-        # A. if id_table...
-        my $this = _get_cont_for_id($id_table, $smsg->{mid});
-        $this->{smsg} = $smsg;
-
-        # saves around 4K across 1K messages
-        # TODO: move this to a more appropriate place, breaks tests
-        # if we do it during psgi_cull
-        delete $smsg->{num};
+sub _set_parent ($$) {
+        my ($id_table, $this) = @_;
 
         # B. For each element in the message's References field:
-        defined(my $refs = $smsg->{references}) or return;
+        defined(my $refs = $this->{references}) or return;
 
         # This loop exists to help fill in gaps left from missing
         # messages.  It is not needed in a perfect world where
@@ -70,7 +64,8 @@ sub _add_message ($$) {
         my $prev;
         foreach my $ref ($refs =~ m/$MID_EXTRACT/go) {
                 # Find a Container object for the given Message-ID
-                my $cont = _get_cont_for_id($id_table, $ref);
+                my $cont = $id_table->{$ref} //=
+                        PublicInbox::SearchThread::Msg::ghost($ref);
 
                 # Link the References field's Containers together in
                 # the order implied by the References header
@@ -96,22 +91,31 @@ sub _add_message ($$) {
 }
 
 package PublicInbox::SearchThread::Msg;
+use base qw(PublicInbox::Smsg);
 use strict;
 use warnings;
 use Carp qw(croak);
 
-sub new {
+# declare a ghost smsg (determined by absence of {blob})
+sub ghost {
         bless {
-                id => $_[1],
+                mid => $_[0],
                 children => {}, # becomes an array when sorted by ->order(...)
-        }, $_[0];
+        }, __PACKAGE__;
+}
+
+# give a existing smsg the methods of this class
+sub cast {
+        my ($smsg) = @_;
+        $smsg->{children} = {};
+        bless $smsg, __PACKAGE__;
 }
 
 sub topmost {
         my ($self) = @_;
         my @q = ($self);
         while (my $cont = shift @q) {
-                return $cont if $cont->{smsg};
+                return $cont if $cont->{blob};
                 push @q, values %{$cont->{children}};
         }
         undef;
@@ -122,7 +126,7 @@ sub add_child {
         croak "Cowardly refusing to become my own parent: $self"
           if $self == $child;
 
-        my $cid = $child->{id};
+        my $cid = $child->{mid};
 
         # reparenting:
         if (defined(my $parent = $child->{parent})) {
@@ -148,8 +152,13 @@ sub has_descendent {
 # being folded/mangled by a MUA, and not a missing message.
 sub visible ($$) {
         my ($self, $ibx) = @_;
-        ($self->{smsg} ||= eval { $ibx->smsg_by_mid($self->{id}) }) ||
-         (scalar values %{$self->{children}});
+        return 1 if $self->{blob};
+        if (my $by_mid = $ibx->smsg_by_mid($self->{mid})) {
+                %$self = (%$self, %$by_mid);
+                1;
+        } else {
+                (scalar values %{$self->{children}});
+        }
 }
 
 sub order_children {
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 4fbf59ef..4336e4d9 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -243,8 +243,8 @@ sub search_nav_bot {
 
 sub sort_relevance {
         [ sort {
-                (eval { $b->topmost->{smsg}->{pct} } // 0) <=>
-                (eval { $a->topmost->{smsg}->{pct} } // 0)
+                (eval { $b->topmost->{pct} } // 0) <=>
+                (eval { $a->topmost->{pct} } // 0)
         } @{$_[0]} ]
 }
 
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index e9efbac7..b50871e8 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -9,7 +9,7 @@ use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD :seek);
 use POSIX qw(dup2);
 use IO::Socket::INET;
 our @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
-        run_script start_script key2sub);
+        run_script start_script key2sub xsys xqx);
 
 sub tmpdir (;$) {
         my ($base) = @_;
@@ -87,7 +87,7 @@ sub require_mods {
 
 sub key2script ($) {
         my ($key) = @_;
-        return $key if (index($key, '/') >= 0);
+        return $key if ($key eq 'git' || index($key, '/') >= 0);
         # n.b. we may have scripts which don't start with "public-inbox" in
         # the future:
         $key =~ s/\A([-\.])/public-inbox$1/;
@@ -244,6 +244,28 @@ sub run_script ($;$$) {
 
 sub wait_for_tail () { sleep(2) }
 
+# like system() built-in, but uses spawn() for env/rdr + vfork
+sub xsys {
+        my ($cmd, $env, $rdr) = @_;
+        if (ref($cmd)) {
+                $rdr ||= {};
+        } else {
+                $cmd = [ @_ ];
+                $env = undef;
+                $rdr = {};
+        }
+        run_script($cmd, $env, { %$rdr, run_mode => 0 });
+        $? >> 8
+}
+
+# like `backtick` or qx{} op, but uses spawn() for env/rdr + vfork
+sub xqx {
+        my ($cmd, $env, $rdr) = @_;
+        $rdr //= {};
+        run_script($cmd, $env, { %$rdr, run_mode => 0, 1 => \(my $out) });
+        wantarray ? split(/^/m, $out) : $out;
+}
+
 sub start_script {
         my ($cmd, $env, $opt) = @_;
         my ($key, @argv) = @$cmd;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 1c78ef24..01b8bed6 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -120,6 +120,7 @@ sub new {
                 last_commit => [], # git repo -> commit
         };
         $self->{shards} = count_shards($self) || nproc_shards($creat);
+        $self->{index_max_size} = $v2ibx->{index_max_size};
         bless $self, $class;
 }
 
@@ -730,9 +731,8 @@ sub fill_alternates ($$) {
 sub git_init {
         my ($self, $epoch) = @_;
         my $git_dir = "$self->{-inbox}->{inboxdir}/git/$epoch.git";
-        my @cmd = (qw(git init --bare -q), $git_dir);
-        PublicInbox::Import::run_die(\@cmd);
-        @cmd = (qw/git config/, "--file=$git_dir/config",
+        PublicInbox::Import::init_bare($git_dir);
+        my @cmd = (qw/git config/, "--file=$git_dir/config",
                         'include.path', '../../all.git/config');
         PublicInbox::Import::run_die(\@cmd);
         fill_alternates($self, $epoch);
@@ -868,6 +868,7 @@ sub atfork_child {
 
 sub mark_deleted ($$$$) {
         my ($self, $sync, $git, $oid) = @_;
+        return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
         my $msgref = $git->cat_file($oid);
         my $mime = PublicInbox::MIME->new($$msgref);
         my $mids = mids($mime->header_obj);
@@ -980,18 +981,6 @@ sub check_unindexed ($$$) {
         }
 }
 
-# reuse Msgmap to store num => oid mapping (rather than num => mid)
-sub multi_mid_q_new () {
-        my ($fh, $fn) = tempfile('multi_mid-XXXXXXX', EXLOCK => 0, TMPDIR => 1);
-        my $multi_mid = PublicInbox::Msgmap->new_file($fn, 1);
-        $multi_mid->{dbh}->do('PRAGMA synchronous = OFF');
-        # for Msgmap->DESTROY:
-        $multi_mid->{tmp_name} = $fn;
-        $multi_mid->{pid} = $$;
-        close $fh or die "failed to close $fn: $!";
-        $multi_mid
-}
-
 sub multi_mid_q_push ($$$) {
         my ($self, $sync, $oid) = @_;
         my $multi_mid = $sync->{multi_mid} //= PublicInbox::MultiMidQueue->new;
@@ -1006,6 +995,7 @@ sub multi_mid_q_push ($$$) {
 
 sub reindex_oid ($$$$) {
         my ($self, $sync, $git, $oid) = @_;
+        return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
         my ($num, $mid0, $len);
         my $msgref = $git->cat_file($oid, \$len);
         return if $len == 0; # purged
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index b6d7acaf..9b62ed3c 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -297,11 +297,9 @@ sub _th_index_lite {
         my $nr_c = scalar @$children;
         my $nr_s = 0;
         my $siblings;
-        if (my $smsg = $node->{smsg}) {
-                # delete saves about 200KB on a 1K message thread
-                if (my $refs = delete $smsg->{references}) {
-                        ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o);
-                }
+        # delete saves about 200KB on a 1K message thread
+        if (my $refs = delete $node->{references}) {
+                ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o);
         }
         my $irt_map = $mapping->{$$irt} if defined $$irt;
         if (defined $irt_map) {
@@ -310,12 +308,12 @@ sub _th_index_lite {
                 $rv .= $pad . $irt_map->[0];
                 if ($idx > 0) {
                         my $prev = $siblings->[$idx - 1];
-                        my $pmid = $prev->{id};
+                        my $pmid = $prev->{mid};
                         if ($idx > 2) {
                                 my $s = ($idx - 1). ' preceding siblings ...';
                                 $rv .= pad_link($pmid, $level, $s);
                         } elsif ($idx == 2) {
-                                my $ppmid = $siblings->[0]->{id};
+                                my $ppmid = $siblings->[0]->{mid};
                                 $rv .= $pad . $mapping->{$ppmid}->[0];
                         }
                         $rv .= $pad . $mapping->{$pmid}->[0];
@@ -328,26 +326,26 @@ sub _th_index_lite {
         $attr =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
         $rv .= "<b>@ $attr";
         if ($nr_c) {
-                my $cmid = $children->[0]->{id};
+                my $cmid = $children->[0]->{mid};
                 $rv .= $pad . $mapping->{$cmid}->[0];
                 if ($nr_c > 2) {
                         my $s = ($nr_c - 1). ' more replies';
                         $rv .= pad_link($cmid, $level + 1, $s);
                 } elsif (my $cn = $children->[1]) {
-                        $rv .= $pad . $mapping->{$cn->{id}}->[0];
+                        $rv .= $pad . $mapping->{$cn->{mid}}->[0];
                 }
         }
 
         my $next = $siblings->[$idx+1] if $siblings && $idx >= 0;
         if ($next) {
-                my $nmid = $next->{id};
+                my $nmid = $next->{mid};
                 $rv .= $pad . $mapping->{$nmid}->[0];
                 my $nnext = $nr_s - $idx;
                 if ($nnext > 2) {
                         my $s = ($nnext - 1).' subsequent siblings';
                         $rv .= pad_link($nmid, $level, $s);
                 } elsif (my $nn = $siblings->[$idx + 2]) {
-                        $rv .= $pad . $mapping->{$nn->{id}}->[0];
+                        $rv .= $pad . $mapping->{$nn->{mid}}->[0];
                 }
         }
         $rv .= $pad ."<a\nhref=#r$id>$s_s, $s_c; $ctx->{s_nr}</a>\n";
@@ -369,7 +367,7 @@ sub walk_thread ($$$) {
 
 sub pre_thread  { # walk_thread callback
         my ($ctx, $level, $node, $idx) = @_;
-        $ctx->{mapping}->{$node->{id}} = [ '', $node, $idx, $level ];
+        $ctx->{mapping}->{$node->{mid}} = [ '', $node, $idx, $level ];
         skel_dump($ctx, $level, $node);
 }
 
@@ -388,8 +386,8 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
                 my $node = shift @$q or next;
                 my $cl = $level + 1;
                 unshift @$q, map { ($cl, $_) } @{$node->{children}};
-                if (my $smsg = $ctx->{-inbox}->smsg_mime($node->{smsg})) {
-                        return thread_index_entry($ctx, $level, $smsg);
+                if ($ctx->{-inbox}->smsg_mime($node)) {
+                        return thread_index_entry($ctx, $level, $node);
                 } else {
                         return ghost_index_entry($ctx, $level, $node);
                 }
@@ -407,7 +405,7 @@ sub stream_thread ($$) {
                 my $node = shift @q or next;
                 my $cl = $level + 1;
                 unshift @q, map { ($cl, $_) } @{$node->{children}};
-                $smsg = $ibx->smsg_mime($node->{smsg}) and last;
+                $smsg = $ibx->smsg_mime($node) and last;
         }
         return missing_thread($ctx) unless $smsg;
 
@@ -825,7 +823,7 @@ sub indent_for {
 sub find_mid_root {
         my ($ctx, $level, $node, $idx) = @_;
         ++$ctx->{root_idx} if $level == 0;
-        if ($node->{id} eq $ctx->{mid}) {
+        if ($node->{mid} eq $ctx->{mid}) {
                 $ctx->{found_mid_at} = $ctx->{root_idx};
                 return 0;
         }
@@ -899,8 +897,8 @@ sub dedupe_subject {
 }
 
 sub skel_dump { # walk_thread callback
-        my ($ctx, $level, $node) = @_;
-        my $smsg = $node->{smsg} or return _skel_ghost($ctx, $level, $node);
+        my ($ctx, $level, $smsg) = @_;
+        $smsg->{blob} or return _skel_ghost($ctx, $level, $smsg);
 
         my $skel = $ctx->{skel};
         my $cur = $ctx->{cur};
@@ -983,7 +981,7 @@ sub skel_dump { # walk_thread callback
 sub _skel_ghost {
         my ($ctx, $level, $node) = @_;
 
-        my $mid = $node->{id};
+        my $mid = $node->{mid};
         my $d = '     [not found] ';
         $d .= '    '  if exists $ctx->{searchview};
         $d .= indent_for($level) . th_pfx($level);
@@ -1006,18 +1004,23 @@ sub _skel_ghost {
 
 sub sort_ds {
         [ sort {
-                (eval { $a->topmost->{smsg}->{ds} } || 0) <=>
-                (eval { $b->topmost->{smsg}->{ds} } || 0)
+                (eval { $a->topmost->{ds} } || 0) <=>
+                (eval { $b->topmost->{ds} } || 0)
         } @{$_[0]} ];
 }
 
 # accumulate recent topics if search is supported
 # returns 200 if done, 404 if not
 sub acc_topic { # walk_thread callback
-        my ($ctx, $level, $node) = @_;
-        my $mid = $node->{id};
-        my $smsg = $node->{smsg} // $ctx->{-inbox}->smsg_by_mid($mid);
-        if ($smsg) {
+        my ($ctx, $level, $smsg) = @_;
+        my $mid = $smsg->{mid};
+        my $has_blob = $smsg->{blob} // do {
+                if (my $by_mid = $ctx->{-inbox}->smsg_by_mid($mid)) {
+                        %$smsg = (%$smsg, %$by_mid);
+                        1;
+                }
+        };
+        if ($has_blob) {
                 my $subj = subject_normalized($smsg->{subject});
                 $subj = '(no subject)' if $subj eq '';
                 my $ds = $smsg->{ds};
@@ -1208,7 +1211,7 @@ sub thread_adj_level {
 sub ghost_index_entry {
         my ($ctx, $level, $node) = @_;
         my ($beg, $end) = thread_adj_level($ctx,  $level);
-        $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{id})
+        $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{mid})
                 . '</pre>' . $end;
 }
 
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index e2024640..7b9e8915 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -11,7 +11,7 @@ use PublicInbox::InboxWritable;
 use File::Temp 0.19 (); # 0.19 for ->newdir
 use PublicInbox::Filter::Base qw(REJECT);
 use PublicInbox::Spamcheck;
-*maildir_path_load = *PublicInbox::InboxWritable::maildir_path_load;
+*mime_from_path = \&PublicInbox::InboxWritable::mime_from_path;
 
 sub new {
         my ($class, $config) = @_;
@@ -59,9 +59,11 @@ sub new {
                 my $watch = $ibx->{watch} or return;
                 if (is_maildir($watch)) {
                         my $watch_hdrs = [];
-                        if (my $wh = $ibx->{watchheader}) {
-                                my ($k, $v) = split(/:/, $wh, 2);
-                                push @$watch_hdrs, [ $k, qr/\Q$v\E/ ];
+                        if (my $whs = $ibx->{watchheader}) {
+                                for (@$whs) {
+                                        my ($k, $v) = split(/:/, $_, 2);
+                                        push @$watch_hdrs, [ $k, qr/\Q$v\E/ ];
+                                }
                         }
                         if (my $list_ids = $ibx->{listid}) {
                                 for (@$list_ids) {
@@ -123,7 +125,7 @@ sub _remove_spam {
         my ($self, $path) = @_;
         # path must be marked as (S)een
         $path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return;
-        my $mime = maildir_path_load($path) or return;
+        my $mime = mime_from_path($path) or return;
         $self->{config}->each_inbox(sub {
                 my ($ibx) = @_;
                 eval {
@@ -165,7 +167,7 @@ sub _try_path {
                 $warn_cb->(@_);
         };
         foreach my $ibx (@$inboxes) {
-                my $mime = maildir_path_load($path) or next;
+                my $mime = mime_from_path($path) or next;
                 my $im = _importer_for($self, $ibx);
 
                 # any header match means it's eligible for the inbox:
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index aa917ed8..c3fbb1a7 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -20,9 +20,8 @@ sub close {}
 
 sub new {
         my ($class, $ctx, $cb) = @_;
-        $ctx->{emit_header} = 1;
         $ctx->{feed_base_url} = $ctx->{-inbox}->base_url($ctx->{env});
-        bless { cb => $cb || \&close, ctx => $ctx }, $class;
+        bless { cb => $cb || \&close, ctx => $ctx, emit_header => 1 }, $class;
 }
 
 sub response {
@@ -130,7 +129,7 @@ sub feed_entry {
         $email = ascii_html($email);
 
         my $s = '';
-        if (delete $ctx->{emit_header}) {
+        if (delete $self->{emit_header}) {
                 $s .= atom_header($ctx, $title);
         }
         $s .= "<entry><author><name>$name</name><email>$email</email>" .
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index 2008ba09..b23a415e 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -151,7 +151,7 @@ sub inbox_config ($$$) {
         url = https://example.com/$name/
         url = http://example.onion/$name/
 EOS
-        for my $k (qw(address listid infourl)) {
+        for my $k (qw(address listid infourl watchheader)) {
                 defined(my $v = $ibx->{$k}) or next;
                 $$txt .= "\t$k = $_\n" for @$v;
         }
@@ -171,7 +171,7 @@ EOF
                 }
         }
 
-        for my $k (qw(filter newsgroup obfuscate replyto watchheader)) {
+        for my $k (qw(filter newsgroup obfuscate replyto)) {
                 defined(my $v = $ibx->{$k}) or next;
                 $$txt .= "\t$k = $v\n";
         }