about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm325
1 files changed, 194 insertions, 131 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 5b0e4458..1cbf6d23 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 # based on notmuch, but with no concept of folders, files
 #
@@ -9,7 +9,8 @@
 package PublicInbox::SearchIdx;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::Search PublicInbox::Lock Exporter);
+use parent qw(PublicInbox::Search PublicInbox::Lock PublicInbox::Umask
+        Exporter);
 use PublicInbox::Eml;
 use PublicInbox::Search qw(xap_terms);
 use PublicInbox::InboxWritable;
@@ -18,9 +19,10 @@ use PublicInbox::MsgIter;
 use PublicInbox::IdxStack;
 use Carp qw(croak carp);
 use POSIX qw(strftime);
+use Fcntl qw(SEEK_SET);
 use Time::Local qw(timegm);
 use PublicInbox::OverIdx;
-use PublicInbox::Spawn qw(spawn nodatacow_dir);
+use PublicInbox::Spawn qw(run_wait popen_rd);
 use PublicInbox::Git qw(git_unquote);
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
 use PublicInbox::Address;
@@ -30,18 +32,19 @@ our @EXPORT_OK = qw(log2stack is_ancestor check_size prepare_stack
 my $X = \%PublicInbox::Search::X;
 our ($DB_CREATE_OR_OPEN, $DB_OPEN);
 our $DB_NO_SYNC = 0;
+our $DB_DANGEROUS = 0;
 our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff :
         # assume a typical 64-bit system has 8x more RAM than a
         # typical 32-bit system:
         (($Config{ptrsize} >= 8 ? 8192 : 1024) * 1024);
-
 use constant DEBUG => !!$ENV{DEBUG};
-
+my $BASE85 = qr/[a-zA-Z0-9\!\#\$\%\&\(\)\*\+\-;<=>\?\@\^_`\{\|\}\~]+/;
 my $xapianlevels = qr/\A(?:full|medium)\z/;
 my $hex = '[a-f0-9]';
 my $OID = $hex .'{40,}';
-my @VMD_MAP = (kw => 'K', L => 'L');
+my @VMD_MAP = (kw => 'K', L => 'L'); # value order matters
 our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/;
+our $PATCHID_BROKEN;
 
 sub new {
         my ($class, $ibx, $creat, $shard) = @_;
@@ -61,6 +64,7 @@ sub new {
                         die("Invalid indexlevel $ibx->{indexlevel}\n");
                 }
         }
+        undef $PATCHID_BROKEN; # retry on new instances in case of upgrades
         $ibx = PublicInbox::InboxWritable->new($ibx);
         my $self = PublicInbox::Search->new($ibx);
         bless $self, $class;
@@ -89,7 +93,7 @@ sub new {
         $self;
 }
 
-sub need_xapian ($) { $_[0]->{indexlevel} =~ $xapianlevels }
+sub need_xapian ($) { ($_[0]->{indexlevel} // 'full') =~ $xapianlevels }
 
 sub idx_release {
         my ($self, $wake) = @_;
@@ -112,12 +116,15 @@ sub load_xapian_writable () {
         *sortable_serialise = $xap.'::sortable_serialise';
         $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
         $DB_OPEN = eval($xap.'::DB_OPEN()');
-        my $ver = (eval($xap.'::major_version()') << 16) |
-                (eval($xap.'::minor_version()') << 8) |
-                eval($xap.'::revision()');
-        $DB_NO_SYNC = 0x4 if $ver >= 0x10400;
+        my $ver = eval 'v'.join('.', eval($xap.'::major_version()'),
+                                eval($xap.'::minor_version()'),
+                                eval($xap.'::revision()'));
+        if ($ver ge 1.4) { # new flags in Xapian 1.4
+                $DB_NO_SYNC = 0x4;
+                $DB_DANGEROUS = 0x10;
+        }
         # Xapian v1.2.21..v1.2.24 were missing close-on-exec on OFD locks
-        $X->{CLOEXEC_UNSET} = 1 if $ver >= 0x010215 && $ver <= 0x010218;
+        $X->{CLOEXEC_UNSET} = 1 if $ver ge v1.2.21 && $ver le v1.2.24;
         1;
 }
 
@@ -130,6 +137,7 @@ sub idx_acquire {
                 load_xapian_writable();
                 $flag = $self->{creat} ? $DB_CREATE_OR_OPEN : $DB_OPEN;
         }
+        my $owner = $self->{ibx} // $self->{eidx} // $self;
         if ($self->{creat}) {
                 require File::Path;
                 $self->lock_acquire;
@@ -139,12 +147,15 @@ sub idx_acquire {
                 if (!-d $dir && (!$is_shard ||
                                 ($is_shard && need_xapian($self)))) {
                         File::Path::mkpath($dir);
-                        nodatacow_dir($dir);
-                        $self->{-set_has_threadid_once} = 1;
+                        require PublicInbox::Syscall;
+                        PublicInbox::Syscall::nodatacow_dir($dir);
+                        # owner == self for CodeSearchIdx
+                        $self->{-set_has_threadid_once} = 1 if $owner != $self;
+                        $flag |= $DB_DANGEROUS if $owner->{-dangerous};
                 }
         }
         return unless defined $flag;
-        $flag |= $DB_NO_SYNC if ($self->{ibx} // $self->{eidx})->{-no_fsync};
+        $flag |= $DB_NO_SYNC if $owner->{-no_fsync};
         my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) };
         croak "Failed opening $dir: $@" if $@;
         $self->{xdb} = $xdb;
@@ -169,9 +180,8 @@ sub term_generator ($) { # write-only
 sub index_phrase ($$$$) {
         my ($self, $text, $wdf_inc, $prefix) = @_;
 
-        my $tg = term_generator($self);
-        $tg->index_text($text, $wdf_inc, $prefix);
-        $tg->increase_termpos;
+        term_generator($self)->index_text($text, $wdf_inc, $prefix);
+        $self->{term_generator}->increase_termpos;
 }
 
 sub index_text ($$$$) {
@@ -180,8 +190,8 @@ sub index_text ($$$$) {
         if ($self->{indexlevel} eq 'full') {
                 index_phrase($self, $text, $wdf_inc, $prefix);
         } else {
-                my $tg = term_generator($self);
-                $tg->index_text_without_positions($text, $wdf_inc, $prefix);
+                term_generator($self)->index_text_without_positions(
+                                        $text, $wdf_inc, $prefix);
         }
 }
 
@@ -228,8 +238,8 @@ sub index_old_diff_fn {
 
         # no renames or space support for traditional diffs,
         # find the number of leading common paths to strip:
-        my @fa = split('/', $fa);
-        my @fb = split('/', $fb);
+        my @fa = split(m'/', $fa);
+        my @fb = split(m'/', $fb);
         while (scalar(@fa) && scalar(@fb)) {
                 $fa = join('/', @fa);
                 $fb = join('/', @fb);
@@ -249,37 +259,59 @@ sub index_diff ($$$) {
         my ($self, $txt, $doc) = @_;
         my %seen;
         my $in_diff;
-        my @xnq;
-        my $xnq = \@xnq;
-        foreach (split(/\n/, $txt)) {
-                if ($in_diff && s/^ //) { # diff context
+        my $xnq = [];
+        my @l = split(/\n/, $$txt);
+        undef $$txt;
+        while (defined($_ = shift @l)) {
+                if ($in_diff && /^GIT binary patch/) {
+                        push @$xnq, $_;
+                        while (@l && $l[0] =~ /^(?:literal|delta) /) {
+                                # TODO allow searching by size range?
+                                # allows searching by exact size via:
+                                # "literal $SIZE" or "delta $SIZE"
+                                push @$xnq, shift(@l);
+
+                                # skip base85 and empty lines
+                                while (@l && ($l[0] =~ /\A$BASE85\h*\z/o ||
+                                                $l[0] !~ /\S/)) {
+                                        shift @l;
+                                }
+                                # loop hits trailing "literal 0\nHcmV?d00001\n"
+                        }
+                } elsif ($in_diff && s/^ //) { # diff context
                         index_diff_inc($self, $_, 'XDFCTX', $xnq);
                 } elsif (/^-- $/) { # email signature begins
                         $in_diff = undef;
-                } elsif (m!^diff --git "?[^/]+/.+ "?[^/]+/.+\z!) {
-                        # wait until "---" and "+++" to capture filenames
+                } elsif (m!^diff --git ("?[^/]+/.+) ("?[^/]+/.+)\z!) {
+                        # capture filenames here for binary diffs:
+                        my ($fa, $fb) = ($1, $2);
+                        push @$xnq, $_;
                         $in_diff = 1;
+                        $fa = (split(m'/', git_unquote($fa), 2))[1];
+                        $fb = (split(m'/', git_unquote($fb), 2))[1];
+                        $seen{$fa}++ or index_diff_inc($self, $fa, 'XDFN', $xnq);
+                        $seen{$fb}++ or index_diff_inc($self, $fb, 'XDFN', $xnq);
                 # traditional diff:
                 } elsif (m/^diff -(.+) (\S+) (\S+)$/) {
                         my ($opt, $fa, $fb) = ($1, $2, $3);
-                        push @xnq, $_;
+                        push @$xnq, $_;
                         # only support unified:
                         next unless $opt =~ /[uU]/;
                         $in_diff = index_old_diff_fn($self, \%seen, $fa, $fb,
                                                         $xnq);
                 } elsif (m!^--- ("?[^/]+/.+)!) {
                         my $fn = $1;
-                        $fn = (split('/', git_unquote($fn), 2))[1];
+                        $fn = (split(m'/', git_unquote($fn), 2))[1];
                         $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq);
                         $in_diff = 1;
                 } elsif (m!^\+\+\+ ("?[^/]+/.+)!)  {
                         my $fn = $1;
-                        $fn = (split('/', git_unquote($fn), 2))[1];
+                        $fn = (split(m'/', git_unquote($fn), 2))[1];
                         $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq);
                         $in_diff = 1;
                 } elsif (/^--- (\S+)/) {
-                        $in_diff = $1;
-                        push @xnq, $_;
+                        $in_diff = $1; # old diff filename
+                        push @$xnq, $_;
                 } elsif (defined $in_diff && /^\+\+\+ (\S+)/) {
                         $in_diff = index_old_diff_fn($self, \%seen, $in_diff,
                                                         $1, $xnq);
@@ -305,19 +337,65 @@ sub index_diff ($$$) {
                                 /^(?:dis)?similarity index / ||
                                 /^\\ No newline at end of file/ ||
                                 /^Binary files .* differ/) {
-                        push @xnq, $_;
+                        push @$xnq, $_;
                 } elsif ($_ eq '') {
                         # possible to be in diff context, some mail may be
                         # stripped by MUA or even GNU diff(1).  "git apply"
                         # treats a bare "\n" as diff context, too
                 } else {
-                        push @xnq, $_;
+                        push @$xnq, $_;
                         warn "non-diff line: $_\n" if DEBUG && $_ ne '';
                         $in_diff = undef;
                 }
         }
 
-        index_text($self, join("\n", @xnq), 1, 'XNQ');
+        index_text($self, join("\n", @$xnq), 1, 'XNQ');
+}
+
+sub index_body_text {
+        my ($self, $doc, $sref) = @_;
+        my $rd;
+        # start patch-id in parallel
+        if ($$sref =~ /^(?:diff|---|\+\+\+) /ms && !$PATCHID_BROKEN) {
+                my $git = ($self->{ibx} // $self->{eidx} // $self)->git;
+                my $fh = PublicInbox::IO::write_file '+>:utf8', undef, $$sref;
+                $fh->flush or die "flush: $!";
+                sysseek($fh, 0, SEEK_SET);
+                $rd = popen_rd($git->cmd(qw(patch-id --stable)), undef,
+                                { 0 => $fh });
+        }
+
+        # split off quoted and unquoted blocks:
+        my @sections = PublicInbox::MsgIter::split_quotes($$sref);
+        undef $$sref; # free memory
+        for my $txt (@sections) {
+                if ($txt =~ /\A>/) {
+                        if ($txt =~ /^[>\t ]+GIT binary patch\r?/sm) {
+                                # get rid of Base-85 noise
+                                $txt =~ s/^([>\h]+(?:literal|delta)
+                                                \x20[0-9]+\r?\n)
+                                        (?:[>\h]+$BASE85\h*\r?\n)+/$1/gsmx;
+                        }
+                        index_text($self, $txt, 0, 'XQUOT');
+                } else { # does it look like a diff?
+                        if ($txt =~ /^(?:diff|---|\+\+\+) /ms) {
+                                index_diff($self, \$txt, $doc);
+                        } else {
+                                index_text($self, $txt, 1, 'XNQ');
+                        }
+                }
+                undef $txt; # free memory
+        }
+        if (defined $rd) { # reap `git patch-id'
+                (readline($rd) // '') =~ /\A([a-f0-9]{40,})/ and
+                        $doc->add_term('XDFID'.$1);
+                if (!$rd->close) {
+                        my $c = 'git patch-id --stable';
+                        $PATCHID_BROKEN = ($? >> 8) == 129;
+                        $PATCHID_BROKEN ? warn("W: $c requires git v2.1.0+\n")
+                                : warn("W: $c failed: \$?=$? (non-fatal)");
+                }
+        }
 }
 
 sub index_xapian { # msg_iter callback
@@ -339,23 +417,7 @@ sub index_xapian { # msg_iter callback
         my ($s, undef) = msg_part_text($part, $ct);
         defined $s or return;
         $_[0]->[0] = $part = undef; # free memory
-
-        # split off quoted and unquoted blocks:
-        my @sections = PublicInbox::MsgIter::split_quotes($s);
-        undef $s; # free memory
-        for my $txt (@sections) {
-                if ($txt =~ /\A>/) {
-                        index_text($self, $txt, 0, 'XQUOT');
-                } else {
-                        # does it look like a diff?
-                        if ($txt =~ /^(?:diff|---|\+\+\+) /ms) {
-                                index_diff($self, $txt, $doc);
-                        } else {
-                                index_text($self, $txt, 1, 'XNQ');
-                        }
-                }
-                undef $txt; # free memory
-        }
+        index_body_text($self, $doc, \$s);
 }
 
 sub index_list_id ($$$) {
@@ -363,6 +425,7 @@ sub index_list_id ($$$) {
         for my $l ($hdr->header_raw('List-Id')) {
                 $l =~ /<([^>]+)>/ or next;
                 my $lid = lc $1;
+                $lid =~ tr/\n\t\r\0//d; # same rules as Message-ID
                 $doc->add_boolean_term('G' . $lid);
                 index_phrase($self, $lid, 1, 'XL'); # probabilistic
         }
@@ -398,8 +461,7 @@ sub eml2doc ($$$;$) {
         add_val($doc, PublicInbox::Search::UID(), $smsg->{num});
         add_val($doc, PublicInbox::Search::THREADID, $smsg->{tid});
 
-        my $tg = term_generator($self);
-        $tg->set_document($doc);
+        term_generator($self)->set_document($doc);
         index_headers($self, $smsg);
 
         if (defined(my $eidx_key = $smsg->{eidx_key})) {
@@ -409,7 +471,7 @@ sub eml2doc ($$$;$) {
         index_ids($self, $doc, $eml, $mids);
 
         # by default, we maintain compatibility with v1.5.0 and earlier
-        # by writing to docdata.glass, users who never exect to downgrade can
+        # by writing to docdata.glass, users who never expect to downgrade can
         # use --skip-docdata
         if (!$self->{-skip_docdata}) {
                 # WWW doesn't need {to} or {cc}, only NNTP
@@ -451,10 +513,9 @@ sub add_xapian ($$$$) {
 sub _msgmap_init ($) {
         my ($self) = @_;
         die "BUG: _msgmap_init is only for v1\n" if $self->{ibx}->version != 1;
-        $self->{mm} //= eval {
+        $self->{mm} //= do {
                 require PublicInbox::Msgmap;
-                my $rw = $self->{ibx}->{-no_fsync} ? 2 : 1;
-                PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, $rw);
+                PublicInbox::Msgmap->new_file($self->{ibx}, 1);
         };
 }
 
@@ -497,9 +558,7 @@ sub add_message {
 
 sub _get_doc ($$) {
         my ($self, $docid) = @_;
-        my $doc = eval { $self->{xdb}->get_document($docid) };
-        $doc // do {
-                warn "E: $@\n" if $@;
+        $self->get_doc($docid) // do {
                 warn "E: #$docid missing in Xapian\n";
                 undef;
         }
@@ -518,6 +577,12 @@ sub add_eidx_info {
         $self->{xdb}->replace_document($docid, $doc);
 }
 
+sub get_terms {
+        my ($self, $pfx, $docid) = @_;
+        begin_txn_lazy($self);
+        xap_terms($pfx, $self->{xdb}, $docid);
+}
+
 sub remove_eidx_info {
         my ($self, $docid, $eidx_key, $eml) = @_;
         begin_txn_lazy($self);
@@ -551,17 +616,16 @@ sub set_vmd {
         my ($self, $docid, $vmd) = @_;
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
-        my ($end, @rm, @add);
+        my ($v, @rm, @add);
         my @x = @VMD_MAP;
+        my ($cur, $end) = ($doc->termlist_begin, $doc->termlist_end);
         while (my ($field, $pfx) = splice(@x, 0, 2)) {
                 my $set = $vmd->{$field} // next;
                 my %keep = map { $_ => 1 } @$set;
                 my %add = %keep;
-                $end //= $doc->termlist_end;
-                for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
-                        $cur->skip_to($pfx);
-                        last if $cur == $end;
-                        my $v = $cur->get_termname;
+                $cur->skip_to($pfx); # works due to @VMD_MAP order
+                for (; $cur != $end; $cur++) {
+                        $v = $cur->get_termname;
                         $v =~ s/\A$pfx//s or next;
                         $keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v);
                 }
@@ -637,18 +701,27 @@ sub update_vmd {
 
 sub xdb_remove {
         my ($self, @docids) = @_;
-        $self->begin_txn_lazy;
-        my $xdb = $self->{xdb} or return;
+        begin_txn_lazy($self);
+        my $xdb = $self->{xdb} // die 'BUG: missing {xdb}';
         for my $docid (@docids) {
                 eval { $xdb->delete_document($docid) };
-                warn "E: #$docid not in in Xapian? $@\n" if $@;
+                warn "E: #$docid not in Xapian? $@\n" if $@;
         }
 }
 
+sub xdb_remove_quiet {
+        my ($self, $docid) = @_;
+        begin_txn_lazy($self);
+        my $xdb = $self->{xdb} // die 'BUG: missing {xdb}';
+        eval { $xdb->delete_document($docid) };
+        ++$self->{-quiet_rm} unless $@;
+}
+
+sub nr_quiet_rm { delete($_[0]->{-quiet_rm}) // 0 }
+
 sub index_git_blob_id {
         my ($doc, $pfx, $objid) = @_;
 
-        my $len = length($objid);
         for (my $len = length($objid); $len >= 7; ) {
                 $doc->add_term($pfx.$objid);
                 $objid = substr($objid, 0, --$len);
@@ -742,7 +815,8 @@ sub unindex_both { # git->cat_async callback
 
 sub with_umask {
         my $self = shift;
-        ($self->{ibx} // $self->{eidx})->with_umask(@_);
+        my $owner = $self->{ibx} // $self->{eidx};
+        $owner ? $owner->with_umask(@_) : $self->SUPER::with_umask(@_)
 }
 
 # called by public-inbox-index
@@ -750,7 +824,8 @@ sub index_sync {
         my ($self, $opt) = @_;
         delete $self->{lock_path} if $opt->{-skip_lock};
         $self->with_umask(\&_index_sync, $self, $opt);
-        if ($opt->{reindex} && !$opt->{quit}) {
+        if ($opt->{reindex} && !$opt->{quit} &&
+                        !grep(defined, @$opt{qw(since until)})) {
                 my %again = %$opt;
                 delete @again{qw(rethread reindex)};
                 index_sync($self, \%again);
@@ -759,10 +834,10 @@ sub index_sync {
 }
 
 sub check_size { # check_async cb for -index --max-size=...
-        my ($oid, $type, $size, $arg, $git) = @_;
-        (($type // '') eq 'blob') or die "E: bad $oid in $git->{git_dir}";
+        my (undef, $oid, $type, $size, $arg) = @_;
+        ($type // '') eq 'blob' or die "E: bad $oid in $arg->{git}->{git_dir}";
         if ($size <= $arg->{max_size}) {
-                $git->cat_async($oid, $arg->{index_oid}, $arg);
+                $arg->{git}->cat_async($oid, $arg->{index_oid}, $arg);
         } else {
                 warn "W: skipping $oid ($size > $arg->{max_size})\n";
         }
@@ -775,18 +850,19 @@ sub v1_checkpoint ($$;$) {
         # $newest may be undef
         my $newest = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}};
         if (defined($newest)) {
-                my $cur = $self->{mm}->last_commit || '';
-                if (need_update($self, $cur, $newest)) {
+                my $cur = $self->{mm}->last_commit;
+                if (need_update($self, $sync, $cur, $newest)) {
                         $self->{mm}->last_commit($newest);
                 }
         }
         ${$sync->{max}} = $self->{batch_bytes};
 
         $self->{mm}->{dbh}->commit;
+        eval { $self->{mm}->{dbh}->do('PRAGMA optimize') };
         my $xdb = $self->{xdb};
         if ($newest && $xdb) {
                 my $cur = $xdb->get_metadata('last_commit');
-                if (need_update($self, $cur, $newest)) {
+                if (need_update($self, $sync, $cur, $newest)) {
                         $xdb->set_metadata('last_commit', $newest);
                 }
         }
@@ -843,6 +919,7 @@ sub process_stack {
                         $arg->{autime} = $at;
                         $arg->{cotime} = $ct;
                         if ($sync->{max_size}) {
+                                $arg->{git} = $git;
                                 $git->check_async($oid, \&check_size, $arg);
                         } else {
                                 $git->cat_async($oid, \&index_both, $arg);
@@ -870,23 +947,28 @@ sub log2stack ($$$) {
 
         # Count the new files so they can be added newest to oldest
         # and still have numbers increasing from oldest to newest
-        my $fh = $git->popen(qw(log --raw -r --pretty=tformat:%at-%ct-%H
-                                --no-notes --no-color --no-renames --no-abbrev),
-                                $range);
-        my ($at, $ct, $stk, $cmt);
-        while (<$fh>) {
+        my @cmd = qw(log --raw -r --pretty=tformat:%at-%ct-%H
+                        --no-notes --no-color --no-renames --no-abbrev);
+        for my $k (qw(since until)) {
+                my $v = $sync->{-opt}->{$k} // next;
+                next if !$sync->{-opt}->{reindex};
+                push @cmd, "--$k=$v";
+        }
+        my $fh = $git->popen(@cmd, $range);
+        my ($at, $ct, $stk, $cmt, $l);
+        while (defined($l = <$fh>)) {
                 return if $sync->{quit};
-                if (/\A([0-9]+)-([0-9]+)-($OID)$/o) {
+                if ($l =~ /\A([0-9]+)-([0-9]+)-($OID)$/o) {
                         ($at, $ct, $cmt) = ($1 + 0, $2 + 0, $3);
                         $stk //= PublicInbox::IdxStack->new($cmt);
-                } elsif (/$del/) {
+                } elsif ($l =~ /$del/) {
                         my $oid = $1;
                         if ($D) { # reindex case
                                 $D->{pack('H*', $oid)}++;
                         } else { # non-reindex case:
                                 $stk->push_rec('d', $at, $ct, $oid, $cmt);
                         }
-                } elsif (/$add/) {
+                } elsif ($l =~ /$add/) {
                         my $oid = $1;
                         if ($D) {
                                 my $oid_bin = pack('H*', $oid);
@@ -898,7 +980,7 @@ sub log2stack ($$$) {
                         $stk->push_rec('m', $at, $ct, $oid, $cmt);
                 }
         }
-        close $fh or die "git log failed: \$?=$?";
+        $fh->close or die "git log failed: \$?=$?";
         $stk //= PublicInbox::IdxStack->new;
         $stk->read_prepare;
 }
@@ -923,15 +1005,20 @@ sub is_ancestor ($$$) {
         return 0 unless $git->check($cur);
         my $cmd = [ 'git', "--git-dir=$git->{git_dir}",
                 qw(merge-base --is-ancestor), $cur, $tip ];
-        my $pid = spawn($cmd);
-        waitpid($pid, 0) == $pid or die join(' ', @$cmd) .' did not finish';
-        $? == 0;
+        run_wait($cmd) == 0;
 }
 
-sub need_update ($$$) {
-        my ($self, $cur, $new) = @_;
+sub need_update ($$$$) {
+        my ($self, $sync, $cur, $new) = @_;
         my $git = $self->{ibx}->git;
-        return 1 if $cur && !is_ancestor($git, $cur, $new);
+        $cur //= ''; # XS Search::Xapian ->get_metadata doesn't give undef
+
+        # don't rewind if --{since,until,before,after} are in use
+        return if $cur ne '' &&
+                grep(defined, @{$sync->{-opt}}{qw(since until)}) &&
+                is_ancestor($git, $new, $cur);
+
+        return 1 if $cur ne '' && !is_ancestor($git, $cur, $new);
         my $range = $cur eq '' ? $new : "$cur..$new";
         chomp(my $n = $git->qx(qw(rev-list --count), $range));
         ($n eq '' || $n > 0);
@@ -979,7 +1066,11 @@ sub _index_sync {
         my $ibx = $self->{ibx};
         local $self->{current_info} = "$ibx->{inboxdir}";
         $self->{batch_bytes} = $opt->{batch_size} // $BATCH_BYTES;
-        $ibx->git->batch_prepare;
+
+        if ($X->{CLOEXEC_UNSET}) {
+                $ibx->git->cat_file($tip);
+                $ibx->git->check($tip);
+        }
         my $pr = $opt->{-progress};
         my $sync = { reindex => $opt->{reindex}, -opt => $opt, ibx => $ibx };
         my $quit = quit_cb($sync);
@@ -1015,8 +1106,10 @@ sub DESTROY {
         $_[0]->{lockfh} = undef;
 }
 
-sub _begin_txn {
+sub begin_txn_lazy {
         my ($self) = @_;
+        return if $self->{txn};
+        my $restore = $self->with_umask;
         my $xdb = $self->{xdb} || idx_acquire($self);
         $self->{oidx}->begin_lazy if $self->{oidx};
         $xdb->begin_transaction if $xdb;
@@ -1024,13 +1117,8 @@ sub _begin_txn {
         $xdb;
 }
 
-sub begin_txn_lazy {
-        my ($self) = @_;
-        $self->with_umask(\&_begin_txn, $self) if !$self->{txn};
-}
-
 # store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard)
-# This metadata is read by Admin::detect_indexlevel:
+# This metadata is read by InboxWritable->detect_indexlevel:
 sub set_metadata_once {
         my ($self) = @_;
 
@@ -1052,8 +1140,10 @@ sub set_metadata_once {
         }
 }
 
-sub _commit_txn {
+sub commit_txn_lazy {
         my ($self) = @_;
+        return unless delete($self->{txn});
+        my $restore = $self->with_umask;
         if (my $eidx = $self->{eidx}) {
                 $eidx->git->async_wait_all;
                 $eidx->{transact_bytes} = 0;
@@ -1065,12 +1155,6 @@ sub _commit_txn {
         $self->{oidx}->commit_lazy if $self->{oidx};
 }
 
-sub commit_txn_lazy {
-        my ($self) = @_;
-        delete($self->{txn}) and
-                $self->with_umask(\&_commit_txn, $self);
-}
-
 sub eidx_shard_new {
         my ($class, $eidx, $shard) = @_;
         my $self = bless {
@@ -1085,25 +1169,4 @@ sub eidx_shard_new {
         $self;
 }
 
-# ensure there's no stale Xapian docs by treating $over as canonical
-sub over_check {
-        my ($self, $over) = @_;
-        begin_txn_lazy($self);
-        my $sth = $over->dbh->prepare(<<'');
-SELECT COUNT(*) FROM over WHERE num = ?
-
-        my $xdb = $self->{xdb};
-        my $cur = $xdb->postlist_begin('');
-        my $end = $xdb->postlist_end('');
-        my $xdir = $self->xdir;
-        for (; $cur != $end; $cur++) {
-                my $docid = $cur->get_docid;
-                $sth->execute($docid);
-                my $x = $sth->fetchrow_array;
-                next if $x > 0;
-                warn "I: removing $xdir #$docid, not in `over'\n";
-                $xdb->delete_document($docid);
-        }
-}
-
 1;