diff options
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 325 |
1 files changed, 194 insertions, 131 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 5b0e4458..1cbf6d23 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # based on notmuch, but with no concept of folders, files # @@ -9,7 +9,8 @@ package PublicInbox::SearchIdx; use strict; use v5.10.1; -use parent qw(PublicInbox::Search PublicInbox::Lock Exporter); +use parent qw(PublicInbox::Search PublicInbox::Lock PublicInbox::Umask + Exporter); use PublicInbox::Eml; use PublicInbox::Search qw(xap_terms); use PublicInbox::InboxWritable; @@ -18,9 +19,10 @@ use PublicInbox::MsgIter; use PublicInbox::IdxStack; use Carp qw(croak carp); use POSIX qw(strftime); +use Fcntl qw(SEEK_SET); use Time::Local qw(timegm); use PublicInbox::OverIdx; -use PublicInbox::Spawn qw(spawn nodatacow_dir); +use PublicInbox::Spawn qw(run_wait popen_rd); use PublicInbox::Git qw(git_unquote); use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); use PublicInbox::Address; @@ -30,18 +32,19 @@ our @EXPORT_OK = qw(log2stack is_ancestor check_size prepare_stack my $X = \%PublicInbox::Search::X; our ($DB_CREATE_OR_OPEN, $DB_OPEN); our $DB_NO_SYNC = 0; +our $DB_DANGEROUS = 0; our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff : # assume a typical 64-bit system has 8x more RAM than a # typical 32-bit system: (($Config{ptrsize} >= 8 ? 8192 : 1024) * 1024); - use constant DEBUG => !!$ENV{DEBUG}; - +my $BASE85 = qr/[a-zA-Z0-9\!\#\$\%\&\(\)\*\+\-;<=>\?\@\^_`\{\|\}\~]+/; my $xapianlevels = qr/\A(?:full|medium)\z/; my $hex = '[a-f0-9]'; my $OID = $hex .'{40,}'; -my @VMD_MAP = (kw => 'K', L => 'L'); +my @VMD_MAP = (kw => 'K', L => 'L'); # value order matters our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/; +our $PATCHID_BROKEN; sub new { my ($class, $ibx, $creat, $shard) = @_; @@ -61,6 +64,7 @@ sub new { die("Invalid indexlevel $ibx->{indexlevel}\n"); } } + undef $PATCHID_BROKEN; # retry on new instances in case of upgrades $ibx = PublicInbox::InboxWritable->new($ibx); my $self = PublicInbox::Search->new($ibx); bless $self, $class; @@ -89,7 +93,7 @@ sub new { $self; } -sub need_xapian ($) { $_[0]->{indexlevel} =~ $xapianlevels } +sub need_xapian ($) { ($_[0]->{indexlevel} // 'full') =~ $xapianlevels } sub idx_release { my ($self, $wake) = @_; @@ -112,12 +116,15 @@ sub load_xapian_writable () { *sortable_serialise = $xap.'::sortable_serialise'; $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()'); $DB_OPEN = eval($xap.'::DB_OPEN()'); - my $ver = (eval($xap.'::major_version()') << 16) | - (eval($xap.'::minor_version()') << 8) | - eval($xap.'::revision()'); - $DB_NO_SYNC = 0x4 if $ver >= 0x10400; + my $ver = eval 'v'.join('.', eval($xap.'::major_version()'), + eval($xap.'::minor_version()'), + eval($xap.'::revision()')); + if ($ver ge 1.4) { # new flags in Xapian 1.4 + $DB_NO_SYNC = 0x4; + $DB_DANGEROUS = 0x10; + } # Xapian v1.2.21..v1.2.24 were missing close-on-exec on OFD locks - $X->{CLOEXEC_UNSET} = 1 if $ver >= 0x010215 && $ver <= 0x010218; + $X->{CLOEXEC_UNSET} = 1 if $ver ge v1.2.21 && $ver le v1.2.24; 1; } @@ -130,6 +137,7 @@ sub idx_acquire { load_xapian_writable(); $flag = $self->{creat} ? $DB_CREATE_OR_OPEN : $DB_OPEN; } + my $owner = $self->{ibx} // $self->{eidx} // $self; if ($self->{creat}) { require File::Path; $self->lock_acquire; @@ -139,12 +147,15 @@ sub idx_acquire { if (!-d $dir && (!$is_shard || ($is_shard && need_xapian($self)))) { File::Path::mkpath($dir); - nodatacow_dir($dir); - $self->{-set_has_threadid_once} = 1; + require PublicInbox::Syscall; + PublicInbox::Syscall::nodatacow_dir($dir); + # owner == self for CodeSearchIdx + $self->{-set_has_threadid_once} = 1 if $owner != $self; + $flag |= $DB_DANGEROUS if $owner->{-dangerous}; } } return unless defined $flag; - $flag |= $DB_NO_SYNC if ($self->{ibx} // $self->{eidx})->{-no_fsync}; + $flag |= $DB_NO_SYNC if $owner->{-no_fsync}; my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) }; croak "Failed opening $dir: $@" if $@; $self->{xdb} = $xdb; @@ -169,9 +180,8 @@ sub term_generator ($) { # write-only sub index_phrase ($$$$) { my ($self, $text, $wdf_inc, $prefix) = @_; - my $tg = term_generator($self); - $tg->index_text($text, $wdf_inc, $prefix); - $tg->increase_termpos; + term_generator($self)->index_text($text, $wdf_inc, $prefix); + $self->{term_generator}->increase_termpos; } sub index_text ($$$$) { @@ -180,8 +190,8 @@ sub index_text ($$$$) { if ($self->{indexlevel} eq 'full') { index_phrase($self, $text, $wdf_inc, $prefix); } else { - my $tg = term_generator($self); - $tg->index_text_without_positions($text, $wdf_inc, $prefix); + term_generator($self)->index_text_without_positions( + $text, $wdf_inc, $prefix); } } @@ -228,8 +238,8 @@ sub index_old_diff_fn { # no renames or space support for traditional diffs, # find the number of leading common paths to strip: - my @fa = split('/', $fa); - my @fb = split('/', $fb); + my @fa = split(m'/', $fa); + my @fb = split(m'/', $fb); while (scalar(@fa) && scalar(@fb)) { $fa = join('/', @fa); $fb = join('/', @fb); @@ -249,37 +259,59 @@ sub index_diff ($$$) { my ($self, $txt, $doc) = @_; my %seen; my $in_diff; - my @xnq; - my $xnq = \@xnq; - foreach (split(/\n/, $txt)) { - if ($in_diff && s/^ //) { # diff context + my $xnq = []; + my @l = split(/\n/, $$txt); + undef $$txt; + while (defined($_ = shift @l)) { + if ($in_diff && /^GIT binary patch/) { + push @$xnq, $_; + while (@l && $l[0] =~ /^(?:literal|delta) /) { + # TODO allow searching by size range? + # allows searching by exact size via: + # "literal $SIZE" or "delta $SIZE" + push @$xnq, shift(@l); + + # skip base85 and empty lines + while (@l && ($l[0] =~ /\A$BASE85\h*\z/o || + $l[0] !~ /\S/)) { + shift @l; + } + # loop hits trailing "literal 0\nHcmV?d00001\n" + } + } elsif ($in_diff && s/^ //) { # diff context index_diff_inc($self, $_, 'XDFCTX', $xnq); } elsif (/^-- $/) { # email signature begins $in_diff = undef; - } elsif (m!^diff --git "?[^/]+/.+ "?[^/]+/.+\z!) { - # wait until "---" and "+++" to capture filenames + } elsif (m!^diff --git ("?[^/]+/.+) ("?[^/]+/.+)\z!) { + # capture filenames here for binary diffs: + my ($fa, $fb) = ($1, $2); + push @$xnq, $_; $in_diff = 1; + $fa = (split(m'/', git_unquote($fa), 2))[1]; + $fb = (split(m'/', git_unquote($fb), 2))[1]; + $seen{$fa}++ or index_diff_inc($self, $fa, 'XDFN', $xnq); + $seen{$fb}++ or index_diff_inc($self, $fb, 'XDFN', $xnq); # traditional diff: } elsif (m/^diff -(.+) (\S+) (\S+)$/) { my ($opt, $fa, $fb) = ($1, $2, $3); - push @xnq, $_; + push @$xnq, $_; # only support unified: next unless $opt =~ /[uU]/; $in_diff = index_old_diff_fn($self, \%seen, $fa, $fb, $xnq); } elsif (m!^--- ("?[^/]+/.+)!) { my $fn = $1; - $fn = (split('/', git_unquote($fn), 2))[1]; + $fn = (split(m'/', git_unquote($fn), 2))[1]; $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq); $in_diff = 1; } elsif (m!^\+\+\+ ("?[^/]+/.+)!) { my $fn = $1; - $fn = (split('/', git_unquote($fn), 2))[1]; + $fn = (split(m'/', git_unquote($fn), 2))[1]; $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq); $in_diff = 1; } elsif (/^--- (\S+)/) { - $in_diff = $1; - push @xnq, $_; + $in_diff = $1; # old diff filename + push @$xnq, $_; } elsif (defined $in_diff && /^\+\+\+ (\S+)/) { $in_diff = index_old_diff_fn($self, \%seen, $in_diff, $1, $xnq); @@ -305,19 +337,65 @@ sub index_diff ($$$) { /^(?:dis)?similarity index / || /^\\ No newline at end of file/ || /^Binary files .* differ/) { - push @xnq, $_; + push @$xnq, $_; } elsif ($_ eq '') { # possible to be in diff context, some mail may be # stripped by MUA or even GNU diff(1). "git apply" # treats a bare "\n" as diff context, too } else { - push @xnq, $_; + push @$xnq, $_; warn "non-diff line: $_\n" if DEBUG && $_ ne ''; $in_diff = undef; } } - index_text($self, join("\n", @xnq), 1, 'XNQ'); + index_text($self, join("\n", @$xnq), 1, 'XNQ'); +} + +sub index_body_text { + my ($self, $doc, $sref) = @_; + my $rd; + # start patch-id in parallel + if ($$sref =~ /^(?:diff|---|\+\+\+) /ms && !$PATCHID_BROKEN) { + my $git = ($self->{ibx} // $self->{eidx} // $self)->git; + my $fh = PublicInbox::IO::write_file '+>:utf8', undef, $$sref; + $fh->flush or die "flush: $!"; + sysseek($fh, 0, SEEK_SET); + $rd = popen_rd($git->cmd(qw(patch-id --stable)), undef, + { 0 => $fh }); + } + + # split off quoted and unquoted blocks: + my @sections = PublicInbox::MsgIter::split_quotes($$sref); + undef $$sref; # free memory + for my $txt (@sections) { + if ($txt =~ /\A>/) { + if ($txt =~ /^[>\t ]+GIT binary patch\r?/sm) { + # get rid of Base-85 noise + $txt =~ s/^([>\h]+(?:literal|delta) + \x20[0-9]+\r?\n) + (?:[>\h]+$BASE85\h*\r?\n)+/$1/gsmx; + } + index_text($self, $txt, 0, 'XQUOT'); + } else { # does it look like a diff? + if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { + index_diff($self, \$txt, $doc); + } else { + index_text($self, $txt, 1, 'XNQ'); + } + } + undef $txt; # free memory + } + if (defined $rd) { # reap `git patch-id' + (readline($rd) // '') =~ /\A([a-f0-9]{40,})/ and + $doc->add_term('XDFID'.$1); + if (!$rd->close) { + my $c = 'git patch-id --stable'; + $PATCHID_BROKEN = ($? >> 8) == 129; + $PATCHID_BROKEN ? warn("W: $c requires git v2.1.0+\n") + : warn("W: $c failed: \$?=$? (non-fatal)"); + } + } } sub index_xapian { # msg_iter callback @@ -339,23 +417,7 @@ sub index_xapian { # msg_iter callback my ($s, undef) = msg_part_text($part, $ct); defined $s or return; $_[0]->[0] = $part = undef; # free memory - - # split off quoted and unquoted blocks: - my @sections = PublicInbox::MsgIter::split_quotes($s); - undef $s; # free memory - for my $txt (@sections) { - if ($txt =~ /\A>/) { - index_text($self, $txt, 0, 'XQUOT'); - } else { - # does it look like a diff? - if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { - index_diff($self, $txt, $doc); - } else { - index_text($self, $txt, 1, 'XNQ'); - } - } - undef $txt; # free memory - } + index_body_text($self, $doc, \$s); } sub index_list_id ($$$) { @@ -363,6 +425,7 @@ sub index_list_id ($$$) { for my $l ($hdr->header_raw('List-Id')) { $l =~ /<([^>]+)>/ or next; my $lid = lc $1; + $lid =~ tr/\n\t\r\0//d; # same rules as Message-ID $doc->add_boolean_term('G' . $lid); index_phrase($self, $lid, 1, 'XL'); # probabilistic } @@ -398,8 +461,7 @@ sub eml2doc ($$$;$) { add_val($doc, PublicInbox::Search::UID(), $smsg->{num}); add_val($doc, PublicInbox::Search::THREADID, $smsg->{tid}); - my $tg = term_generator($self); - $tg->set_document($doc); + term_generator($self)->set_document($doc); index_headers($self, $smsg); if (defined(my $eidx_key = $smsg->{eidx_key})) { @@ -409,7 +471,7 @@ sub eml2doc ($$$;$) { index_ids($self, $doc, $eml, $mids); # by default, we maintain compatibility with v1.5.0 and earlier - # by writing to docdata.glass, users who never exect to downgrade can + # by writing to docdata.glass, users who never expect to downgrade can # use --skip-docdata if (!$self->{-skip_docdata}) { # WWW doesn't need {to} or {cc}, only NNTP @@ -451,10 +513,9 @@ sub add_xapian ($$$$) { sub _msgmap_init ($) { my ($self) = @_; die "BUG: _msgmap_init is only for v1\n" if $self->{ibx}->version != 1; - $self->{mm} //= eval { + $self->{mm} //= do { require PublicInbox::Msgmap; - my $rw = $self->{ibx}->{-no_fsync} ? 2 : 1; - PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, $rw); + PublicInbox::Msgmap->new_file($self->{ibx}, 1); }; } @@ -497,9 +558,7 @@ sub add_message { sub _get_doc ($$) { my ($self, $docid) = @_; - my $doc = eval { $self->{xdb}->get_document($docid) }; - $doc // do { - warn "E: $@\n" if $@; + $self->get_doc($docid) // do { warn "E: #$docid missing in Xapian\n"; undef; } @@ -518,6 +577,12 @@ sub add_eidx_info { $self->{xdb}->replace_document($docid, $doc); } +sub get_terms { + my ($self, $pfx, $docid) = @_; + begin_txn_lazy($self); + xap_terms($pfx, $self->{xdb}, $docid); +} + sub remove_eidx_info { my ($self, $docid, $eidx_key, $eml) = @_; begin_txn_lazy($self); @@ -551,17 +616,16 @@ sub set_vmd { my ($self, $docid, $vmd) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; - my ($end, @rm, @add); + my ($v, @rm, @add); my @x = @VMD_MAP; + my ($cur, $end) = ($doc->termlist_begin, $doc->termlist_end); while (my ($field, $pfx) = splice(@x, 0, 2)) { my $set = $vmd->{$field} // next; my %keep = map { $_ => 1 } @$set; my %add = %keep; - $end //= $doc->termlist_end; - for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) { - $cur->skip_to($pfx); - last if $cur == $end; - my $v = $cur->get_termname; + $cur->skip_to($pfx); # works due to @VMD_MAP order + for (; $cur != $end; $cur++) { + $v = $cur->get_termname; $v =~ s/\A$pfx//s or next; $keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v); } @@ -637,18 +701,27 @@ sub update_vmd { sub xdb_remove { my ($self, @docids) = @_; - $self->begin_txn_lazy; - my $xdb = $self->{xdb} or return; + begin_txn_lazy($self); + my $xdb = $self->{xdb} // die 'BUG: missing {xdb}'; for my $docid (@docids) { eval { $xdb->delete_document($docid) }; - warn "E: #$docid not in in Xapian? $@\n" if $@; + warn "E: #$docid not in Xapian? $@\n" if $@; } } +sub xdb_remove_quiet { + my ($self, $docid) = @_; + begin_txn_lazy($self); + my $xdb = $self->{xdb} // die 'BUG: missing {xdb}'; + eval { $xdb->delete_document($docid) }; + ++$self->{-quiet_rm} unless $@; +} + +sub nr_quiet_rm { delete($_[0]->{-quiet_rm}) // 0 } + sub index_git_blob_id { my ($doc, $pfx, $objid) = @_; - my $len = length($objid); for (my $len = length($objid); $len >= 7; ) { $doc->add_term($pfx.$objid); $objid = substr($objid, 0, --$len); @@ -742,7 +815,8 @@ sub unindex_both { # git->cat_async callback sub with_umask { my $self = shift; - ($self->{ibx} // $self->{eidx})->with_umask(@_); + my $owner = $self->{ibx} // $self->{eidx}; + $owner ? $owner->with_umask(@_) : $self->SUPER::with_umask(@_) } # called by public-inbox-index @@ -750,7 +824,8 @@ sub index_sync { my ($self, $opt) = @_; delete $self->{lock_path} if $opt->{-skip_lock}; $self->with_umask(\&_index_sync, $self, $opt); - if ($opt->{reindex} && !$opt->{quit}) { + if ($opt->{reindex} && !$opt->{quit} && + !grep(defined, @$opt{qw(since until)})) { my %again = %$opt; delete @again{qw(rethread reindex)}; index_sync($self, \%again); @@ -759,10 +834,10 @@ sub index_sync { } sub check_size { # check_async cb for -index --max-size=... - my ($oid, $type, $size, $arg, $git) = @_; - (($type // '') eq 'blob') or die "E: bad $oid in $git->{git_dir}"; + my (undef, $oid, $type, $size, $arg) = @_; + ($type // '') eq 'blob' or die "E: bad $oid in $arg->{git}->{git_dir}"; if ($size <= $arg->{max_size}) { - $git->cat_async($oid, $arg->{index_oid}, $arg); + $arg->{git}->cat_async($oid, $arg->{index_oid}, $arg); } else { warn "W: skipping $oid ($size > $arg->{max_size})\n"; } @@ -775,18 +850,19 @@ sub v1_checkpoint ($$;$) { # $newest may be undef my $newest = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}}; if (defined($newest)) { - my $cur = $self->{mm}->last_commit || ''; - if (need_update($self, $cur, $newest)) { + my $cur = $self->{mm}->last_commit; + if (need_update($self, $sync, $cur, $newest)) { $self->{mm}->last_commit($newest); } } ${$sync->{max}} = $self->{batch_bytes}; $self->{mm}->{dbh}->commit; + eval { $self->{mm}->{dbh}->do('PRAGMA optimize') }; my $xdb = $self->{xdb}; if ($newest && $xdb) { my $cur = $xdb->get_metadata('last_commit'); - if (need_update($self, $cur, $newest)) { + if (need_update($self, $sync, $cur, $newest)) { $xdb->set_metadata('last_commit', $newest); } } @@ -843,6 +919,7 @@ sub process_stack { $arg->{autime} = $at; $arg->{cotime} = $ct; if ($sync->{max_size}) { + $arg->{git} = $git; $git->check_async($oid, \&check_size, $arg); } else { $git->cat_async($oid, \&index_both, $arg); @@ -870,23 +947,28 @@ sub log2stack ($$$) { # Count the new files so they can be added newest to oldest # and still have numbers increasing from oldest to newest - my $fh = $git->popen(qw(log --raw -r --pretty=tformat:%at-%ct-%H - --no-notes --no-color --no-renames --no-abbrev), - $range); - my ($at, $ct, $stk, $cmt); - while (<$fh>) { + my @cmd = qw(log --raw -r --pretty=tformat:%at-%ct-%H + --no-notes --no-color --no-renames --no-abbrev); + for my $k (qw(since until)) { + my $v = $sync->{-opt}->{$k} // next; + next if !$sync->{-opt}->{reindex}; + push @cmd, "--$k=$v"; + } + my $fh = $git->popen(@cmd, $range); + my ($at, $ct, $stk, $cmt, $l); + while (defined($l = <$fh>)) { return if $sync->{quit}; - if (/\A([0-9]+)-([0-9]+)-($OID)$/o) { + if ($l =~ /\A([0-9]+)-([0-9]+)-($OID)$/o) { ($at, $ct, $cmt) = ($1 + 0, $2 + 0, $3); $stk //= PublicInbox::IdxStack->new($cmt); - } elsif (/$del/) { + } elsif ($l =~ /$del/) { my $oid = $1; if ($D) { # reindex case $D->{pack('H*', $oid)}++; } else { # non-reindex case: $stk->push_rec('d', $at, $ct, $oid, $cmt); } - } elsif (/$add/) { + } elsif ($l =~ /$add/) { my $oid = $1; if ($D) { my $oid_bin = pack('H*', $oid); @@ -898,7 +980,7 @@ sub log2stack ($$$) { $stk->push_rec('m', $at, $ct, $oid, $cmt); } } - close $fh or die "git log failed: \$?=$?"; + $fh->close or die "git log failed: \$?=$?"; $stk //= PublicInbox::IdxStack->new; $stk->read_prepare; } @@ -923,15 +1005,20 @@ sub is_ancestor ($$$) { return 0 unless $git->check($cur); my $cmd = [ 'git', "--git-dir=$git->{git_dir}", qw(merge-base --is-ancestor), $cur, $tip ]; - my $pid = spawn($cmd); - waitpid($pid, 0) == $pid or die join(' ', @$cmd) .' did not finish'; - $? == 0; + run_wait($cmd) == 0; } -sub need_update ($$$) { - my ($self, $cur, $new) = @_; +sub need_update ($$$$) { + my ($self, $sync, $cur, $new) = @_; my $git = $self->{ibx}->git; - return 1 if $cur && !is_ancestor($git, $cur, $new); + $cur //= ''; # XS Search::Xapian ->get_metadata doesn't give undef + + # don't rewind if --{since,until,before,after} are in use + return if $cur ne '' && + grep(defined, @{$sync->{-opt}}{qw(since until)}) && + is_ancestor($git, $new, $cur); + + return 1 if $cur ne '' && !is_ancestor($git, $cur, $new); my $range = $cur eq '' ? $new : "$cur..$new"; chomp(my $n = $git->qx(qw(rev-list --count), $range)); ($n eq '' || $n > 0); @@ -979,7 +1066,11 @@ sub _index_sync { my $ibx = $self->{ibx}; local $self->{current_info} = "$ibx->{inboxdir}"; $self->{batch_bytes} = $opt->{batch_size} // $BATCH_BYTES; - $ibx->git->batch_prepare; + + if ($X->{CLOEXEC_UNSET}) { + $ibx->git->cat_file($tip); + $ibx->git->check($tip); + } my $pr = $opt->{-progress}; my $sync = { reindex => $opt->{reindex}, -opt => $opt, ibx => $ibx }; my $quit = quit_cb($sync); @@ -1015,8 +1106,10 @@ sub DESTROY { $_[0]->{lockfh} = undef; } -sub _begin_txn { +sub begin_txn_lazy { my ($self) = @_; + return if $self->{txn}; + my $restore = $self->with_umask; my $xdb = $self->{xdb} || idx_acquire($self); $self->{oidx}->begin_lazy if $self->{oidx}; $xdb->begin_transaction if $xdb; @@ -1024,13 +1117,8 @@ sub _begin_txn { $xdb; } -sub begin_txn_lazy { - my ($self) = @_; - $self->with_umask(\&_begin_txn, $self) if !$self->{txn}; -} - # store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard) -# This metadata is read by Admin::detect_indexlevel: +# This metadata is read by InboxWritable->detect_indexlevel: sub set_metadata_once { my ($self) = @_; @@ -1052,8 +1140,10 @@ sub set_metadata_once { } } -sub _commit_txn { +sub commit_txn_lazy { my ($self) = @_; + return unless delete($self->{txn}); + my $restore = $self->with_umask; if (my $eidx = $self->{eidx}) { $eidx->git->async_wait_all; $eidx->{transact_bytes} = 0; @@ -1065,12 +1155,6 @@ sub _commit_txn { $self->{oidx}->commit_lazy if $self->{oidx}; } -sub commit_txn_lazy { - my ($self) = @_; - delete($self->{txn}) and - $self->with_umask(\&_commit_txn, $self); -} - sub eidx_shard_new { my ($class, $eidx, $shard) = @_; my $self = bless { @@ -1085,25 +1169,4 @@ sub eidx_shard_new { $self; } -# ensure there's no stale Xapian docs by treating $over as canonical -sub over_check { - my ($self, $over) = @_; - begin_txn_lazy($self); - my $sth = $over->dbh->prepare(<<''); -SELECT COUNT(*) FROM over WHERE num = ? - - my $xdb = $self->{xdb}; - my $cur = $xdb->postlist_begin(''); - my $end = $xdb->postlist_end(''); - my $xdir = $self->xdir; - for (; $cur != $end; $cur++) { - my $docid = $cur->get_docid; - $sth->execute($docid); - my $x = $sth->fetchrow_array; - next if $x > 0; - warn "I: removing $xdir #$docid, not in `over'\n"; - $xdb->delete_document($docid); - } -} - 1; |