diff options
-rwxr-xr-x | ci/deps.perl | 5 | ||||
-rw-r--r-- | lib/PublicInbox/Admin.pm | 3 | ||||
-rw-r--r-- | lib/PublicInbox/ExtSearch.pm | 7 | ||||
-rw-r--r-- | lib/PublicInbox/Git.pm | 11 | ||||
-rw-r--r-- | lib/PublicInbox/Inbox.pm | 67 | ||||
-rw-r--r-- | lib/PublicInbox/Search.pm | 16 | ||||
-rw-r--r-- | t/extsearch.t | 15 |
7 files changed, 68 insertions, 56 deletions
diff --git a/ci/deps.perl b/ci/deps.perl index a797911a..ae85986d 100755 --- a/ci/deps.perl +++ b/ci/deps.perl @@ -17,7 +17,6 @@ my $profiles = { essential => [ qw( git perl - Devel::Peek Digest::SHA Encode ExtUtils::MakeMaker @@ -79,10 +78,6 @@ my $non_auto = { pkg => 'p5-TimeDate', rpm => 'perl-TimeDate', }, - 'Devel::Peek' => { - deb => 'perl', # libperl5.XX, but the XX varies - pkg => 'perl5', - }, 'Digest::SHA' => { deb => 'perl', # libperl5.XX, but the XX varies pkg => 'perl5', diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 20964f9c..dcf17cf5 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -198,8 +198,7 @@ sub resolve_inboxes ($;$$) { $opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs; } -# TODO: make Devel::Peek optional, only used for daemon -my @base_mod = qw(Devel::Peek); +my @base_mod = (); my @over_mod = qw(DBD::SQLite DBI); my %mod_groups = ( -index => [ @base_mod, @over_mod ], diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm index cd7cba2e..7520e71c 100644 --- a/lib/PublicInbox/ExtSearch.pm +++ b/lib/PublicInbox/ExtSearch.pm @@ -112,6 +112,11 @@ sub description { '$EXTINDEX_DIR/description missing'; } +sub search { + PublicInbox::Inbox::_cleanup_later($_[0]); + $_[0]; +} + no warnings 'once'; *base_url = \&PublicInbox::Inbox::base_url; *smsg_eml = \&PublicInbox::Inbox::smsg_eml; @@ -121,6 +126,6 @@ no warnings 'once'; *recent = \&PublicInbox::Inbox::recent; *max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef -*isrch = *search = \&PublicInbox::Search::reopen; +*isrch = \&search; 1; diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index cf51239f..3c577ab3 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -400,7 +400,7 @@ sub cleanup { delete $self->{inflight_c}; _destroy($self, qw(cat_rbuf in out pid)); _destroy($self, qw(chk_rbuf in_c out_c pid_c err_c)); - !!($self->{pid} || $self->{pid_c}); + defined($self->{pid}) || defined($self->{pid_c}); } @@ -523,18 +523,25 @@ sub manifest_entry { $ent; } +# returns true if there are pending cat-file processes sub cleanup_if_unlinked { my ($self) = @_; return cleanup($self) if $^O ne 'linux'; # Linux-specific /proc/$PID/maps access # TODO: support this inside git.git + my $ret = 0; for my $fld (qw(pid pid_c)) { my $pid = $self->{$fld} // next; - open my $fh, '<', "/proc/$pid/maps" or next; + open my $fh, '<', "/proc/$pid/maps" or return cleanup($self); while (<$fh>) { + # n.b. we do not restart for unlinked multi-pack-index + # since it's not too huge, and the startup cost may + # be higher. return cleanup($self) if /\.(?:idx|pack) \(deleted\)$/; } + ++$ret; } + $ret; } 1; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 02ffc7be..c0962af9 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -16,70 +16,47 @@ use Carp qw(croak); # admin will attempt to replace them atomically after compact/vacuum # and we need to be prepared for that. my $cleanup_timer; -my $cleanup_avail = -1; # 0, or 1 -my $have_devel_peek; my $CLEANUP = {}; # string(inbox) -> inbox sub git_cleanup ($) { my ($self) = @_; - if ($self->isa(__PACKAGE__)) { - # normal Inbox; low startup cost, and likely to have many - # many so this keeps process/pipe counts in check - $self->{git}->cleanup if $self->{git}; - } else { - # ExtSearch, high startup cost if ->ALL, and probably - # only one per-daemon, so teardown only if required: - $self->git->cleanup_if_unlinked; - } + my $git = $self->{git} // return undef; + # normal inboxes have low startup cost and there may be many, so + # keep process+pipe counts in check. ExtSearch may have high startup + # cost (e.g. ->ALL) and but likely one per-daemon, so cleanup only + # if there's unlinked files + my $live = $self->isa(__PACKAGE__) ? $git->cleanup + : $git->cleanup_if_unlinked; + delete($self->{git}) unless $live; + $live; } +# returns true if further checking is required +sub cleanup_shards { $_[0]->{search} ? $_[0]->{search}->cleanup_shards : undef } + sub cleanup_task () { $cleanup_timer = undef; my $next = {}; for my $ibx (values %$CLEANUP) { - my $again; - if ($have_devel_peek) { - foreach my $f (qw(search)) { - # we bump refcnt by assigning tmp, here: - my $tmp = $ibx->{$f} or next; - next if Devel::Peek::SvREFCNT($tmp) > 2; - delete $ibx->{$f}; - # refcnt is zero when tmp is out-of-scope - } - } - git_cleanup($ibx); - if (my $gits = $ibx->{-repo_objs}) { - foreach my $git (@$gits) { - $again = 1 if $git->cleanup; - } + my $again = git_cleanup($ibx); + $ibx->cleanup_shards and $again = 1; + for my $git (@{$ibx->{-repo_objs}}) { + $again = 1 if $git->cleanup; } check_inodes($ibx); - if ($have_devel_peek) { - $again ||= !!$ibx->{search}; - } $next->{"$ibx"} = $ibx if $again; } $CLEANUP = $next; + $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task); } -sub cleanup_possible () { +sub _cleanup_later ($) { # no need to require DS, here, if it were enabled another # module would've require'd it, already - eval { PublicInbox::DS::in_loop() } or return 0; - - eval { - require Devel::Peek; # needs separate package in Fedora - $have_devel_peek = 1; - }; - 1; -} - -sub _cleanup_later ($) { - my ($self) = @_; - $cleanup_avail = cleanup_possible() if $cleanup_avail < 0; - return if $cleanup_avail != 1; - $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task); - $CLEANUP->{"$self"} = $self; + if (eval { PublicInbox::DS::in_loop() }) { + $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task); + $CLEANUP->{"$_[0]"} = $_[0]; # $self + } } sub _set_limiter ($$$) { diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 1d1cd2f5..d285c11c 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -199,7 +199,7 @@ sub xdb_shards_flat ($) { my (@xdb, $slow_phrase); load_xapian(); $self->{qp_flags} //= $QP_FLAGS; - if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) { + if ($xpfx =~ m!/xapian[0-9]+\z!) { @xdb = ($X{Database}->new($xpfx)); $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert"; } else { @@ -243,6 +243,20 @@ sub xdb ($) { }; } +# returns true if a future rescan is desired +sub cleanup_shards { + my ($self) = @_; + return unless exists($self->{xdb}); + my $xpfx = $self->{xpfx}; + return reopen($self) if $xpfx =~ m!/xapian[0-9]+\z!; # true + opendir(my $dh, $xpfx) or return warn("$xpfx gone: $!\n"); # true + my $nr = grep(/\A[0-9]+\z/, readdir($dh)) or + return warn("$xpfx has no shards\n"); # true + return reopen($self) if $nr == ($self->{nshard} // -1); + delete($self->{xdb}); + undef; +} + sub new { my ($class, $ibx) = @_; ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; diff --git a/t/extsearch.t b/t/extsearch.t index ad4f2c6d..6c074022 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -436,12 +436,27 @@ for my $j (1, 3, 6) { SKIP: { my $d = "$home/extindex-j1"; + my $es = PublicInbox::ExtSearch->new($d); + ok(my $nresult0 = $es->mset('z:0..')->size, 'got results'); + ok(ref($es->{xdb}), '{xdb} created'); + my $nshards1 = $es->{nshard}; + is($nshards1, 1, 'correct shard count'); + my $xdb_str = "$es->{xdb}"; + ok($es->cleanup_shards, 'cleanup_shards noop'); + is("$es->{xdb}", $xdb_str, '{xdb} unchanged'); + my $o = { 2 => \(my $err = '') }; ok(run_script([qw(-xcpdb -R4), $d]), 'xcpdb R4'); my @dirs = glob("$d/ei*/?"); for my $i (0..3) { is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] created"); } + is($es->cleanup_shards, undef, 'cleanup_shards cleaned'); + ok(!defined($es->{xdb}), 'old {xdb} gone'); + is($es->cleanup_shards, undef, 'cleanup_shards clean idempotent'); + is($es->mset('z:0..')->size, $nresult0, 'new shards, same results'); + ok($es->cleanup_shards, 'cleanup_shards true after open'); + for my $i (4..5) { is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]"); } |