about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/Admin.pm3
-rw-r--r--lib/PublicInbox/ExtSearch.pm7
-rw-r--r--lib/PublicInbox/Git.pm11
-rw-r--r--lib/PublicInbox/Inbox.pm67
-rw-r--r--lib/PublicInbox/Search.pm16
5 files changed, 53 insertions, 51 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 20964f9c..dcf17cf5 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -198,8 +198,7 @@ sub resolve_inboxes ($;$$) {
         $opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs;
 }
 
-# TODO: make Devel::Peek optional, only used for daemon
-my @base_mod = qw(Devel::Peek);
+my @base_mod = ();
 my @over_mod = qw(DBD::SQLite DBI);
 my %mod_groups = (
         -index => [ @base_mod, @over_mod ],
diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm
index cd7cba2e..7520e71c 100644
--- a/lib/PublicInbox/ExtSearch.pm
+++ b/lib/PublicInbox/ExtSearch.pm
@@ -112,6 +112,11 @@ sub description {
                 '$EXTINDEX_DIR/description missing';
 }
 
+sub search {
+        PublicInbox::Inbox::_cleanup_later($_[0]);
+        $_[0];
+}
+
 no warnings 'once';
 *base_url = \&PublicInbox::Inbox::base_url;
 *smsg_eml = \&PublicInbox::Inbox::smsg_eml;
@@ -121,6 +126,6 @@ no warnings 'once';
 *recent = \&PublicInbox::Inbox::recent;
 
 *max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
-*isrch = *search = \&PublicInbox::Search::reopen;
+*isrch = \&search;
 
 1;
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index cf51239f..3c577ab3 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -400,7 +400,7 @@ sub cleanup {
         delete $self->{inflight_c};
         _destroy($self, qw(cat_rbuf in out pid));
         _destroy($self, qw(chk_rbuf in_c out_c pid_c err_c));
-        !!($self->{pid} || $self->{pid_c});
+        defined($self->{pid}) || defined($self->{pid_c});
 }
 
 
@@ -523,18 +523,25 @@ sub manifest_entry {
         $ent;
 }
 
+# returns true if there are pending cat-file processes
 sub cleanup_if_unlinked {
         my ($self) = @_;
         return cleanup($self) if $^O ne 'linux';
         # Linux-specific /proc/$PID/maps access
         # TODO: support this inside git.git
+        my $ret = 0;
         for my $fld (qw(pid pid_c)) {
                 my $pid = $self->{$fld} // next;
-                open my $fh, '<', "/proc/$pid/maps" or next;
+                open my $fh, '<', "/proc/$pid/maps" or return cleanup($self);
                 while (<$fh>) {
+                        # n.b. we do not restart for unlinked multi-pack-index
+                        # since it's not too huge, and the startup cost may
+                        # be higher.
                         return cleanup($self) if /\.(?:idx|pack) \(deleted\)$/;
                 }
+                ++$ret;
         }
+        $ret;
 }
 
 1;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 02ffc7be..c0962af9 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -16,70 +16,47 @@ use Carp qw(croak);
 # admin will attempt to replace them atomically after compact/vacuum
 # and we need to be prepared for that.
 my $cleanup_timer;
-my $cleanup_avail = -1; # 0, or 1
-my $have_devel_peek;
 my $CLEANUP = {}; # string(inbox) -> inbox
 
 sub git_cleanup ($) {
         my ($self) = @_;
-        if ($self->isa(__PACKAGE__)) {
-                # normal Inbox; low startup cost, and likely to have many
-                # many so this keeps process/pipe counts in check
-                $self->{git}->cleanup if $self->{git};
-        } else {
-                # ExtSearch, high startup cost if ->ALL, and probably
-                # only one per-daemon, so teardown only if required:
-                $self->git->cleanup_if_unlinked;
-        }
+        my $git = $self->{git} // return undef;
+        # normal inboxes have low startup cost and there may be many, so
+        # keep process+pipe counts in check.  ExtSearch may have high startup
+        # cost (e.g. ->ALL) and but likely one per-daemon, so cleanup only
+        # if there's unlinked files
+        my $live = $self->isa(__PACKAGE__) ? $git->cleanup
+                                        : $git->cleanup_if_unlinked;
+        delete($self->{git}) unless $live;
+        $live;
 }
 
+# returns true if further checking is required
+sub cleanup_shards { $_[0]->{search} ? $_[0]->{search}->cleanup_shards : undef }
+
 sub cleanup_task () {
         $cleanup_timer = undef;
         my $next = {};
         for my $ibx (values %$CLEANUP) {
-                my $again;
-                if ($have_devel_peek) {
-                        foreach my $f (qw(search)) {
-                                # we bump refcnt by assigning tmp, here:
-                                my $tmp = $ibx->{$f} or next;
-                                next if Devel::Peek::SvREFCNT($tmp) > 2;
-                                delete $ibx->{$f};
-                                # refcnt is zero when tmp is out-of-scope
-                        }
-                }
-                git_cleanup($ibx);
-                if (my $gits = $ibx->{-repo_objs}) {
-                        foreach my $git (@$gits) {
-                                $again = 1 if $git->cleanup;
-                        }
+                my $again = git_cleanup($ibx);
+                $ibx->cleanup_shards and $again = 1;
+                for my $git (@{$ibx->{-repo_objs}}) {
+                        $again = 1 if $git->cleanup;
                 }
                 check_inodes($ibx);
-                if ($have_devel_peek) {
-                        $again ||= !!$ibx->{search};
-                }
                 $next->{"$ibx"} = $ibx if $again;
         }
         $CLEANUP = $next;
+        $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
 }
 
-sub cleanup_possible () {
+sub _cleanup_later ($) {
         # no need to require DS, here, if it were enabled another
         # module would've require'd it, already
-        eval { PublicInbox::DS::in_loop() } or return 0;
-
-        eval {
-                require Devel::Peek; # needs separate package in Fedora
-                $have_devel_peek = 1;
-        };
-        1;
-}
-
-sub _cleanup_later ($) {
-        my ($self) = @_;
-        $cleanup_avail = cleanup_possible() if $cleanup_avail < 0;
-        return if $cleanup_avail != 1;
-        $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
-        $CLEANUP->{"$self"} = $self;
+        if (eval { PublicInbox::DS::in_loop() }) {
+                $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
+                $CLEANUP->{"$_[0]"} = $_[0]; # $self
+        }
 }
 
 sub _set_limiter ($$$) {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 1d1cd2f5..d285c11c 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -199,7 +199,7 @@ sub xdb_shards_flat ($) {
         my (@xdb, $slow_phrase);
         load_xapian();
         $self->{qp_flags} //= $QP_FLAGS;
-        if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
+        if ($xpfx =~ m!/xapian[0-9]+\z!) {
                 @xdb = ($X{Database}->new($xpfx));
                 $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
         } else {
@@ -243,6 +243,20 @@ sub xdb ($) {
         };
 }
 
+# returns true if a future rescan is desired
+sub cleanup_shards {
+        my ($self) = @_;
+        return unless exists($self->{xdb});
+        my $xpfx = $self->{xpfx};
+        return reopen($self) if $xpfx =~ m!/xapian[0-9]+\z!; # true
+        opendir(my $dh, $xpfx) or return warn("$xpfx gone: $!\n"); # true
+        my $nr = grep(/\A[0-9]+\z/, readdir($dh)) or
+                return warn("$xpfx has no shards\n"); # true
+        return reopen($self) if $nr == ($self->{nshard} // -1);
+        delete($self->{xdb});
+        undef;
+}
+
 sub new {
         my ($class, $ibx) = @_;
         ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";