From 7e5cea05f061e757f36b3eb9abcd285425365224 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 28 Sep 2021 23:11:05 +0000 Subject: inbox: rewrite cleanup to be more aggressive Avoid relying on a giant cleanup hash and instead use the new DS->add_uniq_timer API to amortize the pause times associated with having to cleanup many inboxes. We can also use smaller intervals for this, as well. We now discard SQLite DB handles at cleanup. Each of these can use several megabytes of memory, which adds up with hundreds/thousands of inboxes. Since per-inbox access intervals are unpredictable and opening an SQLite handle is relatively inexpensive, release memory more aggressively to avoid the heap having to hit swap. --- lib/PublicInbox/Git.pm | 12 +++++++----- lib/PublicInbox/Inbox.pm | 41 ++++++++++++++--------------------------- 2 files changed, 21 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 3c577ab3..d5b1d39d 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -392,8 +392,10 @@ sub async_wait_all ($) { # returns true if there are pending "git cat-file" processes sub cleanup { - my ($self) = @_; + my ($self, $lazy) = @_; local $in_cleanup = 1; + return 1 if $lazy && (scalar(@{$self->{inflight_c} // []}) || + scalar(@{$self->{inflight} // []})); delete $self->{async_cat}; async_wait_all($self); delete $self->{inflight}; @@ -403,7 +405,6 @@ sub cleanup { defined($self->{pid}) || defined($self->{pid_c}); } - # assuming a well-maintained repo, this should be a somewhat # accurate estimation of its size # TODO: show this in the WWW UI as a hint to potential cloners @@ -526,18 +527,19 @@ sub manifest_entry { # returns true if there are pending cat-file processes sub cleanup_if_unlinked { my ($self) = @_; - return cleanup($self) if $^O ne 'linux'; + return cleanup($self, 1) if $^O ne 'linux'; # Linux-specific /proc/$PID/maps access # TODO: support this inside git.git my $ret = 0; for my $fld (qw(pid pid_c)) { my $pid = $self->{$fld} // next; - open my $fh, '<', "/proc/$pid/maps" or return cleanup($self); + open my $fh, '<', "/proc/$pid/maps" or return cleanup($self, 1); while (<$fh>) { # n.b. we do not restart for unlinked multi-pack-index # since it's not too huge, and the startup cost may # be higher. - return cleanup($self) if /\.(?:idx|pack) \(deleted\)$/; + /\.(?:idx|pack) \(deleted\)$/ and + return cleanup($self, 1); } ++$ret; } diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 1d5fc708..04b7d764 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -10,14 +10,6 @@ use PublicInbox::Eml; use List::Util qw(max); use Carp qw(croak); -# Long-running "git-cat-file --batch" processes won't notice -# unlinked packs, so we need to restart those processes occasionally. -# Xapian and SQLite file handles are mostly stable, but sometimes an -# admin will attempt to replace them atomically after compact/vacuum -# and we need to be prepared for that. -my $cleanup_timer; -my $CLEANUP = {}; # string(inbox) -> inbox - sub git_cleanup ($) { my ($self) = @_; my $git = $self->{git} // return undef; @@ -25,7 +17,7 @@ sub git_cleanup ($) { # keep process+pipe counts in check. ExtSearch may have high startup # cost (e.g. ->ALL) and but likely one per-daemon, so cleanup only # if there's unlinked files - my $live = $self->isa(__PACKAGE__) ? $git->cleanup + my $live = $self->isa(__PACKAGE__) ? $git->cleanup(1) : $git->cleanup_if_unlinked; delete($self->{git}) unless $live; $live; @@ -34,29 +26,22 @@ sub git_cleanup ($) { # returns true if further checking is required sub cleanup_shards { $_[0]->{search} ? $_[0]->{search}->cleanup_shards : undef } -sub cleanup_task () { - $cleanup_timer = undef; - my $next = {}; - for my $ibx (values %$CLEANUP) { - my $again = git_cleanup($ibx); - $ibx->cleanup_shards and $again = 1; - for my $git (@{$ibx->{-repo_objs}}) { - $again = 1 if $git->cleanup; - } - check_inodes($ibx); - $next->{"$ibx"} = $ibx if $again; +sub do_cleanup { + my ($ibx) = @_; + my $live = git_cleanup($ibx); + $ibx->cleanup_shards and $live = 1; + for my $git (@{$ibx->{-repo_objs}}) { + $live = 1 if $git->cleanup(1); } - $CLEANUP = $next; - $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task); + delete @$ibx{qw(over mm)}; + PublicInbox::DS::add_uniq_timer($ibx+0, 5, \&do_cleanup, $ibx) if $live; } sub _cleanup_later ($) { # no need to require DS, here, if it were enabled another # module would've require'd it, already - if (eval { PublicInbox::DS::in_loop() }) { - $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task); - $CLEANUP->{"$_[0]"} = $_[0]; # $self - } + eval { PublicInbox::DS::in_loop() } and + PublicInbox::DS::add_uniq_timer($_[0]+0, 30, \&do_cleanup, @_) } sub _set_limiter ($$$) { @@ -156,6 +141,7 @@ sub mm { my ($self, $req) = @_; $self->{mm} //= eval { require PublicInbox::Msgmap; + _cleanup_later($self); my $dir = $self->{inboxdir}; if ($self->version >= 2) { PublicInbox::Msgmap->new_file("$dir/msgmap.sqlite3"); @@ -186,6 +172,7 @@ sub over { require PublicInbox::Search; PublicInbox::Search->new($self); }; + _cleanup_later($self); my $over = PublicInbox::Over->new("$srch->{xpfx}/over.sqlite3"); $over->dbh; # may fail $self->{over} = $over; @@ -387,7 +374,7 @@ sub unsubscribe_unlock { sub check_inodes ($) { my ($self) = @_; - for (qw(over mm)) { # TODO: search + for (qw(over mm)) { $self->{$_}->check_inodes if $self->{$_}; } } -- cgit v1.2.3-24-ge0c7