diff options
author | Eric Wong <e@80x24.org> | 2021-09-22 09:45:17 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2021-09-23 04:52:06 +0000 |
commit | 356439a571c536eaa487031802b436d087113f4f (patch) | |
tree | cab628111eefa6f5a059cbc405d8e01a2ccb8670 /lib/PublicInbox/Gcf2.pm | |
parent | 7d2f9f7caf63256bab7b3342c52a1d97c889ada4 (diff) | |
download | public-inbox-356439a571c536eaa487031802b436d087113f4f.tar.gz |
Check for unlinked mmap-ed files via /proc/$PID/maps every 60s or so. ExtSearch (extindex) is compatible-enough with Inbox objects to be wired into the old per-inbox code, but the startup cost is projected to be much higher down the line when there's >30K inboxes, so we scan /proc/$PID/maps for deleted files before unlinking. With old Inbox objects, it was (and is) simpler to just kill processes w/o checking due to the low startup cost (and non-portability of checking). Reported-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org> Link: https://public-inbox.org/meta/20210921144754.gulkneuulzo27qbw@meerkat.local/
Diffstat (limited to 'lib/PublicInbox/Gcf2.pm')
-rw-r--r-- | lib/PublicInbox/Gcf2.pm | 26 |
1 files changed, 23 insertions, 3 deletions
diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm index 64945ca6..f546208f 100644 --- a/lib/PublicInbox/Gcf2.pm +++ b/lib/PublicInbox/Gcf2.pm @@ -8,6 +8,7 @@ use strict; use v5.10.1; use PublicInbox::Spawn qw(which popen_rd); # may set PERL_INLINE_DIRECTORY use Fcntl qw(LOCK_EX SEEK_SET); +use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); use IO::Handle; # autoflush BEGIN { my (%CFG, $c_src); @@ -96,11 +97,21 @@ sub add_alt ($$) { 1; } -# Usage: $^X -MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop +sub have_unlinked_files () { + # FIXME: port gcf2-like over to git.git so we won't need to + # deal with libgit2 + return 1 if $^O ne 'linux'; + open my $fh, '<', "/proc/$$/maps" or return; + while (<$fh>) { return 1 if /\.(?:idx|pack) \(deleted\)$/ } + undef; +} + +# Usage: $^X -MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop [EXPIRE-TIMEOUT] # (see lib/PublicInbox/Gcf2Client.pm) -sub loop () { +sub loop (;$) { + my $exp = $_[0] || $ARGV[0] || 60; # seconds my $gcf2 = new(); - my %seen; + my (%seen, $check_at); STDERR->autoflush(1); STDOUT->autoflush(1); @@ -116,6 +127,7 @@ sub loop () { $gcf2 = new(); %seen = ($git_dir => add_alt($gcf2,"$git_dir/objects")); + $check_at = clock_gettime(CLOCK_MONOTONIC) + $exp; if ($gcf2->cat_oid(1, $oid)) { warn "I: $$ $oid found after retry\n"; @@ -123,6 +135,14 @@ sub loop () { warn "W: $$ $oid missing after retry\n"; print "$oid missing\n"; # mimic git-cat-file } + } else { # check expiry to deal with deleted pack files + my $now = clock_gettime(CLOCK_MONOTONIC); + $check_at //= $now + $exp; + if ($now > $check_at && have_unlinked_files()) { + undef $check_at; + $gcf2 = new(); + %seen = (); + } } } } |