* [PATCH 07/15] git: share unlinked pack checking code with gcf2
2023-11-30 11:40 5% [PATCH 00/15] various cindex fixes + speedups Eric Wong
@ 2023-11-30 11:41 7% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-11-30 11:41 UTC (permalink / raw)
To: meta
It saves some code in case we keep libgit2 around.
---
lib/PublicInbox/Gcf2.pm | 16 ++++------------
lib/PublicInbox/Git.pm | 27 ++++++++++++++-------------
2 files changed, 18 insertions(+), 25 deletions(-)
diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm
index dcbb201d..78392990 100644
--- a/lib/PublicInbox/Gcf2.pm
+++ b/lib/PublicInbox/Gcf2.pm
@@ -9,7 +9,7 @@ use PublicInbox::Spawn qw(which run_qx); # may set PERL_INLINE_DIRECTORY
use Fcntl qw(SEEK_SET);
use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
use IO::Handle; # autoflush
-use PublicInbox::Git;
+use PublicInbox::Git qw($ck_unlinked_packs);
use PublicInbox::Lock;
use autodie qw(close open seek truncate);
@@ -86,16 +86,6 @@ sub add_alt ($$) {
1;
}
-sub have_unlinked_files () {
- # FIXME: port gcf2-like over to git.git so we won't need to
- # deal with libgit2
- return 1 if $^O ne 'linux';
- if (my $s = PublicInbox::IO::try_cat("/proc/$$/maps")) {
- return 1 if /\.(?:idx|pack) \(deleted\)/s;
- }
- undef;
-}
-
# Usage: $^X -MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop [EXPIRE-TIMEOUT]
# (see lib/PublicInbox/Gcf2Client.pm)
sub loop (;$) {
@@ -104,6 +94,7 @@ sub loop (;$) {
my (%seen, $check_at);
STDERR->autoflush(1);
STDOUT->autoflush(1);
+ my $pid = $$;
while (<STDIN>) {
chomp;
@@ -130,7 +121,8 @@ sub loop (;$) {
$check_at //= $now + $exp;
if ($now > $check_at) {
undef $check_at;
- if (have_unlinked_files()) {
+ if (!$ck_unlinked_packs ||
+ $ck_unlinked_packs->($pid)) {
$gcf2 = new();
%seen = ();
}
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 235a35cd..9c4d938e 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -24,7 +24,8 @@ use Carp qw(croak carp);
use PublicInbox::SHA qw(sha_all);
our %HEXLEN2SHA = (40 => 1, 64 => 256);
our %OFMT2HEXLEN = (sha1 => 40, sha256 => 64);
-our @EXPORT_OK = qw(git_unquote git_quote %HEXLEN2SHA %OFMT2HEXLEN);
+our @EXPORT_OK = qw(git_unquote git_quote %HEXLEN2SHA %OFMT2HEXLEN
+ $ck_unlinked_packs);
our $in_cleanup;
our $async_warn; # true in read-only daemons
@@ -597,27 +598,27 @@ sub manifest_entry {
$ent;
}
+our $ck_unlinked_packs = $^O eq 'linux' ? sub {
+ # FIXME: port gcf2-like over to git.git so we won't need to
+ # deal with libgit2
+ my $s = try_cat "/proc/$_[0]/maps";
+ $s =~ /\.(?:idx|pack) \(deleted\)/s ? 1 : undef;
+} : undef;
+
# returns true if there are pending cat-file processes
sub cleanup_if_unlinked {
my ($self) = @_;
- return cleanup($self, 1) if $^O ne 'linux';
+ $ck_unlinked_packs or return cleanup($self, 1);
# Linux-specific /proc/$PID/maps access
# TODO: support this inside git.git
- my $ret = 0;
+ my $nr_live = 0;
for my $obj ($self, ($self->{ck} // ())) {
my $sock = $obj->{sock} // next;
my $pid = $sock->attached_pid // next;
- open my $fh, '<', "/proc/$pid/maps" or return cleanup($self, 1);
- while (<$fh>) {
- # n.b. we do not restart for unlinked multi-pack-index
- # since it's not too huge, and the startup cost may
- # be higher.
- /\.(?:idx|pack) \(deleted\)$/ and
- return cleanup($self, 1);
- }
- ++$ret;
+ $ck_unlinked_packs->($pid) and return cleanup($self, 1);
+ ++$nr_live;
}
- $ret;
+ $nr_live;
}
sub event_step {
^ permalink raw reply related [relevance 7%]
* [PATCH 00/15] various cindex fixes + speedups
@ 2023-11-30 11:40 5% Eric Wong
2023-11-30 11:41 7% ` [PATCH 07/15] git: share unlinked pack checking code with gcf2 Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2023-11-30 11:40 UTC (permalink / raw)
To: meta
Notable changes:
10/15 provides a huge speedup which will hopefully make
future developments faster.
12/15 probably obsoletes libgit2 for extindex "all" users.
13/15 can save some memory with many inboxes while making
configuration easier.
Eric Wong (15):
cindex: fix store_repo+repo_stored on no-op
codesearch: allow inbox count to exceed matches
config: reject newlines consistently in dir names
cindex: only create {-cidx_err} field on failures
cindex: keep batch pipe for pruning SHA-256 repos
cindex: store extensions.objectFormat with repo data
git: share unlinked pack checking code with gcf2
cindex: skip getpid guard for most OnDestroy use
spawn: drop IO layer support from redirects
cindex: speed up initial scan setup phase
inbox: expire resources more aggressively
git_async_cat: use git from "all" extindex if possible
www_listing: support publicInbox.nameIsUrl
inbox: shrink data structures for publicinbox.*.hide
codesearch: use retry_reopen for WWW
Documentation/public-inbox-config.pod | 19 +-
lib/PublicInbox/CodeSearch.pm | 54 +++--
lib/PublicInbox/CodeSearchIdx.pm | 286 ++++++++++++++++----------
lib/PublicInbox/Config.pm | 32 ++-
lib/PublicInbox/Gcf2.pm | 16 +-
lib/PublicInbox/Git.pm | 27 +--
lib/PublicInbox/GitAsyncCat.pm | 8 +-
lib/PublicInbox/Inbox.pm | 32 +--
lib/PublicInbox/MailDiff.pm | 3 +-
lib/PublicInbox/SearchIdx.pm | 5 +-
lib/PublicInbox/Spawn.pm | 32 +--
lib/PublicInbox/WwwListing.pm | 21 +-
12 files changed, 303 insertions(+), 232 deletions(-)
^ permalink raw reply [relevance 5%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-11-30 11:40 5% [PATCH 00/15] various cindex fixes + speedups Eric Wong
2023-11-30 11:41 7% ` [PATCH 07/15] git: share unlinked pack checking code with gcf2 Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).