user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
 Warning: Initial query:
 %22gcf2: require git dir with OID%22
 returned no results, used:
 "gcf2: require git dir with OID"
 instead

Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 6/7] gcf2: require git dir with OID
  2020-09-19  9:37  7% [PATCH 0/7] gcf2: libgit2-based cat-file alternative Eric Wong
@ 2020-09-19  9:37  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-09-19  9:37 UTC (permalink / raw)
  To: meta

This amortizes the cost of recreating PublicInbox::Gcf2 objects
when alternates change in v2 all.git.
---
 lib/PublicInbox/Gcf2Client.pm  | 13 -------------
 lib/PublicInbox/Git.pm         |  5 +++--
 lib/PublicInbox/GitAsyncCat.pm | 24 +++++++++++++++---------
 script/public-inbox-gcf2       | 14 +++++---------
 t/gcf2_client.t                | 11 ++++-------
 5 files changed, 27 insertions(+), 40 deletions(-)

diff --git a/lib/PublicInbox/Gcf2Client.pm b/lib/PublicInbox/Gcf2Client.pm
index a048cd1a..30f85c71 100644
--- a/lib/PublicInbox/Gcf2Client.pm
+++ b/lib/PublicInbox/Gcf2Client.pm
@@ -23,19 +23,6 @@ sub new {
 	$self;
 }
 
-sub add_git_dir {
-	my ($self, $git_dir) = @_;
-
-	# ensure buffers are drained, length($git_dir) may exceed
-	# PIPE_BUF on platforms where PIPE_BUF is only 512 bytes
-	my $inflight = $self->{inflight};
-	while (scalar(@$inflight)) {
-		$self->cat_async_step($inflight);
-	}
-	print { $self->{out} } $git_dir, "\n" or
-				$self->fail("write error: $!");
-}
-
 # always false, since -gcf2 retries internally
 sub alternates_changed {}
 
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index b49b5bd3..6bb82b6b 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -190,7 +190,8 @@ sub cat_async_step ($$) {
 		$bref = my_read($self->{in}, $rbuf, $size + 1) or
 			fail($self, defined($bref) ? 'read EOF' : "read: $!");
 		chop($$bref) eq "\n" or fail($self, 'LF missing after blob');
-	} elsif ($head =~ / missing$/) {
+	} elsif ($head =~ s/ missing\n//s) {
+		$oid = $head;
 		# ref($req) indicates it's already been retried
 		# -gcf2 retries internally, so it never hits this path:
 		if (!ref($req) && !$in_cleanup && $self->alternates_changed) {
@@ -198,7 +199,7 @@ sub cat_async_step ($$) {
 						$req, $cb, $arg);
 		}
 		$type = 'missing';
-		$oid = ref($req) ? $$req : $req;
+		$oid = ref($req) ? $$req : $req if $oid eq '';
 	} else {
 		fail($self, "Unexpected result from async git cat-file: $head");
 	}
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index db1a7f94..8a54c608 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -16,21 +16,27 @@ our @EXPORT = qw(git_async_cat);
 
 sub event_step {
 	my ($self) = @_;
-	my $git = $self->{git};
-	return $self->close if ($git->{in} // 0) != ($self->{sock} // 1);
-	my $inflight = $git->{inflight};
+	my $gitish = $self->{gitish};
+	return $self->close if ($gitish->{in} // 0) != ($self->{sock} // 1);
+	my $inflight = $gitish->{inflight};
 	if ($inflight && @$inflight) {
-		$git->cat_async_step($inflight);
-		$self->requeue if @$inflight || exists $git->{cat_rbuf};
+		$gitish->cat_async_step($inflight);
+		$self->requeue if @$inflight || exists $gitish->{cat_rbuf};
 	}
 }
 
 sub git_async_cat ($$$$) {
 	my ($git, $oid, $cb, $arg) = @_;
-	$git->cat_async($oid, $cb, $arg);
-	$git->{async_cat} //= do {
-		my $self = bless { git => $git }, __PACKAGE__;
-		$self->SUPER::new($git->{in}, EPOLLIN|EPOLLET);
+	my $gitish = $git->{gcf2c}; # PublicInbox::Gcf2Client
+	if ($gitish) {
+		$oid .= " $git->{git_dir}";
+	} else {
+		$gitish = $git;
+	}
+	$gitish->cat_async($oid, $cb, $arg);
+	$gitish->{async_cat} //= do {
+		my $self = bless { gitish => $gitish }, __PACKAGE__;
+		$self->SUPER::new($gitish->{in}, EPOLLIN|EPOLLET);
 		\undef; # this is a true ref()
 	};
 }
diff --git a/script/public-inbox-gcf2 b/script/public-inbox-gcf2
index d2d2ac8b..4a44b654 100755
--- a/script/public-inbox-gcf2
+++ b/script/public-inbox-gcf2
@@ -3,7 +3,6 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 eval { require PublicInbox::Gcf2 };
 die "libgit2 development package or Inline::C missing for $0: $@\n" if $@;
-my @dirs; # may get big (30K-100K)
 my $gcf2 = PublicInbox::Gcf2::new();
 use IO::Handle; # autoflush
 STDERR->autoflush(1);
@@ -11,19 +10,16 @@ STDOUT->autoflush(1);
 
 while (<STDIN>) {
 	chomp;
-	if (m!\A/!) { # +/path/to/git-dir
-		push @dirs, $_;
-		$gcf2->add_alternate("$_/objects");
-	} elsif (!$gcf2->cat_oid(1, $_)) {
+	my ($oid, $git_dir) = split(/ /, $_, 2);
+	$gcf2->add_alternate("$git_dir/objects");
+	if (!$gcf2->cat_oid(1, $oid)) {
 		# retry once if missing.  We only get unabbreviated OIDs
 		# from SQLite or Xapian DBs, here, so malicious clients
 		# can't trigger excessive retries:
-		my $oid = $_;
-		warn "I: $$ $oid missing, retrying...\n";
+		warn "I: $$ $oid missing, retrying in $git_dir...\n";
 
-		# clients may need to wait a bit for this:
 		$gcf2 = PublicInbox::Gcf2::new();
-		$gcf2->add_alternate("$_/objects") for @dirs;
+		$gcf2->add_alternate("$git_dir/objects");
 
 		if ($gcf2->cat_oid(1, $oid)) {
 			warn "I: $$ $oid found after retry\n";
diff --git a/t/gcf2_client.t b/t/gcf2_client.t
index 0f7e7203..19462379 100644
--- a/t/gcf2_client.t
+++ b/t/gcf2_client.t
@@ -27,9 +27,7 @@ my $err_f = "$tmpdir/err";
 	local $ENV{PATH} = getcwd()."/blib/script:$ENV{PATH}";
 	open my $err, '>', $err_f or BAIL_OUT $!;
 	my $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err });
-	$gcf2c->add_git_dir($git_a);
-
-	$gcf2c->cat_async($tree, sub {
+	$gcf2c->cat_async("$tree $git_a", sub {
 		my ($bref, $oid, $type, $size, $arg) = @_;
 		is($oid, $tree, 'got expected OID');
 		is($size, 30, 'got expected length');
@@ -45,7 +43,7 @@ my $err_f = "$tmpdir/err";
 	is($estr, '', 'nothing in stderr');
 
 	my $trunc = substr($tree, 0, 39);
-	$gcf2c->cat_async($trunc, sub {
+	$gcf2c->cat_async("$trunc $git_a", sub {
 		my ($bref, $oid, $type, $size, $arg) = @_;
 		is(undef, $bref, 'missing bref is undef');
 		is($oid, $trunc, 'truncated OID printed');
@@ -63,8 +61,7 @@ my $err_f = "$tmpdir/err";
 	# try failed alternates lookup
 	open $err, '>', $err_f or BAIL_OUT $!;
 	$gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err });
-	$gcf2c->add_git_dir($git_b);
-	$gcf2c->cat_async($tree, sub {
+	$gcf2c->cat_async("$tree $git_b", sub {
 		my ($bref, $oid, $type, $size, $arg) = @_;
 		is(undef, $bref, 'missing bref from alt is undef');
 		$called++;
@@ -79,7 +76,7 @@ my $err_f = "$tmpdir/err";
 	print $alt "$git_a/objects\n" or BAIL_OUT $!;
 	close $alt or BAIL_OUT;
 	my $expect = xqx(['git', "--git-dir=$git_a", qw(cat-file tree), $tree]);
-	$gcf2c->cat_async($tree, sub {
+	$gcf2c->cat_async("$tree $git_a", sub {
 		my ($bref, $oid, $type, $size, $arg) = @_;
 		is($oid, $tree, 'oid match on alternates retry');
 		is($$bref, $expect, 'tree content matched');

^ permalink raw reply related	[relevance 5%]

* [PATCH 0/7] gcf2: libgit2-based cat-file alternative
@ 2020-09-19  9:37  7% Eric Wong
  2020-09-19  9:37  5% ` [PATCH 6/7] gcf2: require git dir with OID Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-09-19  9:37 UTC (permalink / raw)
  To: meta

This allows a single cat-file-like process to handle multiple
inboxes; instead of having a "git cat-file --batch" process for
every inbox; saving pipes and process table space.

The preliminary code was done months ago, but I struggled to put
all the pieces together in a coherent way.  My brain has been
scattered :x

I finally decided to make the gcf2 process a global singleton
(per-worker) to avoid complexity elsewhere in the config...

It doesn't detect or release unlinked packs + indices, yet,
so "git gc" may not free disk space until restarted.

Otherwise it does detect new epochs and seems mostly working
otherwise...

Eric Wong (7):
  gcf2: libgit2-based git cat-file alternative
  t/gcf2: test changes to alternates
  add gcf2 client and executable script
  gcf2: transparently retry on missing OID
  gcf2*: more descriptive package descriptions
  gcf2: require git dir with OID
  gcf2: wire up read-only daemons and rm -gcf2 script

 MANIFEST                       |   5 +
 lib/PublicInbox/Daemon.pm      |  11 +++
 lib/PublicInbox/Gcf2.pm        |  89 ++++++++++++++++++
 lib/PublicInbox/Gcf2Client.pm  |  62 +++++++++++++
 lib/PublicInbox/Git.pm         |  41 ++++++---
 lib/PublicInbox/GitAsyncCat.pm |  73 +++++++++++++--
 lib/PublicInbox/IMAP.pm        |   2 +-
 lib/PublicInbox/gcf2_libgit2.h | 142 +++++++++++++++++++++++++++++
 script/public-inbox-httpd      |   1 +
 t/gcf2.t                       | 162 +++++++++++++++++++++++++++++++++
 t/gcf2_client.t                |  90 ++++++++++++++++++
 11 files changed, 652 insertions(+), 26 deletions(-)
 create mode 100644 lib/PublicInbox/Gcf2.pm
 create mode 100644 lib/PublicInbox/Gcf2Client.pm
 create mode 100644 lib/PublicInbox/gcf2_libgit2.h
 create mode 100644 t/gcf2.t
 create mode 100644 t/gcf2_client.t

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-09-19  9:37  7% [PATCH 0/7] gcf2: libgit2-based cat-file alternative Eric Wong
2020-09-19  9:37  5% ` [PATCH 6/7] gcf2: require git dir with OID Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).