user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] favor git(1) rather than libgit2 for ExtSearch
Date: Thu, 24 Jun 2021 05:50:04 +0000	[thread overview]
Message-ID: <20210624055004.27955-1-e@80x24.org> (raw)

While both git and libgit2 take around 16 minutes to load 100K
alternates there's already a proposed patch to make git faster:

  <https://lore.kernel.org/git/20210624005806.12079-1-e@80x24.org/>

It's also easier to patch and install git locally since the
git.git build system defaults to prefix=$HOME and dealing with
dynamic linking with libgit2 is more difficult for end users
relying on Inline::C.

libgit2 remains in use for the non-ALL.git case, but maybe it's
not necessary (libgit2 is significantly slower than git in
Debian 10 due to SHA-1 collision checking).
---
 Documentation/technical/ds.txt |  2 +-
 lib/PublicInbox/GitAsyncCat.pm | 21 +++++++++++++--------
 lib/PublicInbox/GzipFilter.pm  |  3 +--
 lib/PublicInbox/HTTPD.pm       |  2 +-
 lib/PublicInbox/IMAP.pm        | 10 +++++-----
 lib/PublicInbox/NNTP.pm        |  4 ++--
 lib/PublicInbox/SolverGit.pm   |  3 +--
 7 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/Documentation/technical/ds.txt b/Documentation/technical/ds.txt
index a0793ca2..7bc1ad79 100644
--- a/Documentation/technical/ds.txt
+++ b/Documentation/technical/ds.txt
@@ -64,7 +64,7 @@ Augmented features:
 * ->requeue support.  An optimization of the AddTimer(0, ...) idiom
   for immediately dispatching code at the next event loop iteration.
   public-inbox uses this for fairly generating large responses
-  iteratively (see PublicInbox::NNTP::long_response or git_async_cat
+  iteratively (see PublicInbox::NNTP::long_response or ibx_async_cat
   for blob retrievals).
 
 New features
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index 7d1a13db..57c194d9 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -8,7 +8,7 @@ use strict;
 use parent qw(PublicInbox::DS Exporter);
 use POSIX qw(WNOHANG);
 use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
-our @EXPORT = qw(git_async_cat git_async_prefetch);
+our @EXPORT = qw(ibx_async_cat ibx_async_prefetch);
 use PublicInbox::Git ();
 
 our $GCF2C; # singleton PublicInbox::Gcf2Client
@@ -45,12 +45,16 @@ sub event_step {
 	}
 }
 
-sub git_async_cat ($$$$) {
-	my ($git, $oid, $cb, $arg) = @_;
-	if ($GCF2C //= eval {
+sub ibx_async_cat ($$$$) {
+	my ($ibx, $oid, $cb, $arg) = @_;
+	my $git = $ibx->git;
+	# {topdir} means ExtSearch (likely [extindex "all"]) with potentially
+	# 100K alternates.  git(1) has a proposed patch for 100K alternates:
+	# <https://lore.kernel.org/git/20210624005806.12079-1-e@80x24.org/>
+	if (!defined($ibx->{topdir}) && ($GCF2C //= eval {
 		require PublicInbox::Gcf2Client;
 		PublicInbox::Gcf2Client::new();
-	} // 0) { # 0: do not retry if libgit2 or Inline::C are missing
+	} // 0)) { # 0: do not retry if libgit2 or Inline::C are missing
 		$GCF2C->gcf2_async(\"$oid $git->{git_dir}\n", $cb, $arg);
 		\undef;
 	} else { # read-only end of git-cat-file pipe
@@ -66,9 +70,10 @@ sub git_async_cat ($$$$) {
 
 # this is safe to call inside $cb, but not guaranteed to enqueue
 # returns true if successful, undef if not.
-sub git_async_prefetch {
-	my ($git, $oid, $cb, $arg) = @_;
-	if ($GCF2C) {
+sub ibx_async_prefetch {
+	my ($ibx, $oid, $cb, $arg) = @_;
+	my $git = $ibx->git;
+	if (!defined($ibx->{topdir}) && $GCF2C) {
 		if (!$GCF2C->{wbuf}) {
 			$oid .= " $git->{git_dir}\n";
 			return $GCF2C->gcf2_async(\$oid, $cb, $arg); # true
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 48ed11a5..334d6581 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -180,8 +180,7 @@ sub async_blob_cb { # git->cat_async callback
 
 sub smsg_blob {
 	my ($self, $smsg) = @_;
-	git_async_cat($self->{ibx}->git, $smsg->{blob},
-			\&async_blob_cb, $self);
+	ibx_async_cat($self->{ibx}, $smsg->{blob}, \&async_blob_cb, $self);
 }
 
 1;
diff --git a/lib/PublicInbox/HTTPD.pm b/lib/PublicInbox/HTTPD.pm
index b193c9ae..fb683f74 100644
--- a/lib/PublicInbox/HTTPD.pm
+++ b/lib/PublicInbox/HTTPD.pm
@@ -37,7 +37,7 @@ sub new {
 		# XXX unstable API!, only GitHTTPBackend needs
 		# this to limit git-http-backend(1) parallelism.
 		# We also check for the truthiness of this to
-		# detect when to use git_async_cat for slow blobs
+		# detect when to use async paths for slow blobs
 		'pi-httpd.async' => \&pi_httpd_async
 	);
 	bless {
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index af8ce72b..9402aa41 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -612,7 +612,7 @@ sub fetch_run_ops {
 	$self->msg_more(")\r\n");
 }
 
-sub fetch_blob_cb { # called by git->cat_async via git_async_cat
+sub fetch_blob_cb { # called by git->cat_async via ibx_async_cat
 	my ($bref, $oid, $type, $size, $fetch_arg) = @_;
 	my ($self, undef, $msgs, $range_info, $ops, $partial) = @$fetch_arg;
 	my $ibx = $self->{ibx} or return $self->close; # client disconnected
@@ -627,8 +627,8 @@ sub fetch_blob_cb { # called by git->cat_async via git_async_cat
 	}
 	my $pre;
 	if (!$self->{wbuf} && (my $nxt = $msgs->[0])) {
-		$pre = git_async_prefetch($ibx->git, $nxt->{blob},
-						\&fetch_blob_cb, $fetch_arg);
+		$pre = ibx_async_prefetch($ibx, $nxt->{blob},
+					\&fetch_blob_cb, $fetch_arg);
 	}
 	fetch_run_ops($self, $smsg, $bref, $ops, $partial);
 	$pre ? $self->zflush : requeue_once($self);
@@ -760,7 +760,7 @@ sub fetch_blob { # long_response
 		}
 	}
 	uo2m_extend($self, $msgs->[-1]->{num});
-	git_async_cat($self->{ibx}->git, $msgs->[0]->{blob},
+	ibx_async_cat($self->{ibx}, $msgs->[0]->{blob},
 			\&fetch_blob_cb, \@_);
 }
 
@@ -1228,7 +1228,7 @@ sub long_step {
 	} elsif ($more) { # $self->{wbuf}:
 		$self->update_idle_time;
 
-		# control passed to git_async_cat if $more == \undef
+		# control passed to ibx_async_cat if $more == \undef
 		requeue_once($self) if !ref($more);
 	} else { # all done!
 		delete $self->{long_cb};
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index f7d99913..9df47133 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -515,7 +515,7 @@ found:
 		$smsg->{nntp_code} = $code;
 		set_art($self, $art);
 		# this dereferences to `undef'
-		${git_async_cat($ibx->git, $smsg->{blob}, \&blob_cb, $smsg)};
+		${ibx_async_cat($ibx, $smsg->{blob}, \&blob_cb, $smsg)};
 	}
 }
 
@@ -549,7 +549,7 @@ sub msg_hdr_write ($$) {
 	$smsg->{nntp}->msg_more($$hdr);
 }
 
-sub blob_cb { # called by git->cat_async via git_async_cat
+sub blob_cb { # called by git->cat_async via ibx_async_cat
 	my ($bref, $oid, $type, $size, $smsg) = @_;
 	my $self = $smsg->{nntp};
 	my $code = $smsg->{nntp_code};
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 92106e75..b0cd0f2c 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -593,8 +593,7 @@ sub resolve_patch ($$) {
 	if (my $msgs = $want->{try_smsgs}) {
 		my $smsg = shift @$msgs;
 		if ($self->{psgi_env}->{'pi-httpd.async'}) {
-			return git_async_cat($want->{cur_ibx}->git,
-						$smsg->{blob},
+			return ibx_async_cat($want->{cur_ibx}, $smsg->{blob},
 						\&extract_diff_async,
 						[$self, $want, $smsg]);
 		} else {

                 reply	other threads:[~2021-06-24  5:50 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210624055004.27955-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).