user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] favor git(1) rather than libgit2 for ExtSearch
@ 2021-06-24  5:50 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-06-24  5:50 UTC (permalink / raw)
  To: meta

While both git and libgit2 take around 16 minutes to load 100K
alternates there's already a proposed patch to make git faster:

  <https://lore.kernel.org/git/20210624005806.12079-1-e@80x24.org/>

It's also easier to patch and install git locally since the
git.git build system defaults to prefix=$HOME and dealing with
dynamic linking with libgit2 is more difficult for end users
relying on Inline::C.

libgit2 remains in use for the non-ALL.git case, but maybe it's
not necessary (libgit2 is significantly slower than git in
Debian 10 due to SHA-1 collision checking).
---
 Documentation/technical/ds.txt |  2 +-
 lib/PublicInbox/GitAsyncCat.pm | 21 +++++++++++++--------
 lib/PublicInbox/GzipFilter.pm  |  3 +--
 lib/PublicInbox/HTTPD.pm       |  2 +-
 lib/PublicInbox/IMAP.pm        | 10 +++++-----
 lib/PublicInbox/NNTP.pm        |  4 ++--
 lib/PublicInbox/SolverGit.pm   |  3 +--
 7 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/Documentation/technical/ds.txt b/Documentation/technical/ds.txt
index a0793ca2..7bc1ad79 100644
--- a/Documentation/technical/ds.txt
+++ b/Documentation/technical/ds.txt
@@ -64,7 +64,7 @@ Augmented features:
 * ->requeue support.  An optimization of the AddTimer(0, ...) idiom
   for immediately dispatching code at the next event loop iteration.
   public-inbox uses this for fairly generating large responses
-  iteratively (see PublicInbox::NNTP::long_response or git_async_cat
+  iteratively (see PublicInbox::NNTP::long_response or ibx_async_cat
   for blob retrievals).
 
 New features
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index 7d1a13db..57c194d9 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -8,7 +8,7 @@ use strict;
 use parent qw(PublicInbox::DS Exporter);
 use POSIX qw(WNOHANG);
 use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
-our @EXPORT = qw(git_async_cat git_async_prefetch);
+our @EXPORT = qw(ibx_async_cat ibx_async_prefetch);
 use PublicInbox::Git ();
 
 our $GCF2C; # singleton PublicInbox::Gcf2Client
@@ -45,12 +45,16 @@ sub event_step {
 	}
 }
 
-sub git_async_cat ($$$$) {
-	my ($git, $oid, $cb, $arg) = @_;
-	if ($GCF2C //= eval {
+sub ibx_async_cat ($$$$) {
+	my ($ibx, $oid, $cb, $arg) = @_;
+	my $git = $ibx->git;
+	# {topdir} means ExtSearch (likely [extindex "all"]) with potentially
+	# 100K alternates.  git(1) has a proposed patch for 100K alternates:
+	# <https://lore.kernel.org/git/20210624005806.12079-1-e@80x24.org/>
+	if (!defined($ibx->{topdir}) && ($GCF2C //= eval {
 		require PublicInbox::Gcf2Client;
 		PublicInbox::Gcf2Client::new();
-	} // 0) { # 0: do not retry if libgit2 or Inline::C are missing
+	} // 0)) { # 0: do not retry if libgit2 or Inline::C are missing
 		$GCF2C->gcf2_async(\"$oid $git->{git_dir}\n", $cb, $arg);
 		\undef;
 	} else { # read-only end of git-cat-file pipe
@@ -66,9 +70,10 @@ sub git_async_cat ($$$$) {
 
 # this is safe to call inside $cb, but not guaranteed to enqueue
 # returns true if successful, undef if not.
-sub git_async_prefetch {
-	my ($git, $oid, $cb, $arg) = @_;
-	if ($GCF2C) {
+sub ibx_async_prefetch {
+	my ($ibx, $oid, $cb, $arg) = @_;
+	my $git = $ibx->git;
+	if (!defined($ibx->{topdir}) && $GCF2C) {
 		if (!$GCF2C->{wbuf}) {
 			$oid .= " $git->{git_dir}\n";
 			return $GCF2C->gcf2_async(\$oid, $cb, $arg); # true
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 48ed11a5..334d6581 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -180,8 +180,7 @@ sub async_blob_cb { # git->cat_async callback
 
 sub smsg_blob {
 	my ($self, $smsg) = @_;
-	git_async_cat($self->{ibx}->git, $smsg->{blob},
-			\&async_blob_cb, $self);
+	ibx_async_cat($self->{ibx}, $smsg->{blob}, \&async_blob_cb, $self);
 }
 
 1;
diff --git a/lib/PublicInbox/HTTPD.pm b/lib/PublicInbox/HTTPD.pm
index b193c9ae..fb683f74 100644
--- a/lib/PublicInbox/HTTPD.pm
+++ b/lib/PublicInbox/HTTPD.pm
@@ -37,7 +37,7 @@ sub new {
 		# XXX unstable API!, only GitHTTPBackend needs
 		# this to limit git-http-backend(1) parallelism.
 		# We also check for the truthiness of this to
-		# detect when to use git_async_cat for slow blobs
+		# detect when to use async paths for slow blobs
 		'pi-httpd.async' => \&pi_httpd_async
 	);
 	bless {
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index af8ce72b..9402aa41 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -612,7 +612,7 @@ sub fetch_run_ops {
 	$self->msg_more(")\r\n");
 }
 
-sub fetch_blob_cb { # called by git->cat_async via git_async_cat
+sub fetch_blob_cb { # called by git->cat_async via ibx_async_cat
 	my ($bref, $oid, $type, $size, $fetch_arg) = @_;
 	my ($self, undef, $msgs, $range_info, $ops, $partial) = @$fetch_arg;
 	my $ibx = $self->{ibx} or return $self->close; # client disconnected
@@ -627,8 +627,8 @@ sub fetch_blob_cb { # called by git->cat_async via git_async_cat
 	}
 	my $pre;
 	if (!$self->{wbuf} && (my $nxt = $msgs->[0])) {
-		$pre = git_async_prefetch($ibx->git, $nxt->{blob},
-						\&fetch_blob_cb, $fetch_arg);
+		$pre = ibx_async_prefetch($ibx, $nxt->{blob},
+					\&fetch_blob_cb, $fetch_arg);
 	}
 	fetch_run_ops($self, $smsg, $bref, $ops, $partial);
 	$pre ? $self->zflush : requeue_once($self);
@@ -760,7 +760,7 @@ sub fetch_blob { # long_response
 		}
 	}
 	uo2m_extend($self, $msgs->[-1]->{num});
-	git_async_cat($self->{ibx}->git, $msgs->[0]->{blob},
+	ibx_async_cat($self->{ibx}, $msgs->[0]->{blob},
 			\&fetch_blob_cb, \@_);
 }
 
@@ -1228,7 +1228,7 @@ sub long_step {
 	} elsif ($more) { # $self->{wbuf}:
 		$self->update_idle_time;
 
-		# control passed to git_async_cat if $more == \undef
+		# control passed to ibx_async_cat if $more == \undef
 		requeue_once($self) if !ref($more);
 	} else { # all done!
 		delete $self->{long_cb};
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index f7d99913..9df47133 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -515,7 +515,7 @@ found:
 		$smsg->{nntp_code} = $code;
 		set_art($self, $art);
 		# this dereferences to `undef'
-		${git_async_cat($ibx->git, $smsg->{blob}, \&blob_cb, $smsg)};
+		${ibx_async_cat($ibx, $smsg->{blob}, \&blob_cb, $smsg)};
 	}
 }
 
@@ -549,7 +549,7 @@ sub msg_hdr_write ($$) {
 	$smsg->{nntp}->msg_more($$hdr);
 }
 
-sub blob_cb { # called by git->cat_async via git_async_cat
+sub blob_cb { # called by git->cat_async via ibx_async_cat
 	my ($bref, $oid, $type, $size, $smsg) = @_;
 	my $self = $smsg->{nntp};
 	my $code = $smsg->{nntp_code};
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 92106e75..b0cd0f2c 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -593,8 +593,7 @@ sub resolve_patch ($$) {
 	if (my $msgs = $want->{try_smsgs}) {
 		my $smsg = shift @$msgs;
 		if ($self->{psgi_env}->{'pi-httpd.async'}) {
-			return git_async_cat($want->{cur_ibx}->git,
-						$smsg->{blob},
+			return ibx_async_cat($want->{cur_ibx}, $smsg->{blob},
 						\&extract_diff_async,
 						[$self, $want, $smsg]);
 		} else {

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-06-24  5:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-24  5:50 [PATCH] favor git(1) rather than libgit2 for ExtSearch Eric Wong

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.mail.public-inbox.meta
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.mail.public-inbox.meta
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git