about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-06-24 05:50:04 +0000
committerEric Wong <e@80x24.org>2021-06-24 12:59:40 +0000
commit9c42ece6dfddf4156dc3016e2fa8835bf3d8aca1 (patch)
treec47b1ae3c5e3817eb67337152f8d1e27e3238f2b /lib
parent5ef37f4a13e2be711ef074dc2cd9994005273117 (diff)
downloadpublic-inbox-9c42ece6dfddf4156dc3016e2fa8835bf3d8aca1.tar.gz
While both git and libgit2 take around 16 minutes to load 100K
alternates there's already a proposed patch to make git faster:

  <https://lore.kernel.org/git/20210624005806.12079-1-e@80x24.org/>

It's also easier to patch and install git locally since the
git.git build system defaults to prefix=$HOME and dealing with
dynamic linking with libgit2 is more difficult for end users
relying on Inline::C.

libgit2 remains in use for the non-ALL.git case, but maybe it's
not necessary (libgit2 is significantly slower than git in
Debian 10 due to SHA-1 collision checking).
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/GitAsyncCat.pm21
-rw-r--r--lib/PublicInbox/GzipFilter.pm3
-rw-r--r--lib/PublicInbox/HTTPD.pm2
-rw-r--r--lib/PublicInbox/IMAP.pm10
-rw-r--r--lib/PublicInbox/NNTP.pm4
-rw-r--r--lib/PublicInbox/SolverGit.pm3
6 files changed, 23 insertions, 20 deletions
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index 7d1a13db..57c194d9 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -8,7 +8,7 @@ use strict;
 use parent qw(PublicInbox::DS Exporter);
 use POSIX qw(WNOHANG);
 use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
-our @EXPORT = qw(git_async_cat git_async_prefetch);
+our @EXPORT = qw(ibx_async_cat ibx_async_prefetch);
 use PublicInbox::Git ();
 
 our $GCF2C; # singleton PublicInbox::Gcf2Client
@@ -45,12 +45,16 @@ sub event_step {
         }
 }
 
-sub git_async_cat ($$$$) {
-        my ($git, $oid, $cb, $arg) = @_;
-        if ($GCF2C //= eval {
+sub ibx_async_cat ($$$$) {
+        my ($ibx, $oid, $cb, $arg) = @_;
+        my $git = $ibx->git;
+        # {topdir} means ExtSearch (likely [extindex "all"]) with potentially
+        # 100K alternates.  git(1) has a proposed patch for 100K alternates:
+        # <https://lore.kernel.org/git/20210624005806.12079-1-e@80x24.org/>
+        if (!defined($ibx->{topdir}) && ($GCF2C //= eval {
                 require PublicInbox::Gcf2Client;
                 PublicInbox::Gcf2Client::new();
-        } // 0) { # 0: do not retry if libgit2 or Inline::C are missing
+        } // 0)) { # 0: do not retry if libgit2 or Inline::C are missing
                 $GCF2C->gcf2_async(\"$oid $git->{git_dir}\n", $cb, $arg);
                 \undef;
         } else { # read-only end of git-cat-file pipe
@@ -66,9 +70,10 @@ sub git_async_cat ($$$$) {
 
 # this is safe to call inside $cb, but not guaranteed to enqueue
 # returns true if successful, undef if not.
-sub git_async_prefetch {
-        my ($git, $oid, $cb, $arg) = @_;
-        if ($GCF2C) {
+sub ibx_async_prefetch {
+        my ($ibx, $oid, $cb, $arg) = @_;
+        my $git = $ibx->git;
+        if (!defined($ibx->{topdir}) && $GCF2C) {
                 if (!$GCF2C->{wbuf}) {
                         $oid .= " $git->{git_dir}\n";
                         return $GCF2C->gcf2_async(\$oid, $cb, $arg); # true
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 48ed11a5..334d6581 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -180,8 +180,7 @@ sub async_blob_cb { # git->cat_async callback
 
 sub smsg_blob {
         my ($self, $smsg) = @_;
-        git_async_cat($self->{ibx}->git, $smsg->{blob},
-                        \&async_blob_cb, $self);
+        ibx_async_cat($self->{ibx}, $smsg->{blob}, \&async_blob_cb, $self);
 }
 
 1;
diff --git a/lib/PublicInbox/HTTPD.pm b/lib/PublicInbox/HTTPD.pm
index b193c9ae..fb683f74 100644
--- a/lib/PublicInbox/HTTPD.pm
+++ b/lib/PublicInbox/HTTPD.pm
@@ -37,7 +37,7 @@ sub new {
                 # XXX unstable API!, only GitHTTPBackend needs
                 # this to limit git-http-backend(1) parallelism.
                 # We also check for the truthiness of this to
-                # detect when to use git_async_cat for slow blobs
+                # detect when to use async paths for slow blobs
                 'pi-httpd.async' => \&pi_httpd_async
         );
         bless {
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index af8ce72b..9402aa41 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -612,7 +612,7 @@ sub fetch_run_ops {
         $self->msg_more(")\r\n");
 }
 
-sub fetch_blob_cb { # called by git->cat_async via git_async_cat
+sub fetch_blob_cb { # called by git->cat_async via ibx_async_cat
         my ($bref, $oid, $type, $size, $fetch_arg) = @_;
         my ($self, undef, $msgs, $range_info, $ops, $partial) = @$fetch_arg;
         my $ibx = $self->{ibx} or return $self->close; # client disconnected
@@ -627,8 +627,8 @@ sub fetch_blob_cb { # called by git->cat_async via git_async_cat
         }
         my $pre;
         if (!$self->{wbuf} && (my $nxt = $msgs->[0])) {
-                $pre = git_async_prefetch($ibx->git, $nxt->{blob},
-                                                \&fetch_blob_cb, $fetch_arg);
+                $pre = ibx_async_prefetch($ibx, $nxt->{blob},
+                                        \&fetch_blob_cb, $fetch_arg);
         }
         fetch_run_ops($self, $smsg, $bref, $ops, $partial);
         $pre ? $self->zflush : requeue_once($self);
@@ -760,7 +760,7 @@ sub fetch_blob { # long_response
                 }
         }
         uo2m_extend($self, $msgs->[-1]->{num});
-        git_async_cat($self->{ibx}->git, $msgs->[0]->{blob},
+        ibx_async_cat($self->{ibx}, $msgs->[0]->{blob},
                         \&fetch_blob_cb, \@_);
 }
 
@@ -1228,7 +1228,7 @@ sub long_step {
         } elsif ($more) { # $self->{wbuf}:
                 $self->update_idle_time;
 
-                # control passed to git_async_cat if $more == \undef
+                # control passed to ibx_async_cat if $more == \undef
                 requeue_once($self) if !ref($more);
         } else { # all done!
                 delete $self->{long_cb};
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index f7d99913..9df47133 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -515,7 +515,7 @@ found:
                 $smsg->{nntp_code} = $code;
                 set_art($self, $art);
                 # this dereferences to `undef'
-                ${git_async_cat($ibx->git, $smsg->{blob}, \&blob_cb, $smsg)};
+                ${ibx_async_cat($ibx, $smsg->{blob}, \&blob_cb, $smsg)};
         }
 }
 
@@ -549,7 +549,7 @@ sub msg_hdr_write ($$) {
         $smsg->{nntp}->msg_more($$hdr);
 }
 
-sub blob_cb { # called by git->cat_async via git_async_cat
+sub blob_cb { # called by git->cat_async via ibx_async_cat
         my ($bref, $oid, $type, $size, $smsg) = @_;
         my $self = $smsg->{nntp};
         my $code = $smsg->{nntp_code};
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 92106e75..b0cd0f2c 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -593,8 +593,7 @@ sub resolve_patch ($$) {
         if (my $msgs = $want->{try_smsgs}) {
                 my $smsg = shift @$msgs;
                 if ($self->{psgi_env}->{'pi-httpd.async'}) {
-                        return git_async_cat($want->{cur_ibx}->git,
-                                                $smsg->{blob},
+                        return ibx_async_cat($want->{cur_ibx}, $smsg->{blob},
                                                 \&extract_diff_async,
                                                 [$self, $want, $smsg]);
                 } else {