From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id BBA631FA11; Sat, 19 Sep 2020 09:37:14 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Cc: Eric Wong Subject: [PATCH 3/7] add gcf2 client and executable script Date: Sat, 19 Sep 2020 09:37:10 +0000 Message-Id: <20200919093714.21776-4-e@80x24.org> In-Reply-To: <20200919093714.21776-1-e@80x24.org> References: <20200919093714.21776-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: From: Eric Wong This should be able to replace multiple `git cat-file' for blob retrieval, but adjustments may be needed. --- Documentation/public-inbox-gcf2.pod | 63 +++++++++++++++++++++++++++++ MANIFEST | 4 ++ Makefile.PL | 5 +++ lib/PublicInbox/Gcf2Client.pm | 35 ++++++++++++++++ script/public-inbox-gcf2 | 14 +++++++ t/gcf2_client.t | 47 +++++++++++++++++++++ 6 files changed, 168 insertions(+) create mode 100644 Documentation/public-inbox-gcf2.pod create mode 100644 lib/PublicInbox/Gcf2Client.pm create mode 100755 script/public-inbox-gcf2 create mode 100644 t/gcf2_client.t diff --git a/Documentation/public-inbox-gcf2.pod b/Documentation/public-inbox-gcf2.pod new file mode 100644 index 00000000..813fbe7f --- /dev/null +++ b/Documentation/public-inbox-gcf2.pod @@ -0,0 +1,63 @@ +=head1 NAME + +public-inbox-gcf2 - internal libgit2-based blob retriever + +=head1 SYNOPSIS + + This is an internal command used by public-inbox. + It may change unrecognizably or cease to exist at some point + +=head1 DESCRIPTION + +public-inbox-gcf2 is an optional internal process used by +public-inbox daemons for read-only access to underlying git +repositories. + +Users are NOT expected to run public-inbox-gcf2 on their own. +It replaces multiple C processes by treating +any git repos it knows about as alternates. + +None of its behaviors are stable and it is ALL subject to change +at any time. + +Any lines written to its standard input prefixed with a C +are interpreted as a git directory. That git directory +will be suffixed with "/objects" and treated as an alternate. +It writes nothing to stdout in this case. + +Otherwise it behaves like C, but only accepts +unabbreviated hexadecimal object IDs in its standard input. +Its output format is identical to C. It +only works for L inboxes and v1 +inboxes indexed by L. + +=head1 OPTIONS + +=head1 ENVIRONMENT + +=over 8 + +=item PERL_INLINE_DIRECTORY + +This must be set unless C<~/.cache/public-inbox/inline-c> +exists. C uses L and libgit2 +and compiles a small shim on its first run. + +=back + +=head1 CONTACT + +Feedback welcome via plain-text mail to L + +The mail archives are hosted at L +and L + +=head1 COPYRIGHT + +Copyright 2020 all contributors L + +License: AGPL-3.0+ L + +=head1 SEE ALSO + +L diff --git a/MANIFEST b/MANIFEST index 0d3a7073..91457dab 100644 --- a/MANIFEST +++ b/MANIFEST @@ -26,6 +26,7 @@ Documentation/public-inbox-config.pod Documentation/public-inbox-convert.pod Documentation/public-inbox-daemon.pod Documentation/public-inbox-edit.pod +Documentation/public-inbox-gcf2.pod Documentation/public-inbox-httpd.pod Documentation/public-inbox-imapd.pod Documentation/public-inbox-index.pod @@ -129,6 +130,7 @@ lib/PublicInbox/Filter/RubyLang.pm lib/PublicInbox/Filter/SubjectTag.pm lib/PublicInbox/Filter/Vger.pm lib/PublicInbox/Gcf2.pm +lib/PublicInbox/Gcf2Client.pm lib/PublicInbox/GetlineBody.pm lib/PublicInbox/Git.pm lib/PublicInbox/GitAsyncCat.pm @@ -221,6 +223,7 @@ sa_config/user/.spamassassin/user_prefs script/public-inbox-compact script/public-inbox-convert script/public-inbox-edit +script/public-inbox-gcf2 script/public-inbox-httpd script/public-inbox-imapd script/public-inbox-index @@ -278,6 +281,7 @@ t/filter_rubylang.t t/filter_subjecttag.t t/filter_vger.t t/gcf2.t +t/gcf2_client.t t/git-http-backend.psgi t/git.fast-import-data t/git.t diff --git a/Makefile.PL b/Makefile.PL index 3fe9acf8..5a268362 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -71,6 +71,11 @@ $v->{gz_docs} = [ map { "$_.gz" } (@{$v->{docs}},@{$v->{docs_html}}) ]; $v->{rsync_docs} = [ @{$v->{gz_docs}}, @{$v->{docs}}, @{$v->{docs_html}}, qw(NEWS.atom NEWS.atom.gz)]; +# filter out public-inbox-gcf2 from the website, it's an internal command +for my $var (qw(gz_docs rsync_docs)) { + @{$v->{$var}} = grep(!/-gcf2/, @{$v->{$var}}); +} + # external manpages which we host ourselves, since some packages # (currently just Xapian) doesn't host manpages themselves. my @xman = qw(copydatabase.1 xapian-compact.1); diff --git a/lib/PublicInbox/Gcf2Client.pm b/lib/PublicInbox/Gcf2Client.pm new file mode 100644 index 00000000..71fbb1d1 --- /dev/null +++ b/lib/PublicInbox/Gcf2Client.pm @@ -0,0 +1,35 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +package PublicInbox::Gcf2Client; +use strict; +use parent 'PublicInbox::Git'; +use PublicInbox::Spawn qw(popen_rd); +use IO::Handle (); + +sub new { + my $self = shift->SUPER::new('/nonexistent'); + my ($out_r, $out_w); + pipe($out_r, $out_w) or $self->fail("pipe failed: $!"); + my $cmd = [ 'public-inbox-gcf2' ]; + @$self{qw(in pid)} = popen_rd($cmd, undef, { 0 => $out_r }); + $self->{inflight} = []; + $self->{out} = $out_w; + fcntl($out_w, 1031, 4096) if $^O eq 'linux'; # 1031: F_SETPIPE_SZ + $out_w->autoflush(1); + $self; +} + +sub add_git_dir { + my ($self, $git_dir) = @_; + + # ensure buffers are drained, length($git_dir) may exceed + # PIPE_BUF on platforms where PIPE_BUF is only 512 bytes + my $inflight = $self->{inflight}; + while (scalar(@$inflight)) { + $self->cat_async_step($inflight); + } + print { $self->{out} } $git_dir, "\n" or + $self->fail("write error: $!"); +} + +1; diff --git a/script/public-inbox-gcf2 b/script/public-inbox-gcf2 new file mode 100755 index 00000000..51811698 --- /dev/null +++ b/script/public-inbox-gcf2 @@ -0,0 +1,14 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +eval { require PublicInbox::Gcf2 }; +die "libgit2 development package or Inline::C missing for $0: $@\n" if $@; +my $gcf2 = PublicInbox::Gcf2::new(); +while () { + chomp; + if (m!\A/!) { # +/path/to/git-dir + $gcf2->add_alternate("$_/objects"); + } else { + $gcf2->cat_oid(1, $_); + } +} diff --git a/t/gcf2_client.t b/t/gcf2_client.t new file mode 100644 index 00000000..39f9f296 --- /dev/null +++ b/t/gcf2_client.t @@ -0,0 +1,47 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use PublicInbox::TestCommon; +use Test::More; +use Cwd qw(getcwd); +use PublicInbox::Import; + +require_mods('PublicInbox::Gcf2'); +use_ok 'PublicInbox::Gcf2Client'; +my ($tmpdir, $for_destroy) = tmpdir(); +PublicInbox::Import::init_bare($tmpdir); +my $fi_data = './t/git.fast-import-data'; +my $rdr = {}; +open $rdr->{0}, '<', $fi_data or BAIL_OUT $!; +xsys([qw(git fast-import --quiet)], { GIT_DIR => $tmpdir }, $rdr); +is($?, 0, 'fast-import succeeded'); + +my $tree = 'fdbc43725f21f485051c17463b50185f4c3cf88c'; +my $called = 0; +{ + local $ENV{PATH} = getcwd()."/blib/script:$ENV{PATH}"; + my $gcf2c = PublicInbox::Gcf2Client->new; + $gcf2c->add_git_dir($tmpdir); + $gcf2c->cat_async($tree, sub { + my ($bref, $oid, $type, $size, $arg) = @_; + is($oid, $tree, 'got expected OID'); + is($size, 30, 'got expected length'); + is($type, 'tree', 'got tree type'); + is(length($$bref), 30, 'got a tree'); + is($arg, 'hi', 'arg passed'); + $called++; + }, 'hi'); + my $trunc = substr($tree, 0, 39); + $gcf2c->cat_async($trunc, sub { + my ($bref, $oid, $type, $size, $arg) = @_; + is(undef, $bref, 'missing bref is undef'); + is($oid, $trunc, 'truncated OID printed'); + is($type, 'missing', 'type is "missing"'); + is($size, undef, 'size is undef'); + is($arg, 'bye', 'arg passed when missing'); + $called++; + }, 'bye'); +} +is($called, 2, 'cat_async callbacks hit'); +done_testing;