user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 08/10] www_coderepo: wire up snapshot support
  2022-10-04 19:12  7% [PATCH 00/10] www_coderepo: git viewer w/ search planned Eric Wong
@ 2022-10-04 19:12  3% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2022-10-04 19:12 UTC (permalink / raw)
  To: meta

These should be compatible with cgit results
---
 MANIFEST                        |  1 +
 lib/PublicInbox/Git.pm          |  1 +
 lib/PublicInbox/GitAsyncCat.pm  | 49 +++++++++++++++--
 lib/PublicInbox/RepoSnapshot.pm | 95 +++++++++++++++++++++++++++++++++
 lib/PublicInbox/WwwCoderepo.pm  |  8 +++
 t/solver_git.t                  | 20 +++++++
 6 files changed, 171 insertions(+), 3 deletions(-)
 create mode 100644 lib/PublicInbox/RepoSnapshot.pm

diff --git a/MANIFEST b/MANIFEST
index cf6d97e1..29f368de 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -306,6 +306,7 @@ lib/PublicInbox/PktOp.pm
 lib/PublicInbox/ProcessPipe.pm
 lib/PublicInbox/Qspawn.pm
 lib/PublicInbox/Reply.pm
+lib/PublicInbox/RepoSnapshot.pm
 lib/PublicInbox/SaPlugin/ListMirror.pm
 lib/PublicInbox/SaPlugin/ListMirror.pod
 lib/PublicInbox/Search.pm
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 691462ed..2ed3a29b 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -426,6 +426,7 @@ sub cleanup {
 				scalar(@{$self->{inflight} // []}));
 	local $in_cleanup = 1;
 	delete $self->{async_cat};
+	delete $self->{async_chk};
 	async_wait_all($self);
 	delete $self->{inflight};
 	delete $self->{inflight_c};
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index 613dbf7e..2e0725a6 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -1,14 +1,14 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # internal class used by PublicInbox::Git + PublicInbox::DS
 # This parses the output pipe of "git cat-file --batch"
 package PublicInbox::GitAsyncCat;
-use strict;
+use v5.12;
 use parent qw(PublicInbox::DS Exporter);
 use POSIX qw(WNOHANG);
 use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
-our @EXPORT = qw(ibx_async_cat ibx_async_prefetch);
+our @EXPORT = qw(ibx_async_cat ibx_async_prefetch async_check);
 use PublicInbox::Git ();
 
 our $GCF2C; # singleton PublicInbox::Gcf2Client
@@ -74,6 +74,18 @@ sub ibx_async_cat ($$$$) {
 	}
 }
 
+sub async_check ($$$$) {
+	my ($ibx, $oidish, $cb, $arg) = @_;
+	my $git = $ibx->{git} // $ibx->git;
+	$git->check_async($oidish, $cb, $arg);
+	$git->{async_chk} //= do {
+		my $self = bless { git => $git }, 'PublicInbox::GitAsyncCheck';
+		$git->{in_c}->blocking(0);
+		$self->SUPER::new($git->{in_c}, EPOLLIN|EPOLLET);
+		\undef; # this is a true ref()
+	};
+}
+
 # this is safe to call inside $cb, but not guaranteed to enqueue
 # returns true if successful, undef if not.  For fairness, we only
 # prefetch if there's no in-flight requests.
@@ -96,3 +108,34 @@ sub ibx_async_prefetch {
 }
 
 1;
+package PublicInbox::GitAsyncCheck;
+use v5.12;
+our @ISA = qw(PublicInbox::GitAsyncCat);
+use POSIX qw(WNOHANG);
+use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
+
+sub event_step {
+	my ($self) = @_;
+	my $git = $self->{git} or return;
+	return $self->close if ($git->{in_c} // 0) != ($self->{sock} // 1);
+	my $inflight = $git->{inflight_c};
+	if ($inflight && @$inflight) {
+		$git->check_async_step($inflight);
+
+		# child death?
+		if (($git->{in_c} // 0) != ($self->{sock} // 1)) {
+			$self->close;
+		} elsif (@$inflight || exists $git->{rbuf_c}) {
+			# ok, more to do, requeue for fairness
+			$self->requeue;
+		}
+	} elsif ((my $pid = waitpid($git->{pid_c}, WNOHANG)) > 0) {
+		# May happen if the child process is killed by a BOFH
+		# (or segfaults)
+		delete $git->{pid_c};
+		warn "E: git $pid exited with \$?=$?\n";
+		$self->close;
+	}
+}
+
+1;
diff --git a/lib/PublicInbox/RepoSnapshot.pm b/lib/PublicInbox/RepoSnapshot.pm
new file mode 100644
index 00000000..460340e6
--- /dev/null
+++ b/lib/PublicInbox/RepoSnapshot.pm
@@ -0,0 +1,95 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# cgit-compatible /snapshot/ endpoint for WWW coderepos
+package PublicInbox::RepoSnapshot;
+use v5.12;
+use PublicInbox::Git;
+use PublicInbox::Qspawn;
+use PublicInbox::GitAsyncCat;
+use PublicInbox::WwwStatic qw(r);
+
+# Not using standard mime types since the compressed tarballs are
+# special or do not match my /etc/mime.types.  Choose what gitweb
+# and cgit agree on for compatibility.
+our %FMT_TYPES = (
+	'tar' => 'application/x-tar',
+	'tar.gz' => 'application/x-gzip',
+	'tar.bz2' => 'application/x-bzip2',
+	'tar.xz' => 'application/x-xz',
+	'zip' => 'application/x-zip',
+);
+
+our %FMT_CFG = (
+	'tar.xz' => 'xz -c',
+	'tar.bz2' => 'bzip2 -c',
+	# not supporting lz nor zstd for now to avoid format proliferation
+	# and increased cache overhead required to handle extra formats.
+);
+
+my $SUFFIX = join('|', map { quotemeta } keys %FMT_TYPES);
+
+# TODO deal with tagged blobs
+
+sub archive_hdr { # parse_hdr for Qspawn
+	my ($r, $bref, $ctx) = @_;
+	$r or return [500, [qw(Content-Type text/plain Content-Length 0)], []];
+	my $fn = "$ctx->{snap_pfx}.$ctx->{snap_fmt}";
+	my $type = $FMT_TYPES{$ctx->{snap_fmt}} //
+				die "BUG: bad fmt: $ctx->{snap_fmt}";
+	[ 200, [ 'Content-Type', "$type; charset=UTF-8",
+		'Content-Disposition', qq(inline; filename="$fn"),
+		'ETag', qq("$ctx->{etag}") ] ];
+}
+
+sub archive_cb {
+	my ($ctx) = @_;
+	my @cfg;
+	if (my $cmd = $FMT_CFG{$ctx->{snap_fmt}}) {
+		@cfg = ('-c', "tar.$ctx->{snap_fmt}.command=$cmd");
+	}
+	my $qsp = PublicInbox::Qspawn->new(['git', @cfg,
+			"--git-dir=$ctx->{git}->{git_dir}", 'archive',
+			"--prefix=$ctx->{snap_pfx}/",
+			"--format=$ctx->{snap_fmt}", $ctx->{treeish}]);
+	$qsp->psgi_return($ctx->{env}, undef, \&archive_hdr, $ctx);
+}
+
+sub ver_check { # git->check_async callback
+	my ($oid, $type, $size, $ctx) = @_;
+	if ($type eq 'missing') { # try 'v' and 'V' prefixes
+		my $pfx = shift @{$ctx->{try_pfx}} or return
+			delete($ctx->{env}->{'qspawn.wcb'})->(r(404));
+		my $v = $ctx->{treeish} = $pfx.$ctx->{snap_ver};
+		return $ctx->{env}->{'pi-httpd.async'} ?
+			async_check($ctx, $v, \&ver_check, $ctx) :
+			$ctx->{git}->check_async($v, \&ver_check, $ctx);
+	}
+	$ctx->{etag} = $oid;
+	archive_cb($ctx);
+}
+
+sub srv {
+	my ($ctx, $fn) = @_;
+	return if $fn =~ /["\s]/s;
+	$fn =~ s/\.($SUFFIX)\z//o or return;
+	$ctx->{snap_fmt} = $1;
+	my $pfx = $ctx->{git}->local_nick // return;
+	$pfx =~ s/(?:\.git)?\z/-/;
+	substr($fn, 0, length($pfx)) eq $pfx or return;
+	$ctx->{snap_pfx} = $fn;
+	my $v = $ctx->{snap_ver} = substr($fn, length($pfx), length($fn));
+	$ctx->{treeish} = $v; # try without [vV] prefix, first
+	@{$ctx->{try_pfx}} = qw(v V); # cf. cgit:ui-snapshot.c
+	sub {
+		$ctx->{env}->{'qspawn.wcb'} = $_[0];
+		if ($ctx->{env}->{'pi-httpd.async'}) {
+			async_check($ctx, $v, \&ver_check, $ctx);
+		} else {
+			$ctx->{git}->check_async($v, \&ver_check, $ctx);
+			$ctx->{git}->check_async_wait;
+		}
+	}
+}
+
+1;
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
index e0fc9045..fb510b28 100644
--- a/lib/PublicInbox/WwwCoderepo.pm
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -175,6 +175,14 @@ sub srv { # endpoint called by PublicInbox::WWW
 			($ctx->{git} = $self->{"\0$1"}) and
 		return PublicInbox::ViewVCS::show($ctx, $2);
 
+	# snapshots:
+	if ($path_info =~ m!\A/(.+?)/snapshot/([^/]+)\z! and
+			($ctx->{git} = $self->{"\0$1"})) {
+		require PublicInbox::RepoSnapshot;
+		return PublicInbox::RepoSnapshot::srv($ctx, $2) // r(404);
+	}
+
+	# enforce trailing slash:
 	if ($path_info =~ m!\A/(.+?)\z! and ($git = $self->{"\0$1"})) {
 		my $qs = $ctx->{env}->{QUERY_STRING};
 		my $url = $git->base_url($ctx->{env});
diff --git a/t/solver_git.t b/t/solver_git.t
index d6936c47..71b9554a 100644
--- a/t/solver_git.t
+++ b/t/solver_git.t
@@ -34,6 +34,7 @@ File::Path::mkpath([map { $md.$_ } (qw(/ /cur /new /tmp))]);
 symlink(abs_path('t/solve/0001-simple-mod.patch'), "$md/cur/foo:2,") or
 	xbail "symlink: $!";
 
+my $v1_0_0_rev = '8a918a8523bc9904123460f85999d75f6d604916';
 my $v1_0_0_tag = 'cb7c42b1e15577ed2215356a2bf925aef59cdd8d';
 my $v1_0_0_tag_short = substr($v1_0_0_tag, 0, 16);
 my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0';
@@ -331,6 +332,25 @@ EOF
 		is($res->code, 200, 'coderepo summary (public-inbox)');
 		$res = $cb->(GET('/public-inbox'));
 		is($res->code, 301, 'redirected');
+
+		my $fn = 'public-inbox-1.0.0.tar.gz';
+		$res = $cb->(GET("/public-inbox/snapshot/$fn"));
+		is($res->code, 200, 'tar.gz snapshot');
+		is($res->header('Content-Disposition'),
+			qq'inline; filename="$fn"', 'c-d header');
+		is($res->header('ETag'), qq'"$v1_0_0_rev"', 'etag header');
+		my $exp = xqx([qw(git archive --format=tar.gz
+				--prefix=public-inbox-1.0.0/ v1.0.0)],
+				{ GIT_DIR => $git_dir });
+		my $got = $res->content;
+		is(length($got), length($exp),
+			"length matches installed `git archive' output") and
+		is(git_sha(1, \$got)->hexdigest, git_sha(1, \$exp)->hexdigest,
+			"content matches installed `git archive' output");
+
+		$fn = 'public-inbox-1.0.2.tar.gz';
+		$res = $cb->(GET("/public-inbox/snapshot/$fn"));
+		is($res->code, 404, '404 on non-existent tag');
 	};
 	test_psgi(sub { $www->call(@_) }, $client);
 	my $env = { PI_CONFIG => $cfgpath, TMPDIR => $tmpdir };

^ permalink raw reply related	[relevance 3%]

* [PATCH 00/10] www_coderepo: git viewer w/ search planned
@ 2022-10-04 19:12  7% Eric Wong
  2022-10-04 19:12  3% ` [PATCH 08/10] www_coderepo: wire up snapshot support Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2022-10-04 19:12 UTC (permalink / raw)
  To: meta

This is to eventually support M:N inbox:coderepo search via
-extindex.  For now, it's a JS-free and CSS-optional
cgit-like UI, but perhaps more brutalist :>

It's mainly for wiring /$INBOX/$OID/s/ into /$CODEREPO/$OID/s/,
but Atom feeds and such will be supported.

It'll work better with coderepos with a ".git" name suffix
to avoid conflicts with the inbox name.  So [coderepo "git"]
becomes [coderepo "git.git"] in my config file.  cgitrc usage
should continue working, I think...

It looks better out-of-the-box on w3m since cgit relies too
much on CSS :>

Example here: https://80x24.org/lore/git.git

Eric Wong (10):
  tests: use test_httpd consistently
  cgit: use Perl 5.10-isms, optimize, and golf
  git: hoist out description
  git: move cloneurl + description reading here
  www_coderepo: an alternative to cgit
  www_coderepo: wire up /$CODEREPO/$OID/s/ endpoint
  git: allow ->local_nick to return undef
  www_coderepo: wire up snapshot support
  www_stream: use git->pub_urls for coderepo links
  www_coderepo: start a top nav bar in summary view

 MANIFEST                        |   2 +
 lib/PublicInbox/Cgit.pm         |  33 ++---
 lib/PublicInbox/Config.pm       |   2 +-
 lib/PublicInbox/ExtSearch.pm    |   2 +-
 lib/PublicInbox/Git.pm          |  53 +++++++--
 lib/PublicInbox/GitAsyncCat.pm  |  66 ++++++++--
 lib/PublicInbox/Inbox.pm        |  23 +---
 lib/PublicInbox/RepoSnapshot.pm |  95 +++++++++++++++
 lib/PublicInbox/SolverGit.pm    |   8 +-
 lib/PublicInbox/TestCommon.pm   |  14 ++-
 lib/PublicInbox/ViewVCS.pm      |   1 +
 lib/PublicInbox/WWW.pm          |  12 +-
 lib/PublicInbox/WwwCoderepo.pm  | 205 ++++++++++++++++++++++++++++++++
 lib/PublicInbox/WwwStream.pm    |  33 ++---
 t/init.t                        |   2 +-
 t/lei-mirror.t                  |  10 +-
 t/psgi_attach.t                 |  13 +-
 t/solver_git.t                  |  70 ++++++++---
 t/www_altid.t                   |  13 +-
 xt/solver.t                     |  18 +--
 20 files changed, 533 insertions(+), 142 deletions(-)
 create mode 100644 lib/PublicInbox/RepoSnapshot.pm
 create mode 100644 lib/PublicInbox/WwwCoderepo.pm

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2022-10-04 19:12  7% [PATCH 00/10] www_coderepo: git viewer w/ search planned Eric Wong
2022-10-04 19:12  3% ` [PATCH 08/10] www_coderepo: wire up snapshot support Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).