user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 4/4] lei blob: aka "git-show-harder" for blobs
Date: Sat, 27 Mar 2021 11:45:51 +0000	[thread overview]
Message-ID: <20210327114551.3607-5-e@80x24.org> (raw)
In-Reply-To: <20210327114551.3607-1-e@80x24.org>

This implements blob reconstruction via SolverGit,
emulating the functionality of /$INBOX/$OID/s/ endpoint
in PublicInbox::WWW.

It uses the current working tree as a coderepo, and
accepts any number of --git-dir=$PATH args.

Remote externals are not yet supported.
---
 MANIFEST                   |   1 +
 lib/PublicInbox/LEI.pm     |  24 ++++----
 lib/PublicInbox/LeiBlob.pm | 119 +++++++++++++++++++++++++++++++++++++
 t/lei-import.t             |   2 +
 t/solver_git.t             |  14 ++++-
 5 files changed, 146 insertions(+), 14 deletions(-)
 create mode 100644 lib/PublicInbox/LeiBlob.pm

diff --git a/MANIFEST b/MANIFEST
index 6b2b33ac..64b3626f 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -181,6 +181,7 @@ lib/PublicInbox/KQNotify.pm
 lib/PublicInbox/LEI.pm
 lib/PublicInbox/LeiALE.pm
 lib/PublicInbox/LeiAuth.pm
+lib/PublicInbox/LeiBlob.pm
 lib/PublicInbox/LeiConvert.pm
 lib/PublicInbox/LeiCurl.pm
 lib/PublicInbox/LeiDedupe.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index e680f5f0..478912cd 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -120,6 +120,9 @@ sub index_opt {
 }
 
 my @c_opt = qw(c=s@ C=s@ quiet|q);
+my @lxs_opt = (qw(remote! local! external! include|I=s@ exclude=s@ only=s@
+	import-remote!  no-torsocks torsocks=s),
+	PublicInbox::LeiQuery::curl_opt());
 
 # we generate shell completion + help using %CMD and %OPTDESC,
 # see lei__complete() and PublicInbox::LeiHelp
@@ -127,16 +130,15 @@ my @c_opt = qw(c=s@ C=s@ quiet|q);
 our %CMD = ( # sorted in order of importance/use:
 'q' => [ '--stdin|SEARCH_TERMS...', 'search for messages matching terms',
 	'stdin|', # /|\z/ must be first for lone dash
+	@lxs_opt,
 	qw(save-as=s output|mfolder|o=s format|f=s dedupe|d=s threads|t+
-	sort|s=s reverse|r offset=i remote! local! external! pretty
-	include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g augment|a
-	import-remote! import-before! lock=s@ rsyncable
-	alert=s@ mua=s no-torsocks torsocks=s verbose|v+), @c_opt,
-	PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ],
+	sort|s=s reverse|r offset=i pretty jobs|j=s globoff|g augment|a
+	import-before! lock=s@ rsyncable alert=s@ mua=s verbose|v+), @c_opt,
+	opt_dash('limit|n=i', '[0-9]+') ],
 
-'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)',
-	qw(type=s solve! format|f=s dedupe|d=s threads|t remote local!
-	verbose|v+), @c_opt, pass_through('git show') ],
+'blob' => [ 'OID', 'display a git blob object, solving if necessary',
+	qw(git-dir=s@ cwd! verbose|v+ oid-a|A=s path-a|a=s path-b|b=s),
+	@lxs_opt, @c_opt ],
 
 'add-external' => [ 'LOCATION',
 	'add/set priority of a publicinbox|extindex for extra matches',
@@ -350,7 +352,7 @@ my %CONFIG_KEYS = (
 	'leistore.dir' => 'top-level storage location',
 );
 
-my @WQ_KEYS = qw(lxs l2m imp mrr cnv p2q mark); # internal workers
+my @WQ_KEYS = qw(lxs l2m imp mrr cnv p2q mark sol); # internal workers
 
 # pronounced "exit": x_it(1 << 8) => exit(1); x_it(13) => SIGPIPE
 sub x_it ($$) {
@@ -726,10 +728,6 @@ sub _lei_store ($;$) {
 	};
 }
 
-sub lei_show {
-	my ($self, @argv) = @_;
-}
-
 sub _config {
 	my ($self, @argv) = @_;
 	my %env = (%{$self->{env}}, GIT_CONFIG => undef);
diff --git a/lib/PublicInbox/LeiBlob.pm b/lib/PublicInbox/LeiBlob.pm
new file mode 100644
index 00000000..a50255aa
--- /dev/null
+++ b/lib/PublicInbox/LeiBlob.pm
@@ -0,0 +1,119 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei blob $OID" command
+package PublicInbox::LeiBlob;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+use PublicInbox::Spawn qw(spawn popen_rd);
+use PublicInbox::DS;
+use PublicInbox::Eml;
+
+sub sol_done_wait { # dwaitpid callback
+	my ($arg, $pid) = @_;
+	my (undef, $lei) = @$arg;
+	$lei->child_error($?) if $?;
+	$lei->dclose;
+}
+
+sub sol_done { # EOF callback for main daemon
+	my ($lei) = @_;
+	my $sol = delete $lei->{sol} or return;
+	$sol->wq_wait_old(\&sol_done_wait, $lei);
+}
+
+sub get_git_dir ($) {
+	my ($d) = @_;
+	return $d if -d "$d/objects" && -d "$d/refs" && -e "$d/HEAD";
+
+	my $cmd = [ qw(git rev-parse --git-dir) ];
+	my ($r, $pid) = popen_rd($cmd, {GIT_DIR => undef}, { '-C' => $d });
+	chomp(my $gd = do { local $/; <$r> });
+	waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd ($!)";
+	$? == 0 ? $gd : undef;
+}
+
+sub solver_user_cb { # called by solver when done
+	my ($res, $self) = @_;
+	my $lei = $self->{lei};
+	my $log_buf = delete $lei->{'log_buf'};
+	$$log_buf =~ s/^/# /sgm;
+	ref($res) eq 'ARRAY' or return $lei->fail($$log_buf);
+	$lei->qerr($$log_buf);
+	my ($git, $oid, $type, $size, $di) = @$res;
+	my $gd = $git->{git_dir};
+
+	# don't try to support all the git-show(1) options for non-blob,
+	# this is just a convenience:
+	$type ne 'blob' and
+		$lei->err("# $oid is a $type of $size bytes in:\n#\t$gd");
+
+	my $cmd = [ 'git', "--git-dir=$gd", 'show', $oid ];
+	my $rdr = { 1 => $lei->{1}, 2 => $lei->{2} };
+	waitpid(spawn($cmd, $lei->{env}, $rdr), 0);
+	$lei->child_error($?) if $?;
+}
+
+sub do_solve_blob { # via wq_do
+	my ($self) = @_;
+	my $lei = $self->{lei};
+	my $git_dirs = $lei->{opt}->{'git-dir'};
+	my $hints = {};
+	for my $x (qw(oid-a path-a path-b)) {
+		my $v = $lei->{opt}->{$x} // next;
+		$x =~ tr/-/_/;
+		$hints->{$x} = $v;
+	}
+	open my $log, '+>', \(my $log_buf = '') or die "PerlIO::scalar: $!";
+	$lei->{log_buf} = \$log_buf;
+	my $git = $lei->ale->git;
+	my $solver = bless {
+		gits => [ map { PublicInbox::Git->new($_) } @$git_dirs ],
+		user_cb => \&solver_user_cb,
+		uarg => $self,
+		# -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks
+		inboxes => [ $self->{lxs}->locals ],
+	}, 'PublicInbox::SolverGit';
+	$lei->{env}->{'psgi.errors'} = $lei->{2}; # ugh...
+	local $PublicInbox::DS::in_loop = 0; # waitpid synchronously
+	$solver->solve($lei->{env}, $log, $self->{oid_b}, $hints);
+}
+
+sub lei_blob {
+	my ($lei, $blob) = @_;
+	$lei->start_pager if -t $lei->{1};
+
+	# first, see if it's a blob returned by "lei q" JSON output:
+	my $rdr = { 1 => $lei->{1} };
+	open $rdr->{2}, '>', '/dev/null' or die "open: $!";
+	my $cmd = [ 'git', '--git-dir='.$lei->ale->git->{git_dir},
+			'cat-file', 'blob', $blob ];
+	waitpid(spawn($cmd, $lei->{env}, $rdr), 0);
+	return if $? == 0;
+
+	# maybe it's a non-email (code) blob from a coderepo
+	my $git_dirs = $lei->{opt}->{'git-dir'} //= [];
+	if ($lei->{opt}->{'cwd'} //= 1) {
+		my $cgd = get_git_dir('.');
+		unshift(@$git_dirs, $cgd) if defined $cgd;
+	}
+	my $lxs = $lei->lxs_prepare or return;
+	require PublicInbox::SolverGit;
+	my $self = bless { lxs => $lxs, oid_b => $blob }, __PACKAGE__;
+	my $op = $lei->workers_start($self, 'lei_solve', 1,
+		{ '' => [ \&sol_done, $lei ] });
+	$lei->{sol} = $self;
+	$self->wq_io_do('do_solve_blob', []);
+	$self->wq_close(1);
+	while ($op && $op->{sock}) { $op->event_step }
+}
+
+sub ipc_atfork_child {
+	my ($self) = @_;
+	$self->{lei}->_lei_atfork_child;
+	$SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb();
+	$self->SUPER::ipc_atfork_child;
+}
+
+1;
diff --git a/t/lei-import.t b/t/lei-import.t
index fa40ad01..33ce490d 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -54,6 +54,8 @@ is($res->[0]->{'m'}, 'x@y', 'got expected message');
 is($res->[0]->{kw}, undef, 'Status ignored for eml');
 lei_ok(qw(q -f mboxrd m:x@y));
 unlike($lei_out, qr/^Status:/, 'no Status: in imported message');
+lei_ok('blob', $res->[0]->{blob});
+is($lei_out, "From: a\@b\nMessage-ID: <x\@y>\n", 'got blob back');
 
 
 $eml->header_set('Message-ID', '<v@y>');
diff --git a/t/solver_git.t b/t/solver_git.t
index 99ffb9e3..22714ae5 100644
--- a/t/solver_git.t
+++ b/t/solver_git.t
@@ -6,6 +6,7 @@ use v5.10.1;
 use PublicInbox::TestCommon;
 use Cwd qw(abs_path);
 require_git(2.6);
+use Digest::SHA qw(sha1_hex);
 use PublicInbox::Spawn qw(popen_rd);
 require_mods(qw(DBD::SQLite Search::Xapian Plack::Util));
 my $git_dir = xqx([qw(git rev-parse --git-dir)], undef, {2 => \(my $null)});
@@ -27,6 +28,18 @@ my $ibx = create_inbox 'v2', version => 2,
 };
 my $v1_0_0_tag = 'cb7c42b1e15577ed2215356a2bf925aef59cdd8d';
 my $v1_0_0_tag_short = substr($v1_0_0_tag, 0, 16);
+my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0';
+
+test_lei({tmpdir => $tmpdir}, sub {
+	lei_ok('blob', '69df7d5', '-I', $ibx->{inboxdir});
+	is(sha1_hex("blob ".length($lei_out)."\0".$lei_out),
+		$expect, 'blob contents output');
+
+	# fallbacks
+	lei_ok('blob', $v1_0_0_tag, '-I', $ibx->{inboxdir});
+	lei_ok('blob', $v1_0_0_tag_short, '-I', $ibx->{inboxdir});
+});
+
 my $git = PublicInbox::Git->new($git_dir);
 $ibx->{-repo_objs} = [ $git ];
 my $res;
@@ -38,7 +51,6 @@ $solver->solve($psgi_env, $log, '69df7d5', {});
 ok($res, 'solved a blob!');
 my $wt_git = $res->[0];
 is(ref($wt_git), 'PublicInbox::Git', 'got a git object for the blob');
-my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0';
 is($res->[1], $expect, 'resolved blob to unabbreviated identifier');
 is($res->[2], 'blob', 'type specified');
 is($res->[3], 4405, 'size returned');

  parent reply	other threads:[~2021-03-27 11:45 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-27 11:45 [PATCH 0/4] lei blob (formerly known as "lei show") Eric Wong
2021-03-27 11:45 ` [PATCH 1/4] lei_ale: do not create store unnecessarily Eric Wong
2021-03-27 11:45 ` [PATCH 2/4] lei help: move "lei help" into LeiHelp.pm Eric Wong
2021-03-27 11:45 ` [PATCH 3/4] lei_query: hoist out lxs_prepare Eric Wong
2021-03-27 11:45 ` Eric Wong [this message]
2021-03-27 20:20   ` [SQUASH] lei blob: use absolute path Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210327114551.3607-5-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).