user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 10/12] lei blob: add remote external support
Date: Sun, 28 Mar 2021 09:01:22 +0000	[thread overview]
Message-ID: <20210328090124.3541-11-e@80x24.org> (raw)
In-Reply-To: <20210328090124.3541-1-e@80x24.org>

Introduce a new LeiRemote wrapper to provide an internal API
which SolverGit expects.  This lets us use HTTP/HTTPS endpoints
to reconstruct blobs off patches as we would with local
endpoints, just more slowly...
---
 MANIFEST                     |  1 +
 lib/PublicInbox/LEI.pm       |  2 +-
 lib/PublicInbox/LeiBlob.pm   | 16 +++++--
 lib/PublicInbox/LeiRemote.pm | 81 ++++++++++++++++++++++++++++++++++++
 t/solver_git.t               | 16 ++++++-
 5 files changed, 110 insertions(+), 6 deletions(-)
 create mode 100644 lib/PublicInbox/LeiRemote.pm

diff --git a/MANIFEST b/MANIFEST
index 9048b900..913ce55c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -196,6 +196,7 @@ lib/PublicInbox/LeiMirror.pm
 lib/PublicInbox/LeiOverview.pm
 lib/PublicInbox/LeiP2q.pm
 lib/PublicInbox/LeiQuery.pm
+lib/PublicInbox/LeiRemote.pm
 lib/PublicInbox/LeiSearch.pm
 lib/PublicInbox/LeiStore.pm
 lib/PublicInbox/LeiToMail.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index a4f4e58c..a94941a9 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -121,7 +121,7 @@ sub index_opt {
 
 my @c_opt = qw(c=s@ C=s@ quiet|q);
 my @lxs_opt = (qw(remote! local! external! include|I=s@ exclude=s@ only=s@
-	import-remote!  no-torsocks torsocks=s),
+	import-remote! no-torsocks torsocks=s),
 	PublicInbox::LeiQuery::curl_opt());
 
 # we generate shell completion + help using %CMD and %OPTDESC,
diff --git a/lib/PublicInbox/LeiBlob.pm b/lib/PublicInbox/LeiBlob.pm
index f44d8af1..8e610efd 100644
--- a/lib/PublicInbox/LeiBlob.pm
+++ b/lib/PublicInbox/LeiBlob.pm
@@ -6,7 +6,7 @@ package PublicInbox::LeiBlob;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC);
-use PublicInbox::Spawn qw(spawn popen_rd);
+use PublicInbox::Spawn qw(spawn popen_rd which);
 use PublicInbox::DS;
 
 sub sol_done_wait { # dwaitpid callback
@@ -66,7 +66,10 @@ sub do_solve_blob { # via wq_do
 	}
 	open my $log, '+>', \(my $log_buf = '') or die "PerlIO::scalar: $!";
 	$lei->{log_buf} = \$log_buf;
-	my $git = $lei->ale->git;
+	my $git = $lei->{ale}->git;
+	my @rmt = map {
+		PublicInbox::LeiRemote->new($lei, $_)
+	} $self->{lxs}->remotes;
 	my $solver = bless {
 		gits => [ map {
 				PublicInbox::Git->new($lei->rel2abs($_))
@@ -74,7 +77,7 @@ sub do_solve_blob { # via wq_do
 		user_cb => \&solver_user_cb,
 		uarg => $self,
 		# -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks
-		inboxes => [ $self->{lxs}->locals ],
+		inboxes => [ $self->{lxs}->locals, @rmt ],
 	}, 'PublicInbox::SolverGit';
 	$lei->{env}->{'psgi.errors'} = $lei->{2}; # ugh...
 	local $PublicInbox::DS::in_loop = 0; # waitpid synchronously
@@ -105,8 +108,15 @@ sub lei_blob {
 	}
 	return $lei->fail('no --git-dir to try') unless @$git_dirs;
 	my $lxs = $lei->lxs_prepare or return;
+	if ($lxs->remotes) {
+		require PublicInbox::LeiRemote;
+		$lei->{curl} //= which('curl') or return
+			$lei->fail('curl needed for', $lxs->remotes);
+		$lei->_lei_store(1)->write_prepare($lei);
+	}
 	require PublicInbox::SolverGit;
 	my $self = bless { lxs => $lxs, oid_b => $blob }, __PACKAGE__;
+	$lei->ale;
 	my ($op_c, $ops) = $lei->workers_start($self, 'lei_solve', 1,
 		{ '' => [ \&sol_done, $lei ] });
 	$lei->{sol} = $self;
diff --git a/lib/PublicInbox/LeiRemote.pm b/lib/PublicInbox/LeiRemote.pm
new file mode 100644
index 00000000..399fc936
--- /dev/null
+++ b/lib/PublicInbox/LeiRemote.pm
@@ -0,0 +1,81 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Make remote externals HTTP(S) inboxes behave like
+# PublicInbox::Inbox and PublicInbox::Search/ExtSearch.
+# This exists solely for SolverGit.  It is a high-latency a
+# synchronous API that is not at all fast.
+package PublicInbox::LeiRemote;
+use v5.10.1;
+use strict;
+use IO::Uncompress::Gunzip;
+use PublicInbox::OnDestroy;
+use PublicInbox::MboxReader;
+use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::LeiCurl;
+use PublicInbox::ContentHash qw(git_sha);
+
+sub new {
+	my ($cls, $lei, $uri) = @_;
+	bless { uri => $uri, lei => $lei }, $cls;
+}
+
+sub isrch { $_[0] } # SolverGit expcets this
+
+sub _each_mboxrd_eml { # callback for MboxReader->mboxrd
+	my ($eml, $self) = @_;
+	my $lei = $self->{lei};
+	my $xoids = $lei->{ale}->xoids_for($eml, 1);
+	if ($lei->{sto} && !$xoids) { # memoize locally
+		$lei->{sto}->ipc_do('add_eml', $eml);
+	}
+	my $smsg = bless {}, 'PublicInbox::Smsg';
+	$smsg->{blob} = $xoids ? (keys(%$xoids))[0]
+				: git_sha(1, $eml)->hexdigest;
+	$smsg->populate($eml);
+	$smsg->{mid} //= '(none)';
+	push @{$self->{smsg}}, $smsg;
+}
+
+sub mset {
+	my ($self, $qstr, undef) = @_; # $opt ($_[2]) ignored
+	my $lei = $self->{lei};
+	my $curl = PublicInbox::LeiCurl->new($lei, $lei->{curl});
+	push @$curl, '-s', '-d', '';
+	my $uri = $self->{uri}->clone;
+	$uri->query_form(q => $qstr, x => 'm', r => 1); # r=1: relevance
+	my $cmd = $curl->for_uri($self->{lei}, $uri);
+	$self->{lei}->qerr("# $cmd");
+	my $rdr = { 2 => $lei->{2}, pgid => 0 };
+	my ($fh, $pid) = popen_rd($cmd, undef, $rdr);
+	my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid);
+	$self->{smsg} = [];
+	$fh = IO::Uncompress::Gunzip->new($fh);
+	PublicInbox::MboxReader->mboxrd($fh, \&_each_mboxrd_eml, $self);
+	my $err = waitpid($pid, 0) == $pid ? undef
+					: "BUG: waitpid($cmd): $!";
+	@$reap = (); # cancel OnDestroy
+	my $wait = $self->{lei}->{sto}->ipc_do('done');
+	die $err if $err;
+	$self; # we are the mset (and $ibx, and $self)
+}
+
+sub size { scalar @{$_[0]->{smsg}} } # size of previous results
+
+sub mset_to_smsg {
+	my ($self, $ibx, $mset) = @_; # all 3 are $self
+	wantarray ? ($self->size, @{$self->{smsg}}) : $self->{smsg};
+}
+
+sub base_url { "$_[0]->{uri}" }
+
+sub smsg_eml {
+	my ($self, $smsg) = @_;
+	if (my $bref = $self->{lei}->ale->git->cat_file($smsg->{blob})) {
+		return PublicInbox::Eml->new($bref);
+	}
+	$self->{lei}->err("E: $self->{uri} $smsg->{blob} gone <$smsg->{mid}>");
+	undef;
+}
+
+1;
diff --git a/t/solver_git.t b/t/solver_git.t
index 6d4b93c7..2d803d47 100644
--- a/t/solver_git.t
+++ b/t/solver_git.t
@@ -7,7 +7,7 @@ use PublicInbox::TestCommon;
 use Cwd qw(abs_path);
 require_git(2.6);
 use Digest::SHA qw(sha1_hex);
-use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Spawn qw(popen_rd which);
 require_mods(qw(DBD::SQLite Search::Xapian Plack::Util));
 my $git_dir = xqx([qw(git rev-parse --git-dir)], undef, {2 => \(my $null)});
 $? == 0 or plan skip_all => "$0 must be run from a git working tree";
@@ -227,8 +227,20 @@ EOF
 		my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ];
 		my $td = start_script($cmd, $env, { 3 => $sock });
 		my ($h, $p) = tcp_host_port($sock);
-		local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p";
+		my $url = "http://$h:$p";
+		local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = $url;
 		Plack::Test::ExternalServer::test_psgi(client => $client);
+		skip 'no curl', 1 unless which('curl');
+
+		mkdir "$tmpdir/ext" // xbail "mkdir $!";
+		test_lei({tmpdir => "$tmpdir/ext"}, sub {
+			my $rurl = "$url/$name";
+			lei_ok(qw(blob --no-mail 69df7d5 -I), $rurl);
+			is(sha1_hex("blob ".length($lei_out)."\0".$lei_out),
+				$expect, 'blob contents output');
+			ok(!lei(qw(blob -I), $rurl, $non_existent),
+					'non-existent blob fails');
+		});
 	}
 }
 

  parent reply	other threads:[~2021-03-28  9:01 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-28  9:01 [PATCH 00/12] lei blob and some yak-shaving Eric Wong
2021-03-28  9:01 ` [PATCH 01/12] lei: simplify PktOp callers Eric Wong
2021-03-28  9:01 ` [PATCH 02/12] lei init: split out into separate file Eric Wong
2021-03-28  9:01 ` [PATCH 03/12] lei blob: dclose if already failed Eric Wong
2021-03-28  9:01 ` [PATCH 04/12] lei blob: support --no-mail switch Eric Wong
2021-03-28  9:01 ` [PATCH 05/12] lei blob: fail early if no git dirs Eric Wong
2021-03-28  9:01 ` [PATCH 06/12] lei blob: some extra tests Eric Wong
2021-03-28  9:01 ` [PATCH 07/12] lei help: show "NAME=VALUE" properly for -c Eric Wong
2021-03-28  9:01 ` [PATCH 08/12] lei blob: flesh out help text Eric Wong
2021-03-28  9:01 ` [PATCH 09/12] t/lei_store: ensure LeiSearch responds to ->isrch Eric Wong
2021-03-28  9:01 ` Eric Wong [this message]
2021-03-28  9:01 ` [PATCH 11/12] lei: drop coderepo placeholders, submodule TODO Eric Wong
2021-03-28  9:31   ` Eric Wong
2021-03-28  9:01 ` [PATCH 12/12] treewide: shorten temporary filename Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210328090124.3541-11-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).