user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH 00/10] lei: externals more stuff
@ 2021-01-23 10:27 Eric Wong
  2021-01-23 10:27 ` [PATCH 01/10] lei: move external vivification to xsearch Eric Wong
                   ` (9 more replies)
  0 siblings, 10 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-23 10:27 UTC (permalink / raw)
  To: meta

I don't know what I'm doing anymore, and maybe I never did.

Eric Wong (10):
  lei: move external vivification to xsearch
  lei: support remote externals
  lei_to_mail: drop cyclic reference if not using IPC
  lei: oneshot: preserve stdout if writing mbox
  lei: default "-f $mfolder" args for common MUAs
  lei completion: handle URLs with port numbers
  lei forget-external: just show the location
  lei q: support a bunch of curl(1) options
  lei forget-external: do not show redundant "not found" lines
  lei add-external: don't allow non-existent directories

 lib/PublicInbox/LEI.pm         |  46 +++++++----
 lib/PublicInbox/LeiExternal.pm |  41 ++++++++--
 lib/PublicInbox/LeiOverview.pm |  10 ++-
 lib/PublicInbox/LeiQuery.pm    |  68 ++++++++++++-----
 lib/PublicInbox/LeiToMail.pm   |  24 ++++--
 lib/PublicInbox/LeiXSearch.pm  | 136 ++++++++++++++++++++++++++++-----
 lib/PublicInbox/ProcessPipe.pm |   2 +
 script/lei                     |   2 +
 t/lei.t                        |  43 +++++++++++
 t/lei_xsearch.t                |   5 +-
 10 files changed, 309 insertions(+), 68 deletions(-)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 01/10] lei: move external vivification to xsearch
  2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
@ 2021-01-23 10:27 ` Eric Wong
  2021-01-23 10:27 ` [PATCH 02/10] lei: support remote externals Eric Wong
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-23 10:27 UTC (permalink / raw)
  To: meta

This seems like a better place to put it given upcoming
URI support, which starts in this commit.
---
 lib/PublicInbox/LeiQuery.pm   | 27 +++++------------
 lib/PublicInbox/LeiXSearch.pm | 57 ++++++++++++++++++++++++-----------
 t/lei_xsearch.t               |  5 ++-
 3 files changed, 50 insertions(+), 39 deletions(-)

diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index 7d634b5e..eebf217b 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -7,19 +7,6 @@ use strict;
 use v5.10.1;
 use PublicInbox::DS qw(dwaitpid);
 
-sub _vivify_external { # _externals_each callback
-	my ($src, $dir) = @_;
-	if (-f "$dir/ei.lock") {
-		require PublicInbox::ExtSearch;
-		push @$src, PublicInbox::ExtSearch->new($dir);
-	} elsif (-f "$dir/inbox.lock" || -d "$dir/public-inbox") { # v2, v1
-		require PublicInbox::Inbox;
-		push @$src, bless { inboxdir => $dir }, 'PublicInbox::Inbox';
-	} else {
-		warn "W: ignoring $dir, unable to determine type\n";
-	}
-}
-
 # the main "lei q SEARCH_TERMS" method
 sub lei_q {
 	my ($self, @argv) = @_;
@@ -27,19 +14,19 @@ sub lei_q {
 	require PublicInbox::LeiOverview;
 	PublicInbox::Config->json; # preload before forking
 	my $opt = $self->{opt};
-	my @srcs; # any number of LeiXSearch || LeiSearch || Inbox
+	my $lxs = $self->{lxs} = PublicInbox::LeiXSearch->new;
+	# any number of LeiXSearch || LeiSearch || Inbox
 	if ($opt->{'local'} //= 1) { # --local is enabled by default
 		my $sto = $self->_lei_store(1);
-		push @srcs, $sto->search;
+		$lxs->prepare_external($sto->search);
 	}
 
-	my $lxs = $self->{lxs} = PublicInbox::LeiXSearch->new;
 	# --external is enabled by default, but allow --no-external
 	if ($opt->{external} //= 1) {
-		$self->_externals_each(\&_vivify_external, \@srcs);
+		my $cb = $lxs->can('prepare_external');
+		$self->_externals_each($cb, $lxs);
 	}
-	my $xj = $opt->{jobs} // (scalar(@srcs) > 3 ? 3 : scalar(@srcs));
-	$xj = 1 if !$opt->{thread};
+	my $xj = $opt->{thread} ? $lxs->locals : ($lxs->remotes + 1);
 	my $ovv = PublicInbox::LeiOverview->new($self) or return;
 	$self->atfork_prepare_wq($lxs);
 	$lxs->wq_workers_start('lei_xsearch', $xj, $self->oldset);
@@ -76,7 +63,7 @@ sub lei_q {
 	$mset_opt{relevance} //= -2 if $opt->{thread};
 	$self->{mset_opt} = \%mset_opt;
 	$ovv->ovv_begin($self);
-	$lxs->do_query($self, \@srcs);
+	$lxs->do_query($self);
 }
 
 1;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 987a9896..10c25246 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -26,10 +26,6 @@ sub new {
 
 sub attach_external {
 	my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox
-
-	if (!$ibxish->can('over') || !$ibxish->over) {
-		return push(@{$self->{remotes}}, $ibxish)
-	}
 	my $desc = $ibxish->{inboxdir} // $ibxish->{topdir};
 	my $srch = $ibxish->search or
 		return warn("$desc not indexed for Xapian\n");
@@ -59,10 +55,9 @@ sub attach_external {
 }
 
 # returns a list of local inboxes (or count in scalar context)
-sub locals {
-	my %uniq = map {; "$_" => $_ } @{$_[0]->{shard2ibx} // []};
-	values %uniq;
-}
+sub locals { @{$_[0]->{locals} // []} }
+
+sub remotes { @{$_[0]->{remotes} // []} }
 
 # called by PublicInbox::Search::xdb
 sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} }
@@ -148,14 +143,16 @@ sub query_thread_mset { # for --thread
 }
 
 sub query_mset { # non-parallel for non-"--thread" users
-	my ($self, $lei, $srcs) = @_;
+	my ($self, $lei) = @_;
 	local $0 = "$0 query_mset";
 	my $startq = delete $self->{5};
 	my %sig = $lei->atfork_child_wq($self);
 	local @SIG{keys %sig} = values %sig;
 	my $mo = { %{$lei->{mset_opt}} };
 	my $mset;
-	$self->attach_external($_) for @$srcs;
+	for my $loc (locals($self)) {
+		attach_external($self, $loc);
+	}
 	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $self);
 	my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
 	$dedupe->prepare_dedupe;
@@ -172,6 +169,10 @@ sub query_mset { # non-parallel for non-"--thread" users
 	$lei->{ovv}->ovv_atexit_child($lei);
 }
 
+sub query_remote_mboxrd {
+	my ($self, $lei, $uri) = @_;
+}
+
 sub git {
 	my ($self) = @_;
 	my (%seen, @dirs);
@@ -221,18 +222,17 @@ sub do_post_augment {
 }
 
 sub start_query { # always runs in main (lei-daemon) process
-	my ($self, $io, $lei, $srcs) = @_;
-	my $remotes = $self->{remotes} // [];
+	my ($self, $io, $lei) = @_;
 	if ($lei->{opt}->{thread}) {
-		for my $ibxish (@$srcs) {
+		for my $ibxish (locals($self)) {
 			$self->wq_do('query_thread_mset', $io, $lei, $ibxish);
 		}
 	} else {
-		$self->wq_do('query_mset', $io, $lei, $srcs);
+		$self->wq_do('query_mset', $io, $lei);
 	}
 	# TODO
-	for my $rmt (@$remotes) {
-		$self->wq_do('query_thread_mbox', $io, $lei, $rmt);
+	for my $uri (remotes($self)) {
+		$self->wq_do('query_remote_mboxrd', $io, $lei, $uri);
 	}
 	@$io = ();
 }
@@ -259,7 +259,7 @@ sub sigpipe_handler { # handles SIGPIPE from l2m/lxs workers
 }
 
 sub do_query {
-	my ($self, $lei_orig, $srcs) = @_;
+	my ($self, $lei_orig) = @_;
 	my ($lei, @io) = $lei_orig->atfork_parent_wq($self);
 	$io[0] = undef;
 	pipe(my $done, $io[0]) or die "pipe $!";
@@ -286,7 +286,7 @@ sub do_query {
 		$io[5] = $startq;
 		$io[1] = $zpipe->[1] if $zpipe;
 	}
-	start_query($self, \@io, $lei, $srcs);
+	start_query($self, \@io, $lei);
 	$self->wq_close(1);
 	unless ($in_loop) {
 		# for the $lei->atfork_child_wq PIPE handler:
@@ -302,4 +302,25 @@ sub ipc_atfork_prepare {
 	$self->SUPER::ipc_atfork_prepare; # PublicInbox::IPC
 }
 
+sub prepare_external {
+	my ($self, $loc, $boost) = @_; # n.b. already ordered by boost
+	if (ref $loc) { # already a URI, or PublicInbox::Inbox-like object
+		return push(@{$self->{remotes}}, $loc) if $loc->can('scheme');
+	} elsif ($loc =~ m!\Ahttps?://!) {
+		require URI;
+		return push(@{$self->{remotes}}, URI->new($loc));
+	} elsif (-f "$loc/ei.lock") {
+		require PublicInbox::ExtSearch;
+		$loc = PublicInbox::ExtSearch->new($loc);
+	} elsif (-f "$loc/inbox.lock" || -d "$loc/public-inbox") {
+		require PublicInbox::Inbox; # v2, v1
+		$loc = bless { inboxdir => $loc }, 'PublicInbox::Inbox';
+	} else {
+		warn "W: ignoring $loc, unable to determine type\n";
+		return;
+	}
+	push @{$self->{locals}}, $loc;
+}
+
+
 1;
diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t
index 8b03c1f2..f745ea3e 100644
--- a/t/lei_xsearch.t
+++ b/t/lei_xsearch.t
@@ -49,7 +49,10 @@ $eidx->eidx_sync({fsync => 0});
 my $es = PublicInbox::ExtSearch->new("$home/eidx");
 my $lxs = PublicInbox::LeiXSearch->new;
 for my $ibxish (shuffle($es, @ibx)) {
-	$lxs->attach_external($ibxish);
+	$lxs->prepare_external($ibxish);
+}
+for my $loc ($lxs->locals) {
+	$lxs->attach_external($loc);
 }
 my $nr = $lxs->xdb->get_doccount;
 my $mset = $lxs->mset('d:19931002..19931003', { limit => $nr });

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 02/10] lei: support remote externals
  2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
  2021-01-23 10:27 ` [PATCH 01/10] lei: move external vivification to xsearch Eric Wong
@ 2021-01-23 10:27 ` Eric Wong
  2021-01-24  6:01   ` Kyle Meyer
  2021-01-23 10:27 ` [PATCH 03/10] lei_to_mail: drop cyclic reference if not using IPC Eric Wong
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 16+ messages in thread
From: Eric Wong @ 2021-01-23 10:27 UTC (permalink / raw)
  To: meta

Via curl(1), since that lets us easily use tor on a
per-connection basis via LD_PRELOAD (torsocks) or proxy.
We'll eventually support more curl options which can allow
users to get past firewalls and deal with other odd network
configurations.
---
 lib/PublicInbox/LEI.pm         | 19 ++++++++++--
 lib/PublicInbox/LeiOverview.pm | 10 +++++-
 lib/PublicInbox/LeiToMail.pm   | 20 +++++++-----
 lib/PublicInbox/LeiXSearch.pm  | 57 +++++++++++++++++++++++++++++++++-
 lib/PublicInbox/ProcessPipe.pm |  2 ++
 script/lei                     |  2 ++
 t/lei.t                        | 39 +++++++++++++++++++++++
 7 files changed, 137 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index ef3f90fc..f6bc920d 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -84,6 +84,7 @@ our %CMD = ( # sorted in order of importance/use:
 'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
 	save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a
 	sort|s=s reverse|r offset=i remote local! external! pretty mua-cmd=s
+	verbose|v
 	since|after=s until|before=s), opt_dash('limit|n=i', '[0-9]+') ],
 
 'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)',
@@ -278,6 +279,16 @@ sub fail ($$;$) {
 	undef;
 }
 
+sub child_error { # passes non-fatal curl exit codes to user
+	my ($self, $child_error) = @_; # child_error is $?
+	if (my $sock = $self->{sock}) { # send to lei(1) client
+		send($sock, "child_error $child_error", MSG_EOR);
+	} else { # oneshot
+		$self->{child_error} = $child_error;
+	}
+	undef;
+}
+
 sub atfork_prepare_wq {
 	my ($self, $wq) = @_;
 	my $tcafc = $wq->{-ipc_atfork_child_close} //= [ $listener // () ];
@@ -959,19 +970,21 @@ sub lazy_start {
 	exit($exit_code // 0);
 }
 
-# for users w/o Socket::Msghdr
+# for users w/o Socket::Msghdr installed or Inline::C enabled
 sub oneshot {
 	my ($main_pkg) = @_;
 	my $exit = $main_pkg->can('exit'); # caller may override exit()
 	local $quit = $exit if $exit;
 	local %PATH2CFG;
 	umask(077) // die("umask(077): $!");
-	dispatch((bless {
+	my $self = bless {
 		0 => *STDIN{GLOB},
 		1 => *STDOUT{GLOB},
 		2 => *STDERR{GLOB},
 		env => \%ENV
-	}, __PACKAGE__), @ARGV);
+	}, __PACKAGE__;
+	dispatch($self, @ARGV);
+	x_it($self, $self->{child_error}) if $self->{child_error};
 }
 
 # ensures stdout hits the FS before sock disconnects so a client
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 7a4fa857..49538a60 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -209,7 +209,15 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		$json->ascii(1) if $lei->{opt}->{ascii};
 	}
 	my $l2m = $lei->{l2m};
-	if ($l2m && $l2m->{-wq_s1}) {
+	if ($l2m && $ibxish->can('scheme')) { # remote https?:// mboxrd
+		delete $l2m->{-wq_s1};
+		my $g2m = $l2m->can('git_to_mail');
+		my $wcb = $l2m->write_cb($lei);
+		sub {
+			my ($smsg, undef, $eml) = @_; # no mitem in $_[1]
+			$wcb->(undef, $smsg, $eml);
+		};
+	} elsif ($l2m && $l2m->{-wq_s1}) {
 		my ($lei_ipc, @io) = $lei->atfork_parent_wq($l2m);
 		# n.b. $io[0] = qry_status_wr, $io[1] = mbox|stdout,
 		# $io[4] becomes a notification pipe that triggers EOF
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index cea68319..43c59da0 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -251,9 +251,9 @@ sub _mbox_write_cb ($$) {
 	my $dedupe = $lei->{dedupe};
 	$dedupe->prepare_dedupe;
 	sub { # for git_to_mail
-		my ($buf, $smsg) = @_;
+		my ($buf, $smsg, $eml) = @_;
 		return unless $out;
-		my $eml = PublicInbox::Eml->new($buf);
+		$eml //= PublicInbox::Eml->new($buf);
 		if (!$dedupe->is_dup($eml, $smsg->{blob})) {
 			$buf = $eml2mbox->($eml, $smsg);
 			my $lk = $ovv->lock_for_scope;
@@ -286,18 +286,23 @@ sub _augment_file { # _maildir_each_file cb
 # _maildir_each_file callback, \&CORE::unlink doesn't work with it
 sub _unlink { unlink($_[0]) }
 
+sub _rand () {
+	state $seq = 0;
+	sprintf('%x,%x,%x,%x', rand(0xffffffff), time, $$, ++$seq);
+}
+
 sub _buf2maildir {
 	my ($dst, $buf, $smsg) = @_;
 	my $kw = $smsg->{kw} // [];
 	my $sfx = join('', sort(map { $kw2char{$_} // () } @$kw));
 	my $rand = ''; # chosen by die roll :P
 	my ($tmp, $fh, $final);
-	my $common = $smsg->{blob};
+	my $common = $smsg->{blob} // _rand;
 	if (defined(my $pct = $smsg->{pct})) { $common .= "=$pct" }
 	do {
 		$tmp = $dst.'tmp/'.$rand.$common;
 	} while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_WRONLY) &&
-		$! == EEXIST && ($rand = int(rand 0x7fffffff).','));
+		$! == EEXIST && ($rand = _rand.','));
 	if (print $fh $$buf and close($fh)) {
 		# ignore new/ and write only to cur/, otherwise MUAs
 		# with R/W access to the Maildir will end up doing
@@ -308,7 +313,7 @@ sub _buf2maildir {
 		do {
 			$final = $dst.$rand.$common.':2,'.$sfx;
 		} while (!link($tmp, $final) && $! == EEXIST &&
-			($rand = int(rand 0x7fffffff).','));
+			($rand = _rand.','));
 		unlink($tmp) or warn "W: failed to unlink $tmp: $!\n";
 	} else {
 		my $err = $!;
@@ -323,9 +328,10 @@ sub _maildir_write_cb ($$) {
 	$dedupe->prepare_dedupe;
 	my $dst = $lei->{ovv}->{dst};
 	sub { # for git_to_mail
-		my ($buf, $smsg) = @_;
+		my ($buf, $smsg, $eml) = @_;
+		$buf //= \($eml->as_string);
 		return _buf2maildir($dst, $buf, $smsg) if !$dedupe;
-		my $eml = PublicInbox::Eml->new($$buf); # copy buf
+		$eml //= PublicInbox::Eml->new($$buf); # copy buf
 		return if $dedupe->is_dup($eml, $smsg->{blob});
 		undef $eml;
 		_buf2maildir($dst, $buf, $smsg);
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 10c25246..d32fe09a 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -14,6 +14,7 @@ use PublicInbox::Import;
 use File::Temp 0.19 (); # 0.19 for ->newdir
 use File::Spec ();
 use PublicInbox::Search qw(xap_terms);
+use PublicInbox::Spawn qw(popen_rd);
 
 sub new {
 	my ($class) = @_;
@@ -169,8 +170,58 @@ sub query_mset { # non-parallel for non-"--thread" users
 	$lei->{ovv}->ovv_atexit_child($lei);
 }
 
+sub each_eml { # callback for MboxReader->mboxrd
+	my ($eml, $self, $lei, $each_smsg) = @_;
+	my $smsg = bless {}, 'PublicInbox::Smsg';
+	$smsg->populate($eml);
+	$smsg->{$_} //= '' for qw(from to cc ds subject references mid);
+	delete @$smsg{qw(From Subject -ds -ts)};
+	if (my $startq = delete($self->{5})) { wait_startq($startq) }
+	return if !$lei->{l2m} && $lei->{dedupe}->is_smsg_dup($smsg);
+	$each_smsg->($smsg, undef, $eml);
+}
+
 sub query_remote_mboxrd {
 	my ($self, $lei, $uri) = @_;
+	local $0 = "$0 query_remote_mboxrd";
+	my %sig = $lei->atfork_child_wq($self); # keep $self->{5} startq
+	local @SIG{keys %sig} = values %sig;
+	my $opt = $lei->{opt};
+	$uri->query_form(q => $lei->{mset_opt}->{qstr}, x => 'm',
+			$opt->{thread} ? (t => 1) : ());
+	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $uri);
+	my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
+	$dedupe->prepare_dedupe;
+	my @cmd = qw(curl -XPOST -sSf);
+	my $tor = $opt->{torsocks} //= 'auto';
+	if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' &&
+			(($lei->{env}->{LD_PRELOAD}//'') !~ /torsocks/)) {
+		unshift @cmd, 'torsocks';
+	} elsif (PublicInbox::Config::git_bool($tor)) {
+		unshift @cmd, 'torsocks';
+	}
+	my $verbose = $opt->{verbose};
+	push @cmd, '-v' if $verbose;
+	push @cmd, $uri->as_string;
+	$lei->err("# @cmd") if $verbose;
+	$? = 0;
+	my $fh = popen_rd(\@cmd, $lei->{env}, { 2 => $lei->{2} });
+	$fh = IO::Uncompress::Gunzip->new($fh);
+	eval {
+		PublicInbox::MboxReader->mboxrd($fh, \&each_eml,
+						$self, $lei, $each_smsg);
+	};
+	return $lei->fail("E: @cmd: $@") if $@;
+	if (($? >> 8) == 22) { # HTTP 404 from curl(1)
+		$uri->query_form(q => $lei->{mset_opt}->{qstr});
+		$lei->err('# no results from '.$uri->as_string);
+	} elsif ($?) {
+		$uri->query_form(q => $lei->{mset_opt}->{qstr});
+		$lei->err('E: '.$uri->as_string);
+		$lei->child_error($?);
+	}
+	undef $each_smsg;
+	$lei->{ovv}->ovv_atexit_child($lei);
 }
 
 sub git {
@@ -230,7 +281,6 @@ sub start_query { # always runs in main (lei-daemon) process
 	} else {
 		$self->wq_do('query_mset', $io, $lei);
 	}
-	# TODO
 	for my $uri (remotes($self)) {
 		$self->wq_do('query_remote_mboxrd', $io, $lei, $uri);
 	}
@@ -263,6 +313,7 @@ sub do_query {
 	my ($lei, @io) = $lei_orig->atfork_parent_wq($self);
 	$io[0] = undef;
 	pipe(my $done, $io[0]) or die "pipe $!";
+	$lei_orig->{1}->autoflush(1);
 
 	$lei_orig->event_step_init; # wait for shutdowns
 	my $done_op = {
@@ -296,6 +347,10 @@ sub do_query {
 
 sub ipc_atfork_prepare {
 	my ($self) = @_;
+	if (exists $self->{remotes}) {
+		require PublicInbox::MboxReader;
+		require IO::Uncompress::Gunzip;
+	}
 	# (0: done_wr, 1: stdout|mbox, 2: stderr,
 	#  3: sock, 4: $l2m->{-wq_s1}, 5: $startq)
 	$self->wq_set_recv_modes(qw[+<&= >&= >&= +<&= +<&= <&=]);
diff --git a/lib/PublicInbox/ProcessPipe.pm b/lib/PublicInbox/ProcessPipe.pm
index e540dc22..97e9c268 100644
--- a/lib/PublicInbox/ProcessPipe.pm
+++ b/lib/PublicInbox/ProcessPipe.pm
@@ -13,6 +13,8 @@ sub TIEHANDLE {
 		$class;
 }
 
+sub BINMODE { binmode(shift->{fh}) } # for IO::Uncompress::Gunzip
+
 sub READ { read($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) }
 
 sub READLINE { readline($_[0]->{fh}) }
diff --git a/script/lei b/script/lei
index 8dcea562..8c40bf12 100755
--- a/script/lei
+++ b/script/lei
@@ -93,6 +93,8 @@ Falling back to (slow) one-shot mode
 		if ($buf =~ /\Ax_it ([0-9]+)\z/) {
 			$x_it_code = $1 + 0;
 			last;
+		} elsif ($buf =~ /\Achild_error ([0-9]+)\z/) {
+			$x_it_code = $1 + 0;
 		} elsif ($buf =~ /\Aexec (.+)\z/) {
 			exec_cmd(\@fds, split(/\0/, $1));
 		} else {
diff --git a/t/lei.t b/t/lei.t
index 50ad2bb1..6b45f5b7 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -8,11 +8,15 @@ use PublicInbox::TestCommon;
 use PublicInbox::Config;
 use File::Path qw(rmtree);
 use Fcntl qw(SEEK_SET);
+use PublicInbox::Spawn qw(which);
 require_git 2.6;
 require_mods(qw(json DBD::SQLite Search::Xapian));
 my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
 my ($home, $for_destroy) = tmpdir();
 my $err_filter;
+my @onions = qw(http://hjrcffqmbrq6wope.onion/meta/
+	http://czquwvybam4bgbro.onion/meta/
+	http://ou63pmih66umazou.onion/meta/);
 my $lei = sub {
 	my ($cmd, $env, $xopt) = @_;
 	$out = $err = '';
@@ -155,6 +159,32 @@ my $setup_publicinboxes = sub {
 	$seen || BAIL_OUT 'no imports';
 };
 
+my $test_external_remote = sub {
+	my ($url, $k) = @_;
+SKIP: {
+	my $nr = 4;
+	skip "$k unset", $nr if !$url;
+	which('curl') or skip 'no curl', $nr;
+	which('torsocks') or skip 'no torsocks', $nr if $url =~ m!\.onion/!;
+	$lei->('ls-external');
+	for my $e (split(/^/ms, $out)) {
+		$e =~ s/\s+boost.*//s;
+		$lei->('forget-external', '-q', $e) or
+			fail "error forgetting $e: $err"
+	}
+	$lei->('add-external', $url);
+	my $mid = '20140421094015.GA8962@dcvr.yhbt.net';
+	ok($lei->('q', "m:$mid"), "query $url");
+	is($err, '', "no errors on $url");
+	my $res = PublicInbox::Config->json->decode($out);
+	is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url");
+	ok($lei->('q', "m:$mid", 'd:..20101002'), 'no results, no error');
+	like($err, qr/404/, 'noted 404');
+	is($out, "[null]\n", 'got null results');
+	$lei->('forget-external', $url);
+} # /SKIP
+}; # /sub
+
 my $test_external = sub {
 	$setup_publicinboxes->();
 	$cleanup->();
@@ -243,6 +273,15 @@ my $test_external = sub {
 	}
 	ok(!$lei->('q', '-o', "$home/mbox", 's:nope'),
 			'fails if mbox format unspecified');
+	my %e = (
+		TEST_LEI_EXTERNAL_HTTPS => 'https://public-inbox.org/meta/',
+		TEST_LEI_EXTERNAL_ONION => $onions[int(rand(scalar(@onions)))],
+	);
+	for my $k (keys %e) {
+		my $url = $ENV{$k} // '';
+		$url = $e{$k} if $url eq '1';
+		$test_external_remote->($url, $k);
+	}
 };
 
 my $test_lei_common = sub {

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 03/10] lei_to_mail: drop cyclic reference if not using IPC
  2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
  2021-01-23 10:27 ` [PATCH 01/10] lei: move external vivification to xsearch Eric Wong
  2021-01-23 10:27 ` [PATCH 02/10] lei: support remote externals Eric Wong
@ 2021-01-23 10:27 ` Eric Wong
  2021-01-23 10:27 ` [PATCH 04/10] lei: oneshot: preserve stdout if writing mbox Eric Wong
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-23 10:27 UTC (permalink / raw)
  To: meta

This may fix another interrupt-related segfault I'm occasionally
seeing (but so far unable to reproduce).
---
 lib/PublicInbox/LeiToMail.pm | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 43c59da0..438fb175 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -483,11 +483,14 @@ sub ipc_atfork_prepare {
 # ordering is unstable at worker exit and may cause segfaults
 sub reap_gits {
 	my ($self) = @_;
+	delete $self->{wcb};
 	for my $git (delete @$self{grep(/\A$$\0/, keys %$self)}) {
 		$git->async_wait_all;
 	}
 }
 
+sub DESTROY { delete $_[0]->{wcb} }
+
 sub ipc_atfork_child { # runs after IPC::wq_worker_loop
 	my ($self) = @_;
 	$self->SUPER::ipc_atfork_child;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 04/10] lei: oneshot: preserve stdout if writing mbox
  2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
                   ` (2 preceding siblings ...)
  2021-01-23 10:27 ` [PATCH 03/10] lei_to_mail: drop cyclic reference if not using IPC Eric Wong
@ 2021-01-23 10:27 ` Eric Wong
  2021-01-23 10:27 ` [PATCH 05/10] lei: default "-f $mfolder" args for common MUAs Eric Wong
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-23 10:27 UTC (permalink / raw)
  To: meta

We still need stdout if launching an MUA.
---
 lib/PublicInbox/LEI.pm        | 5 ++++-
 lib/PublicInbox/LeiToMail.pm  | 1 +
 lib/PublicInbox/LeiXSearch.pm | 9 ++++++++-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index f6bc920d..ba744ef3 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -298,6 +298,9 @@ sub atfork_prepare_wq {
 	if (my $pgr = $self->{pgr}) {
 		push @$tcafc, @$pgr[1,2];
 	}
+	if (my $old_1 = $self->{old_1}) {
+		push @$tcafc, $old_1;
+	}
 	for my $f (qw(lxs l2m)) {
 		my $ipc = $self->{$f} or next;
 		push @$tcafc, grep { defined }
@@ -340,7 +343,7 @@ sub atfork_parent_wq {
 		$ret->{dedupe} = $wq->deep_clone($dedupe);
 	}
 	$self->{env} = $env;
-	delete @$ret{qw(-lei_store cfg pgr lxs)}; # keep l2m
+	delete @$ret{qw(-lei_store cfg old_1 pgr lxs)}; # keep l2m
 	my @io = delete @$ret{0..2};
 	$io[3] = delete($ret->{sock}) // $io[2];
 	my $l2m = $ret->{l2m};
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 438fb175..5f38add1 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -405,6 +405,7 @@ sub _pre_augment_mbox {
 			$! == ENOENT or die "unlink($dst): $!";
 		}
 		open my $out, $mode, $dst or die "open($dst): $!";
+		$lei->{old_1} = $lei->{1};
 		$lei->{1} = $out;
 	}
 	# Perl does SEEK_END even with O_APPEND :<
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index d32fe09a..8d36bca9 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -252,7 +252,14 @@ sub query_done { # EOF callback
 	}
 	$lei->{ovv}->ovv_end($lei);
 	if ($has_l2m) { # close() calls LeiToMail reap_compress
-		close(delete($lei->{1})) if $lei->{1};
+		if (my $out = delete $lei->{old_1}) {
+			if (my $mbout = $lei->{1}) {
+				close($mbout) or return $lei->fail(<<"");
+Error closing $lei->{ovv}->{dst}: $!
+
+			}
+			$lei->{1} = $out;
+		}
 		$lei->start_mua;
 	}
 	$lei->dclose;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 05/10] lei: default "-f $mfolder" args for common MUAs
  2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
                   ` (3 preceding siblings ...)
  2021-01-23 10:27 ` [PATCH 04/10] lei: oneshot: preserve stdout if writing mbox Eric Wong
@ 2021-01-23 10:27 ` Eric Wong
  2021-01-23 10:27 ` [PATCH 06/10] lei completion: handle URLs with port numbers Eric Wong
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-23 10:27 UTC (permalink / raw)
  To: meta

At least mail, mailx, mutt, and neomutt follow this convention.
Heirloom mailx doesn't support Maildir (our default), but GNU
mailutils mail/mailx does.
---
 lib/PublicInbox/LEI.pm | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index ba744ef3..890be575 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -698,17 +698,21 @@ sub exec_buf ($$) {
 }
 
 sub start_mua {
-	my ($self, $sock) = @_;
+	my ($self) = @_;
 	my $mua = $self->{opt}->{'mua-cmd'} // return;
 	my $mfolder = $self->{ovv}->{dst};
-	require Text::ParseWords;
-	my $replaced;
-	my @cmd = Text::ParseWords::shellwords($mua);
-	# mutt uses '%f' for open-hook with compressed folders, so we use %f
-	@cmd = map { $_ eq '%f' ? ($replaced = $mfolder) : $_ } @cmd;
+	my (@cmd, $replaced);
+	if ($mua =~ /\A(?:mutt|mailx|mail|neomutt)\z/) {
+		@cmd = ($mua, '-f');
+	# TODO: help wanted: other common FOSS MUAs
+	} else {
+		require Text::ParseWords;
+		my @cmd = Text::ParseWords::shellwords($mua);
+		# mutt uses '%f' for open-hook with compressed mbox, we follow
+		@cmd = map { $_ eq '%f' ? ($replaced = $mfolder) : $_ } @cmd;
+	}
 	push @cmd, $mfolder unless defined($replaced);
-	$sock //= $self->{sock};
-	if ($sock) { # lei(1) client process runs it
+	if (my $sock = $self->{sock}) { # lei(1) client process runs it
 		send($sock, exec_buf(\@cmd, {}), MSG_EOR);
 	} else { # oneshot
 		$self->{"mua.pid.$self.$$"} = spawn(\@cmd);

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 06/10] lei completion: handle URLs with port numbers
  2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
                   ` (4 preceding siblings ...)
  2021-01-23 10:27 ` [PATCH 05/10] lei: default "-f $mfolder" args for common MUAs Eric Wong
@ 2021-01-23 10:27 ` Eric Wong
  2021-01-23 10:27 ` [PATCH 07/10] lei forget-external: just show the location Eric Wong
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-23 10:27 UTC (permalink / raw)
  To: meta

This improves the experience for developers running local
instances of PublicInbox::WWW without permissions to bind
port 80 or 443.
---
 lib/PublicInbox/LeiExternal.pm | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index 59c3c367..a4e644ee 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -101,12 +101,36 @@ sub _complete_forget_external {
 	# Workaround bash word-splitting URLs to ['https', ':', '//' ...]
 	# Maybe there's a better way to go about this in
 	# contrib/completion/lei-completion.bash
-	my $colon = ($argv[-1] // '') eq ':';
-	my $re = $cur =~ /\A[\w-]/ ? '' : '.*';
+	my $re = '';
+	if (@argv) {
+		my @x = @argv;
+		if ($cur eq ':' && @x) {
+			push @x, $cur;
+			$cur = '';
+		}
+		while (@x > 2 && $x[0] !~ /\Ahttps?\z/ && $x[1] ne ':') {
+			shift @x;
+		}
+		if (@x >= 2) { # qw(https : hostname : 443) or qw(http :)
+			$re = join('', @x);
+		} else { # just filter out the flags and hope for the best
+			$re = join('', grep(!/^-/, @argv));
+		}
+		$re = quotemeta($re);
+	}
+	# FIXME: bash completion off "http:" or "https:" when the last
+	# character is a colon doesn't work properly even if we're
+	# returning "//$HTTP_HOST/$PATH_INFO/", not sure why, could
+	# be a bash issue.
 	map {
 		my $x = substr($_, length('external.'));
 		# only return the part specified on the CLI
-		$colon && $x =~ /(\Q$cur\E.*)/ ? $1 : $x;
+		if ($x =~ /\A$re(\Q$cur\E.*)/) {
+			# don't duplicate if already 100% completed
+			$cur eq $1 ? () : $1;
+		} else {
+			();
+		}
 	} grep(/\Aexternal\.$re\Q$cur/, @{$cfg->{-section_order}});
 }
 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 07/10] lei forget-external: just show the location
  2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
                   ` (5 preceding siblings ...)
  2021-01-23 10:27 ` [PATCH 06/10] lei completion: handle URLs with port numbers Eric Wong
@ 2021-01-23 10:27 ` Eric Wong
  2021-01-23 10:27 ` [PATCH 08/10] lei q: support a bunch of curl(1) options Eric Wong
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-23 10:27 UTC (permalink / raw)
  To: meta

No need to show the full key name since the user mainly
uses the location.
---
 lib/PublicInbox/LeiExternal.pm | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index a4e644ee..5b5f08d1 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -76,9 +76,9 @@ sub lei_forget_external {
 			delete($cfg->{$key});
 			$self->_config('--unset', $key);
 			if ($? == 0) {
-				push @unset, $key;
+				push @unset, $l;
 			} elsif (($? >> 8) == 5) {
-				push @not_found, $key;
+				push @not_found, $l;
 			} else {
 				$self->err("# --unset $key error");
 				return $self->x_it($?);
@@ -86,7 +86,7 @@ sub lei_forget_external {
 		}
 		if (@unset) {
 			next if $quiet;
-			$self->err("# $_ unset") for @unset;
+			$self->err("# $_ gone") for @unset;
 		} elsif (@not_found) {
 			$self->err("# $_ not found") for @not_found;
 		} # else { already exited

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 08/10] lei q: support a bunch of curl(1) options
  2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
                   ` (6 preceding siblings ...)
  2021-01-23 10:27 ` [PATCH 07/10] lei forget-external: just show the location Eric Wong
@ 2021-01-23 10:27 ` Eric Wong
  2021-01-23 10:27 ` [PATCH 09/10] lei forget-external: don't show redundant "not found" Eric Wong
  2021-01-23 10:27 ` [PATCH 10/10] lei add-external: don't allow non-existent directories Eric Wong
  9 siblings, 0 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-23 10:27 UTC (permalink / raw)
  To: meta

Some of these options will make sense when on weird networks
(behind firewalls, etc.)  Some of these options may not make
sense at all.

This allows users who prefer to use the SOCKS5 proxy support in
curl rather than torsocks(1), but we'll still support torsocks
by default since some Tor instances aren't on the default
127.0.0.1:9050.
---
 lib/PublicInbox/LEI.pm        |  4 ++--
 lib/PublicInbox/LeiQuery.pm   | 41 +++++++++++++++++++++++++++++++++++
 lib/PublicInbox/LeiXSearch.pm | 13 +++++++++++
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 890be575..a9123c6e 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -84,8 +84,8 @@ our %CMD = ( # sorted in order of importance/use:
 'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
 	save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a
 	sort|s=s reverse|r offset=i remote local! external! pretty mua-cmd=s
-	verbose|v
-	since|after=s until|before=s), opt_dash('limit|n=i', '[0-9]+') ],
+	torsocks=s no-torsocks verbose|v since|after=s until|before=s),
+	PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ],
 
 'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)',
 	qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!),
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index eebf217b..acab3c2c 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -66,4 +66,45 @@ sub lei_q {
 	$lxs->do_query($self);
 }
 
+# Stuff we may pass through to curl (as of 7.64.0), see curl manpage for
+# details, so most options which make sense for HTTP/HTTPS (including proxy
+# support for Tor and other methods of getting past weird networks).
+# Most of these are untested by us, some may not make sense for our use case
+# and typos below are likely.
+# n.b. some short options (-$NUMBER) are not supported since they conflict
+# with other "lei q" switches.
+# FIXME: Getopt::Long doesn't easily let us support support options with
+# '.' in them (e.g. --http1.1)
+sub curl_opt { qw(
+	abstract-unix-socket=s anyauth basic cacert=s capath=s
+	cert-status cert-type cert|E=s ciphers=s config|K=s@
+	connect-timeout=s connect-to=s cookie-jar|c=s cookie|b=s crlfile=s
+	digest disable dns-interface=s dns-ipv4-addr=s dns-ipv6-addr=s
+	dns-servers=s doh-url=s egd-file=s engine=s false-start
+	happy-eyeballs-timeout-ms=s haproxy-protocol header|H=s@
+	http2-prior-knowledge http2 insecure|k
+	interface=s ipv4 ipv6 junk-session-cookies
+	key-type=s key=s limit-rate=s local-port=s location-trusted location|L
+	max-redirs=i max-time=s negotiate netrc-file=s netrc-optional netrc
+	no-alpn no-buffer|N no-npn no-sessionid noproxy=s ntlm-wb ntlm
+	pass=s pinnedpubkey=s post301 post302 post303 preproxy=s
+	proxy-anyauth proxy-basic proxy-cacert=s proxy-capath=s
+	proxy-cert-type=s proxy-cert=s proxy-ciphers=s proxy-crlfile=s
+	proxy-digest proxy-header=s@ proxy-insecure
+	proxy-key-type=s proxy-key proxy-negotiate proxy-ntlm proxy-pass=s
+	proxy-pinnedpubkey=s proxy-service-name=s proxy-ssl-allow-beast
+	proxy-tls13-ciphers=s proxy-tlsauthtype=s proxy-tlspassword=s
+	proxy-tlsuser=s proxy-tlsv1 proxy-user|U=s proxy=s
+	proxytunnel=s pubkey=s random-file=s referer=s resolve=s
+	retry-connrefused retry-delay=s retry-max-time=s retry=i
+	sasl-ir service-name=s socks4=s socks4a=s socks5-basic
+	socks5-gssapi-service-name=s socks5-gssapi socks5-hostname=s socks5=s
+	speed-limit|Y speed-type|y ssl-allow-beast sslv2 sslv3
+	suppress-connect-headers tcp-fastopen tls-max=s
+	tls13-ciphers=s tlsauthtype=s tlspassword=s tlsuser=s
+	tlsv1 trace-ascii=s trace-time trace=s
+	unix-socket=s user-agent|A=s user|u=s
+)
+}
+
 1;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 8d36bca9..defe5e67 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -193,6 +193,7 @@ sub query_remote_mboxrd {
 	my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
 	$dedupe->prepare_dedupe;
 	my @cmd = qw(curl -XPOST -sSf);
+	$opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
 	my $tor = $opt->{torsocks} //= 'auto';
 	if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' &&
 			(($lei->{env}->{LD_PRELOAD}//'') !~ /torsocks/)) {
@@ -202,6 +203,18 @@ sub query_remote_mboxrd {
 	}
 	my $verbose = $opt->{verbose};
 	push @cmd, '-v' if $verbose;
+	for my $o ($lei->curl_opt) {
+		$o =~ s/\|[a-z0-9]\b//i; # remove single char short option
+		if ($o =~ s/=[is]@\z//) {
+			my $ary = $opt->{$o} or next;
+			push @cmd, map { ("--$o", $_) } @$ary;
+		} elsif ($o =~ s/=[is]\z//) {
+			my $val = $opt->{$o} // next;
+			push @cmd, "--$o", $val;
+		} elsif ($opt->{$o}) {
+			push @cmd, "--$o";
+		}
+	}
 	push @cmd, $uri->as_string;
 	$lei->err("# @cmd") if $verbose;
 	$? = 0;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 09/10] lei forget-external: don't show redundant "not found"
  2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
                   ` (7 preceding siblings ...)
  2021-01-23 10:27 ` [PATCH 08/10] lei q: support a bunch of curl(1) options Eric Wong
@ 2021-01-23 10:27 ` Eric Wong
  2021-01-23 10:27 ` [PATCH 10/10] lei add-external: don't allow non-existent directories Eric Wong
  9 siblings, 0 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-23 10:27 UTC (permalink / raw)
  To: meta

Pathname/URL canonicalization may not change the result at
all, so there's no point in trying (and failing) the same
form twice if pre and post-canonicalization are identical.
---
 lib/PublicInbox/LeiExternal.pm | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index 5b5f08d1..e7693e09 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -69,9 +69,11 @@ sub lei_forget_external {
 	my ($self, @locations) = @_;
 	my $cfg = $self->_lei_cfg(1);
 	my $quiet = $self->{opt}->{quiet};
+	my %seen;
 	for my $loc (@locations) {
 		my (@unset, @not_found);
 		for my $l ($loc, _canonicalize($loc)) {
+			next if $seen{$l}++;
 			my $key = "external.$l.boost";
 			delete($cfg->{$key});
 			$self->_config('--unset', $key);

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 10/10] lei add-external: don't allow non-existent directories
  2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
                   ` (8 preceding siblings ...)
  2021-01-23 10:27 ` [PATCH 09/10] lei forget-external: don't show redundant "not found" Eric Wong
@ 2021-01-23 10:27 ` Eric Wong
  9 siblings, 0 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-23 10:27 UTC (permalink / raw)
  To: meta

At least not yet, though we may support mirroring via git.
---
 lib/PublicInbox/LeiExternal.pm | 3 +++
 t/lei.t                        | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index e7693e09..bf07c41c 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -58,6 +58,9 @@ sub lei_add_external {
 	my $cfg = $self->_lei_cfg(1);
 	my $new_boost = $self->{opt}->{boost} // 0;
 	$location = _canonicalize($location);
+	if ($location !~ m!\Ahttps?://! && !-d $location) {
+		return $self->fail("$location not a directory");
+	}
 	my $key = "external.$location.boost";
 	my $cur_boost = $cfg->{$key};
 	return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
diff --git a/t/lei.t b/t/lei.t
index 6b45f5b7..60ca75c5 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -193,6 +193,10 @@ my $test_external = sub {
 	ok(!-e $config_file && !-e $store_dir,
 		'nothing created by ls-external');
 
+	ok(!$lei->('add-external', "$home/nonexistent"),
+		"fails on non-existent dir");
+	$lei->('ls-external');
+	is($out.$err, '', 'ls-external still has no output');
 	my $cfg = PublicInbox::Config->new;
 	$cfg->each_inbox(sub {
 		my ($ibx) = @_;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 02/10] lei: support remote externals
  2021-01-23 10:27 ` [PATCH 02/10] lei: support remote externals Eric Wong
@ 2021-01-24  6:01   ` Kyle Meyer
  2021-01-24 12:02     ` Eric Wong
  0 siblings, 1 reply; 16+ messages in thread
From: Kyle Meyer @ 2021-01-24  6:01 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta

Eric Wong writes:

>  sub query_remote_mboxrd {
[...]
> +	my @cmd = qw(curl -XPOST -sSf);

I've been playing around with lei locally (wow :>).

The one snag I've hit is hooking up the http archives that I host
(<https://yhetil.org>).  It seems to boil down to the `curl -XPOST'
command failing.  For example, this works fine with public-inbox.org:

  $ curl -sSf -XPOST 'https://public-inbox.org/meta/?q=s:lei&x=m' | zless

But it fails with the mirror of meta at yhetil.org:

  $ curl -sSf -XPOST 'https://yhetil.org/meta/?q=s:lei&x=m' | zless
  curl: (22) The requested URL returned error: 400 Bad Request

If I add -d'' to the call, it works and produces the same output as the
above call against public-inbox.org/meta.  And if I add this option to
query_remote_mboxrd (i.e. applying the change at the end), `lei q' works
for me as expected.

yhetil.org uses nginx and varnish, and I'm _very_ far from being an
expert in either of those, so I have no doubt that the above error could
be the result of me configuring something incorrectly.  However, despite
a fair amount of time and effort, I couldn't figure out how to tweak
things to make the above command work without --data.

I quickly checked <https://lore.kernel.org>, and it seems like it would
give a similar response to -XPOST without data:

  $ curl -fSs -XPOST 'https://lore.kernel.org/git/?q=get-urlmatch&x=m'
  curl: (22) The requested URL returned error: 400
  $  curl -d'' -fSs -XPOST 'https://lore.kernel.org/git/?q=get-urlmatch&x=m' | zless
  # works


diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index defe5e67..766e9f5f 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -192,7 +192,7 @@ sub query_remote_mboxrd {
 	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $uri);
 	my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
 	$dedupe->prepare_dedupe;
-	my @cmd = qw(curl -XPOST -sSf);
+	my @cmd = qw(curl -XPOST -d'' -sSf);
 	$opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
 	my $tor = $opt->{torsocks} //= 'auto';
 	if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' &&


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 02/10] lei: support remote externals
  2021-01-24  6:01   ` Kyle Meyer
@ 2021-01-24 12:02     ` Eric Wong
  2021-01-24 12:12       ` Eric Wong
  2021-01-24 22:11       ` Kyle Meyer
  0 siblings, 2 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-24 12:02 UTC (permalink / raw)
  To: Kyle Meyer; +Cc: meta

Kyle Meyer <kyle@kyleam.com> wrote:
> I've been playing around with lei locally (wow :>).

Glad you're enjoying it.  I'm still not completely happy with
some of the internals (the IPC stuff is a bit adventurous and
perhaps overkill), but functionality's getting slowly fleshed
out.

Btw, since you seem to be figuring things out without existing
docs, could I convince you to start manpages for lei?

Don't feel obligated, but it might be better for everybody since
my brain tends to skip over stuff that's only obvious because I
designed it :x (and my brain feels "off" :<)

I'm envisioning git-style manpages, with subcommands each having
their own manpage and an lei-overview(7) with common examples
as quick-start for beginners.

<snip>

> yhetil.org uses nginx and varnish, and I'm _very_ far from being an
> expert in either of those, so I have no doubt that the above error could
> be the result of me configuring something incorrectly.  However, despite
> a fair amount of time and effort, I couldn't figure out how to tweak
> things to make the above command work without --data.

Thanks, it may be nginx-specific behavior, but
https://public-inbox.org/meta/20210124114655.12815-7-e@80x24.org/
should do the trick.

> -	my @cmd = qw(curl -XPOST -sSf);
> +	my @cmd = qw(curl -XPOST -d'' -sSf);

That '' is a syntax error for me, and curl nags on -XPOST
with -d, so I've omitted -XPOST from my patch.  Thanks again for
the report.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 02/10] lei: support remote externals
  2021-01-24 12:02     ` Eric Wong
@ 2021-01-24 12:12       ` Eric Wong
  2021-01-24 22:11       ` Kyle Meyer
  1 sibling, 0 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-24 12:12 UTC (permalink / raw)
  To: Kyle Meyer; +Cc: meta

Eric Wong <e@80x24.org> wrote:
> Kyle Meyer <kyle@kyleam.com> wrote:
> > -	my @cmd = qw(curl -XPOST -sSf);
> > +	my @cmd = qw(curl -XPOST -d'' -sSf);
> 
> That '' is a syntax error for me, and curl nags on -XPOST
> with -d, so I've omitted -XPOST from my patch.  Thanks again for
> the report.

Nevermind :x  Way too sleepy :<

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 02/10] lei: support remote externals
  2021-01-24 12:02     ` Eric Wong
  2021-01-24 12:12       ` Eric Wong
@ 2021-01-24 22:11       ` Kyle Meyer
  2021-01-25 18:37         ` Eric Wong
  1 sibling, 1 reply; 16+ messages in thread
From: Kyle Meyer @ 2021-01-24 22:11 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta

Eric Wong writes:

> Btw, since you seem to be figuring things out without existing
> docs, could I convince you to start manpages for lei?

Sure, happy for a way to contribute.  I'm currently a bit behind with
some other volunteer work, but I should be able to carve out time for
this next weekend.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 02/10] lei: support remote externals
  2021-01-24 22:11       ` Kyle Meyer
@ 2021-01-25 18:37         ` Eric Wong
  0 siblings, 0 replies; 16+ messages in thread
From: Eric Wong @ 2021-01-25 18:37 UTC (permalink / raw)
  To: Kyle Meyer; +Cc: meta

Kyle Meyer <kyle@kyleam.com> wrote:
> Eric Wong writes:
> 
> > Btw, since you seem to be figuring things out without existing
> > docs, could I convince you to start manpages for lei?
> 
> Sure, happy for a way to contribute.  I'm currently a bit behind with
> some other volunteer work, but I should be able to carve out time for
> this next weekend.

No worries and thanks in advance!

I think most of the stuff implemented for lei is stable so far,
but more features will appear :)  And please let us know if
there's anything that's too surprising or bad.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-01-25 18:37 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
2021-01-23 10:27 ` [PATCH 01/10] lei: move external vivification to xsearch Eric Wong
2021-01-23 10:27 ` [PATCH 02/10] lei: support remote externals Eric Wong
2021-01-24  6:01   ` Kyle Meyer
2021-01-24 12:02     ` Eric Wong
2021-01-24 12:12       ` Eric Wong
2021-01-24 22:11       ` Kyle Meyer
2021-01-25 18:37         ` Eric Wong
2021-01-23 10:27 ` [PATCH 03/10] lei_to_mail: drop cyclic reference if not using IPC Eric Wong
2021-01-23 10:27 ` [PATCH 04/10] lei: oneshot: preserve stdout if writing mbox Eric Wong
2021-01-23 10:27 ` [PATCH 05/10] lei: default "-f $mfolder" args for common MUAs Eric Wong
2021-01-23 10:27 ` [PATCH 06/10] lei completion: handle URLs with port numbers Eric Wong
2021-01-23 10:27 ` [PATCH 07/10] lei forget-external: just show the location Eric Wong
2021-01-23 10:27 ` [PATCH 08/10] lei q: support a bunch of curl(1) options Eric Wong
2021-01-23 10:27 ` [PATCH 09/10] lei forget-external: don't show redundant "not found" Eric Wong
2021-01-23 10:27 ` [PATCH 10/10] lei add-external: don't allow non-existent directories Eric Wong

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).