user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 03/19] lei add-external: handle interrupts with --mirror
Date: Sun,  7 Feb 2021 08:51:45 +0000	[thread overview]
Message-ID: <20210207085201.13871-4-e@80x24.org> (raw)
In-Reply-To: <20210207085201.13871-1-e@80x24.org>

This also updates lei_xsearch to follow the same pattern for
stopping curl(1) and tail(1) processes it spawns.
---
 lib/PublicInbox/IPC.pm        |  5 +--
 lib/PublicInbox/LEI.pm        |  6 ++++
 lib/PublicInbox/LeiMirror.pm  | 66 +++++++++++++++++++++++------------
 lib/PublicInbox/LeiXSearch.pm | 21 +++++------
 lib/PublicInbox/OnDestroy.pm  |  2 +-
 t/lei-mirror.t                | 12 +++++++
 6 files changed, 74 insertions(+), 38 deletions(-)

diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm
index 0dee2a92..b936c27a 100644
--- a/lib/PublicInbox/IPC.pm
+++ b/lib/PublicInbox/IPC.pm
@@ -150,9 +150,10 @@ sub ipc_worker_reap { # dwaitpid callback
 }
 
 sub wq_wait_old {
-	my ($self, $args) = @_;
+	my ($self, @args) = @_;
+	my $cb = ref($args[0]) eq 'CODE' ? shift(@args) : \&ipc_worker_reap;
 	my $pids = delete $self->{"-wq_old_pids.$$"} or return;
-	dwaitpid($_, \&ipc_worker_reap, [$self, $args]) for @$pids;
+	dwaitpid($_, $cb, [$self, @args]) for @$pids;
 }
 
 # for base class, override in sub classes
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 3098ade7..515bc2a3 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -370,6 +370,12 @@ sub sigpipe_handler { # handles SIGPIPE from @WQ_KEYS workers
 	fail_handler($_[0], 13, delete $_[0]->{1});
 }
 
+# PublicInbox::OnDestroy callback for SIGINT to take out the entire pgid
+sub sigint_reap {
+	my ($pgid) = @_;
+	dwaitpid($pgid) if kill('-INT', $pgid);
+}
+
 sub fail ($$;$) {
 	my ($self, $buf, $exit_code) = @_;
 	err($self, $buf) if defined $buf;
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index bb172e6a..13795a58 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -10,13 +10,19 @@ use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
 use PublicInbox::Spawn qw(popen_rd spawn);
 use PublicInbox::PktOp;
 
+sub do_finish_mirror { # dwaitpid callback
+	my ($arg, $pid) = @_;
+	my ($mrr, $lei) = @$arg;
+	if ($? == 0 && unlink("$mrr->{dst}/mirror.done")) {
+		$lei->add_external_finish($mrr->{dst});
+	}
+	$lei->dclose;
+}
+
 sub mirror_done { # EOF callback for main daemon
 	my ($lei) = @_;
-	my $mrr = delete $lei->{mrr};
-	$mrr->wq_wait_old($lei) if $mrr;
-	# FIXME: check $? before finish
-	$lei->add_external_finish($mrr->{dst});
-	$lei->dclose;
+	my $mrr = delete $lei->{mrr} or return;
+	$mrr->wq_wait_old(\&do_finish_mirror, $lei);
 }
 
 # for old installations without manifest.js.gz
@@ -59,8 +65,9 @@ E: confused by scraping <$uri>, got ambiguous results:
 }
 
 sub clone_cmd {
-	my ($lei) = @_;
+	my ($lei, $opt) = @_;
 	my @cmd = qw(git);
+	$opt->{$_} = $lei->{$_} for (0..2);
 	# we support "-c $key=$val" for arbitrary git config options
 	# e.g.: git -c http.proxy=socks5h://127.0.0.1:9050
 	push(@cmd, '-c', $_) for @{$lei->{opt}->{c} // []};
@@ -92,14 +99,12 @@ sub _try_config {
 	my $f = "$ce-$$.tmp";
 	open(my $fh, '+>', $f) or return $lei->err("open $f: $! (non-fatal)");
 	my $opt = { 0 => $lei->{0}, 1 => $fh, 2 => $lei->{2} };
-	$lei->qerr("# @$cmd");
-	my $pid = spawn($cmd, $lei->{env}, $opt);
-	waitpid($pid, 0) == $pid or return $lei->err("waitpid @$cmd: $!");
-	if (($? >> 8) == 22) { # 404 missing
+	my $cerr = run_reap($lei, $cmd, $opt) // return;
+	if (($cerr >> 8) == 22) { # 404 missing
 		unlink($f) if -s $fh == 0;
 		return;
 	}
-	return $lei->err("# @$cmd failed (non-fatal)") if $?;
+	return $lei->err("# @$cmd failed (non-fatal)") if $cerr;
 	rename($f, $ce) or return $lei->err("link($f, $ce): $! (non-fatal)");
 	my $cfg = PublicInbox::Config::git_config_dump($f);
 	my $ibx = $self->{ibx} = {};
@@ -132,6 +137,18 @@ sub index_cloned_inbox {
 	local %ENV = (%ENV, %$env) if $env;
 	PublicInbox::Admin::progress_prepare($opt, $lei->{2});
 	PublicInbox::Admin::index_inbox($ibx, undef, $opt);
+	open my $x, '>', "$self->{dst}/mirror.done"; # for do_finish_mirror
+}
+
+sub run_reap {
+	my ($lei, $cmd, $opt) = @_;
+	$lei->qerr("# @$cmd");
+	$opt->{pgid} = 0;
+	my $pid = spawn($cmd, $lei->{env}, $opt);
+	my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid);
+	my $err = waitpid($pid, 0) == $pid ? undef : "waitpid @$cmd: $!";
+	@$reap = (); # cancel reap
+	$err ? $lei->err($err) : $?
 }
 
 sub clone_v1 {
@@ -140,11 +157,10 @@ sub clone_v1 {
 	my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
 	my $uri = URI->new($self->{src});
 	my $pfx = $curl->torsocks($lei, $uri) or return;
-	my $cmd = [ @$pfx, clone_cmd($lei), $uri->as_string, $self->{dst} ];
-	$lei->qerr("# @$cmd");
-	my $pid = spawn($cmd, $lei->{env}, $lei);
-	waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd: $!";
-	$? == 0 or return $lei->child_error($?, "@$cmd failed");
+	my $cmd = [ @$pfx, clone_cmd($lei, my $opt = {}),
+			$uri->as_string, $self->{dst} ];
+	my $cerr = run_reap($lei, $cmd, $opt) // return;
+	return $lei->child_error($cerr, "@$cmd failed") if $cerr;
 	_try_config($self);
 	index_cloned_inbox($self, 1);
 }
@@ -170,13 +186,11 @@ failed to extract epoch number from $src
 	my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock';
 	_try_config($self);
 	my $on_destroy = $lk->lock_for_scope($$);
-	my @cmd = clone_cmd($lei);
+	my @cmd = clone_cmd($lei, my $opt = {});
 	while (my $pair = shift(@src_edst)) {
 		my $cmd = [ @$pfx, @cmd, @$pair ];
-		$lei->qerr("# @$cmd");
-		my $pid = spawn($cmd, $lei->{env}, $lei);
-		waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd: $!";
-		$? == 0 or return $lei->child_error($?, "@$cmd failed");
+		my $cerr = run_reap($lei, $cmd, $opt) // return;
+		return $lei->child_error($cerr, "@$cmd failed") if $cerr;
 	}
 	undef $on_destroy; # unlock
 	index_cloned_inbox($self, 2);
@@ -193,9 +207,14 @@ sub try_manifest {
 	my $cmd = $curl->for_uri($lei, $uri);
 	$lei->qerr("# @$cmd");
 	my $opt = { 0 => $lei->{0}, 2 => $lei->{2} };
-	my $fh = popen_rd($cmd, $lei->{env}, $opt);
+	my ($fh, $pid) = popen_rd($cmd, $lei->{env}, $opt);
+	my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid);
 	my $gz = do { local $/; <$fh> } // die "read(curl $uri): $!";
-	unless (close $fh) {
+	close $fh;
+	my $err = waitpid($pid, 0) == $pid ? undef : "waitpid @$cmd: $!";
+	@$reap = ();
+	return $lei->err($err) if $err;
+	if ($?) {
 		return try_scrape($self) if ($? >> 8) == 22; # 404 missing
 		return $lei->child_error($?, "@$cmd failed");
 	}
@@ -282,6 +301,7 @@ sub start {
 sub ipc_atfork_child {
 	my ($self) = @_;
 	$self->{lei}->lei_atfork_child;
+	$SIG{TERM} = sub { exit(128 + 15) }; # trigger OnDestroy $reap
 	$self->SUPER::ipc_atfork_child;
 }
 
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 1e5d7ca6..6a1b107b 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -197,13 +197,6 @@ sub each_eml { # callback for MboxReader->mboxrd
 	$each_smsg->($smsg, undef, $eml);
 }
 
-# PublicInbox::OnDestroy callback
-sub kill_reap {
-	my ($pid) = @_;
-	kill('KILL', $pid); # spawn() blocks other signals
-	waitpid($pid, 0);
-}
-
 sub query_remote_mboxrd {
 	my ($self, $uris) = @_;
 	local $0 = "$0 query_remote_mboxrd";
@@ -213,18 +206,19 @@ sub query_remote_mboxrd {
 	my @qform = (q => $lei->{mset_opt}->{qstr}, x => 'm');
 	push(@qform, t => 1) if $opt->{thread};
 	my $verbose = $opt->{verbose};
-	my $reap;
+	my ($reap_tail, $reap_curl);
 	my $cerr = File::Temp->new(TEMPLATE => 'curl.err-XXXX', TMPDIR => 1);
 	fcntl($cerr, F_SETFL, O_APPEND|O_RDWR) or warn "set O_APPEND: $!";
-	my $rdr = { 2 => $cerr };
+	my $rdr = { 2 => $cerr, pgid => 0 };
 	my $coff = 0;
+	my $sigint_reap = $lei->can('sigint_reap');
 	if ($verbose) {
 		# spawn a process to force line-buffering, otherwise curl
 		# will write 1 character at-a-time and parallel outputs
 		# mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss
-		my $o = { 1 => $lei->{2}, 2 => $lei->{2} };
+		my $o = { 1 => $lei->{2}, 2 => $lei->{2}, pgid => 0 };
 		my $pid = spawn(['tail', '-f', $cerr->filename], undef, $o);
-		$reap = PublicInbox::OnDestroy->new(\&kill_reap, $pid);
+		$reap_tail = PublicInbox::OnDestroy->new($sigint_reap, $pid);
 	}
 	my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return;
 	push @$curl, '-s', '-d', '';
@@ -236,10 +230,13 @@ sub query_remote_mboxrd {
 		my $cmd = $curl->for_uri($lei, $uri);
 		$lei->err("# @$cmd") if $verbose;
 		my ($fh, $pid) = popen_rd($cmd, $env, $rdr);
+		$reap_curl = PublicInbox::OnDestroy->new($sigint_reap, $pid);
 		$fh = IO::Uncompress::Gunzip->new($fh);
 		PublicInbox::MboxReader->mboxrd($fh, \&each_eml, $self,
 						$lei, $each_smsg);
-		waitpid($pid, 0) == $pid or die "BUG: waitpid (curl): $!";
+		my $err = waitpid($pid, 0) == $pid ? undef : "BUG: waitpid: $!";
+		@$reap_curl = (); # cancel OnDestroy
+		die $err if $err;
 		if ($? == 0) {
 			my $nr = $lei->{-nr_remote_eml};
 			mset_progress($lei, $lei->{-current_url}, $nr, $nr);
diff --git a/lib/PublicInbox/OnDestroy.pm b/lib/PublicInbox/OnDestroy.pm
index 0ae4c4c9..615bc450 100644
--- a/lib/PublicInbox/OnDestroy.pm
+++ b/lib/PublicInbox/OnDestroy.pm
@@ -10,7 +10,7 @@ sub new {
 
 sub DESTROY {
 	my ($cb, @args) = @{$_[0]};
-	if (!ref($cb)) {
+	if (!ref($cb) && $cb) {
 		my $pid = $cb;
 		return if $pid != $$;
 		$cb = shift @args;
diff --git a/t/lei-mirror.t b/t/lei-mirror.t
index 6af49678..2373b370 100644
--- a/t/lei-mirror.t
+++ b/t/lei-mirror.t
@@ -13,15 +13,27 @@ test_lei({ tmpdir => $tmpdir }, sub {
 	my $t1 = "$home/t1-mirror";
 	ok($lei->('add-external', $t1, '--mirror', "$http/t1/"), '--mirror v1');
 	ok(-f "$t1/public-inbox/msgmap.sqlite3", 't1-mirror indexed');
+
+	ok($lei->('ls-external'), 'ls-external');
+	like($lei_out, qr!\Q$t1\E!, 't1 added to ls-externals');
+
 	my $t2 = "$home/t2-mirror";
 	ok($lei->('add-external', $t2, '--mirror', "$http/t2/"), '--mirror v2');
 	ok(-f "$t2/msgmap.sqlite3", 't2-mirror indexed');
 
+	ok($lei->('ls-external'), 'ls-external');
+	like($lei_out, qr!\Q$t2\E!, 't2 added to ls-externals');
+
 	ok(!$lei->('add-external', $t2, '--mirror', "$http/t2/"),
 		'--mirror fails if reused');
 
+	ok($lei->('ls-external'), 'ls-external');
+	like($lei_out, qr!\Q$t2\E!, 'still in ls-externals');
+
 	ok(!$lei->('add-external', "$t2-fail", '-Lmedium'), '--mirror v2');
 	ok(!-d "$t2-fail", 'destination not created on failure');
+	ok($lei->('ls-external'), 'ls-external');
+	unlike($lei_out, qr!\Q$t2-fail\E!, 'not added to ls-external');
 });
 
 ok($td->kill, 'killed -httpd');

  parent reply	other threads:[~2021-02-07  8:52 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-07  8:51 [PATCH 00/19] lei import Maildir, remote mboxrd fixes Eric Wong
2021-02-07  8:51 ` [PATCH 01/19] spawn: pi_fork_exec: restore parent sigmask in child Eric Wong
2021-02-07  8:51 ` [PATCH 02/19] spawn: pi_fork_exec: support "pgid" Eric Wong
2021-02-07 23:10   ` dprintf(3) portability? [was [02/19] spawn: pi_fork_exec: support "pgid"] Eric Wong
2021-02-07  8:51 ` Eric Wong [this message]
2021-02-07  8:51 ` [PATCH 04/19] spawn_pp: die more consistently in child Eric Wong
2021-02-07  8:51 ` [PATCH 05/19] ipc: do not die inside wq_worker child process Eric Wong
2021-02-07  8:51 ` [PATCH 06/19] ipc: trim down the Storable checks Eric Wong
2021-02-07  8:51 ` [PATCH 07/19] Makefile.PL: depend on IO::Uncompress::Gunzip Eric Wong
2021-02-07  8:51 ` [PATCH 08/19] xapcmd: avoid potential die surprise in children Eric Wong
2021-02-07  8:51 ` [PATCH 09/19] tests: guard setup_public_inboxes for SQLite and Xapian Eric Wong
2021-02-07  8:51 ` [PATCH 10/19] Revert "ipc: add support for asynchronous callbacks" Eric Wong
2021-02-07  8:51 ` [PATCH 11/19] ipc: wq_do => wq_io_do Eric Wong
2021-02-07  8:51 ` [PATCH 12/19] lei: more consistent IPC exit and error handling Eric Wong
2021-02-07  8:51 ` [PATCH 13/19] lei: remove --mua-cmd alias for --mua Eric Wong
2021-02-07  8:51 ` [PATCH 14/19] lei: replace --thread with --threads Eric Wong
2021-02-07  8:51 ` [PATCH 15/19] lei q: improve remote mboxrd UX Eric Wong
2021-02-07  9:32   ` [PATCH 20/19] lei_xsearch: allow quieting regular mset progress, too Eric Wong
2021-02-07  8:51 ` [PATCH 16/19] lei q: SIGWINCH process group with the terminal Eric Wong
2021-02-07  8:51 ` [PATCH 17/19] lei import: support Maildirs Eric Wong
2021-02-07  8:52 ` [PATCH 18/19] imap: avoid unnecessary on-stack delete Eric Wong
2021-02-07  8:52 ` [PATCH 19/19] httpd/async: " Eric Wong
2021-02-07 10:40 ` [PATCH 21/19] lei q: fix arbitrary --mua command handling Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210207085201.13871-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).