user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 00/19] lei import Maildir, remote mboxrd fixes
@ 2021-02-07  8:51  7% Eric Wong
  2021-02-07  8:51  4% ` [PATCH 03/19] lei add-external: handle interrupts with --mirror Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2021-02-07  8:51 UTC (permalink / raw)
  To: meta

"lei q" with remote mboxrd + early MUA spawning is
nicer, too.  Several risky constructs eliminated,

Interrupting "add-external --mirror" is less bad, now;
though it could probably support indexlevel=none in
case somebody wants to run index themselves.

Eric Wong (19):
  spawn: pi_fork_exec: restore parent sigmask in child
  spawn: pi_fork_exec: support "pgid"
  lei add-external: handle interrupts with --mirror
  spawn_pp: die more consistently in child
  ipc: do not die inside wq_worker child process
  ipc: trim down the Storable checks
  Makefile.PL: depend on IO::Uncompress::Gunzip
  xapcmd: avoid potential die surprise in children
  tests: guard setup_public_inboxes for SQLite and Xapian
  Revert "ipc: add support for asynchronous callbacks"
  ipc: wq_do => wq_io_do
  lei: more consistent IPC exit and error handling
  lei: remove --mua-cmd alias for --mua
  lei: replace --thread with --threads
  lei q: improve remote mboxrd UX
  lei q: SIGWINCH process group with the terminal
  lei import: support Maildirs
  imap: avoid unnecessary delete on stack
  httpd/async: avoid unnecessary on-stack delete

 Documentation/lei-q.pod        |   4 +-
 MANIFEST                       |   1 +
 Makefile.PL                    |   1 +
 lib/PublicInbox/HTTPD/Async.pm |   2 +-
 lib/PublicInbox/IMAP.pm        |   6 +-
 lib/PublicInbox/IPC.pm         | 105 +++++++-----------------
 lib/PublicInbox/LEI.pm         |  49 +++++++----
 lib/PublicInbox/LeiCurl.pm     |  11 ++-
 lib/PublicInbox/LeiHelp.pm     |   6 +-
 lib/PublicInbox/LeiImport.pm   |  38 ++++++---
 lib/PublicInbox/LeiMirror.pm   |  75 ++++++++++-------
 lib/PublicInbox/LeiOverview.pm |   7 +-
 lib/PublicInbox/LeiQuery.pm    |   4 +-
 lib/PublicInbox/LeiStore.pm    |   8 +-
 lib/PublicInbox/LeiToMail.pm   |  37 ++++-----
 lib/PublicInbox/LeiXSearch.pm  | 143 ++++++++++++++++++++-------------
 lib/PublicInbox/Mbox.pm        |   2 +-
 lib/PublicInbox/OnDestroy.pm   |   2 +-
 lib/PublicInbox/Search.pm      |   2 +-
 lib/PublicInbox/SearchView.pm  |   2 +-
 lib/PublicInbox/Spawn.pm       |  63 +++++++++------
 lib/PublicInbox/SpawnPP.pm     |  44 +++++-----
 lib/PublicInbox/Xapcmd.pm      |  11 +--
 script/lei                     |   8 +-
 t/ipc.t                        |  39 ++-------
 t/lei-externals.t              |   2 +
 t/lei-import-maildir.t         |  33 ++++++++
 t/lei-mirror.t                 |  14 ++++
 t/lei.t                        |   2 +-
 t/lei_to_mail.t                |   6 +-
 t/spawn.t                      |  18 +++++
 xt/stress-sharedkv.t           |   6 +-
 32 files changed, 433 insertions(+), 318 deletions(-)
 create mode 100644 t/lei-import-maildir.t


^ permalink raw reply	[relevance 7%]

* [PATCH 03/19] lei add-external: handle interrupts with --mirror
  2021-02-07  8:51  7% [PATCH 00/19] lei import Maildir, remote mboxrd fixes Eric Wong
@ 2021-02-07  8:51  4% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2021-02-07  8:51 UTC (permalink / raw)
  To: meta

This also updates lei_xsearch to follow the same pattern for
stopping curl(1) and tail(1) processes it spawns.
---
 lib/PublicInbox/IPC.pm        |  5 +--
 lib/PublicInbox/LEI.pm        |  6 ++++
 lib/PublicInbox/LeiMirror.pm  | 66 +++++++++++++++++++++++------------
 lib/PublicInbox/LeiXSearch.pm | 21 +++++------
 lib/PublicInbox/OnDestroy.pm  |  2 +-
 t/lei-mirror.t                | 12 +++++++
 6 files changed, 74 insertions(+), 38 deletions(-)

diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm
index 0dee2a92..b936c27a 100644
--- a/lib/PublicInbox/IPC.pm
+++ b/lib/PublicInbox/IPC.pm
@@ -150,9 +150,10 @@ sub ipc_worker_reap { # dwaitpid callback
 }
 
 sub wq_wait_old {
-	my ($self, $args) = @_;
+	my ($self, @args) = @_;
+	my $cb = ref($args[0]) eq 'CODE' ? shift(@args) : \&ipc_worker_reap;
 	my $pids = delete $self->{"-wq_old_pids.$$"} or return;
-	dwaitpid($_, \&ipc_worker_reap, [$self, $args]) for @$pids;
+	dwaitpid($_, $cb, [$self, @args]) for @$pids;
 }
 
 # for base class, override in sub classes
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 3098ade7..515bc2a3 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -370,6 +370,12 @@ sub sigpipe_handler { # handles SIGPIPE from @WQ_KEYS workers
 	fail_handler($_[0], 13, delete $_[0]->{1});
 }
 
+# PublicInbox::OnDestroy callback for SIGINT to take out the entire pgid
+sub sigint_reap {
+	my ($pgid) = @_;
+	dwaitpid($pgid) if kill('-INT', $pgid);
+}
+
 sub fail ($$;$) {
 	my ($self, $buf, $exit_code) = @_;
 	err($self, $buf) if defined $buf;
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index bb172e6a..13795a58 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -10,13 +10,19 @@ use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
 use PublicInbox::Spawn qw(popen_rd spawn);
 use PublicInbox::PktOp;
 
+sub do_finish_mirror { # dwaitpid callback
+	my ($arg, $pid) = @_;
+	my ($mrr, $lei) = @$arg;
+	if ($? == 0 && unlink("$mrr->{dst}/mirror.done")) {
+		$lei->add_external_finish($mrr->{dst});
+	}
+	$lei->dclose;
+}
+
 sub mirror_done { # EOF callback for main daemon
 	my ($lei) = @_;
-	my $mrr = delete $lei->{mrr};
-	$mrr->wq_wait_old($lei) if $mrr;
-	# FIXME: check $? before finish
-	$lei->add_external_finish($mrr->{dst});
-	$lei->dclose;
+	my $mrr = delete $lei->{mrr} or return;
+	$mrr->wq_wait_old(\&do_finish_mirror, $lei);
 }
 
 # for old installations without manifest.js.gz
@@ -59,8 +65,9 @@ E: confused by scraping <$uri>, got ambiguous results:
 }
 
 sub clone_cmd {
-	my ($lei) = @_;
+	my ($lei, $opt) = @_;
 	my @cmd = qw(git);
+	$opt->{$_} = $lei->{$_} for (0..2);
 	# we support "-c $key=$val" for arbitrary git config options
 	# e.g.: git -c http.proxy=socks5h://127.0.0.1:9050
 	push(@cmd, '-c', $_) for @{$lei->{opt}->{c} // []};
@@ -92,14 +99,12 @@ sub _try_config {
 	my $f = "$ce-$$.tmp";
 	open(my $fh, '+>', $f) or return $lei->err("open $f: $! (non-fatal)");
 	my $opt = { 0 => $lei->{0}, 1 => $fh, 2 => $lei->{2} };
-	$lei->qerr("# @$cmd");
-	my $pid = spawn($cmd, $lei->{env}, $opt);
-	waitpid($pid, 0) == $pid or return $lei->err("waitpid @$cmd: $!");
-	if (($? >> 8) == 22) { # 404 missing
+	my $cerr = run_reap($lei, $cmd, $opt) // return;
+	if (($cerr >> 8) == 22) { # 404 missing
 		unlink($f) if -s $fh == 0;
 		return;
 	}
-	return $lei->err("# @$cmd failed (non-fatal)") if $?;
+	return $lei->err("# @$cmd failed (non-fatal)") if $cerr;
 	rename($f, $ce) or return $lei->err("link($f, $ce): $! (non-fatal)");
 	my $cfg = PublicInbox::Config::git_config_dump($f);
 	my $ibx = $self->{ibx} = {};
@@ -132,6 +137,18 @@ sub index_cloned_inbox {
 	local %ENV = (%ENV, %$env) if $env;
 	PublicInbox::Admin::progress_prepare($opt, $lei->{2});
 	PublicInbox::Admin::index_inbox($ibx, undef, $opt);
+	open my $x, '>', "$self->{dst}/mirror.done"; # for do_finish_mirror
+}
+
+sub run_reap {
+	my ($lei, $cmd, $opt) = @_;
+	$lei->qerr("# @$cmd");
+	$opt->{pgid} = 0;
+	my $pid = spawn($cmd, $lei->{env}, $opt);
+	my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid);
+	my $err = waitpid($pid, 0) == $pid ? undef : "waitpid @$cmd: $!";
+	@$reap = (); # cancel reap
+	$err ? $lei->err($err) : $?
 }
 
 sub clone_v1 {
@@ -140,11 +157,10 @@ sub clone_v1 {
 	my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
 	my $uri = URI->new($self->{src});
 	my $pfx = $curl->torsocks($lei, $uri) or return;
-	my $cmd = [ @$pfx, clone_cmd($lei), $uri->as_string, $self->{dst} ];
-	$lei->qerr("# @$cmd");
-	my $pid = spawn($cmd, $lei->{env}, $lei);
-	waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd: $!";
-	$? == 0 or return $lei->child_error($?, "@$cmd failed");
+	my $cmd = [ @$pfx, clone_cmd($lei, my $opt = {}),
+			$uri->as_string, $self->{dst} ];
+	my $cerr = run_reap($lei, $cmd, $opt) // return;
+	return $lei->child_error($cerr, "@$cmd failed") if $cerr;
 	_try_config($self);
 	index_cloned_inbox($self, 1);
 }
@@ -170,13 +186,11 @@ failed to extract epoch number from $src
 	my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock';
 	_try_config($self);
 	my $on_destroy = $lk->lock_for_scope($$);
-	my @cmd = clone_cmd($lei);
+	my @cmd = clone_cmd($lei, my $opt = {});
 	while (my $pair = shift(@src_edst)) {
 		my $cmd = [ @$pfx, @cmd, @$pair ];
-		$lei->qerr("# @$cmd");
-		my $pid = spawn($cmd, $lei->{env}, $lei);
-		waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd: $!";
-		$? == 0 or return $lei->child_error($?, "@$cmd failed");
+		my $cerr = run_reap($lei, $cmd, $opt) // return;
+		return $lei->child_error($cerr, "@$cmd failed") if $cerr;
 	}
 	undef $on_destroy; # unlock
 	index_cloned_inbox($self, 2);
@@ -193,9 +207,14 @@ sub try_manifest {
 	my $cmd = $curl->for_uri($lei, $uri);
 	$lei->qerr("# @$cmd");
 	my $opt = { 0 => $lei->{0}, 2 => $lei->{2} };
-	my $fh = popen_rd($cmd, $lei->{env}, $opt);
+	my ($fh, $pid) = popen_rd($cmd, $lei->{env}, $opt);
+	my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid);
 	my $gz = do { local $/; <$fh> } // die "read(curl $uri): $!";
-	unless (close $fh) {
+	close $fh;
+	my $err = waitpid($pid, 0) == $pid ? undef : "waitpid @$cmd: $!";
+	@$reap = ();
+	return $lei->err($err) if $err;
+	if ($?) {
 		return try_scrape($self) if ($? >> 8) == 22; # 404 missing
 		return $lei->child_error($?, "@$cmd failed");
 	}
@@ -282,6 +301,7 @@ sub start {
 sub ipc_atfork_child {
 	my ($self) = @_;
 	$self->{lei}->lei_atfork_child;
+	$SIG{TERM} = sub { exit(128 + 15) }; # trigger OnDestroy $reap
 	$self->SUPER::ipc_atfork_child;
 }
 
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 1e5d7ca6..6a1b107b 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -197,13 +197,6 @@ sub each_eml { # callback for MboxReader->mboxrd
 	$each_smsg->($smsg, undef, $eml);
 }
 
-# PublicInbox::OnDestroy callback
-sub kill_reap {
-	my ($pid) = @_;
-	kill('KILL', $pid); # spawn() blocks other signals
-	waitpid($pid, 0);
-}
-
 sub query_remote_mboxrd {
 	my ($self, $uris) = @_;
 	local $0 = "$0 query_remote_mboxrd";
@@ -213,18 +206,19 @@ sub query_remote_mboxrd {
 	my @qform = (q => $lei->{mset_opt}->{qstr}, x => 'm');
 	push(@qform, t => 1) if $opt->{thread};
 	my $verbose = $opt->{verbose};
-	my $reap;
+	my ($reap_tail, $reap_curl);
 	my $cerr = File::Temp->new(TEMPLATE => 'curl.err-XXXX', TMPDIR => 1);
 	fcntl($cerr, F_SETFL, O_APPEND|O_RDWR) or warn "set O_APPEND: $!";
-	my $rdr = { 2 => $cerr };
+	my $rdr = { 2 => $cerr, pgid => 0 };
 	my $coff = 0;
+	my $sigint_reap = $lei->can('sigint_reap');
 	if ($verbose) {
 		# spawn a process to force line-buffering, otherwise curl
 		# will write 1 character at-a-time and parallel outputs
 		# mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss
-		my $o = { 1 => $lei->{2}, 2 => $lei->{2} };
+		my $o = { 1 => $lei->{2}, 2 => $lei->{2}, pgid => 0 };
 		my $pid = spawn(['tail', '-f', $cerr->filename], undef, $o);
-		$reap = PublicInbox::OnDestroy->new(\&kill_reap, $pid);
+		$reap_tail = PublicInbox::OnDestroy->new($sigint_reap, $pid);
 	}
 	my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return;
 	push @$curl, '-s', '-d', '';
@@ -236,10 +230,13 @@ sub query_remote_mboxrd {
 		my $cmd = $curl->for_uri($lei, $uri);
 		$lei->err("# @$cmd") if $verbose;
 		my ($fh, $pid) = popen_rd($cmd, $env, $rdr);
+		$reap_curl = PublicInbox::OnDestroy->new($sigint_reap, $pid);
 		$fh = IO::Uncompress::Gunzip->new($fh);
 		PublicInbox::MboxReader->mboxrd($fh, \&each_eml, $self,
 						$lei, $each_smsg);
-		waitpid($pid, 0) == $pid or die "BUG: waitpid (curl): $!";
+		my $err = waitpid($pid, 0) == $pid ? undef : "BUG: waitpid: $!";
+		@$reap_curl = (); # cancel OnDestroy
+		die $err if $err;
 		if ($? == 0) {
 			my $nr = $lei->{-nr_remote_eml};
 			mset_progress($lei, $lei->{-current_url}, $nr, $nr);
diff --git a/lib/PublicInbox/OnDestroy.pm b/lib/PublicInbox/OnDestroy.pm
index 0ae4c4c9..615bc450 100644
--- a/lib/PublicInbox/OnDestroy.pm
+++ b/lib/PublicInbox/OnDestroy.pm
@@ -10,7 +10,7 @@ sub new {
 
 sub DESTROY {
 	my ($cb, @args) = @{$_[0]};
-	if (!ref($cb)) {
+	if (!ref($cb) && $cb) {
 		my $pid = $cb;
 		return if $pid != $$;
 		$cb = shift @args;
diff --git a/t/lei-mirror.t b/t/lei-mirror.t
index 6af49678..2373b370 100644
--- a/t/lei-mirror.t
+++ b/t/lei-mirror.t
@@ -13,15 +13,27 @@ test_lei({ tmpdir => $tmpdir }, sub {
 	my $t1 = "$home/t1-mirror";
 	ok($lei->('add-external', $t1, '--mirror', "$http/t1/"), '--mirror v1');
 	ok(-f "$t1/public-inbox/msgmap.sqlite3", 't1-mirror indexed');
+
+	ok($lei->('ls-external'), 'ls-external');
+	like($lei_out, qr!\Q$t1\E!, 't1 added to ls-externals');
+
 	my $t2 = "$home/t2-mirror";
 	ok($lei->('add-external', $t2, '--mirror', "$http/t2/"), '--mirror v2');
 	ok(-f "$t2/msgmap.sqlite3", 't2-mirror indexed');
 
+	ok($lei->('ls-external'), 'ls-external');
+	like($lei_out, qr!\Q$t2\E!, 't2 added to ls-externals');
+
 	ok(!$lei->('add-external', $t2, '--mirror', "$http/t2/"),
 		'--mirror fails if reused');
 
+	ok($lei->('ls-external'), 'ls-external');
+	like($lei_out, qr!\Q$t2\E!, 'still in ls-externals');
+
 	ok(!$lei->('add-external', "$t2-fail", '-Lmedium'), '--mirror v2');
 	ok(!-d "$t2-fail", 'destination not created on failure');
+	ok($lei->('ls-external'), 'ls-external');
+	unlike($lei_out, qr!\Q$t2-fail\E!, 'not added to ls-external');
 });
 
 ok($td->kill, 'killed -httpd');

^ permalink raw reply related	[relevance 4%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-02-07  8:51  7% [PATCH 00/19] lei import Maildir, remote mboxrd fixes Eric Wong
2021-02-07  8:51  4% ` [PATCH 03/19] lei add-external: handle interrupts with --mirror Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).