user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 06/95] lei_mirror: rely on global process reaper
Date: Mon, 28 Nov 2022 05:31:03 +0000	[thread overview]
Message-ID: <20221128053232.291618-7-e@80x24.org> (raw)
In-Reply-To: <20221128053232.291618-1-e@80x24.org>

We no longer rely on SIGCHLD for predictability, and instead
call waitpid at safe points.  This will make it easier for us to
do parallel mirroring of multiple inboxes while preserving
proper dependencies via ->DESTROY callbacks.
---
 lib/PublicInbox/LeiMirror.pm | 54 +++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 0603dd48..7dc47ab8 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -14,7 +14,7 @@ use PublicInbox::Spawn qw(popen_rd spawn);
 use File::Temp ();
 use Fcntl qw(SEEK_SET O_CREAT O_EXCL O_WRONLY);
 use Carp qw(croak);
-use POSIX qw(WNOHANG);
+our %LIVE;
 
 sub _wq_done_wait { # dwaitpid callback (via wq_eof)
 	my ($arg, $pid) = @_;
@@ -61,7 +61,9 @@ sub try_scrape {
 			my ($n) = (m!/([0-9]+)\z!);
 			$n => [ URI->new($_), '' ]
 		} @v2_urls; # uniq
-		return clone_v2($self, \%v2_epochs);
+		clone_v2($self, \%v2_epochs);
+		reap_live() while keys(%LIVE);
+		return;
 	}
 
 	# filter out common URLs served by WWW (e.g /$MSGID/T/)
@@ -311,16 +313,16 @@ EOM
 }
 
 sub reap_clone { # async, called via SIGCHLD
-	my ($lei, $cmd, $live) = @_;
+	my ($lei, $cmd) = @_;
 	my $cerr = $?;
 	$? = 0; # don't let it influence normal exit
 	if ($cerr) {
-		kill('TERM', keys %$live);
+		kill('TERM', keys %LIVE);
 		$lei->child_error($cerr, "@$cmd failed");
 	}
 }
 
-sub v2_done {
+sub v2_done { # called via OnDestroy
 	my ($self) = @_;
 	require PublicInbox::MultiGit;
 	my $dst = $self->{cur_dst} // $self->{dst};
@@ -336,6 +338,16 @@ sub v2_done {
 	index_cloned_inbox($self, 2);
 }
 
+sub reap_live {
+	my $pid = waitpid(-1, 0) // die "waitpid(-1): $!";
+	if (my $x = delete $LIVE{$pid}) {
+		my $cb = shift @$x;
+		$cb->(@$x);
+	} else {
+		warn "reaped unknown PID=$pid ($?)\n";
+	}
+}
+
 sub clone_v2 ($$;$) {
 	my ($self, $v2_epochs, $m) = @_; # $m => manifest.js.gz hashref
 	my $lei = $self->{lei};
@@ -366,37 +378,21 @@ failed to extract epoch number from $src
 	# filter out the epochs we skipped
 	$self->{-culled_manifest} = 1 if delete(@$m{@skip});
 	my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock';
-	my %live;
 	my $fini = PublicInbox::OnDestroy->new($$, \&v2_done, $task);
-	$live{_try_config_start($task)} = [ \&_try_config_done, $task, $fini ];
+	$LIVE{_try_config_start($task)} = [ \&_try_config_done, $task, $fini ];
 	$task->{-locked} = $lk->lock_for_scope($$);
 	my @cmd = clone_cmd($lei, my $opt = {});
 	my $jobs = $self->{lei}->{opt}->{jobs} // 2;
-	my $sigchld = sub {
-		my ($sig) = @_;
-		my $flags = $sig ? WNOHANG : 0;
-		while (1) {
-			my $pid = waitpid(-1, $flags) or return;
-			return if $pid < 0;
-			if (my $x = delete $live{$pid}) {
-				my $cb = shift @$x;
-				$cb->(@$x, \%live);
-			} else {
-				warn "reaped unknown PID=$pid ($?)\n";
-			}
-		}
-	};
 	do {
-		$sigchld->(0) while keys(%live) >= $jobs;
-		while (keys(%live) < $jobs && @src_edst &&
+		reap_live() while keys(%LIVE) >= $jobs;
+		while (keys(%LIVE) < $jobs && @src_edst &&
 				!$lei->{child_error}) {
 			my $cmd = [ @$pfx, @cmd, splice(@src_edst, 0, 2) ];
 			$lei->qerr("# @$cmd");
-			my $pid = spawn($cmd, undef, $opt);
-			$live{$pid} = [ \&reap_clone, $lei, $cmd, $fini ];
+			$LIVE{spawn($cmd, undef, $opt)} = [ \&reap_clone,
+							$lei, $cmd, $fini ];
 		}
 	} while (@src_edst && !$lei->{child_error});
-	$sigchld->(0) while keys(%live);
 }
 
 sub decode_manifest ($$$) {
@@ -487,6 +483,7 @@ sub try_manifest {
 	my $opt = { -C => $pdir };
 	$opt->{$_} = $lei->{$_} for (0..2);
 	my $cerr = run_reap($lei, $cmd, $opt);
+	local %LIVE;
 	if ($cerr) {
 		return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing
 		return $lei->child_error($cerr, "@$cmd failed");
@@ -498,6 +495,7 @@ sub try_manifest {
 	}
 	my ($path_pfx, $n, $multi) = multi_inbox($self, \$path, $m);
 	return $lei->child_error(1, $multi) if !ref($multi);
+	my $jobs = $self->{lei}->{opt}->{jobs} // 2;
 	if (my $v2 = delete $multi->{v2}) {
 		for my $name (sort keys %$v2) {
 			my $epochs = delete $v2->{$name};
@@ -520,6 +518,8 @@ sub try_manifest {
 E: `$self->{cur_dst}' must not contain newline
 EOM
 			clone_v2($self, \%v2_epochs, $m);
+			reap_live() while keys(%LIVE) >= $jobs;
+			return if $self->{lei}->{child_error};
 		}
 	}
 	if (my $v1 = delete $multi->{v1}) {
@@ -540,6 +540,7 @@ EOM
 			clone_v1($self, 1);
 		}
 	}
+	reap_live() while keys(%LIVE);
 	if (delete $self->{-culled_manifest}) { # set by clone_v2/-I/--exclude
 		# write the smaller manifest if epochs were skipped so
 		# users won't have to delete manifest if they +w an
@@ -566,6 +567,7 @@ sub do_mirror { # via wq_io_do
 	eval {
 		my $iv = $lei->{opt}->{'inbox-version'};
 		if (defined $iv) {
+			local %LIVE;
 			return clone_v1($self) if $iv == 1;
 			return try_scrape($self) if $iv == 2;
 			die "bad --inbox-version=$iv\n";

  parent reply	other threads:[~2022-11-28  5:32 UTC|newest]

Thread overview: 96+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-28  5:30 [PATCH 00/95] clone: multi-inbox/repo support Eric Wong
2022-11-28  5:30 ` [PATCH 01/95] clone: support multi-inbox clone Eric Wong
2022-11-28  5:30 ` [PATCH 02/95] clone: support --include and --exclude with multi-clone Eric Wong
2022-11-28  5:31 ` [PATCH 03/95] clone: parallelize v2 epoch clones Eric Wong
2022-11-28  5:31 ` [PATCH 04/95] lei_mirror: async config retrieval for v2 w/ manifest Eric Wong
2022-11-28  5:31 ` [PATCH 05/95] lei_mirror: rely on DESTROY to index v2 inbox Eric Wong
2022-11-28  5:31 ` Eric Wong [this message]
2022-11-28  5:31 ` [PATCH 07/95] clone: support parallel v1 clones Eric Wong
2022-11-28  5:31 ` [PATCH 08/95] lei_mirror: default to single job by default Eric Wong
2022-11-28  5:31 ` [PATCH 09/95] lei_mirror: move directory creation to v2-only path Eric Wong
2022-11-28  5:31 ` [PATCH 10/95] lei_mirror: retrieve description text asynchronously, too Eric Wong
2022-11-28  5:31 ` [PATCH 11/95] switch inotify/kevent stuff to v5.12 Eric Wong
2022-11-28  5:31 ` [PATCH 12/95] manifest: update module blurb + v5.12 Eric Wong
2022-11-28  5:31 ` [PATCH 13/95] lei_mirror: simplify _get_txt_start callers Eric Wong
2022-11-28  5:31 ` [PATCH 14/95] lei_mirror: elide description retrieval for v1|coderepo Eric Wong
2022-11-28  5:31 ` [PATCH 15/95] lei_mirror: add a hint for skipped epoch permissions Eric Wong
2022-11-28  5:31 ` [PATCH 16/95] lei_mirror: consolidate clone process management Eric Wong
2022-11-28  5:31 ` [PATCH 17/95] lei_mirror: load File::Path unconditionally Eric Wong
2022-11-28  5:31 ` [PATCH 18/95] lei_mirror: load most modules up-front Eric Wong
2022-11-28  5:31 ` [PATCH 19/95] lei_mirror: set gitweb.owner from manifest Eric Wong
2022-11-28  5:31 ` [PATCH 20/95] clone: support --dry-run / -n flag Eric Wong
2022-11-28  5:31 ` [PATCH 21/95] lei_mirror: initialize placeholders with "head" from manifest Eric Wong
2022-11-28  5:31 ` [PATCH 22/95] lei_mirror: support {reference} for v1 manifest clones Eric Wong
2022-11-28  5:31 ` [PATCH 23/95] lei_mirror: reduce noise on interrupted clones Eric Wong
2022-11-28  5:31 ` [PATCH 24/95] clone: support --inbox-config option Eric Wong
2022-11-28  5:31 ` [PATCH 25/95] lei_mirror: retrieve v2 description properly Eric Wong
2022-11-28  5:31 ` [PATCH 26/95] lei_mirror: reduce scope of v2 lock Eric Wong
2022-11-28  5:31 ` [PATCH 27/95] lei_mirror: allow --epoch on mixed v1/v2 clones Eric Wong
2022-11-28  5:31 ` [PATCH 28/95] lei_mirror: fix infinite loop in dependency resolution Eric Wong
2022-11-28  5:31 ` [PATCH 29/95] lei_mirror: defend against infinite loops Eric Wong
2022-11-28  5:31 ` [PATCH 30/95] lei_mirror: do not fetch descriptions if using manifest Eric Wong
2022-11-28  5:31 ` [PATCH 31/95] lei_mirror: require PublicInbox::Lock at use Eric Wong
2022-11-28  5:31 ` [PATCH 32/95] lei_mirror: fix glob semantics to match end-of-path Eric Wong
2022-11-28  5:31 ` [PATCH 33/95] lei_mirror: differentiate -entv vs -ent Eric Wong
2022-11-28  5:31 ` [PATCH 34/95] lei_mirror: support manifest {references} for v2 epochs Eric Wong
2022-11-28  5:31 ` [PATCH 35/95] lei_mirror: simplify v2 code paths Eric Wong
2022-11-28  5:31 ` [PATCH 36/95] clone: support --inbox-version Eric Wong
2022-11-28  5:31 ` [PATCH 37/95] lei_mirror: require Perl v5.12+ Eric Wong
2022-11-28  5:31 ` [PATCH 38/95] lei_mirror: ensure curl exits 22 on HTTP 404 responses Eric Wong
2022-11-28  5:31 ` [PATCH 39/95] lei_mirror: cleanup File::Temp OO usage Eric Wong
2022-11-28  5:31 ` [PATCH 40/95] lei_mirror: add `index' target to generated Makefile Eric Wong
2022-11-28  5:31 ` [PATCH 41/95] lei_mirror: do not write Makefile for --inbox-config=never Eric Wong
2022-11-28  5:31 ` [PATCH 42/95] lei_mirror: hoist out dump_manifest sub Eric Wong
2022-11-28  5:31 ` [PATCH 43/95] lei_mirror: avoid convoluted lazy_cb usage Eric Wong
2022-11-28  5:31 ` [PATCH 44/95] lei_mirror: simplify clone_v2_prep Eric Wong
2022-11-28  5:31 ` [PATCH 45/95] lei_mirror: support --objstore and forkgroups Eric Wong
2022-11-28  5:31 ` [PATCH 46/95] lei_mirror: cleanup process reaping logic Eric Wong
2022-11-28  5:31 ` [PATCH 47/95] lei_mirror: ensure git <1.8.5 fallback can use torsocks Eric Wong
2022-11-28  5:31 ` [PATCH 48/95] clone: flesh out --objstore behavior and document Eric Wong
2022-11-28  5:31 ` [PATCH 49/95] lei_mirror: always pack refs for coderepos Eric Wong
2022-11-28  5:31 ` [PATCH 50/95] lei_mirror: set description for non-inboxes, too Eric Wong
2022-11-28  5:31 ` [PATCH 51/95] lei_mirror: force --no-tags when fetching forkgroups Eric Wong
2022-11-28  5:31 ` [PATCH 52/95] lei_mirror: preserve permissions of existing alternates file Eric Wong
2022-11-28  5:31 ` [PATCH 53/95] lei_mirror: do not show ref updates w/o --verbose Eric Wong
2022-11-28  5:31 ` [PATCH 54/95] lei_mirror: drop git <1.8.5 support Eric Wong
2022-11-28  5:31 ` [PATCH 55/95] lei_mirror: make basename more descriptive Eric Wong
2022-11-28  5:31 ` [PATCH 56/95] lei_mirror: fix --dry-run for forkgroups Eric Wong
2022-11-28  5:31 ` [PATCH 57/95] lei_mirror: forkgroups use `git fetch --multiple' Eric Wong
2022-11-28  5:31 ` [PATCH 58/95] clone: move --dry-run handling to lei_mirror Eric Wong
2022-11-28  5:31 ` [PATCH 59/95] clone: drop unnecessary requires Eric Wong
2022-11-28  5:31 ` [PATCH 60/95] clone: use v5.12 Eric Wong
2022-11-28  5:31 ` [PATCH 61/95] clone: require `--objstore=' for default location Eric Wong
2022-11-28  5:31 ` [PATCH 62/95] lei_mirror: shorten remote names Eric Wong
2022-11-28  5:32 ` [PATCH 63/95] fetch: use v5.12 Eric Wong
2022-11-28  5:32 ` [PATCH 64/95] fetch: eliminate File::Temp->filename var Eric Wong
2022-11-28  5:32 ` [PATCH 65/95] lei_mirror: properly pack-refs in non-forkgroup repos Eric Wong
2022-11-28  5:32 ` [PATCH 66/95] lei_mirror: show child error error code Eric Wong
2022-11-28  5:32 ` [PATCH 67/95] on_destroy: support ->cancel callback Eric Wong
2022-11-28  5:32 ` [PATCH 68/95] lei_mirror: support resuming multi-repo clones Eric Wong
2022-11-28  5:32 ` [PATCH 69/95] lei_mirror: check fingerprints before fetching Eric Wong
2022-11-28  5:32 ` [PATCH 70/95] clone: support loading manifest.js.gz from destination Eric Wong
2022-11-28  5:32 ` [PATCH 71/95] lei_mirror: delay configuring forkgroups Eric Wong
2022-11-28  5:32 ` [PATCH 72/95] clone: canonicalize destination path from CLI Eric Wong
2022-11-28  5:32 ` [PATCH 73/95] clone|fetch: support passing --prune(-tags) to `git fetch' Eric Wong
2022-11-28  5:32 ` [PATCH 74/95] lei_mirror: avoid needless FD passing Eric Wong
2022-11-28  5:32 ` [PATCH 75/95] clone: support --keep-going/-k like make(1) Eric Wong
2022-11-28  5:32 ` [PATCH 76/95] lei_mirror: don't warn on missing manifest on initial clone Eric Wong
2022-11-28  5:32 ` [PATCH 77/95] lei_mirror: respect `./' and `../' prefixes for CLI args Eric Wong
2022-11-28  5:32 ` [PATCH 78/95] lei_mirror: --manifest= affects destination, too Eric Wong
2022-11-28  5:32 ` [PATCH 79/95] lei_mirror: update fingerprints when writing local manifest.js.gz Eric Wong
2022-11-28  5:32 ` [PATCH 80/95] lei_mirror: remove janky mirror.done stamp file Eric Wong
2022-11-28  5:32 ` [PATCH 81/95] lei_mirror: simplify most process spawning Eric Wong
2022-11-28  5:32 ` [PATCH 82/95] lei_mirror: run v1_done earlier on forkgroup done Eric Wong
2022-11-28  5:32 ` [PATCH 83/95] lei_mirror: simplify forkgroup-related subs Eric Wong
2022-11-28  5:32 ` [PATCH 84/95] lei_mirror: shorten scope mirror objects Eric Wong
2022-11-28  5:32 ` [PATCH 85/95] lei_mirror: set {head} from manifest Eric Wong
2022-11-28  5:32 ` [PATCH 86/95] lei_mirror: support {symlinks} " Eric Wong
2022-11-28  5:32 ` [PATCH 87/95] lei_mirror: eliminate circular references Eric Wong
2022-11-28  5:32 ` [PATCH 88/95] lei_mirror: use curl -z/--timecond if manifest exists Eric Wong
2022-11-28  5:32 ` [PATCH 89/95] lei_mirror: avoid redundant curl `-f' use Eric Wong
2022-11-28  5:32 ` [PATCH 90/95] lei_mirror: omit trailing slash for git remote.*.url Eric Wong
2022-11-28  5:32 ` [PATCH 91/95] lei_mirror: set info/web/last-modified from manifest Eric Wong
2022-11-28  5:32 ` [PATCH 92/95] lei_mirror: don't clobber inbox.config.example if it exists Eric Wong
2022-11-28  5:32 ` [PATCH 93/95] lei_mirror: break out of fgrp fetch iteration early Eric Wong
2022-11-28  5:32 ` [PATCH 94/95] clone: support --project-list= for cgit Eric Wong
2022-11-28  5:32 ` [PATCH 95/95] lei_mirror: handle forkgroup changes Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221128053232.291618-7-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).