user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 25/26] cindex: use run_await wrapper for git commands
Date: Wed, 25 Oct 2023 00:29:48 +0000	[thread overview]
Message-ID: <20231025002949.3092193-26-e@80x24.org> (raw)
In-Reply-To: <20231025002949.3092193-1-e@80x24.org>

Instead of keeping track of live processes ourselves in a hash
table, we'll rely on OnDestroy here to notify us of git command
completions.
---
 lib/PublicInbox/CodeSearchIdx.pm | 101 +++++++++++++++----------------
 1 file changed, 48 insertions(+), 53 deletions(-)

diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 2356164b..e17cba39 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -58,14 +58,14 @@ use PublicInbox::Compat qw(uniqstr);
 use PublicInbox::Aspawn qw(run_await);
 use Carp ();
 use autodie qw(pipe open seek sysseek send);
+our $DO_QUIT = 15; # signal number
 our (
-	$LIVE, # pid => cmd
 	$LIVE_JOBS, # integer
+	$GITS_NR, # number of coderepos
 	$MY_SIG, # like %SIG
 	$SIGSET,
 	$TXN_BYTES, # number of bytes in current shard transaction
 	$BATCH_BYTES,
-	$DO_QUIT, # signal number
 	@RDONLY_XDB, # Xapian::Database
 	@IDX_SHARDS, # clones of self
 	$MAX_SIZE,
@@ -359,39 +359,31 @@ sub docids_by_postlist ($$) { # consider moving to PublicInbox::Search
 	@ids;
 }
 
-sub cidx_await_cb { # awaitpid cb
-	my ($pid, $cb, $self, $git, @args) = @_;
-	return if !$LIVE || $DO_QUIT;
-	my $cmd = delete $LIVE->{$pid} // die 'BUG: no $cmd';
-	PublicInbox::DS::enqueue_reap() if !keys(%$LIVE); # once more for PLC
-	if ($?) {
-		$git->{-cidx_err} = 1;
-		return warn("@$cmd error: \$?=$?\n");
-	}
-	$cb->($self, $git, @args);
+sub _cb { # run_await cb
+	my ($pid, $cmd, undef, $opt, $cb, $self, $git, @arg) = @_;
+	return if $DO_QUIT;
+	($git->{-cidx_err} = $?) ? warn("@$cmd error: \$?=$?\n") :
+				$cb->($opt, $self, $git, @arg);
 }
 
-sub cidx_await ($$$$$@) {
-	my ($pid, $cmd, $cb, $self, $git, @args) = @_;
-	$LIVE->{$pid} = $cmd;
-	awaitpid($pid, \&cidx_await_cb, $cb, $self, $git, @args);
+sub run_git {
+	my ($cmd, $opt, $cb, $self, $git, @arg) = @_;
+	run_await($git->cmd(@$cmd), undef, $opt, \&_cb, $cb, $self, $git, @arg)
 }
 
 # this is different from the grokmirror-compatible fingerprint since we
 # only care about --heads (branches) and --tags, and not even their names
 sub fp_start ($$$) {
 	my ($self, $git, $prep_repo) = @_;
-	return if !$LIVE || $DO_QUIT;
+	return if $DO_QUIT;
 	open my $refs, '+>', undef;
-	my $cmd = ['git', "--git-dir=$git->{git_dir}",
-		qw(show-ref --heads --tags --hash)];
-	my $pid = spawn($cmd, undef, { 1 => $refs });
 	$git->{-repo}->{refs} = $refs;
-	cidx_await($pid, $cmd, \&fp_fini, $self, $git, $prep_repo);
+	run_git([qw(show-ref --heads --tags --hash)], { 1 => $refs },
+		\&fp_fini, $self, $git, $prep_repo);
 }
 
-sub fp_fini { # cidx_await cb
-	my ($self, $git, $prep_repo) = @_;
+sub fp_fini { # run_git cb
+	my (undef, $self, $git, $prep_repo) = @_;
 	my $refs = $git->{-repo}->{refs} // die 'BUG: no {-repo}->{refs}';
 	seek($refs, 0, SEEK_SET);
 	my $buf;
@@ -402,26 +394,23 @@ sub fp_fini { # cidx_await cb
 
 sub ct_start ($$$) {
 	my ($self, $git, $prep_repo) = @_;
-	return if !$LIVE || $DO_QUIT;
-	my $cmd = [ 'git', "--git-dir=$git->{git_dir}",
-		qw[for-each-ref --sort=-committerdate
+	return if $DO_QUIT;
+	run_git([ qw[for-each-ref --sort=-committerdate
 		--format=%(committerdate:raw) --count=1
-		refs/heads/ refs/tags/] ];
-	my ($rd, $pid) = popen_rd($cmd);
-	cidx_await($pid, $cmd, \&ct_fini, $self, $git, $rd, $prep_repo);
+		refs/heads/ refs/tags/] ], undef, # capture like qx
+		\&ct_fini, $self, $git, $prep_repo);
 }
 
-sub ct_fini { # cidx_await cb
-	my ($self, $git, $rd, $prep_repo) = @_;
-	defined(my $ct = <$rd>) or return;
-	$ct =~ s/\s+.*\z//s; # drop TZ + LF
+sub ct_fini { # run_git cb
+	my ($opt, $self, $git, $prep_repo) = @_;
+	my ($ct) = split(/\s+/, ${$opt->{1}}); # drop TZ + LF
 	$git->{-repo}->{ct} = $ct + 0;
 }
 
 # TODO: also index gitweb.owner and the full fingerprint for grokmirror?
 sub prep_repo ($$) {
 	my ($self, $git) = @_;
-	return if !$LIVE || $DO_QUIT;
+	return if $DO_QUIT;
 	return index_next($self) if $git->{-cidx_err};
 	my $repo = $git->{-repo} // die 'BUG: no {-repo}';
 	if (!defined($repo->{ct})) {
@@ -578,9 +567,9 @@ sub index_next ($) {
 	my ($self) = @_;
 	return if $DO_QUIT;
 	if ($IDX_TODO && @$IDX_TODO) {
-		index_repo($self, shift @$IDX_TODO);
+		index_repo(undef, $self, shift @$IDX_TODO);
 	} elsif ($GIT_TODO && @$GIT_TODO) {
-		my $git = PublicInbox::Git->new(shift @$GIT_TODO);
+		my $git = shift @$GIT_TODO;
 		my $prep_repo = PublicInbox::OnDestroy->new($$, \&prep_repo,
 							$self, $git);
 		fp_start($self, $git, $prep_repo);
@@ -589,7 +578,8 @@ sub index_next ($) {
 		return if delete($TODO{dump_roots_start});
 		delete $TODO{dump_ibx_start}; # runs OnDestroy once
 		return dump_ibx($self, shift @IBXQ) if @IBXQ;
-		undef $DUMP_IBX_WPIPE; # done dumping inboxes, dump roots
+		undef $DUMP_IBX_WPIPE; # done dumping inboxes
+		undef $XHC;
 		delete $TODO{associate};
 	}
 	# else: wait for shards_active (post_loop_do) callback
@@ -630,8 +620,8 @@ sub commit_shard { # OnDestroy cb
 	# shard_done fires when all shards are committed
 }
 
-sub index_repo { # cidx_await cb
-	my ($self, $git) = @_;
+sub index_repo { # run_git cb
+	my (undef, $self, $git) = @_;
 	return if $DO_QUIT;
 	return index_next($self) if $git->{-cidx_err};
 	return push(@$IDX_TODO, $git) if $REPO_CTX; # busy
@@ -649,6 +639,7 @@ sub index_repo { # cidx_await cb
 	my @shard_in = partition_refs($self, $git, delete($repo->{refs}));
 	$repo->{git_dir} = $git->{git_dir};
 	my $repo_ctx = $REPO_CTX = { self => $self, repo => $repo };
+	delete $git->{-cidx_gits_fini}; # may fire gits_fini
 	my $commit_shard = PublicInbox::OnDestroy->new($$, \&commit_shard,
 							$repo_ctx);
 	my ($c, $p) = PublicInbox::PktOp->pair;
@@ -667,15 +658,13 @@ sub index_repo { # cidx_await cb
 
 sub get_roots ($$) {
 	my ($self, $git) = @_;
-	return if !$LIVE || $DO_QUIT;
+	return if $DO_QUIT;
 	my $refs = $git->{-repo}->{refs} // die 'BUG: no {-repo}->{refs}';
 	sysseek($refs, 0, SEEK_SET);
 	open my $roots_fh, '+>', undef;
-	my $cmd = [ 'git', "--git-dir=$git->{git_dir}",
-			qw(rev-list --stdin --max-parents=0) ];
-	my $pid = spawn($cmd, undef, { 0 => $refs, 1 => $roots_fh });
 	$git->{-repo}->{roots_fh} = $roots_fh;
-	cidx_await($pid, $cmd, \&index_repo, $self, $git);
+	run_git([ qw(rev-list --stdin --max-parents=0) ],
+		{ 0 => $refs, 1 => $roots_fh }, \&index_repo, $self, $git)
 }
 
 # for PublicInbox::SearchIdx::patch_id and with_umask
@@ -751,10 +740,18 @@ EOM
 	@shards;
 }
 
+sub gits_fini {
+	undef $GITS_NR;
+	PublicInbox::DS::enqueue_reap(); # kick @post_loop_do
+}
+
 sub scan_git_dirs ($) {
 	my ($self) = @_;
-	my $n = @$GIT_TODO = @{$self->{git_dirs}};
-	progress($self, "scanning $n code repositories...");
+	@$GIT_TODO = map { PublicInbox::Git->new($_) } @{$self->{git_dirs}};
+	$GITS_NR = @$GIT_TODO;
+	my $gits_fini = PublicInbox::OnDestroy->new($$, \&gits_fini);
+	$_->{-cidx_gits_fini} = $gits_fini for @$GIT_TODO;
+	progress($self, "scanning $GITS_NR code repositories...");
 }
 
 sub prune_init { # via wq_io_do in IDX_SHARDS
@@ -786,10 +783,10 @@ sub prune_commit { # via wq_io_do in IDX_SHARDS
 
 sub shards_active { # post_loop_do
 	return if $DO_QUIT;
-	return if grep(defined, $PRUNE_DONE, $GIT_TODO, $IDX_TODO, $LIVE) != 4;
+	return if grep(defined, $PRUNE_DONE, $GIT_TODO, $IDX_TODO) != 3;
 	return 1 if grep(defined, @$PRUNE_DONE) != @IDX_SHARDS;
-	return 1 if scalar(@$GIT_TODO) || scalar(@$IDX_TODO) || $REPO_CTX;
-	return 1 if keys(%$LIVE) || @IBXQ || keys(%TODO);
+	return 1 if $GITS_NR || scalar(@$IDX_TODO) || $REPO_CTX;
+	return 1 if @IBXQ || keys(%TODO);
 	for my $s (grep { $_->{-wq_s1} } @IDX_SHARDS) {
 		$s->{-cidx_quit} = 1 if defined($s->{-wq_s1});
 		$s->wq_close; # may recurse via awaitpid outside of event_loop
@@ -1105,14 +1102,13 @@ sub cidx_run { # main entry point
 					POSIX::SIGTSTP, POSIX::SIGCONT);
 	my $restore = PublicInbox::OnDestroy->new($$,
 		\&PublicInbox::DS::sig_setmask, $SIGSET);
-	local $LIVE = {};
 	local $PRUNE_DONE = [];
 	local $IDX_TODO = [];
 	local $GIT_TODO = [];
 	local ($DO_QUIT, $REINDEX, $TXN_BYTES, @GIT_DIR_GONE, @PRUNE_QUEUE,
 		$REPO_CTX, %ALT_FH, $TMPDIR, @AWK, @COMM, $CMD_ENV,
 		%TODO, @IBXQ, @IBX, @JOIN, @ASSOC_PFX, $DUMP_IBX_WPIPE,
-		@ID2ROOT, $XHC, @SORT);
+		@ID2ROOT, $XHC, @SORT, $GITS_NR);
 	local $BATCH_BYTES = $self->{-opt}->{batch_size} //
 				$PublicInbox::SearchIdx::BATCH_BYTES;
 	local $self->{ASSOC_PFX} = \@ASSOC_PFX;
@@ -1202,14 +1198,13 @@ sub ipc_atfork_child { # @IDX_SHARDS
 sub shard_done_wait { # awaitpid cb via ipc_worker_reap
 	my ($pid, $shard, $self) = @_;
 	my $quit_req = delete($shard->{-cidx_quit});
-	return if $DO_QUIT || !$LIVE;
+	return if $DO_QUIT;
 	if ($? == 0) { # success
 		$quit_req // warn 'BUG: {-cidx_quit} unset';
 	} else {
 		warn "PID:$pid $shard->{shard} exited with \$?=$?\n";
 		++$self->{shard_err} if defined($self->{shard_err});
 	}
-	PublicInbox::DS::enqueue_reap() if !shards_active(); # once more for PLC
 }
 
 1;

  parent reply	other threads:[~2023-10-25  0:29 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-25  0:29 [PATCH 00/26] process management simplifications Eric Wong
2023-10-25  0:29 ` [PATCH 01/26] limiter: split out from qspawn Eric Wong
2023-10-25  0:29 ` [PATCH 02/26] spawn: support synchronous run_qx Eric Wong
2023-10-25  0:29 ` [PATCH 03/26] psgi_qx: use a temporary file rather than pipe Eric Wong
2023-10-25  0:29 ` [PATCH 04/26] www_coderepo: capture uses a flattened list Eric Wong
2023-10-25  0:29 ` [PATCH 05/26] qspawn: psgi_return allows list for callback args Eric Wong
2023-10-25  0:29 ` [PATCH 06/26] qspawn: drop unused err arg for ->event_step Eric Wong
2023-10-25  0:29 ` [PATCH 07/26] httpd/async: require IO arg Eric Wong
2023-10-25  0:29 ` [PATCH 08/26] xt/check-run: call DS->Reset after all tests Eric Wong
2023-10-25  0:29 ` [PATCH 09/26] qspawn: introduce new psgi_yield API Eric Wong
2023-10-25  0:29 ` [PATCH 10/26] repo_atom: switch to psgi_yield Eric Wong
2023-10-25  0:29 ` [PATCH 11/26] repo_snapshot: psgi_yield Eric Wong
2023-10-25  0:29 ` [PATCH 12/26] viewvcs: psgi_yield Eric Wong
2023-10-25  0:29 ` [PATCH 13/26] www_altid: switch to psgi_yield Eric Wong
2023-10-25  0:29 ` [PATCH 14/26] cgit: " Eric Wong
2023-10-25  0:29 ` [PATCH 15/26] www_coderepo: use psgi_yield Eric Wong
2023-10-25  0:29 ` [PATCH 16/26] drop psgi_return, httpd/async and GetlineBody Eric Wong
2023-10-25  0:29 ` [PATCH 17/26] qspawn: use WwwStatic for fallbacks and error code Eric Wong
2023-10-25  0:29 ` [PATCH 18/26] qspawn: simplify internal argument passing Eric Wong
2023-10-25  0:29 ` [PATCH 19/26] cidx_log_p: don't bother with F_SETPIPE_SZ Eric Wong
2023-10-25  0:29 ` [PATCH 20/26] cindex: avoid awaitpid for popen Eric Wong
2023-10-25  0:29 ` [PATCH 21/26] cindex: use timer for inits Eric Wong
2023-10-25  0:29 ` [PATCH 22/26] cindex: start using run_await to simplify code Eric Wong
2023-10-25  0:29 ` [PATCH 23/26] cindex: use run_await to read extensions.objectFormat Eric Wong
2023-10-25  0:29 ` [PATCH 24/26] cindex: drop XH_PID global Eric Wong
2023-10-25  0:29 ` Eric Wong [this message]
2023-10-25  0:29 ` [PATCH 26/26] cindex: use sysread for generating fingerprint Eric Wong
2023-10-25  6:33   ` [PATCH 27/26] lei_mirror+fetch: don't slurp `git show-ref' output Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231025002949.3092193-26-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).