user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/3] cindex: store coderepo data asynchronously
Date: Wed, 25 Oct 2023 15:33:47 +0000	[thread overview]
Message-ID: <20231025153349.3247178-2-e@80x24.org> (raw)
In-Reply-To: <20231025153349.3247178-1-e@80x24.org>

While it's typically fast to store coderepo data, pathological
latency on HDDs can let us use that delay to get other work
done.
---
 lib/PublicInbox/CodeSearchIdx.pm | 69 +++++++++++++++++---------------
 1 file changed, 36 insertions(+), 33 deletions(-)

diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index aeee37c0..f2fd28e3 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -193,8 +193,9 @@ sub progress {
 	$pr->($self->{git} ? ("$self->{git}->{git_dir}: ") : (), @msg, "\n");
 }
 
-sub store_repo { # wq_do - returns docid
+sub store_repo { # wq_io_do, sends docid back
 	my ($self, $repo) = @_;
+	my $op_p = delete($self->{0}) // die 'BUG: no {0} op_p';
 	$self->begin_txn_lazy;
 	$self->{xdb}->delete_document($_) for @{$repo->{to_delete}};
 	my $doc = $PublicInbox::Search::X{Document}->new;
@@ -203,12 +204,10 @@ sub store_repo { # wq_do - returns docid
 	$doc->add_boolean_term('T'.'r');
 	$doc->add_boolean_term('G'.$_) for @{$repo->{roots}};
 	$doc->set_data($repo->{fp}); # \n delimited
-	if ($repo->{docid}) {
-		$self->{xdb}->replace_document($repo->{docid}, $doc);
-		$repo->{docid};
-	} else {
-		$self->{xdb}->add_document($doc);
-	}
+	my $did = $repo->{docid};
+	$did ? $self->{xdb}->replace_document($did, $doc)
+		: ($did = $self->{xdb}->add_document($doc));
+	send($op_p, "repo_stored $did", 0);
 }
 
 sub cidx_ckpoint ($;$) {
@@ -322,6 +321,17 @@ sub shard_done { # called via PktOp on shard_index completion
 	$repo_ctx->{shard_ok}->{$n} = 1;
 }
 
+sub repo_stored {
+	my ($self, $repo_ctx, $did) = @_;
+	$did > 0 or die "BUG: $repo_ctx->{repo}->{git_dir}: docid=$did";
+	my $next = PublicInbox::OnDestroy->new($$, \&next_repos, $repo_ctx);
+	my ($c, $p) = PublicInbox::PktOp->pair;
+	$c->{ops}->{shard_done} = [ $self, $repo_ctx, $next ];
+	# shard_done fires when all shards are committed
+	my @active = keys %{$repo_ctx->{active}};
+	$IDX_SHARDS[$_]->wq_io_do('shard_commit', [ $p->{op_p} ]) for @active;
+}
+
 sub prune_done { # called via prune_do completion
 	my ($self, $n) = @_;
 	return if $DO_QUIT || !$PRUNE_DONE;
@@ -584,37 +594,30 @@ sub index_next ($) {
 
 sub next_repos { # OnDestroy cb
 	my ($repo_ctx) = @_;
-	progress($repo_ctx->{self}, "$repo_ctx->{repo}->{git_dir}: done");
-	return if $DO_QUIT;
-	if ($REPO_CTX) {
-		$REPO_CTX == $repo_ctx or die "BUG: $REPO_CTX != $repo_ctx";
-		$REPO_CTX = undef;
-		index_next($repo_ctx->{self});
-	}
+	my ($self, $repo, $active) = @$repo_ctx{qw(self repo active)};
+	progress($self, "$repo->{git_dir}: done");
+	return if $DO_QUIT || !$REPO_CTX;
+	my $n = grep { ! $repo_ctx->{shard_ok}->{$_} } keys %$active;
+	die "E: $repo->{git_dir} $n shards failed" if $n;
+	$REPO_CTX == $repo_ctx or die "BUG: $REPO_CTX != $repo_ctx";
+	$REPO_CTX = undef;
+	index_next($self);
 }
 
-sub commit_shard { # OnDestroy cb
+sub index_done { # OnDestroy cb called when done indexing each code repo
 	my ($repo_ctx) = @_;
 	my ($self, $repo, $active) = @$repo_ctx{qw(self repo active)};
 
+	return if $DO_QUIT;
 	my $n = grep { ! $repo_ctx->{shard_ok}->{$_} } keys %$active;
-	die "E: $repo->{git_dir} $n shards failed" if $n && !$DO_QUIT;
-
-	$repo_ctx->{shard_ok} = {};
-	if (!$DO_QUIT) {
-		my $id = $IDX_SHARDS[$repo->{shard_n}]->wq_do('store_repo',
-								$repo);
-		(!defined($id) || $id <= 0) and
-			die "E: store_repo $repo->{git_dir}: id=$id";
-		$active->{$repo->{shard_n}} = undef;
-	}
-	my $next = PublicInbox::OnDestroy->new($$, \&next_repos, $repo_ctx);
+	die "E: $repo->{git_dir} $n shards failed" if $n;
+	$repo_ctx->{shard_ok} = {}; # reset for future shard_done
+	$n = $repo->{shard_n};
+	$active->{$n} = undef;
 	my ($c, $p) = PublicInbox::PktOp->pair;
-	$c->{ops}->{shard_done} = [ $repo_ctx->{self}, $repo_ctx, $next ];
-	for my $n (keys %$active) {
-		$IDX_SHARDS[$n]->wq_io_do('shard_commit', [ $p->{op_p} ]);
-	}
-	# shard_done fires when all shards are committed
+	$c->{ops}->{repo_stored} = [ $self, $repo_ctx ];
+	$IDX_SHARDS[$n]->wq_io_do('store_repo', [ $p->{op_p} ], $repo);
+	# repo_stored will fire once store_repo is done
 }
 
 sub index_repo { # run_git cb
@@ -637,10 +640,10 @@ sub index_repo { # run_git cb
 	$repo->{git_dir} = $git->{git_dir};
 	my $repo_ctx = $REPO_CTX = { self => $self, repo => $repo };
 	delete $git->{-cidx_gits_fini}; # may fire gits_fini
-	my $commit_shard = PublicInbox::OnDestroy->new($$, \&commit_shard,
+	my $index_done = PublicInbox::OnDestroy->new($$, \&index_done,
 							$repo_ctx);
 	my ($c, $p) = PublicInbox::PktOp->pair;
-	$c->{ops}->{shard_done} = [ $self, $repo_ctx, $commit_shard ];
+	$c->{ops}->{shard_done} = [ $self, $repo_ctx, $index_done ];
 	for my $n (0..$#shard_in) {
 		$shard_in[$n]->flush or die "flush shard[$n]: $!";
 		-s $shard_in[$n] or next;

  reply	other threads:[~2023-10-25 15:33 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-25 15:33 [PATCH 0/3] cindex: prune fixes Eric Wong
2023-10-25 15:33 ` Eric Wong [this message]
2023-10-25 15:33 ` [PATCH 2/3] cindex: quiet --prune when checking objectFormat Eric Wong
2023-10-25 15:33 ` [PATCH 3/3] cindex: fix large prunes Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231025153349.3247178-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).