user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 19/20] searchidx: $batch_cb => v1_checkpoint
Date: Fri, 24 Jul 2020 05:56:05 +0000	[thread overview]
Message-ID: <20200724055606.27332-20-e@yhbt.net> (raw)
In-Reply-To: <20200724055606.27332-1-e@yhbt.net>

Another closure gone, and we may be able to share more
code with v2 in upcoming commits.
---
 lib/PublicInbox/SearchIdx.pm | 90 ++++++++++++++++++------------------
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 39dc1f874..fe089c8e8 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -573,9 +573,48 @@ sub ck_size { # check_async cb for -index --max-size=...
 	}
 }
 
+sub v1_checkpoint ($$;$) {
+	my ($self, $sync, $stk) = @_;
+	$self->{ibx}->git->check_async_wait;
+	$self->{ibx}->git->cat_async_wait;
+
+	# latest_cmt may be undef
+	my $newest = $stk ? $stk->{latest_cmt} : undef;
+	if ($newest) {
+		my $cur = $self->{mm}->last_commit || '';
+		if (need_update($self, $cur, $newest)) {
+			$self->{mm}->last_commit($newest);
+		}
+	} else {
+		${$sync->{max}} = $BATCH_BYTES;
+	}
+
+	$self->{mm}->{dbh}->commit;
+	if ($newest && need_xapian($self)) {
+		my $cur = $self->{xdb}->get_metadata('last_commit');
+		if (need_update($self, $cur, $newest)) {
+			$self->{xdb}->set_metadata('last_commit', $newest);
+		}
+	}
+
+	$self->{over}->rethread_done($sync->{-opt}) if $newest; # all done
+	commit_txn_lazy($self);
+	$self->{ibx}->git->cleanup;
+	my $nr = ${$sync->{nr}};
+	idx_release($self, $nr);
+	# let another process do some work...
+	if (my $pr = $sync->{-opt}->{-progress}) {
+		$pr->("indexed $nr/$sync->{ntodo}\n") if $nr;
+	}
+	if (!$stk) { # more to come
+		begin_txn_lazy($self);
+		$self->{mm}->{dbh}->begin_work;
+	}
+}
+
 # only for v1
 sub process_stack {
-	my ($self, $stk, $sync, $batch_cb) = @_;
+	my ($self, $sync, $stk) = @_;
 	my $git = $self->{ibx}->git;
 	my $max = $BATCH_BYTES;
 	my $nr = 0;
@@ -583,6 +622,7 @@ sub process_stack {
 	$sync->{max} = \$max;
 	$sync->{sidx} = $self;
 
+	$self->{mm}->{dbh}->begin_work;
 	if (my @leftovers = keys %{delete($sync->{D}) // {}}) {
 		warn('W: unindexing '.scalar(@leftovers)." leftovers\n");
 		for my $oid (@leftovers) {
@@ -599,19 +639,12 @@ sub process_stack {
 			} else {
 				$git->cat_async($oid, \&index_both, $arg);
 			}
-			if ($max <= 0) {
-				$git->check_async_wait;
-				$git->cat_async_wait;
-				$max = $BATCH_BYTES;
-				$batch_cb->($nr);
-			}
+			v1_checkpoint($self, $sync) if $max <= 0;
 		} elsif ($f eq 'd') {
 			$git->cat_async($oid, \&unindex_both, $self);
 		}
 	}
-	$git->check_async_wait;
-	$git->cat_async_wait;
-	$batch_cb->($nr, $stk);
+	v1_checkpoint($self, $sync, $stk);
 }
 
 sub log2stack ($$$$) {
@@ -729,7 +762,7 @@ sub _index_sync {
 	my $git = $self->{ibx}->git;
 	$git->batch_prepare;
 	my $pr = $opts->{-progress};
-	my $sync = { reindex => $opts->{reindex} };
+	my $sync = { reindex => $opts->{reindex}, -opt => $opts };
 	my $xdb = $self->begin_txn_lazy;
 	$self->{over}->rethread_prepare($opts);
 	my $mm = _msgmap_init($self);
@@ -750,40 +783,7 @@ sub _index_sync {
 	my $stk = prepare_stack($self, $sync, $range);
 	$sync->{ntodo} = $stk ? $stk->num_records : 0;
 	$pr->("$sync->{ntodo}\n") if $pr; # continue previous line
-
-	my $dbh = $mm->{dbh};
-	my $batch_cb = sub {
-		my ($nr, $stk) = @_;
-		# latest_cmt may be undef
-		my $newest = $stk ? $stk->{latest_cmt} : undef;
-		if ($newest) {
-			my $cur = $mm->last_commit || '';
-			if (need_update($self, $cur, $newest)) {
-				$mm->last_commit($newest);
-			}
-		}
-		$dbh->commit;
-		if ($newest && need_xapian($self)) {
-			my $cur = $xdb->get_metadata('last_commit');
-			if (need_update($self, $cur, $newest)) {
-				$xdb->set_metadata('last_commit', $newest);
-			}
-		}
-
-		$self->{over}->rethread_done($opts) if $newest; # all done
-		$self->commit_txn_lazy;
-		$git->cleanup;
-		$xdb = idx_release($self, $nr);
-		# let another process do some work...
-		$pr->("indexed $nr/$sync->{ntodo}\n") if $pr && $nr;
-		if (!$stk) { # more to come
-			$xdb = $self->begin_txn_lazy;
-			$dbh->begin_work;
-		}
-	};
-
-	$dbh->begin_work;
-	process_stack($self, $stk, $sync, $batch_cb);
+	process_stack($self, $sync, $stk);
 }
 
 sub DESTROY {

  parent reply	other threads:[~2020-07-24  5:56 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-24  5:55 [PATCH 00/20] indexing changes and new features Eric Wong
2020-07-24  5:55 ` [PATCH 01/20] index: support --rethread switch to fix old indices Eric Wong
2020-07-24  5:55 ` [PATCH 02/20] v2: index forwards (via `git log --reverse') Eric Wong
2020-07-24  5:55 ` [PATCH 03/20] v2writable: introduce idx_stack Eric Wong
2020-07-24  5:55 ` [PATCH 04/20] v2writable: index_sync: reduce fill_alternates calls Eric Wong
2020-07-24  5:55 ` [PATCH 05/20] v2writable: move {autime} and {cotime} into $sync state Eric Wong
2020-07-24  5:55 ` [PATCH 06/20] v2writable: allow >= 40 byte git object IDs Eric Wong
2020-07-24  5:55 ` [PATCH 07/20] v2writable: drop "EPOCH.git indexing $RANGE" progress Eric Wong
2020-07-24  5:55 ` [PATCH 08/20] use consistent {ibx} field for writable code paths Eric Wong
2020-07-24  5:55 ` [PATCH 09/20] search: avoid copying {inboxdir} Eric Wong
2020-07-24  5:55 ` [PATCH 10/20] v2writable: use read-only PublicInbox::Git for cat_file Eric Wong
2020-07-24  5:55 ` [PATCH 11/20] v2writable: get rid of {reindex_pipe} field Eric Wong
2020-07-24  5:55 ` [PATCH 12/20] v2writable: clarify "epoch" comment Eric Wong
2020-07-24  5:55 ` [PATCH 13/20] xapcmd: set {from} properly for v1 inboxes Eric Wong
2020-07-24  5:56 ` [PATCH 14/20] searchidx: rename _xdb_{acquire,release} => idx_ Eric Wong
2020-07-24  5:56 ` [PATCH 15/20] searchidx: make v1 indexing closer to v2 Eric Wong
2020-07-24  5:56 ` [PATCH 16/20] index+xcpdb: support --no-sync flag Eric Wong
2020-07-24  5:56 ` [PATCH 17/20] v2writable: share log2stack code with v1 Eric Wong
2020-07-24  5:56 ` [PATCH 18/20] searchidx: support async git check Eric Wong
2020-07-24  5:56 ` Eric Wong [this message]
2020-07-24  5:56 ` [PATCH 20/20] v2writable: {unindexed} belongs in $sync state Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200724055606.27332-20-e@yhbt.net \
    --to=e@yhbt.net \
    --cc=meta@public-inbox.org \
    --subject='Re: [PATCH 19/20] searchidx: $batch_cb => v1_checkpoint' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).