From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 19/20] searchidx: $batch_cb => v1_checkpoint
Date: Fri, 24 Jul 2020 05:56:05 +0000 [thread overview]
Message-ID: <20200724055606.27332-20-e@yhbt.net> (raw)
In-Reply-To: <20200724055606.27332-1-e@yhbt.net>
Another closure gone, and we may be able to share more
code with v2 in upcoming commits.
---
lib/PublicInbox/SearchIdx.pm | 90 ++++++++++++++++++------------------
1 file changed, 45 insertions(+), 45 deletions(-)
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 39dc1f874..fe089c8e8 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -573,9 +573,48 @@ sub ck_size { # check_async cb for -index --max-size=...
}
}
+sub v1_checkpoint ($$;$) {
+ my ($self, $sync, $stk) = @_;
+ $self->{ibx}->git->check_async_wait;
+ $self->{ibx}->git->cat_async_wait;
+
+ # latest_cmt may be undef
+ my $newest = $stk ? $stk->{latest_cmt} : undef;
+ if ($newest) {
+ my $cur = $self->{mm}->last_commit || '';
+ if (need_update($self, $cur, $newest)) {
+ $self->{mm}->last_commit($newest);
+ }
+ } else {
+ ${$sync->{max}} = $BATCH_BYTES;
+ }
+
+ $self->{mm}->{dbh}->commit;
+ if ($newest && need_xapian($self)) {
+ my $cur = $self->{xdb}->get_metadata('last_commit');
+ if (need_update($self, $cur, $newest)) {
+ $self->{xdb}->set_metadata('last_commit', $newest);
+ }
+ }
+
+ $self->{over}->rethread_done($sync->{-opt}) if $newest; # all done
+ commit_txn_lazy($self);
+ $self->{ibx}->git->cleanup;
+ my $nr = ${$sync->{nr}};
+ idx_release($self, $nr);
+ # let another process do some work...
+ if (my $pr = $sync->{-opt}->{-progress}) {
+ $pr->("indexed $nr/$sync->{ntodo}\n") if $nr;
+ }
+ if (!$stk) { # more to come
+ begin_txn_lazy($self);
+ $self->{mm}->{dbh}->begin_work;
+ }
+}
+
# only for v1
sub process_stack {
- my ($self, $stk, $sync, $batch_cb) = @_;
+ my ($self, $sync, $stk) = @_;
my $git = $self->{ibx}->git;
my $max = $BATCH_BYTES;
my $nr = 0;
@@ -583,6 +622,7 @@ sub process_stack {
$sync->{max} = \$max;
$sync->{sidx} = $self;
+ $self->{mm}->{dbh}->begin_work;
if (my @leftovers = keys %{delete($sync->{D}) // {}}) {
warn('W: unindexing '.scalar(@leftovers)." leftovers\n");
for my $oid (@leftovers) {
@@ -599,19 +639,12 @@ sub process_stack {
} else {
$git->cat_async($oid, \&index_both, $arg);
}
- if ($max <= 0) {
- $git->check_async_wait;
- $git->cat_async_wait;
- $max = $BATCH_BYTES;
- $batch_cb->($nr);
- }
+ v1_checkpoint($self, $sync) if $max <= 0;
} elsif ($f eq 'd') {
$git->cat_async($oid, \&unindex_both, $self);
}
}
- $git->check_async_wait;
- $git->cat_async_wait;
- $batch_cb->($nr, $stk);
+ v1_checkpoint($self, $sync, $stk);
}
sub log2stack ($$$$) {
@@ -729,7 +762,7 @@ sub _index_sync {
my $git = $self->{ibx}->git;
$git->batch_prepare;
my $pr = $opts->{-progress};
- my $sync = { reindex => $opts->{reindex} };
+ my $sync = { reindex => $opts->{reindex}, -opt => $opts };
my $xdb = $self->begin_txn_lazy;
$self->{over}->rethread_prepare($opts);
my $mm = _msgmap_init($self);
@@ -750,40 +783,7 @@ sub _index_sync {
my $stk = prepare_stack($self, $sync, $range);
$sync->{ntodo} = $stk ? $stk->num_records : 0;
$pr->("$sync->{ntodo}\n") if $pr; # continue previous line
-
- my $dbh = $mm->{dbh};
- my $batch_cb = sub {
- my ($nr, $stk) = @_;
- # latest_cmt may be undef
- my $newest = $stk ? $stk->{latest_cmt} : undef;
- if ($newest) {
- my $cur = $mm->last_commit || '';
- if (need_update($self, $cur, $newest)) {
- $mm->last_commit($newest);
- }
- }
- $dbh->commit;
- if ($newest && need_xapian($self)) {
- my $cur = $xdb->get_metadata('last_commit');
- if (need_update($self, $cur, $newest)) {
- $xdb->set_metadata('last_commit', $newest);
- }
- }
-
- $self->{over}->rethread_done($opts) if $newest; # all done
- $self->commit_txn_lazy;
- $git->cleanup;
- $xdb = idx_release($self, $nr);
- # let another process do some work...
- $pr->("indexed $nr/$sync->{ntodo}\n") if $pr && $nr;
- if (!$stk) { # more to come
- $xdb = $self->begin_txn_lazy;
- $dbh->begin_work;
- }
- };
-
- $dbh->begin_work;
- process_stack($self, $stk, $sync, $batch_cb);
+ process_stack($self, $sync, $stk);
}
sub DESTROY {
next prev parent reply other threads:[~2020-07-24 5:56 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-24 5:55 [PATCH 00/20] indexing changes and new features Eric Wong
2020-07-24 5:55 ` [PATCH 01/20] index: support --rethread switch to fix old indices Eric Wong
2020-07-24 5:55 ` [PATCH 02/20] v2: index forwards (via `git log --reverse') Eric Wong
2020-07-24 5:55 ` [PATCH 03/20] v2writable: introduce idx_stack Eric Wong
2020-07-24 5:55 ` [PATCH 04/20] v2writable: index_sync: reduce fill_alternates calls Eric Wong
2020-07-24 5:55 ` [PATCH 05/20] v2writable: move {autime} and {cotime} into $sync state Eric Wong
2020-07-24 5:55 ` [PATCH 06/20] v2writable: allow >= 40 byte git object IDs Eric Wong
2020-07-24 5:55 ` [PATCH 07/20] v2writable: drop "EPOCH.git indexing $RANGE" progress Eric Wong
2020-07-24 5:55 ` [PATCH 08/20] use consistent {ibx} field for writable code paths Eric Wong
2020-07-24 5:55 ` [PATCH 09/20] search: avoid copying {inboxdir} Eric Wong
2020-07-24 5:55 ` [PATCH 10/20] v2writable: use read-only PublicInbox::Git for cat_file Eric Wong
2020-07-24 5:55 ` [PATCH 11/20] v2writable: get rid of {reindex_pipe} field Eric Wong
2020-07-24 5:55 ` [PATCH 12/20] v2writable: clarify "epoch" comment Eric Wong
2020-07-24 5:55 ` [PATCH 13/20] xapcmd: set {from} properly for v1 inboxes Eric Wong
2020-07-24 5:56 ` [PATCH 14/20] searchidx: rename _xdb_{acquire,release} => idx_ Eric Wong
2020-07-24 5:56 ` [PATCH 15/20] searchidx: make v1 indexing closer to v2 Eric Wong
2020-07-24 5:56 ` [PATCH 16/20] index+xcpdb: support --no-sync flag Eric Wong
2020-07-24 5:56 ` [PATCH 17/20] v2writable: share log2stack code with v1 Eric Wong
2020-07-24 5:56 ` [PATCH 18/20] searchidx: support async git check Eric Wong
2020-07-24 5:56 ` Eric Wong [this message]
2020-07-24 5:56 ` [PATCH 20/20] v2writable: {unindexed} belongs in $sync state Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200724055606.27332-20-e@yhbt.net \
--to=e@yhbt.net \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).