From 6a2805beea98eb52b8ed866758fd2c416e22fdfb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 30 May 2019 03:59:40 +0000 Subject: v2writable: avoid mm_tmp creation without regen Creating mm_tmp is an expensive operation with large inboxes and can be avoided if there are no new messages to process. Since git-fetch(1) currently lacks an --exit-code option(*), mirrors will run `public-inbox-index' unconditionally after fetch, which is an expensive op if it needs to duplicate a large SQLite DB. This speeds up the mirror case of: git --git-dir=git/$EPOCH.git fetch && public-inbox-index This reduces the no-op `public-inbox-index' time from over 8s to ~0.5s on a (currently) 7-epoch clone of https://lore.kernel.org/lkml/ on my system. (*) WIP --exit-code for git-fetch: https://public-inbox.org/git/87ftphw7mv.fsf@evledraar.gmail.com/ --- lib/PublicInbox/V2Writable.pm | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'lib/PublicInbox/V2Writable.pm') diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 375f12fa..fd93ac27 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -900,6 +900,9 @@ sub sync_prepare ($$$) { $pr->("$n\n") if $pr; $regen_max += $n; } + + return 0 if (!$regen_max && !keys(%{$self->{unindex_range}})); + # reindex should NOT see new commits anymore, if we do, # it's a problem and we need to notice it via die() my $pad = length($regen_max) + 1; @@ -1027,7 +1030,6 @@ sub index_sync { return unless defined $latest; $self->idx_init($opt); # acquire lock my $sync = { - mm_tmp => $self->{mm}->tmp_clone, D => {}, # "$mid\0$cid" => $oid unindex_range => {}, # EPOCH => oid_old..oid_new reindex => $opt->{reindex}, @@ -1036,6 +1038,16 @@ sub index_sync { $sync->{ranges} = sync_ranges($self, $sync, $epoch_max); $sync->{regen} = sync_prepare($self, $sync, $epoch_max); + if ($sync->{regen}) { + # tmp_clone seems to fail if inside a transaction, so + # we rollback here (because we opened {mm} for reading) + # Note: we do NOT rely on DBI transactions for atomicity; + # only for batch performance. + $self->{mm}->{dbh}->rollback; + $self->{mm}->{dbh}->begin_work; + $sync->{mm_tmp} = $self->{mm}->tmp_clone; + } + # work backwards through history for (my $i = $epoch_max; $i >= 0; $i--) { index_epoch($self, $sync, $i); @@ -1049,8 +1061,10 @@ sub index_sync { $git->cleanup; } $self->done; - if (my $pr = $sync->{-opt}->{-progress}) { - $pr->('all.git '.sprintf($sync->{-regen_fmt}, $sync->{nr})); + + if (my $nr = $sync->{nr}) { + my $pr = $sync->{-opt}->{-progress}; + $pr->('all.git '.sprintf($sync->{-regen_fmt}, $nr)) if $pr; } # reindex does not pick up new changes, so we rerun w/o it: -- cgit v1.2.3-24-ge0c7