From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 94B1D1F97E for ; Mon, 27 May 2019 18:45:46 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/3] v2: fix reindex skipping NNTP article numbers Date: Mon, 27 May 2019 18:45:45 +0000 Message-Id: <20190527184545.536-4-e@80x24.org> In-Reply-To: <20190527184545.536-1-e@80x24.org> References: <20190527184545.536-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: `public-inbox-index --reindex' could cause NNTP article number gaps to form when it also has to deal with new, never-before-seen commits in mirrors running off `git fetch'. Fix this by running two distinct invocations of ->index_sync; once to only reindex old commits, and a second time to index new commits. This does not appear to be a problem on v1 at the moment, but I'll need more time to analyze this. --- lib/PublicInbox/V2Writable.pm | 25 ++++++++++++++++++++++++- t/indexlevels-mirror.t | 25 +++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index cd08acd..331c4f4 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -850,11 +850,19 @@ sub index_prepare { my $pr = $opts->{-progress}; my $regen_max = 0; my $head = $self->{-inbox}->{ref_head} || 'refs/heads/master'; + + # reindex stops at the current heads and we later rerun index_sync + # without {reindex} + my $reindex_heads = last_commits($self, $epoch_max) if $opts->{reindex}; + for (my $i = $epoch_max; $i >= 0; $i--) { die 'BUG: already indexing!' if $self->{reindex_pipe}; my $git_dir = git_dir_n($self, $i); -d $git_dir or next; # missing parts are fine my $git = PublicInbox::Git->new($git_dir); + if ($reindex_heads) { + $head = $reindex_heads->[$i] or next; + } chomp(my $tip = $git->qx(qw(rev-parse -q --verify), $head)); next if $?; # new repo @@ -959,7 +967,14 @@ sub index_sync { my $high = $self->{mm}->num_highwater(); my $regen = $self->index_prepare($opts, $epoch_max, $ranges); - $$regen += $high if $high; + if ($opts->{reindex}) { + # reindex should NOT see new commits anymore, if we do, + # it's a problem and we need to notice it via die() + $$regen = -1; + } else { + $$regen += $high; + } + my $D = {}; # "$mid\0$cid" => $oid my @cmd = qw(log --raw -r --pretty=tformat:%H --no-notes --no-color --no-abbrev --no-renames); @@ -1001,6 +1016,14 @@ sub index_sync { $git->cleanup; } $self->done; + + # reindex does not pick up new changes, so we rerun w/o it: + if ($opts->{reindex}) { + my %again = %$opts; + $mm_tmp = undef; + delete @again{qw(reindex -skip_lock)}; + index_sync($self, \%again); + } } 1; diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t index ce138fe..1251136 100644 --- a/t/indexlevels-mirror.t +++ b/t/indexlevels-mirror.t @@ -105,9 +105,17 @@ sub import_index_incremental { is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], ['m@1','m@2'], 'got both messages in master'); + my @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; + is_deeply(\@rw_nums, [1, 2], 'master has expected NNTP articles'); + + my @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)}; + is_deeply(\@ro_nums, [1, 2], 'mirror has expected NNTP articles'); + # remove message from master ok($im->remove($mime), '2nd message removed'); $im->done; + @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; + is_deeply(\@rw_nums, [1], 'unindex NNTP article'.$v.$level); if ($level ne 'basic') { is(system(@xcpdb, $mirror), 0, "v$v xcpdb OK"); @@ -132,6 +140,23 @@ sub import_index_incremental { ($nr, $msgs) = $ro_mirror->search->reopen->query('m:m@2'); is($nr, 0, "v$v m\@2 gone from Xapian in mirror on $level"); } + + # add another message to master and have the mirror + # sync and reindex it + my @expect = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; + foreach my $i (3..5) { + $mime->header_set('Message-ID', ""); + ok($im->add($mime), "#$i message added"); + push @expect, $i; + } + $im->done; + is(system('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); + is(system($index, '--reindex', $mirror), 0, + "v$v index --reindex mirror OK"); + @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)}; + @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; + is_deeply(\@rw_nums, \@expect, "v$v master has expected NNTP articles"); + is_deeply(\@ro_nums, \@expect, "v$v mirror matches master articles"); } # we can probably cull some other tests and put full/medium tests, here -- EW