From 9294716ecc685c7b21ec21e2dbbbeb2c8f62c477 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:32 -0500 Subject: Import.pm: Don't assume {in} and {out} always exist While working on one of the tests I did: my $im = PublicInbox::V2Writable->new($ibx, 1); my $im0 = $im->importer(); $im->add($mime); Which resulted in a warning of the use of an undefined value from atfork_child, and the test failing nastily. Inspection of the code reveals this can happen anytime gfi_start has not been called. So just fix atfork_child to skip closing file descriptors that have not yet been setup. Signed-off-by: "Eric W. Biederman" --- lib/PublicInbox/Import.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 4e3b4c55..bfa7a805 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -451,6 +451,7 @@ sub done { sub atfork_child { my ($self) = @_; foreach my $f (qw(in out)) { + next unless defined($self->{$f}); close $self->{$f} or die "failed to close import[$f]: $!\n"; } } -- cgit v1.2.3-24-ge0c7 From 61d33536cdda6a9e6267161b25e7f8701501b34c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:33 -0500 Subject: t/v1reindex.t: Isolate the test cases While inspecting the tests I realized that because we have been reusing variables there can be a memory between one test case and another. Add scopes and local variables to prevent an unintended memory between one test and another. Signed-off-by: "Eric W. Biederman" --- t/v1reindex.t | 111 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 66 insertions(+), 45 deletions(-) diff --git a/t/v1reindex.t b/t/v1reindex.t index 75380f0f..fdffdaee 100644 --- a/t/v1reindex.t +++ b/t/v1reindex.t @@ -22,7 +22,6 @@ my $ibx_config = { -primary_address => 'test@example.com', indexlevel => 'full', }; -my $ibx = PublicInbox::Inbox->new($ibx_config); my $mime = PublicInbox::MIME->create( header => [ From => 'a@example.com', @@ -32,55 +31,72 @@ my $mime = PublicInbox::MIME->create( ], body => "hello world\n", ); -my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); -foreach my $i (1..10) { - $mime->header_set('Message-Id', "<$i\@example.com>"); - ok($im->add($mime), "message $i added"); - if ($i == 4) { - $im->remove($mime); +my $minmax; +{ + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + foreach my $i (1..10) { + $mime->header_set('Message-Id', "<$i\@example.com>"); + ok($im->add($mime), "message $i added"); + if ($i == 4) { + $im->remove($mime); + } } -} -if ('test remove later') { - $mime->header_set('Message-Id', "<5\@example.com>"); - $im->remove($mime); -} + if ('test remove later') { + $mime->header_set('Message-Id', "<5\@example.com>"); + $im->remove($mime); + } -$im->done; -my $rw = PublicInbox::SearchIdx->new($ibx, 1); -eval { $rw->index_sync() }; -is($@, '', 'no error from indexing'); + $im->done; + my $rw = PublicInbox::SearchIdx->new($ibx, 1); + eval { $rw->index_sync() }; + is($@, '', 'no error from indexing'); -my $minmax = [ $ibx->mm->minmax ]; -ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); -is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); + $minmax = [ $ibx->mm->minmax ]; + ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); + is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); +} -$rw = PublicInbox::SearchIdx->new($ibx, 1); -eval { $rw->index_sync({reindex => 1}) }; -is($@, '', 'no error from reindexing'); -$im->done; +{ + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); + eval { $rw->index_sync({reindex => 1}) }; + is($@, '', 'no error from reindexing'); + $im->done; +} my $xap = "$mainrepo/public-inbox/xapian".PublicInbox::Search::SCHEMA_VERSION(); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed'); -$rw = PublicInbox::SearchIdx->new($ibx, 1); +{ + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); -eval { $rw->index_sync({reindex => 1}) }; -is($@, '', 'no error from reindexing'); -$im->done; -ok(-d $xap, 'Xapian directories recreated'); + eval { $rw->index_sync({reindex => 1}) }; + is($@, '', 'no error from reindexing'); + $im->done; + ok(-d $xap, 'Xapian directories recreated'); -delete $ibx->{mm}; -is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + delete $ibx->{mm}; + is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); +} ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); - -$rw = PublicInbox::SearchIdx->new($ibx, 1); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is(scalar(@warn), 0, 'no warnings from reindexing'); @@ -93,11 +109,13 @@ $rw = PublicInbox::SearchIdx->new($ibx, 1); ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); - -$rw = PublicInbox::SearchIdx->new($ibx, 1); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); @@ -110,13 +128,14 @@ $rw = PublicInbox::SearchIdx->new($ibx, 1); ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); - -$ibx_config->{indexlevel} = 'medium'; -$ibx = PublicInbox::Inbox->new($ibx_config); -$rw = PublicInbox::SearchIdx->new($ibx, 1); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + $config{indexlevel} = 'medium'; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); @@ -131,13 +150,14 @@ $rw = PublicInbox::SearchIdx->new($ibx, 1); ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); - -$ibx_config->{indexlevel} = 'basic'; -$ibx = PublicInbox::Inbox->new($ibx_config); -$rw = PublicInbox::SearchIdx->new($ibx, 1); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + $config{indexlevel} = 'basic'; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); @@ -152,13 +172,14 @@ $rw = PublicInbox::SearchIdx->new($ibx, 1); # upgrade existing basic to medium # note: changing indexlevels is not yet supported in v2, # and may not be without more effort -$ibx_config->{indexlevel} = 'medium'; -$ibx = PublicInbox::Inbox->new($ibx_config); -$rw = PublicInbox::SearchIdx->new($ibx, 1); # no removals { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + $config{indexleve} = 'medium'; + my $ibx = PublicInbox::Inbox->new(\%config); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync }; is($@, '', 'no error from indexing'); is_deeply(\@warn, [], 'no warnings'); -- cgit v1.2.3-24-ge0c7 From fd50b4bb72711f6a9e1bc911bb5e63b3732cb74e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:34 -0500 Subject: t/v2reindex.t: Isolate the test cases more While inspecting the tests I realized that because we have been reusing variables there can be a memory between one test case and another. Add scopes and local variables to prevent an unintended memory between one test cases. Signed-off-by: "Eric W. Biederman" --- t/v2reindex.t | 85 +++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 31 deletions(-) diff --git a/t/v2reindex.t b/t/v2reindex.t index 1543309c..4d06c6ce 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -21,7 +21,6 @@ my $ibx_config = { -primary_address => 'test@example.com', indexlevel => 'full', }; -my $ibx = PublicInbox::Inbox->new($ibx_config); my $mime = PublicInbox::MIME->create( header => [ From => 'a@example.com', @@ -32,39 +31,57 @@ my $mime = PublicInbox::MIME->create( body => "hello world\n", ); local $ENV{NPROC} = 2; -my $im = PublicInbox::V2Writable->new($ibx, 1); -foreach my $i (1..10) { - $mime->header_set('Message-Id', "<$i\@example.com>"); - ok($im->add($mime), "message $i added"); - if ($i == 4) { +my $minmax; +{ + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::V2Writable->new($ibx, 1); + foreach my $i (1..10) { + $mime->header_set('Message-Id', "<$i\@example.com>"); + ok($im->add($mime), "message $i added"); + if ($i == 4) { + $im->remove($mime); + } + } + + if ('test remove later') { + $mime->header_set('Message-Id', "<5\@example.com>"); $im->remove($mime); } -} -if ('test remove later') { - $mime->header_set('Message-Id', "<5\@example.com>"); - $im->remove($mime); + $im->done; + $minmax = [ $ibx->mm->minmax ]; + ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); + is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); } -$im->done; -my $minmax = [ $ibx->mm->minmax ]; -ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); -is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); +{ + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::V2Writable->new($ibx, 1); + eval { $im->index_sync({reindex => 1}) }; + is($@, '', 'no error from reindexing'); + $im->done; -eval { $im->index_sync({reindex => 1}) }; -is($@, '', 'no error from reindexing'); -$im->done; + delete $ibx->{mm}; + is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); +} my $xap = "$mainrepo/xap".PublicInbox::Search::SCHEMA_VERSION(); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed'); -eval { $im->index_sync({reindex => 1}) }; -is($@, '', 'no error from reindexing'); -$im->done; -ok(-d $xap, 'Xapian directories recreated'); +{ + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::V2Writable->new($ibx, 1); + eval { $im->index_sync({reindex => 1}) }; + is($@, '', 'no error from reindexing'); + $im->done; + ok(-d $xap, 'Xapian directories recreated'); -delete $ibx->{mm}; -is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + delete $ibx->{mm}; + is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); +} ok(unlink "$mainrepo/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); @@ -72,6 +89,9 @@ ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::V2Writable->new($ibx, 1); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is(scalar(@warn), 0, 'no warnings from reindexing'); @@ -88,6 +108,9 @@ ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::V2Writable->new($ibx, 1); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); @@ -103,13 +126,13 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(unlink "$mainrepo/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); - -$ibx_config->{indexlevel} = 'medium'; -$ibx = PublicInbox::Inbox->new($ibx_config); -$im = PublicInbox::V2Writable->new($ibx); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + $config{indexlevel} = 'medium'; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); @@ -135,13 +158,13 @@ $im = PublicInbox::V2Writable->new($ibx); ok(unlink "$mainrepo/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); - -$ibx_config->{indexlevel} = 'basic'; -$ibx = PublicInbox::Inbox->new($ibx_config); -$im = PublicInbox::V2Writable->new($ibx); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + $config{indexlevel} = 'basic'; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); -- cgit v1.2.3-24-ge0c7 From f785db0909bff5b8ba2eab473815f81192d283ab Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:35 -0500 Subject: t/v[12]reindex.t: Place expected second in Xapian tests Place the expected value second in is and isnt tests because when these tests fail they report the second value as the expected value. A report saying got 0 expected 8 'no Xapian search results' can be confusing. Signed-off-by: "Eric W. Biederman" --- t/v1reindex.t | 6 +++--- t/v2reindex.t | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/t/v1reindex.t b/t/v1reindex.t index fdffdaee..de4fafda 100644 --- a/t/v1reindex.t +++ b/t/v1reindex.t @@ -144,7 +144,7 @@ ok(!-d $xap, 'Xapian directories removed again'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); my $mset = $ibx->search->query('hello world', {mset=>1}); - isnt(0, $mset->size, 'got Xapian search results'); + isnt($mset->size, 0, 'got Xapian search results'); } ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); @@ -166,7 +166,7 @@ ok(!-d $xap, 'Xapian directories removed again'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); my $mset = $ibx->search->reopen->query('hello world', {mset=>1}); - is(0, $mset->size, "no Xapian search results"); + is($mset->size, 0, "no Xapian search results"); } # upgrade existing basic to medium @@ -184,7 +184,7 @@ ok(!-d $xap, 'Xapian directories removed again'); is($@, '', 'no error from indexing'); is_deeply(\@warn, [], 'no warnings'); my $mset = $ibx->search->reopen->query('hello world', {mset=>1}); - isnt(0, $mset->size, 'search OK after basic -> medium'); + isnt($mset->size, 0, 'search OK after basic -> medium'); } done_testing(); diff --git a/t/v2reindex.t b/t/v2reindex.t index 4d06c6ce..67d8be78 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -119,7 +119,7 @@ ok(!-d $xap, 'Xapian directories removed again'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); my $mset = $ibx->search->query('"hello world"', {mset=>1}); - isnt(0, $mset->size, "phrase search succeeds on indexlevel=full"); + isnt($mset->size, 0, "phrase search succeeds on indexlevel=full"); for (<"$xap/*/*">) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } } @@ -146,11 +146,11 @@ ok(!-d $xap, 'Xapian directories removed again'); # phrase searches still work delete $ibx->{search}; my $mset = $ibx->search->query('"hello world"', {mset=>1}); - is(0, $mset->size, 'phrase search does not work on medium'); + is($mset->size, 0, 'phrase search does not work on medium'); } my $mset = $ibx->search->query('hello world', {mset=>1}); - isnt(0, $mset->size, "normal search works on indexlevel=medium"); + isnt($mset->size, 0, "normal search works on indexlevel=medium"); for (<"$xap/*/*">) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } ok($sizes{full} > $sizes{medium}, 'medium is smaller than full'); } @@ -173,7 +173,7 @@ ok(!-d $xap, 'Xapian directories removed again'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); my $mset = $ibx->search->query('hello', {mset=>1}); - is(0, $mset->size, "search fails on indexlevel='basic'"); + is($mset->size, 0, "search fails on indexlevel='basic'"); for (<"$xap/*/*">) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } ok($sizes{medium} > $sizes{basic}, 'basic is smaller than medium'); } -- cgit v1.2.3-24-ge0c7 From 344684fc77a999ac071f99803a0bff1a96dc5f7c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:36 -0500 Subject: t/v[12]reindex.t: Test that the resulting msgmap is as expected Deeply inspect the entire message map in the reindexing tests as the actual message order is significant and can result in surprises. Signed-off-by: "Eric W. Biederman" --- t/v1reindex.t | 35 +++++++++++++++++++++++++++++++++++ t/v2reindex.t | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/t/v1reindex.t b/t/v1reindex.t index de4fafda..877e7039 100644 --- a/t/v1reindex.t +++ b/t/v1reindex.t @@ -32,6 +32,7 @@ my $mime = PublicInbox::MIME->create( body => "hello world\n", ); my $minmax; +my $msgmap; { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); @@ -57,6 +58,19 @@ my $minmax; $minmax = [ $ibx->mm->minmax ]; ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); + + my ($min, $max) = @$minmax; + $msgmap = $ibx->mm->msg_range(\$min, $max); + is_deeply($msgmap, [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [6, '6@example.com' ], + [7, '7@example.com' ], + [8, '8@example.com' ], + [9, '9@example.com' ], + [10, '10@example.com' ], + ], 'msgmap as expected'); } { @@ -67,6 +81,9 @@ my $minmax; eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing'); $im->done; + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } my $xap = "$mainrepo/public-inbox/xapian".PublicInbox::Search::SCHEMA_VERSION(); @@ -85,6 +102,9 @@ ok(!-d $xap, 'Xapian directories removed'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); @@ -104,6 +124,9 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); @@ -123,6 +146,9 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + + my ($min, $max) = @$minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); @@ -145,6 +171,9 @@ ok(!-d $xap, 'Xapian directories removed again'); is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); my $mset = $ibx->search->query('hello world', {mset=>1}); isnt($mset->size, 0, 'got Xapian search results'); + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); @@ -167,6 +196,9 @@ ok(!-d $xap, 'Xapian directories removed again'); is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); my $mset = $ibx->search->reopen->query('hello world', {mset=>1}); is($mset->size, 0, "no Xapian search results"); + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } # upgrade existing basic to medium @@ -185,6 +217,9 @@ ok(!-d $xap, 'Xapian directories removed again'); is_deeply(\@warn, [], 'no warnings'); my $mset = $ibx->search->reopen->query('hello world', {mset=>1}); isnt($mset->size, 0, 'search OK after basic -> medium'); + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } done_testing(); diff --git a/t/v2reindex.t b/t/v2reindex.t index 67d8be78..31e61d5b 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -32,6 +32,7 @@ my $mime = PublicInbox::MIME->create( ); local $ENV{NPROC} = 2; my $minmax; +my $msgmap; { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); @@ -53,6 +54,19 @@ my $minmax; $minmax = [ $ibx->mm->minmax ]; ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); + + my ($min, $max) = @$minmax; + $msgmap = $ibx->mm->msg_range(\$min, $max); + is_deeply($msgmap, [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [6, '6@example.com' ], + [7, '7@example.com' ], + [8, '8@example.com' ], + [9, '9@example.com' ], + [10, '10@example.com' ], + ], 'msgmap as expected'); } { @@ -65,6 +79,9 @@ my $minmax; delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } my $xap = "$mainrepo/xap".PublicInbox::Search::SCHEMA_VERSION(); @@ -81,6 +98,9 @@ ok(!-d $xap, 'Xapian directories removed'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$mainrepo/msgmap.sqlite3", 'remove msgmap'); @@ -99,6 +119,9 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } my %sizes; @@ -121,6 +144,9 @@ ok(!-d $xap, 'Xapian directories removed again'); my $mset = $ibx->search->query('"hello world"', {mset=>1}); isnt($mset->size, 0, "phrase search succeeds on indexlevel=full"); for (<"$xap/*/*">) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$mainrepo/msgmap.sqlite3", 'remove msgmap'); @@ -153,6 +179,10 @@ ok(!-d $xap, 'Xapian directories removed again'); isnt($mset->size, 0, "normal search works on indexlevel=medium"); for (<"$xap/*/*">) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } ok($sizes{full} > $sizes{medium}, 'medium is smaller than full'); + + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$mainrepo/msgmap.sqlite3", 'remove msgmap'); @@ -176,6 +206,9 @@ ok(!-d $xap, 'Xapian directories removed again'); is($mset->size, 0, "search fails on indexlevel='basic'"); for (<"$xap/*/*">) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } ok($sizes{medium} > $sizes{basic}, 'basic is smaller than medium'); + + my ($min, $max) = $ibx->mm->minmax; + is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } done_testing(); -- cgit v1.2.3-24-ge0c7 From 075d15d10333ea9127170b8c7635e4286777c005 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:37 -0500 Subject: t/v[12]reindex.t: Test incremental indexing works Capture interesting commits of the test repository in mark variables. Use those marks to build interesting scenarios where index_sync proceeds as if those marks are the heads of the repositor. Use this capability to test what happens when adds and deletes are mixed within a repository. Be sad because things don't yet work as they should. Signed-off-by: "Eric W. Biederman" --- t/v1reindex.t | 194 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ t/v2reindex.t | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 389 insertions(+) diff --git a/t/v1reindex.t b/t/v1reindex.t index 877e7039..8e78aa76 100644 --- a/t/v1reindex.t +++ b/t/v1reindex.t @@ -33,6 +33,7 @@ my $mime = PublicInbox::MIME->create( ); my $minmax; my $msgmap; +my ($mark1, $mark2, $mark3, $mark4); { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); @@ -41,13 +42,17 @@ my $msgmap; $mime->header_set('Message-Id', "<$i\@example.com>"); ok($im->add($mime), "message $i added"); if ($i == 4) { + $mark1 = $im->get_mark($im->{tip}); $im->remove($mime); + $mark2 = $im->get_mark($im->{tip}); } } if ('test remove later') { + $mark3 = $im->get_mark($im->{tip}); $mime->header_set('Message-Id', "<5\@example.com>"); $im->remove($mime); + $mark4 = $im->get_mark($im->{tip}); } $im->done; @@ -222,4 +227,193 @@ ok(!-d $xap, 'Xapian directories removed again'); is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } +# An incremental indexing test +ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); +remove_tree($xap); +ok(!-d $xap, 'Xapian directories removed again'); +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); + # mark1 4 simple additions in the same index_sync + eval { $rw->index_sync({ref => $mark1}) }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 4, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [4, '4@example.com' ], + ], 'msgmap as expected' ); +} +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); + # mark2 A delete separated form and add in the same index_sync + eval { $rw->index_sync({ref => $mark2}) }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 3, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + ], 'msgmap as expected' ); +} +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); + # mark3 adds following the delete at mark2 + eval { $rw->index_sync({ref => $mark3}) }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 10, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [5, '5@example.com' ], + [6, '6@example.com' ], + [7, '7@example.com' ], + [8, '8@example.com' ], + [9, '9@example.com' ], + [10, '10@example.com' ], + ], 'msgmap as expected' ); +} +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); + # mark4 A delete of an older message + eval { $rw->index_sync({ref => $mark4}) }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 10, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [6, '6@example.com' ], + [7, '7@example.com' ], + [8, '8@example.com' ], + [9, '9@example.com' ], + [10, '10@example.com' ], + ], 'msgmap as expected' ); +} + + +# Another incremental indexing test +ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); +remove_tree($xap); +ok(!-d $xap, 'Xapian directories removed again'); +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); + # mark2 an add and it's delete in the same index_sync + eval { $rw->index_sync({ref => $mark2}) }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 3, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + ], 'msgmap as expected' ); +} +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); + # mark3 adds following the delete at mark2 + eval { $rw->index_sync({ref => $mark3}) }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 10, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [5, '5@example.com' ], + [6, '6@example.com' ], + [7, '7@example.com' ], + [8, '8@example.com' ], + [9, '9@example.com' ], + [10, '10@example.com' ], + ], 'msgmap as expected' ); +} +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $rw = PublicInbox::SearchIdx->new($ibx, 1); + # mark4 A delete of an older message + eval { $rw->index_sync({ref => $mark4}) }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 10, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [6, '6@example.com' ], + [7, '7@example.com' ], + [8, '8@example.com' ], + [9, '9@example.com' ], + [10, '10@example.com' ], + ], 'msgmap as expected' ); +} + + done_testing(); diff --git a/t/v2reindex.t b/t/v2reindex.t index 31e61d5b..ce13a07c 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -33,21 +33,27 @@ my $mime = PublicInbox::MIME->create( local $ENV{NPROC} = 2; my $minmax; my $msgmap; +my ($mark1, $mark2, $mark3, $mark4); { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, 1); + my $im0 = $im->importer(); foreach my $i (1..10) { $mime->header_set('Message-Id', "<$i\@example.com>"); ok($im->add($mime), "message $i added"); if ($i == 4) { + $mark1 = $im0->get_mark($im0->{tip}); $im->remove($mime); + $mark2 = $im0->get_mark($im0->{tip}); } } if ('test remove later') { + $mark3 = $im0->get_mark($im0->{tip}); $mime->header_set('Message-Id', "<5\@example.com>"); $im->remove($mime); + $mark4 = $im0->get_mark($im0->{tip}); } $im->done; @@ -211,4 +217,193 @@ ok(!-d $xap, 'Xapian directories removed again'); is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } + +# An incremental indexing test +ok(unlink "$mainrepo/msgmap.sqlite3", 'remove msgmap'); +remove_tree($xap); +ok(!-d $xap, 'Xapian directories removed again'); +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + # mark1 4 simple additions in the same index_sync + $ibx->{ref_head} = $mark1; + my $im = PublicInbox::V2Writable->new($ibx); + eval { $im->index_sync() }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 4, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [4, '4@example.com' ], + ], 'msgmap as expected' ); +} +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + # mark2 A delete separated from an add in the same index_sync + $ibx->{ref_head} = $mark2; + my $im = PublicInbox::V2Writable->new($ibx); + eval { $im->index_sync() }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 3, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + ], 'msgmap as expected' ); +} +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + # mark3 adds following the delete at mark2 + $ibx->{ref_head} = $mark3; + my $im = PublicInbox::V2Writable->new($ibx); + eval { $im->index_sync() }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 10, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [5, '5@example.com' ], + [6, '6@example.com' ], + [7, '7@example.com' ], + [8, '8@example.com' ], + [9, '9@example.com' ], + [10, '10@example.com' ], + ], 'msgmap as expected' ); +} +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + # mark4 A delete of an older message + $ibx->{ref_head} = $mark4; + my $im = PublicInbox::V2Writable->new($ibx); + eval { $im->index_sync() }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 10, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [6, '6@example.com' ], + [7, '7@example.com' ], + [8, '8@example.com' ], + [9, '9@example.com' ], + [10, '10@example.com' ], + ], 'msgmap as expected' ); +} + + +# Another incremental indexing test +ok(unlink "$mainrepo/msgmap.sqlite3", 'remove msgmap'); +remove_tree($xap); +ok(!-d $xap, 'Xapian directories removed again'); +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + # mark2 an add and it's delete in the same index_sync + $ibx->{ref_head} = $mark2; + my $im = PublicInbox::V2Writable->new($ibx); + eval { $im->index_sync() }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 3, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + ], 'msgmap as expected' ); +} +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + # mark3 adds following the delete at mark2 + $ibx->{ref_head} = $mark3; + my $im = PublicInbox::V2Writable->new($ibx); + eval { $im->index_sync() }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 10, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [5, '5@example.com' ], + [6, '6@example.com' ], + [7, '7@example.com' ], + [8, '8@example.com' ], + [9, '9@example.com' ], + [10, '10@example.com' ], + ], 'msgmap as expected' ); +} +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + # mark4 A delete of an older message + $ibx->{ref_head} = $mark4; + my $im = PublicInbox::V2Writable->new($ibx); + eval { $im->index_sync() }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + my ($min, $max) = $ibx->mm->minmax; + is($min, 1, 'min as expected'); + is($max, 10, 'max as expected'); + is_deeply($ibx->mm->msg_range(\$min, $max), + [ + [1, '1@example.com' ], + [2, '2@example.com' ], + [3, '3@example.com' ], + [6, '6@example.com' ], + [7, '7@example.com' ], + [8, '8@example.com' ], + [9, '9@example.com' ], + [10, '10@example.com' ], + ], 'msgmap as expected' ); +} + done_testing(); -- cgit v1.2.3-24-ge0c7 From ad2080deb8102a75d2f26b448267c209bea4b4e2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:38 -0500 Subject: SearchIdx.pm: Always assign numbers backwards during incremental indexing When walking messages newest to oldest, assigning the larger numbers before smaller numbers ensures older messages get smaller numbers. This leads to the possibility of a msgmap that can be regenerated when needed. Signed-off-by: "Eric W. Biederman" --- lib/PublicInbox/SearchIdx.pm | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 1d259a86..ac821ac0 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -619,23 +619,28 @@ sub _git_log { my ($self, $range) = @_; my $git = $self->{git}; + # Count the new files so they can be added newest to oldest + # and still have numbers increasing from oldest to newest + my $fcount = 0; + # can't use 'rev-list --count' if we use --diff-filter + my $fh = $git->popen(qw(log --pretty=tformat:%h + --no-notes --no-color --no-renames + --diff-filter=AM), $range); + ++$fcount while <$fh>; + my (undef, $max) = $self->{mm}->minmax; + if (index($range, '..') < 0) { - my $regen_max = 0; - # can't use 'rev-list --count' if we use --diff-filter - my $fh = $git->popen(qw(log --pretty=tformat:%h - --no-notes --no-color --no-renames - --diff-filter=AM), $range); - ++$regen_max while <$fh>; - my (undef, $max) = $self->{mm}->minmax; - - if ($max && $max == $regen_max) { + if ($max && $max == $fcount) { # fix up old bugs in full indexes which caused messages to # not appear in Msgmap $self->{regen_up} = $max; } else { # normal regen is for for fresh data - $self->{regen_down} = $regen_max; + $self->{regen_down} = $fcount; } + } else { + # Give oldest messages the smallest numbers + $self->{regen_down} = $max + $fcount; } $git->popen(qw/log --no-notes --no-color --no-renames -- cgit v1.2.3-24-ge0c7 From 5e6d44673ba5e9aaeb6d8e63f27adf542c2760a0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:39 -0500 Subject: Msgmap.pm: Track the largest value of num ever assigned Today the only thing that prevents public-inbox not reusing the message numbers of deleted messages is the sqlite autoincrement magic and that only works part of the time. The new incremental indexing test has revealed areas where today public-inbox does try to reuse numbers of deleted messages. Reusing the message numbers of existing messages is a problem because if a client ever sees messages that are subsequently deleted the client will not see the new messages with their old numbers. In practice this is difficult to trigger because it requires the most recently added message to be removed and have the removal show up in a separate pull request. Still it can happen and it should be handled. Instead of infering the highset number ever used by finding the maximum number in the message map, track the largest number ever assigned directly. Update Msgmap to track this value and update the indexers to use this value. Signed-off-by: "Eric W. Biederman" --- lib/PublicInbox/Msgmap.pm | 23 +++++++++++++++++++++-- lib/PublicInbox/SearchIdx.pm | 8 ++++---- lib/PublicInbox/V2Writable.pm | 4 ++-- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm index fdc71e46..d474bade 100644 --- a/lib/PublicInbox/Msgmap.pm +++ b/lib/PublicInbox/Msgmap.pm @@ -50,6 +50,10 @@ sub new_file { create_tables($dbh); $dbh->begin_work; $self->created_at(time) unless $self->created_at; + + my (undef, $max) = $self->minmax(); + $max ||= 0; + $self->num_highwater($max); $dbh->commit; } $self; @@ -107,6 +111,17 @@ sub created_at { $self->meta_accessor('created_at', $second); } +sub num_highwater { + my ($self, $num) = @_; + my $high = $self->{num_highwater} ||= + $self->meta_accessor('num_highwater'); + if (defined($num) && (!defined($high) || ($num > $high))) { + $self->{num_highwater} = $num; + $self->meta_accessor('num_highwater', $num); + } + $self->{num_highwater}; +} + sub mid_insert { my ($self, $mid) = @_; my $dbh = $self->{dbh}; @@ -114,7 +129,9 @@ sub mid_insert { INSERT OR IGNORE INTO msgmap (mid) VALUES (?) return if $sth->execute($mid) == 0; - $dbh->last_insert_id(undef, undef, 'msgmap', 'num'); + my $num = $dbh->last_insert_id(undef, undef, 'msgmap', 'num'); + $self->num_highwater($num) unless !defined($num); + $num; } sub mid_for { @@ -213,7 +230,9 @@ sub mid_set { $self->{dbh}->prepare( 'INSERT OR IGNORE INTO msgmap (num,mid) VALUES (?,?)'); }; - $sth->execute($num, $mid); + my $result = $sth->execute($num, $mid); + $self->num_highwater($num) if (defined($result) && $result == 1); + $result; } sub DESTROY { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index ac821ac0..2532c8df 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -627,20 +627,20 @@ sub _git_log { --no-notes --no-color --no-renames --diff-filter=AM), $range); ++$fcount while <$fh>; - my (undef, $max) = $self->{mm}->minmax; + my $high = $self->{mm}->num_highwater; if (index($range, '..') < 0) { - if ($max && $max == $fcount) { + if ($high && $high == $fcount) { # fix up old bugs in full indexes which caused messages to # not appear in Msgmap - $self->{regen_up} = $max; + $self->{regen_up} = $high; } else { # normal regen is for for fresh data $self->{regen_down} = $fcount; } } else { # Give oldest messages the smallest numbers - $self->{regen_down} = $max + $fcount; + $self->{regen_down} = $high + $fcount; } $git->popen(qw/log --no-notes --no-color --no-renames diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 934640eb..c450980c 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -879,9 +879,9 @@ sub index_sync { my $mm_tmp = $self->{mm}->tmp_clone; my $ranges = $opts->{reindex} ? [] : $self->last_commits($epoch_max); - my ($min, $max) = $mm_tmp->minmax; + my $high = $self->{mm}->num_highwater(); my $regen = $self->index_prepare($opts, $epoch_max, $ranges); - $$regen += $max if $max; + $$regen += $high if $high; my $D = {}; # "$mid\0$cid" => $oid my @cmd = qw(log --raw -r --pretty=tformat:%H --no-notes --no-color --no-abbrev --no-renames); -- cgit v1.2.3-24-ge0c7 From 1597d1a5fff6c1090cc65f439d1773cb3e98d382 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:40 -0500 Subject: t/v[12]reindex.t Verify num_highwater Signed-off-by: "Eric W. Biederman" --- t/v1reindex.t | 7 +++++++ t/v2reindex.t | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/t/v1reindex.t b/t/v1reindex.t index 8e78aa76..876c9db3 100644 --- a/t/v1reindex.t +++ b/t/v1reindex.t @@ -246,6 +246,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 4, 'max as expected'); + is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -269,6 +270,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); + is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -291,6 +293,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -319,6 +322,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -352,6 +356,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); + is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -374,6 +379,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -402,6 +408,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], diff --git a/t/v2reindex.t b/t/v2reindex.t index ce13a07c..3d217aff 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -237,6 +237,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 4, 'max as expected'); + is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -260,6 +261,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); + is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -282,6 +284,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -310,6 +313,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -343,6 +347,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); + is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -365,6 +370,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], @@ -393,6 +399,7 @@ ok(!-d $xap, 'Xapian directories removed again'); my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], -- cgit v1.2.3-24-ge0c7 From 0aa5baa369e8ecced14773ae62dc14ec305ee1b9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:41 -0500 Subject: t/v[12]reindex.t: Verify the num highwater is as expected Instrument the tests to verify the highwater num highwater mark is where it is expected. Signed-off-by: "Eric W. Biederman" --- t/v1reindex.t | 10 ++++++++++ t/v2reindex.t | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/t/v1reindex.t b/t/v1reindex.t index 876c9db3..8be95149 100644 --- a/t/v1reindex.t +++ b/t/v1reindex.t @@ -63,6 +63,7 @@ my ($mark1, $mark2, $mark3, $mark4); $minmax = [ $ibx->mm->minmax ]; ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = @$minmax; $msgmap = $ibx->mm->msg_range(\$min, $max); @@ -87,6 +88,8 @@ my ($mark1, $mark2, $mark3, $mark4); is($@, '', 'no error from reindexing'); $im->done; + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); + my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } @@ -107,6 +110,7 @@ ok(!-d $xap, 'Xapian directories removed'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); @@ -129,6 +133,7 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); @@ -151,6 +156,7 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = @$minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); @@ -174,6 +180,7 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my $mset = $ibx->search->query('hello world', {mset=>1}); isnt($mset->size, 0, 'got Xapian search results'); @@ -199,6 +206,7 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my $mset = $ibx->search->reopen->query('hello world', {mset=>1}); is($mset->size, 0, "no Xapian search results"); @@ -223,6 +231,8 @@ ok(!-d $xap, 'Xapian directories removed again'); my $mset = $ibx->search->reopen->query('hello world', {mset=>1}); isnt($mset->size, 0, 'search OK after basic -> medium'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); + my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } diff --git a/t/v2reindex.t b/t/v2reindex.t index 3d217aff..a5454a22 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -60,6 +60,7 @@ my ($mark1, $mark2, $mark3, $mark4); $minmax = [ $ibx->mm->minmax ]; ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = @$minmax; $msgmap = $ibx->mm->msg_range(\$min, $max); @@ -85,6 +86,7 @@ my ($mark1, $mark2, $mark3, $mark4); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); @@ -104,6 +106,7 @@ ok(!-d $xap, 'Xapian directories removed'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); @@ -125,6 +128,7 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); @@ -147,6 +151,7 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my $mset = $ibx->search->query('"hello world"', {mset=>1}); isnt($mset->size, 0, "phrase search succeeds on indexlevel=full"); for (<"$xap/*/*">) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } @@ -172,6 +177,7 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); if (0) { # not sure why, but Xapian seems to fallback to terms and @@ -208,6 +214,7 @@ ok(!-d $xap, 'Xapian directories removed again'); ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my $mset = $ibx->search->query('hello', {mset=>1}); is($mset->size, 0, "search fails on indexlevel='basic'"); for (<"$xap/*/*">) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } -- cgit v1.2.3-24-ge0c7 From 3f189032fb2d7c8a9dda732e0263e697ce1cb0ac Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:42 -0500 Subject: SearchIdx,V2Writeable: Update num_highwater on optimized deletes When performing an incremental index update with index_sync if a message is seen to be both added and deleted update the num_highwater mark even though the message is not otherwise indexed. This ensures index_sync generates the same msgmap no matter which commit it stops at during incremental syncs. Signed-off-by: "Eric W. Biederman" --- lib/PublicInbox/SearchIdx.pm | 3 ++- lib/PublicInbox/V2Writable.pm | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 2532c8df..54f82aa8 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -583,7 +583,8 @@ sub read_log { my $blob = $1; if (delete $D{$blob}) { if (defined $self->{regen_down}) { - $self->{regen_down}--; + my $num = $self->{regen_down}--; + $self->{mm}->num_highwater($num); } next; } diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index c450980c..92d2672c 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -658,7 +658,7 @@ sub mark_deleted { } sub reindex_oid { - my ($self, $mm_tmp, $D, $git, $oid, $regen) = @_; + my ($self, $mm_tmp, $D, $git, $oid, $regen, $reindex) = @_; my $len; my $msgref = $git->cat_file($oid, \$len); my $mime = PublicInbox::MIME->new($$msgref); @@ -700,7 +700,8 @@ sub reindex_oid { if (!defined($mid0) || $del) { if (!defined($mid0) && $del) { # expected for deletes - $$regen--; + $num = $$regen--; + $self->{mm}->num_highwater($num) unless $reindex; return } @@ -877,7 +878,8 @@ sub index_sync { return unless defined $latest; $self->idx_init; # acquire lock my $mm_tmp = $self->{mm}->tmp_clone; - my $ranges = $opts->{reindex} ? [] : $self->last_commits($epoch_max); + my $reindex = $opts->{reindex}; + my $ranges = $reindex ? [] : $self->last_commits($epoch_max); my $high = $self->{mm}->num_highwater(); my $regen = $self->index_prepare($opts, $epoch_max, $ranges); @@ -903,7 +905,7 @@ sub index_sync { chomp($cmt = $_); } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\tm$/o) { $self->reindex_oid($mm_tmp, $D, $git, $1, - $regen); + $regen, $reindex); } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\td$/o) { $self->mark_deleted($D, $git, $1); } -- cgit v1.2.3-24-ge0c7 From 65820d88976bc2fa83e9acd3b460ac304578d40a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:43 -0500 Subject: V2Writeable.pm: Ensure that a found message number is in the msgmap The lookup to see if a num has already been assigned to a message happens in a temporary copy of message map. It is possible that the number has been removed from the current message map. The unindex/reindex after a history rewrite triggered by a purge should be one such case. Therefore add the number to the msgmap in case it is not currently present. Signed-off-by: "Eric W. Biederman" --- lib/PublicInbox/V2Writable.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 92d2672c..4dd14331 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -676,6 +676,7 @@ sub reindex_oid { if (defined $n && $n > $num) { $mid0 = $mid; $num = $n; + $self->{mm}->mid_set($num, $mid0); } } if (!defined($mid0) && $regen && !$del) { -- cgit v1.2.3-24-ge0c7 From 72fa722146912781230c54d7282bf7c1147e0455 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:44 -0500 Subject: V2Writeable.pm: In unindex_oid delete the message from msgmap Now that we track the num highwater mark it is safe to remove messages from msgmap that have been previously allocated. Removing even the highest numbered article will no longer cause new message numbers to move backwards. Signed-off-by: "Eric W. Biederman" --- lib/PublicInbox/V2Writable.pm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 4dd14331..0396d9f5 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -842,7 +842,10 @@ sub unindex_oid { warn "BUG: multiple articles linked to $oid\n", join(',',sort keys %gone), "\n"; } - $self->{unindexed}->{$_}++ foreach keys %gone; + foreach my $num (keys %gone) { + $self->{unindexed}->{$_}++; + $self->{mm}->num_delete($num); + } $self->unindex_oid_remote($oid, $mid); } } -- cgit v1.2.3-24-ge0c7