diff options
author | Eric Wong <e@80x24.org> | 2018-08-03 20:05:24 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2018-08-03 20:05:24 +0000 |
commit | 861bec7bec5908871e5b0ede244cb1e990a47403 (patch) | |
tree | 8d116f0c9ad6a3af4d1b4d4041c2be5bbdf42065 /lib/PublicInbox/V2Writable.pm | |
parent | 7808b18c63f9d754a56ad7b2bd2385545d3521fb (diff) | |
parent | 72fa722146912781230c54d7282bf7c1147e0455 (diff) | |
download | public-inbox-861bec7bec5908871e5b0ede244cb1e990a47403.tar.gz |
Incremental indexing fixes from Eric W. Biederman. These prevents the highest message number in msgmap from being reassigned after deletes in rare cases and ensures messages are deleted from msgmap in v2. * eb/index-incremental: V2Writeable.pm: In unindex_oid delete the message from msgmap V2Writeable.pm: Ensure that a found message number is in the msgmap SearchIdx,V2Writeable: Update num_highwater on optimized deletes t/v[12]reindex.t: Verify the num highwater is as expected t/v[12]reindex.t Verify num_highwater Msgmap.pm: Track the largest value of num ever assigned SearchIdx.pm: Always assign numbers backwards during incremental indexing t/v[12]reindex.t: Test incremental indexing works t/v[12]reindex.t: Test that the resulting msgmap is as expected t/v[12]reindex.t: Place expected second in Xapian tests t/v2reindex.t: Isolate the test cases more t/v1reindex.t: Isolate the test cases Import.pm: Don't assume {in} and {out} always exist
Diffstat (limited to 'lib/PublicInbox/V2Writable.pm')
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 934640eb..0396d9f5 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -658,7 +658,7 @@ sub mark_deleted { } sub reindex_oid { - my ($self, $mm_tmp, $D, $git, $oid, $regen) = @_; + my ($self, $mm_tmp, $D, $git, $oid, $regen, $reindex) = @_; my $len; my $msgref = $git->cat_file($oid, \$len); my $mime = PublicInbox::MIME->new($$msgref); @@ -676,6 +676,7 @@ sub reindex_oid { if (defined $n && $n > $num) { $mid0 = $mid; $num = $n; + $self->{mm}->mid_set($num, $mid0); } } if (!defined($mid0) && $regen && !$del) { @@ -700,7 +701,8 @@ sub reindex_oid { if (!defined($mid0) || $del) { if (!defined($mid0) && $del) { # expected for deletes - $$regen--; + $num = $$regen--; + $self->{mm}->num_highwater($num) unless $reindex; return } @@ -840,7 +842,10 @@ sub unindex_oid { warn "BUG: multiple articles linked to $oid\n", join(',',sort keys %gone), "\n"; } - $self->{unindexed}->{$_}++ foreach keys %gone; + foreach my $num (keys %gone) { + $self->{unindexed}->{$_}++; + $self->{mm}->num_delete($num); + } $self->unindex_oid_remote($oid, $mid); } } @@ -877,11 +882,12 @@ sub index_sync { return unless defined $latest; $self->idx_init; # acquire lock my $mm_tmp = $self->{mm}->tmp_clone; - my $ranges = $opts->{reindex} ? [] : $self->last_commits($epoch_max); + my $reindex = $opts->{reindex}; + my $ranges = $reindex ? [] : $self->last_commits($epoch_max); - my ($min, $max) = $mm_tmp->minmax; + my $high = $self->{mm}->num_highwater(); my $regen = $self->index_prepare($opts, $epoch_max, $ranges); - $$regen += $max if $max; + $$regen += $high if $high; my $D = {}; # "$mid\0$cid" => $oid my @cmd = qw(log --raw -r --pretty=tformat:%H --no-notes --no-color --no-abbrev --no-renames); @@ -903,7 +909,7 @@ sub index_sync { chomp($cmt = $_); } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\tm$/o) { $self->reindex_oid($mm_tmp, $D, $git, $1, - $regen); + $regen, $reindex); } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\td$/o) { $self->mark_deleted($D, $git, $1); } |