From 5e6d44673ba5e9aaeb6d8e63f27adf542c2760a0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:39 -0500 Subject: Msgmap.pm: Track the largest value of num ever assigned Today the only thing that prevents public-inbox not reusing the message numbers of deleted messages is the sqlite autoincrement magic and that only works part of the time. The new incremental indexing test has revealed areas where today public-inbox does try to reuse numbers of deleted messages. Reusing the message numbers of existing messages is a problem because if a client ever sees messages that are subsequently deleted the client will not see the new messages with their old numbers. In practice this is difficult to trigger because it requires the most recently added message to be removed and have the removal show up in a separate pull request. Still it can happen and it should be handled. Instead of infering the highset number ever used by finding the maximum number in the message map, track the largest number ever assigned directly. Update Msgmap to track this value and update the indexers to use this value. Signed-off-by: "Eric W. Biederman" --- lib/PublicInbox/V2Writable.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/PublicInbox/V2Writable.pm') diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 934640eb..c450980c 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -879,9 +879,9 @@ sub index_sync { my $mm_tmp = $self->{mm}->tmp_clone; my $ranges = $opts->{reindex} ? [] : $self->last_commits($epoch_max); - my ($min, $max) = $mm_tmp->minmax; + my $high = $self->{mm}->num_highwater(); my $regen = $self->index_prepare($opts, $epoch_max, $ranges); - $$regen += $max if $max; + $$regen += $high if $high; my $D = {}; # "$mid\0$cid" => $oid my @cmd = qw(log --raw -r --pretty=tformat:%H --no-notes --no-color --no-abbrev --no-renames); -- cgit v1.2.3-24-ge0c7 From 3f189032fb2d7c8a9dda732e0263e697ce1cb0ac Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:42 -0500 Subject: SearchIdx,V2Writeable: Update num_highwater on optimized deletes When performing an incremental index update with index_sync if a message is seen to be both added and deleted update the num_highwater mark even though the message is not otherwise indexed. This ensures index_sync generates the same msgmap no matter which commit it stops at during incremental syncs. Signed-off-by: "Eric W. Biederman" --- lib/PublicInbox/V2Writable.pm | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'lib/PublicInbox/V2Writable.pm') diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index c450980c..92d2672c 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -658,7 +658,7 @@ sub mark_deleted { } sub reindex_oid { - my ($self, $mm_tmp, $D, $git, $oid, $regen) = @_; + my ($self, $mm_tmp, $D, $git, $oid, $regen, $reindex) = @_; my $len; my $msgref = $git->cat_file($oid, \$len); my $mime = PublicInbox::MIME->new($$msgref); @@ -700,7 +700,8 @@ sub reindex_oid { if (!defined($mid0) || $del) { if (!defined($mid0) && $del) { # expected for deletes - $$regen--; + $num = $$regen--; + $self->{mm}->num_highwater($num) unless $reindex; return } @@ -877,7 +878,8 @@ sub index_sync { return unless defined $latest; $self->idx_init; # acquire lock my $mm_tmp = $self->{mm}->tmp_clone; - my $ranges = $opts->{reindex} ? [] : $self->last_commits($epoch_max); + my $reindex = $opts->{reindex}; + my $ranges = $reindex ? [] : $self->last_commits($epoch_max); my $high = $self->{mm}->num_highwater(); my $regen = $self->index_prepare($opts, $epoch_max, $ranges); @@ -903,7 +905,7 @@ sub index_sync { chomp($cmt = $_); } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\tm$/o) { $self->reindex_oid($mm_tmp, $D, $git, $1, - $regen); + $regen, $reindex); } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\td$/o) { $self->mark_deleted($D, $git, $1); } -- cgit v1.2.3-24-ge0c7 From 65820d88976bc2fa83e9acd3b460ac304578d40a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:43 -0500 Subject: V2Writeable.pm: Ensure that a found message number is in the msgmap The lookup to see if a num has already been assigned to a message happens in a temporary copy of message map. It is possible that the number has been removed from the current message map. The unindex/reindex after a history rewrite triggered by a purge should be one such case. Therefore add the number to the msgmap in case it is not currently present. Signed-off-by: "Eric W. Biederman" --- lib/PublicInbox/V2Writable.pm | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/PublicInbox/V2Writable.pm') diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 92d2672c..4dd14331 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -676,6 +676,7 @@ sub reindex_oid { if (defined $n && $n > $num) { $mid0 = $mid; $num = $n; + $self->{mm}->mid_set($num, $mid0); } } if (!defined($mid0) && $regen && !$del) { -- cgit v1.2.3-24-ge0c7 From 72fa722146912781230c54d7282bf7c1147e0455 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2018 11:43:44 -0500 Subject: V2Writeable.pm: In unindex_oid delete the message from msgmap Now that we track the num highwater mark it is safe to remove messages from msgmap that have been previously allocated. Removing even the highest numbered article will no longer cause new message numbers to move backwards. Signed-off-by: "Eric W. Biederman" --- lib/PublicInbox/V2Writable.pm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/V2Writable.pm') diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 4dd14331..0396d9f5 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -842,7 +842,10 @@ sub unindex_oid { warn "BUG: multiple articles linked to $oid\n", join(',',sort keys %gone), "\n"; } - $self->{unindexed}->{$_}++ foreach keys %gone; + foreach my $num (keys %gone) { + $self->{unindexed}->{$_}++; + $self->{mm}->num_delete($num); + } $self->unindex_oid_remote($oid, $mid); } } -- cgit v1.2.3-24-ge0c7