about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2018-08-01 11:43:39 -0500
committerEric Wong <e@80x24.org>2018-08-03 20:03:49 +0000
commit5e6d44673ba5e9aaeb6d8e63f27adf542c2760a0 (patch)
tree33d46fb424fdc89dad277a87d6a4c93ea37a4a9c
parentad2080deb8102a75d2f26b448267c209bea4b4e2 (diff)
downloadpublic-inbox-5e6d44673ba5e9aaeb6d8e63f27adf542c2760a0.tar.gz
Today the only thing that prevents public-inbox not reusing the
message numbers of deleted messages is the sqlite autoincrement magic
and that only works part of the time.  The new incremental indexing
test has revealed areas where today public-inbox does try to reuse
numbers of deleted messages.

Reusing the message numbers of existing messages is a problem because
if a client ever sees messages that are subsequently deleted the
client will not see the new messages with their old numbers.

In practice this is difficult to trigger because it requires the most
recently added message to be removed and have the removal show up in a
separate pull request.  Still it can happen and it should be handled.

Instead of infering the highset number ever used by finding the maximum
number in the message map, track the largest number ever assigned directly.

Update Msgmap to track this value and update the indexers to use this
value.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
-rw-r--r--lib/PublicInbox/Msgmap.pm23
-rw-r--r--lib/PublicInbox/SearchIdx.pm8
-rw-r--r--lib/PublicInbox/V2Writable.pm4
3 files changed, 27 insertions, 8 deletions
diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index fdc71e46..d474bade 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -50,6 +50,10 @@ sub new_file {
                 create_tables($dbh);
                 $dbh->begin_work;
                 $self->created_at(time) unless $self->created_at;
+
+                my (undef, $max) = $self->minmax();
+                $max ||= 0;
+                $self->num_highwater($max);
                 $dbh->commit;
         }
         $self;
@@ -107,6 +111,17 @@ sub created_at {
         $self->meta_accessor('created_at', $second);
 }
 
+sub num_highwater {
+        my ($self, $num) = @_;
+        my $high = $self->{num_highwater} ||=
+            $self->meta_accessor('num_highwater');
+        if (defined($num) && (!defined($high) || ($num > $high))) {
+                $self->{num_highwater} = $num;
+                $self->meta_accessor('num_highwater', $num);
+        }
+        $self->{num_highwater};
+}
+
 sub mid_insert {
         my ($self, $mid) = @_;
         my $dbh = $self->{dbh};
@@ -114,7 +129,9 @@ sub mid_insert {
 INSERT OR IGNORE INTO msgmap (mid) VALUES (?)
 
         return if $sth->execute($mid) == 0;
-        $dbh->last_insert_id(undef, undef, 'msgmap', 'num');
+        my $num = $dbh->last_insert_id(undef, undef, 'msgmap', 'num');
+        $self->num_highwater($num) unless !defined($num);
+        $num;
 }
 
 sub mid_for {
@@ -213,7 +230,9 @@ sub mid_set {
                 $self->{dbh}->prepare(
                         'INSERT OR IGNORE INTO msgmap (num,mid) VALUES (?,?)');
         };
-        $sth->execute($num, $mid);
+        my $result = $sth->execute($num, $mid);
+        $self->num_highwater($num) if (defined($result) && $result == 1);
+        $result;
 }
 
 sub DESTROY {
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index ac821ac0..2532c8df 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -627,20 +627,20 @@ sub _git_log {
                              --no-notes --no-color --no-renames
                              --diff-filter=AM), $range);
         ++$fcount while <$fh>;
-        my (undef, $max) = $self->{mm}->minmax;
+        my $high = $self->{mm}->num_highwater;
 
         if (index($range, '..') < 0) {
-                if ($max && $max == $fcount) {
+                if ($high && $high == $fcount) {
                         # fix up old bugs in full indexes which caused messages to
                         # not appear in Msgmap
-                        $self->{regen_up} = $max;
+                        $self->{regen_up} = $high;
                 } else {
                         # normal regen is for for fresh data
                         $self->{regen_down} = $fcount;
                 }
         } else {
                 # Give oldest messages the smallest numbers
-                $self->{regen_down} = $max + $fcount;
+                $self->{regen_down} = $high + $fcount;
         }
 
         $git->popen(qw/log --no-notes --no-color --no-renames
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 934640eb..c450980c 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -879,9 +879,9 @@ sub index_sync {
         my $mm_tmp = $self->{mm}->tmp_clone;
         my $ranges = $opts->{reindex} ? [] : $self->last_commits($epoch_max);
 
-        my ($min, $max) = $mm_tmp->minmax;
+        my $high = $self->{mm}->num_highwater();
         my $regen = $self->index_prepare($opts, $epoch_max, $ranges);
-        $$regen += $max if $max;
+        $$regen += $high if $high;
         my $D = {}; # "$mid\0$cid" => $oid
         my @cmd = qw(log --raw -r --pretty=tformat:%H
                         --no-notes --no-color --no-abbrev --no-renames);