about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2018-08-03 20:05:24 +0000
committerEric Wong <e@80x24.org>2018-08-03 20:05:24 +0000
commit861bec7bec5908871e5b0ede244cb1e990a47403 (patch)
tree8d116f0c9ad6a3af4d1b4d4041c2be5bbdf42065 /lib/PublicInbox/SearchIdx.pm
parent7808b18c63f9d754a56ad7b2bd2385545d3521fb (diff)
parent72fa722146912781230c54d7282bf7c1147e0455 (diff)
downloadpublic-inbox-861bec7bec5908871e5b0ede244cb1e990a47403.tar.gz
Incremental indexing fixes from Eric W. Biederman.

These prevents the highest message number in msgmap from
being reassigned after deletes in rare cases and ensures
messages are deleted from msgmap in v2.

* eb/index-incremental:
  V2Writeable.pm: In unindex_oid delete the message from msgmap
  V2Writeable.pm: Ensure that a found message number is in the msgmap
  SearchIdx,V2Writeable: Update num_highwater on optimized deletes
  t/v[12]reindex.t: Verify the num highwater is as expected
  t/v[12]reindex.t Verify num_highwater
  Msgmap.pm: Track the largest value of num ever assigned
  SearchIdx.pm: Always assign numbers backwards during incremental indexing
  t/v[12]reindex.t: Test incremental indexing works
  t/v[12]reindex.t: Test that the resulting msgmap is as expected
  t/v[12]reindex.t: Place expected second in Xapian tests
  t/v2reindex.t: Isolate the test cases more
  t/v1reindex.t: Isolate the test cases
  Import.pm: Don't assume {in} and {out} always exist
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm30
1 files changed, 18 insertions, 12 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 29868d99..ca832ad3 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -582,7 +582,8 @@ sub read_log {
                         my $blob = $1;
                         if (delete $D{$blob}) {
                                 if (defined $self->{regen_down}) {
-                                        $self->{regen_down}--;
+                                        my $num = $self->{regen_down}--;
+                                        $self->{mm}->num_highwater($num);
                                 }
                                 next;
                         }
@@ -618,23 +619,28 @@ sub _git_log {
         my ($self, $range) = @_;
         my $git = $self->{git};
 
+        # Count the new files so they can be added newest to oldest
+        # and still have numbers increasing from oldest to newest
+        my $fcount = 0;
+        # can't use 'rev-list --count' if we use --diff-filter
+        my $fh = $git->popen(qw(log --pretty=tformat:%h
+                             --no-notes --no-color --no-renames
+                             --diff-filter=AM), $range);
+        ++$fcount while <$fh>;
+        my $high = $self->{mm}->num_highwater;
+
         if (index($range, '..') < 0) {
-                my $regen_max = 0;
-                # can't use 'rev-list --count' if we use --diff-filter
-                my $fh = $git->popen(qw(log --pretty=tformat:%h
-                                --no-notes --no-color --no-renames
-                                --diff-filter=AM), $range);
-                ++$regen_max while <$fh>;
-                my (undef, $max) = $self->{mm}->minmax;
-
-                if ($max && $max == $regen_max) {
+                if ($high && $high == $fcount) {
                         # fix up old bugs in full indexes which caused messages to
                         # not appear in Msgmap
-                        $self->{regen_up} = $max;
+                        $self->{regen_up} = $high;
                 } else {
                         # normal regen is for for fresh data
-                        $self->{regen_down} = $regen_max;
+                        $self->{regen_down} = $fcount;
                 }
+        } else {
+                # Give oldest messages the smallest numbers
+                $self->{regen_down} = $high + $fcount;
         }
 
         $git->popen(qw/log --no-notes --no-color --no-renames