diff options
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r-- | lib/PublicInbox/Import.pm | 1 | ||||
-rw-r--r-- | lib/PublicInbox/Msgmap.pm | 23 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 30 | ||||
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 20 |
4 files changed, 53 insertions, 21 deletions
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 4e3b4c55..bfa7a805 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -451,6 +451,7 @@ sub done { sub atfork_child { my ($self) = @_; foreach my $f (qw(in out)) { + next unless defined($self->{$f}); close $self->{$f} or die "failed to close import[$f]: $!\n"; } } diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm index fdc71e46..d474bade 100644 --- a/lib/PublicInbox/Msgmap.pm +++ b/lib/PublicInbox/Msgmap.pm @@ -50,6 +50,10 @@ sub new_file { create_tables($dbh); $dbh->begin_work; $self->created_at(time) unless $self->created_at; + + my (undef, $max) = $self->minmax(); + $max ||= 0; + $self->num_highwater($max); $dbh->commit; } $self; @@ -107,6 +111,17 @@ sub created_at { $self->meta_accessor('created_at', $second); } +sub num_highwater { + my ($self, $num) = @_; + my $high = $self->{num_highwater} ||= + $self->meta_accessor('num_highwater'); + if (defined($num) && (!defined($high) || ($num > $high))) { + $self->{num_highwater} = $num; + $self->meta_accessor('num_highwater', $num); + } + $self->{num_highwater}; +} + sub mid_insert { my ($self, $mid) = @_; my $dbh = $self->{dbh}; @@ -114,7 +129,9 @@ sub mid_insert { INSERT OR IGNORE INTO msgmap (mid) VALUES (?) return if $sth->execute($mid) == 0; - $dbh->last_insert_id(undef, undef, 'msgmap', 'num'); + my $num = $dbh->last_insert_id(undef, undef, 'msgmap', 'num'); + $self->num_highwater($num) unless !defined($num); + $num; } sub mid_for { @@ -213,7 +230,9 @@ sub mid_set { $self->{dbh}->prepare( 'INSERT OR IGNORE INTO msgmap (num,mid) VALUES (?,?)'); }; - $sth->execute($num, $mid); + my $result = $sth->execute($num, $mid); + $self->num_highwater($num) if (defined($result) && $result == 1); + $result; } sub DESTROY { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 29868d99..ca832ad3 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -582,7 +582,8 @@ sub read_log { my $blob = $1; if (delete $D{$blob}) { if (defined $self->{regen_down}) { - $self->{regen_down}--; + my $num = $self->{regen_down}--; + $self->{mm}->num_highwater($num); } next; } @@ -618,23 +619,28 @@ sub _git_log { my ($self, $range) = @_; my $git = $self->{git}; + # Count the new files so they can be added newest to oldest + # and still have numbers increasing from oldest to newest + my $fcount = 0; + # can't use 'rev-list --count' if we use --diff-filter + my $fh = $git->popen(qw(log --pretty=tformat:%h + --no-notes --no-color --no-renames + --diff-filter=AM), $range); + ++$fcount while <$fh>; + my $high = $self->{mm}->num_highwater; + if (index($range, '..') < 0) { - my $regen_max = 0; - # can't use 'rev-list --count' if we use --diff-filter - my $fh = $git->popen(qw(log --pretty=tformat:%h - --no-notes --no-color --no-renames - --diff-filter=AM), $range); - ++$regen_max while <$fh>; - my (undef, $max) = $self->{mm}->minmax; - - if ($max && $max == $regen_max) { + if ($high && $high == $fcount) { # fix up old bugs in full indexes which caused messages to # not appear in Msgmap - $self->{regen_up} = $max; + $self->{regen_up} = $high; } else { # normal regen is for for fresh data - $self->{regen_down} = $regen_max; + $self->{regen_down} = $fcount; } + } else { + # Give oldest messages the smallest numbers + $self->{regen_down} = $high + $fcount; } $git->popen(qw/log --no-notes --no-color --no-renames diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 934640eb..0396d9f5 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -658,7 +658,7 @@ sub mark_deleted { } sub reindex_oid { - my ($self, $mm_tmp, $D, $git, $oid, $regen) = @_; + my ($self, $mm_tmp, $D, $git, $oid, $regen, $reindex) = @_; my $len; my $msgref = $git->cat_file($oid, \$len); my $mime = PublicInbox::MIME->new($$msgref); @@ -676,6 +676,7 @@ sub reindex_oid { if (defined $n && $n > $num) { $mid0 = $mid; $num = $n; + $self->{mm}->mid_set($num, $mid0); } } if (!defined($mid0) && $regen && !$del) { @@ -700,7 +701,8 @@ sub reindex_oid { if (!defined($mid0) || $del) { if (!defined($mid0) && $del) { # expected for deletes - $$regen--; + $num = $$regen--; + $self->{mm}->num_highwater($num) unless $reindex; return } @@ -840,7 +842,10 @@ sub unindex_oid { warn "BUG: multiple articles linked to $oid\n", join(',',sort keys %gone), "\n"; } - $self->{unindexed}->{$_}++ foreach keys %gone; + foreach my $num (keys %gone) { + $self->{unindexed}->{$_}++; + $self->{mm}->num_delete($num); + } $self->unindex_oid_remote($oid, $mid); } } @@ -877,11 +882,12 @@ sub index_sync { return unless defined $latest; $self->idx_init; # acquire lock my $mm_tmp = $self->{mm}->tmp_clone; - my $ranges = $opts->{reindex} ? [] : $self->last_commits($epoch_max); + my $reindex = $opts->{reindex}; + my $ranges = $reindex ? [] : $self->last_commits($epoch_max); - my ($min, $max) = $mm_tmp->minmax; + my $high = $self->{mm}->num_highwater(); my $regen = $self->index_prepare($opts, $epoch_max, $ranges); - $$regen += $max if $max; + $$regen += $high if $high; my $D = {}; # "$mid\0$cid" => $oid my @cmd = qw(log --raw -r --pretty=tformat:%H --no-notes --no-color --no-abbrev --no-renames); @@ -903,7 +909,7 @@ sub index_sync { chomp($cmt = $_); } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\tm$/o) { $self->reindex_oid($mm_tmp, $D, $git, $1, - $regen); + $regen, $reindex); } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\td$/o) { $self->mark_deleted($D, $git, $1); } |