diff options
author | Eric Wong (Contractor, The Linux Foundation) <e@80x24.org> | 2018-03-22 03:39:30 +0000 |
---|---|---|
committer | Eric Wong (Contractor, The Linux Foundation) <e@80x24.org> | 2018-03-22 07:57:27 +0000 |
commit | 4c5fa84eec85cf406cbcd900f92dc478ed14ea30 (patch) | |
tree | 5e4b2b970d510f7910a31f91322fa8d54b7900dc /lib | |
parent | 4048e3c000ee6659a5754dea697a1b77a8022bbd (diff) | |
download | public-inbox-4c5fa84eec85cf406cbcd900f92dc478ed14ea30.tar.gz |
Allow best-effort regeneration of NNTP article numbers from cloned git repositories in addition to indexing Xapian Article numbers will not remain consistent when we add purge support, though.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 61 |
1 files changed, 54 insertions, 7 deletions
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 550a74d4..605f6887 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -494,7 +494,7 @@ sub mark_deleted { } sub reindex_oid { - my ($self, $mm_tmp, $D, $git, $oid) = @_; + my ($self, $mm_tmp, $D, $git, $oid, $regen) = @_; my $len; my $msgref = $git->cat_file($oid, \$len); my $mime = PublicInbox::MIME->new($$msgref); @@ -514,8 +514,27 @@ sub reindex_oid { $num = $n; } } + if (!defined($mid0) && $regen && !$del) { + $num = $$regen--; + die "BUG: ran out of article numbers\n" if $num <= 0; + my $mm = $self->{skel}->{mm}; + foreach my $mid (@$mids) { + if ($mm->mid_set($num, $mid) == 1) { + $mid0 = $mid; + last; + } + } + if (!defined($mid0)) { + my $id = '<' . join('> <', @$mids) . '>'; + warn "Message-Id $id unusable for $num\n"; + } + } + if (!defined($mid0) || $del) { - return if (!defined($mid0) && $del); # expected for deletes + if (!defined($mid0) && $del) { # expected for deletes + $$regen--; + return + } my $id = '<' . join('> <', @$mids) . '>'; defined($mid0) or @@ -546,19 +565,45 @@ sub reindex_oid { } sub reindex { - my ($self) = @_; + my ($self, $regen) = @_; my $ibx = $self->{-inbox}; my $pfx = "$ibx->{mainrepo}/git"; my $max_git; my $latest = git_dir_latest($self, \$max_git); return unless defined $latest; - my @cmd = qw(log --raw -r --pretty=tformat:%h - --no-notes --no-color --no-abbrev); my $head = $ibx->{ref_head} || 'refs/heads/master'; $self->idx_init; # acquire lock my $x40 = qr/[a-f0-9]{40}/; my $mm_tmp = $self->{skel}->{mm}->tmp_clone; + if (!$regen) { + my (undef, $max) = $mm_tmp->minmax; + unless (defined $max) { + $regen = 1; + warn +"empty msgmap.sqlite3, regenerating article numbers\n"; + } + } + my $tip; # latest commit out of all git repos + if ($regen) { + my $regen_max = 0; + for (my $cur = $max_git; $cur >= 0; $cur--) { + die "already reindexing!\n" if $self->{reindex_pipe}; + my $git = PublicInbox::Git->new("$pfx/$cur.git"); + chomp($tip = $git->qx('rev-parse', $head)) unless $tip; + my $h = $cur == $max_git ? $tip : $head; + my @count = ('rev-list', '--count', $h, '--', 'm'); + $regen_max += $git->qx(@count); + } + die "No messages found in $pfx/*.git, bug?\n" unless $regen_max; + $regen = \$regen_max; + } my $D = {}; + my @cmd = qw(log --raw -r --pretty=tformat:%h + --no-notes --no-color --no-abbrev); + + # if we are regenerating, we must not use a newer tip commit than what + # the regeneration counter used: + $tip ||= $head; # work backwards through history for (my $cur = $max_git; $cur >= 0; $cur--) { @@ -566,12 +611,14 @@ sub reindex { my $cmt; my $git_dir = "$pfx/$cur.git"; my $git = PublicInbox::Git->new($git_dir); - my $fh = $self->{reindex_pipe} = $git->popen(@cmd, $head); + my $h = $cur == $max_git ? $tip : $head; + my $fh = $self->{reindex_pipe} = $git->popen(@cmd, $h); while (<$fh>) { if (/\A$x40$/o) { chomp($cmt = $_); } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\tm$/o) { - $self->reindex_oid($mm_tmp, $D, $git, $1); + $self->reindex_oid($mm_tmp, $D, $git, $1, + $regen); } elsif (m!\A:\d{6} 100644 $x40 ($x40) [AM]\t_/D$!o) { $self->mark_deleted($D, $git, $1); } |