diff options
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 24 | ||||
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 10 | ||||
-rw-r--r-- | lib/PublicInbox/Xapcmd.pm | 8 |
3 files changed, 29 insertions, 13 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 0aeeb6bc..9c291066 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -549,12 +549,12 @@ sub index_sync { $self->{-inbox}->with_umask(sub { $self->_index_sync($opts) }) } -sub batch_adjust ($$$$) { - my ($max, $bytes, $batch_cb, $latest) = @_; +sub batch_adjust ($$$$$) { + my ($max, $bytes, $batch_cb, $latest, $nr) = @_; $$max -= $bytes; if ($$max <= 0) { $$max = BATCH_BYTES; - $batch_cb->($latest); + $batch_cb->($nr, $latest); } } @@ -573,6 +573,7 @@ sub read_log { my %D; my $line; my $newest; + my $nr = 0; while (defined($line = <$log>)) { if ($line =~ /$addmsg/o) { my $blob = $1; @@ -584,7 +585,7 @@ sub read_log { next; } my $mime = do_cat_mail($git, $blob, \$bytes) or next; - batch_adjust(\$max, $bytes, $batch_cb, $latest); + batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr); $add_cb->($self, $mime, $bytes, $blob); } elsif ($line =~ /$delmsg/o) { my $blob = $1; @@ -599,7 +600,7 @@ sub read_log { my $mime = do_cat_mail($git, $blob, \$bytes) or next; $del_cb->($self, $mime); } - $batch_cb->($latest, $newest); + $batch_cb->($nr, $latest, $newest); } sub _msgmap_init { @@ -612,7 +613,7 @@ sub _msgmap_init { } sub _git_log { - my ($self, $range) = @_; + my ($self, $opts, $range) = @_; my $git = $self->{git}; if (index($range, '..') < 0) { @@ -629,12 +630,17 @@ sub _git_log { # Count the new files so they can be added newest to oldest # and still have numbers increasing from oldest to newest my $fcount = 0; + my $pr = $opts->{-progress}; + $pr->("counting changes\n\t$range ... ") if $pr; # can't use 'rev-list --count' if we use --diff-filter my $fh = $git->popen(qw(log --pretty=tformat:%h --no-notes --no-color --no-renames --diff-filter=AM), $range); ++$fcount while <$fh>; + close $fh; my $high = $self->{mm}->num_highwater; + $pr->("$fcount\n") if $pr; # continue previous line + $self->{ntodo} = $fcount; if (index($range, '..') < 0) { if ($high && $high == $fcount) { @@ -707,6 +713,7 @@ sub _index_sync { my ($last_commit, $lx, $xlog); my $git = $self->{git}; $git->batch_prepare; + my $pr = $opts->{-progress}; my $xdb = $self->begin_txn_lazy; my $mm = _msgmap_init($self); @@ -724,14 +731,14 @@ sub _index_sync { # ensure we leak no FDs to "git log" with Xapian <= 1.2 my $range = $lx eq '' ? $tip : "$lx..$tip"; - $xlog = _git_log($self, $range); + $xlog = _git_log($self, $opts, $range); $xdb = $self->begin_txn_lazy; } while (_last_x_commit($self, $mm) ne $last_commit); my $dbh = $mm->{dbh} if $mm; my $cb = sub { - my ($commit, $newest) = @_; + my ($nr, $commit, $newest) = @_; if ($dbh) { if ($newest) { my $cur = $mm->last_commit || ''; @@ -751,6 +758,7 @@ sub _index_sync { $git->cleanup; $xdb = _xdb_release($self); # let another process do some work... < + $pr->("indexed $nr/$self->{ntodo}\n") if $pr && $nr; if (!$newest) { $xdb = $self->begin_txn_lazy; $dbh->begin_work if $dbh; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 1ee19b21..1170f32c 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -777,6 +777,9 @@ sub reindex_oid { $git->cleanup; $mm_tmp->atfork_prepare; $self->done; # release lock + + # TODO: print progress info, here + # allow -watch or -mda to write... $self->idx_init; # reacquire lock $mm_tmp->atfork_parent; @@ -844,6 +847,7 @@ $range sub index_prepare { my ($self, $opts, $epoch_max, $ranges) = @_; + my $pr = $opts->{-progress}; my $regen_max = 0; my $head = $self->{-inbox}->{ref_head} || 'refs/heads/master'; for (my $i = $epoch_max; $i >= 0; $i--) { @@ -858,10 +862,14 @@ sub index_prepare { $ranges->[$i] = $range; # can't use 'rev-list --count' if we use --diff-filter + $pr->("$i.git counting changes\n\t$range ... ") if $pr; + my $n = 0; my $fh = $git->popen(qw(log --pretty=tformat:%H --no-notes --no-color --no-renames --diff-filter=AM), $range, '--', 'm'); - ++$regen_max while <$fh>; + ++$n while <$fh>; + $pr->("$n\n") if $pr; + $regen_max += $n; } \$regen_max; } diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index aa3e4c09..0e448047 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -222,10 +222,11 @@ sub cpdb { $it = $src->postlist_begin(''); $end = $src->postlist_end(''); - $pfx = (split('/', $old))[-1].':'; if ($pr) { $nr = 0; $tot = $src->get_doccount; + my @p = split('/', $old); + $pfx = "$p[-2]/$p[-1]:"; $fmt = "$pfx % ".length($tot)."u/$tot\n"; $pr->("$pfx copying $tot documents\n"); } @@ -255,7 +256,6 @@ sub cpdb { return unless $opt->{compact}; $src = $dst = undef; # flushes and closes - $pfx = undef unless $fmt; $pr->("$pfx compacting...\n") if $pr; # this is probably the best place to do xapian-compact @@ -268,11 +268,11 @@ sub cpdb { } my ($r, $w); - if ($pfx && pipe($r, $w)) { + if ($pr && pipe($r, $w)) { $rdr->{1} = fileno($w); } my $pid = spawn($cmd, $env, $rdr); - if ($pfx) { + if ($pr) { close $w or die "close: \$w: $!"; foreach (<$r>) { s/\r/\r$pfx /g; |