From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id E052C1FB06 for ; Sat, 7 Nov 2020 10:57:06 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 08/10] v2writable: more accurate {current_info} warnings/progress Date: Sat, 7 Nov 2020 10:56:58 +0000 Message-Id: <20201107105700.12586-9-e@80x24.org> In-Reply-To: <20201107105700.12586-1-e@80x24.org> References: <20201107105700.12586-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: With async git blob retrievals, the OID being enqueued and the OID being processed can be totally unrelated and misleading. We'll also prefix $INBOX_DIR for v2, and not just the epoch since we could be indexing multiple inboxes via both -index and -extindex. --- lib/PublicInbox/ExtSearchIdx.pm | 7 +++++++ lib/PublicInbox/V2Writable.pm | 24 ++++++++++++++++++------ script/public-inbox-extindex | 1 + 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 3e7f5604..50342802 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -250,17 +250,22 @@ sub cur_ibx_xnum ($$) { sub index_oid { # git->cat_async callback for 'm' my ($bref, $oid, $type, $size, $req) = @_; + my $self = $req->{self}; + local $self->{current_info} = "$self->{current_info} $oid"; return if is_bad_blob($oid, $type, $size, $req->{oid}); my $new_smsg = $req->{new_smsg} = bless { blob => $oid, }, 'PublicInbox::Smsg'; $new_smsg->{bytes} = $size + crlf_adjust($$bref); defined($req->{xnum} = cur_ibx_xnum($req, $bref)) or return; + ++${$req->{nr}}; do_step($req); } sub unindex_oid { # git->cat_async callback for 'd' my ($bref, $oid, $type, $size, $req) = @_; + my $self = $req->{self}; + local $self->{current_info} = "$self->{current_info} $oid"; return if is_bad_blob($oid, $type, $size, $req->{oid}); return if defined(cur_ibx_xnum($req, $bref)); # was re-added do_step($req); @@ -286,6 +291,8 @@ sub _sync_inbox ($$$) { -opt => $opt, self => $self, ibx => $ibx, + nr => \(my $nr = 0), + -regen_fmt => "%u/?\n", }; my $v = $ibx->version; my $ekey = $ibx->eidx_key; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 224675ab..18f33655 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -882,12 +882,13 @@ sub reindex_checkpoint ($$) { sub index_oid { # cat_async callback my ($bref, $oid, $type, $size, $arg) = @_; + my $self = $arg->{self}; + local $self->{current_info} = "$self->{current_info} $oid"; return if $size == 0; # purged my ($num, $mid0); my $eml = PublicInbox::Eml->new($$bref); my $mids = mids($eml); my $chash = content_hash($eml); - my $self = $arg->{self}; if (scalar(@$mids) == 0) { warn "E: $oid has no Message-ID, skipping\n"; @@ -1047,6 +1048,11 @@ sub sync_prepare ($$) { my $pr = $sync->{-opt}->{-progress}; my $regen_max = 0; my $head = $sync->{ibx}->{ref_head} || 'HEAD'; + my $pfx; + if ($pr) { + ($pfx) = ($sync->{ibx}->{inboxdir} =~ m!([^/]+)\z!g); + $pfx //= $sync->{ibx}->{inboxdir}; + } # reindex stops at the current heads and we later rerun index_sync # without {reindex} @@ -1068,7 +1074,7 @@ sub sync_prepare ($$) { my $range = log_range($sync, $unit, $tip) or next; # can't use 'rev-list --count' if we use --diff-filter - $pr->("$i.git counting $range ... ") if $pr; + $pr->("$pfx $i.git counting $range ... ") if $pr; # Don't bump num_highwater on --reindex by using {D}. # We intentionally do NOT use {D} in the non-reindex case # because we want NNTP article number gaps from unindexed @@ -1086,10 +1092,10 @@ sub sync_prepare ($$) { # our code and blindly injects "d" file history into git repos if (my @leftovers = keys %{delete($sync->{D}) // {}}) { warn('W: unindexing '.scalar(@leftovers)." leftovers\n"); + local $self->{current_info} = 'leftover '; my $unindex_oid = $self->can('unindex_oid'); for my $oid (@leftovers) { $oid = unpack('H*', $oid); - $self->{current_info} = "leftover $oid"; my $req = { %$sync, oid => $oid }; $self->git->cat_async($oid, $unindex_oid, $req); } @@ -1121,6 +1127,7 @@ sub unindex_oid_aux ($$$) { sub unindex_oid ($$;$) { # git->cat_async callback my ($bref, $oid, $type, $size, $sync) = @_; my $self = $sync->{self}; + local $self->{current_info} = "$self->{current_info} $oid"; my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef; my $mm = $self->{mm}; my $mids = mids(PublicInbox::Eml->new($bref)); @@ -1230,10 +1237,15 @@ sub index_todo ($$$) { my $all = $self->git; my $index_oid = $self->can('index_oid'); my $unindex_oid = $self->can('unindex_oid'); - my ($pfx) = ($unit->{git}->{git_dir} =~ m!/([^/]+)\z!g); - $pfx //= $unit->{git}->{git_dir}; + my $pfx; + if ($unit->{git}->{git_dir} =~ m!/([^/]+)/git/([0-9]+\.git)\z!) { + $pfx = "$1 $2"; # v2 + } else { # v1 + ($pfx) = ($unit->{git}->{git_dir} =~ m!/([^/]+)\z!g); + $pfx //= $unit->{git}->{git_dir}; + } + local $self->{current_info} = "$pfx "; while (my ($f, $at, $ct, $oid) = $stk->pop_rec) { - $self->{current_info} = "$pfx $oid"; my $req = { %$sync, autime => $at, cotime => $ct, oid => $oid }; if ($f eq 'm') { if ($sync->{max_size}) { diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index a58f35ca..bb1e174a 100644 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -37,6 +37,7 @@ require PublicInbox::Admin; my $cfg = PublicInbox::Config->new; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die(qw(-search)); +PublicInbox::Admin::progress_prepare($opt); require PublicInbox::ExtSearchIdx; my $eidx = PublicInbox::ExtSearchIdx->new($eidx_dir, $opt); $eidx->attach_inbox($_) for @ibxs;