about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-08-24 01:22:32 +0000
committerEric Wong <e@80x24.org>2023-08-24 07:47:50 +0000
commitcf96412eb8f193ebd334fae340b2d91b6b7f2afe (patch)
treee71b3af27231e3315aa23e47b2e907cedc337dad
parent2f41101df13a82befc24edefe6946b2d52d2c96c (diff)
downloadpublic-inbox-cf96412eb8f193ebd334fae340b2d91b6b7f2afe.tar.gz
This aids in development, but I'm not sure it's going to stay
or be moved into another interface.
-rw-r--r--lib/PublicInbox/CodeSearchIdx.pm32
-rwxr-xr-xscript/public-inbox-cindex2
2 files changed, 33 insertions, 1 deletions
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 2480dbd2..e795c2b3 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -1058,6 +1058,37 @@ sub _prep_ibx { # each_inbox callback
                 push @{$self->{IBX}}, $ibx;
 }
 
+sub show_roots { # for diagnostics
+        my ($self) = @_;
+        local $self->{xdb};
+        my $cur = $self->xdb->allterms_begin('G');
+        my $end = $self->{xdb}->allterms_end('G');
+        my $qrepo = $PublicInbox::Search::X{Query}->new('T'.'r');
+        my $enq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
+        $enq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
+        $enq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
+        for (; $cur != $end; $cur++) {
+                my $G_oidhex = $cur->get_termname;
+                my $qry = $PublicInbox::Search::X{Query}->new(
+                                PublicInbox::Search::OP_FILTER(),
+                                $qrepo, $G_oidhex);
+                $enq->set_query($qry);
+                my ($off, $lim) = (0, 10000);
+                say 'commit ',substr($G_oidhex, 1), ' appears in:';
+                while (1) {
+                        my $mset = $enq->get_mset($off, $lim);
+                        my $size = $mset->size or last;
+                        for my $x ($mset->items) {
+                                my $doc = $x->get_document;
+                                for (xap_terms('P', $x->get_document)) {
+                                        say '- /', substr($_, 1);
+                                }
+                        }
+                        $off += $size;
+                }
+        }
+}
+
 sub cidx_run { # main entry point
         my ($self) = @_;
         my $restore_umask = prep_umask($self);
@@ -1150,6 +1181,7 @@ sub cidx_run { # main entry point
         PublicInbox::DS::event_loop($MY_SIG, $SIGSET) if shards_active();
         PublicInbox::DS->Reset;
         $self->lock_release(!!$NCHANGE);
+        show_roots($self) if $self->{-opt}->{'show-roots'} # for diagnostics
 }
 
 sub ipc_atfork_child { # @IDX_SHARDS
diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex
index 888c8b10..0526434c 100755
--- a/script/public-inbox-cindex
+++ b/script/public-inbox-cindex
@@ -29,7 +29,7 @@ GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous
                 indexlevel|index-level|L=s associate associate-max=i
                 associate-date-range=s associate-prefixes=s@
                 batch_size|batch-size=s max_size|max-size=s
-                include|I=s@ only=s@ all
+                include|I=s@ only=s@ all show-roots
                 project-list=s exclude=s@
                 sort-parallel=s sort-compress-program=s sort-buffer-size=s
                 d=s update|u scan! prune dry-run|n C=s@ help|h))