user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 08/14] www: load and use cindex join data
Date: Tue, 28 Nov 2023 14:56:21 +0000	[thread overview]
Message-ID: <20231128145628.1455176-9-e@80x24.org> (raw)
In-Reply-To: <20231128145628.1455176-1-e@80x24.org>

This is a major step in solving the problem of having to
manually associate hundreds/thousands of coderepos with
hundreds/thousands of public-inboxes to power solver
(and more).
---
 lib/PublicInbox/CodeSearch.pm    | 153 +++++++++++++++++++++++++++++--
 lib/PublicInbox/CodeSearchIdx.pm |  42 ++++-----
 lib/PublicInbox/Config.pm        |  39 +++++++-
 lib/PublicInbox/Search.pm        |  17 ++++
 lib/PublicInbox/SearchIdx.pm     |  10 +-
 lib/PublicInbox/SolverGit.pm     |   6 +-
 lib/PublicInbox/View.pm          |   7 +-
 lib/PublicInbox/WWW.pm           |   1 +
 lib/PublicInbox/WwwCoderepo.pm   |  41 ++++++++-
 lib/PublicInbox/WwwText.pm       |  19 +++-
 t/cindex.t                       |  28 +++++-
 xt/solver.t                      |   3 +-
 12 files changed, 312 insertions(+), 54 deletions(-)

diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm
index eb057525..7d7f6df6 100644
--- a/lib/PublicInbox/CodeSearch.pm
+++ b/lib/PublicInbox/CodeSearch.pm
@@ -21,7 +21,7 @@ use constant {
 our @CODE_NRP;
 our @CODE_VMAP = (
 	[ AT, 'd:' ], # mairix compat
-	[ AT, 'dt:' ], # mail compat
+	[ AT, 'dt:' ], # public-inbox mail compat
 	[ CT, 'ct:' ],
 );
 
@@ -51,7 +51,7 @@ my %prob_prefix = ( # copied from PublicInbox::Search
 sub new {
 	my ($cls, $dir, $cfg) = @_;
 	# can't have a PublicInbox::Config here due to circular refs
-	bless { xpfx => "$dir/cidx".CIDX_SCHEMA_VER,
+	bless { topdir => $dir, xpfx => "$dir/cidx".CIDX_SCHEMA_VER,
 		-cfg_f => $cfg->{-f} }, $cls;
 }
 
@@ -63,7 +63,20 @@ sub join_data {
 	my $cur = $self->xdb->get_metadata($key) or return;
 	$cur = eval { PublicInbox::Config::json()->decode(uncompress($cur)) };
 	warn "E: $@ (corrupt metadata in `$key' key?)" if $@;
-	$cur;
+	my @m = grep { ref($cur->{$_}) ne 'ARRAY' } qw(ekeys roots ibx2root);
+	if (@m) {
+		warn <<EOM;
+W: $self->{topdir} join data for $self->{-cfg_f} missing: @m
+EOM
+		undef;
+	} elsif (@{$cur->{ekeys}} != @{$cur->{ibx2root}}) {
+		warn <<EOM;
+W: $self->{topdir} join data for $self->{-cfg_f} mismatched ekeys and ibx2root
+EOM
+		undef;
+	} else {
+		$cur;
+	}
 }
 
 sub qparse_new ($) {
@@ -196,16 +209,136 @@ sub roots2paths { # for diagnostics
 	\%ret;
 }
 
-sub paths2roots { # for diagnostics
-	my ($self) = @_;
+sub root_oids ($$) {
+	my ($self, $git_dir) = @_;
+	my @ids = $self->docids_by_postlist('P'.$git_dir);
+	@ids or warn <<"";
+BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir}
+
+	warn <<"" if @ids > 1;
+BUG: (non-fatal) $git_dir indexed multiple times in $self->{topdir}
+
 	my %ret;
-	my $tmp = roots2paths($self);
-	for my $root_oidhex (keys %$tmp) {
-		my $paths = delete $tmp->{$root_oidhex};
-		push @{$ret{$_}}, $root_oidhex for @$paths;
+	for my $docid (@ids) {
+		my @oids = xap_terms('G', $self->xdb, $docid);
+		@ret{@oids} = @oids;
+	}
+	sort keys %ret;
+}
+
+sub paths2roots {
+	my ($self, $paths) = @_;
+	my %ret;
+	if ($paths) {
+		for my $p (keys %$paths) { @{$ret{$p}} = root_oids($self, $p) }
+	} else {
+		my $tmp = roots2paths($self);
+		for my $root_oidhex (keys %$tmp) {
+			my $paths = delete $tmp->{$root_oidhex};
+			push @{$ret{$_}}, $root_oidhex for @$paths;
+		}
+		@$_ = sort(@$_) for values %ret;
 	}
-	@$_ = sort(@$_) for values %ret;
 	\%ret;
 }
 
+sub load_commit_times { # each_cindex callback
+	my ($self, $todo) = @_; # todo = [ [ time, git ], [ time, git ] ...]
+	my (@pending, $rec, $dir, @ids, $doc);
+	while ($rec = shift @$todo) {
+		@ids = $self->docids_by_postlist('P'.$rec->[1]->{git_dir});
+		if (@ids) {
+			warn <<EOM if @ids > 1;
+W: $rec->[1]->{git_dir} indexed multiple times in $self->{topdir}
+EOM
+			for (@ids) {
+				$doc = $self->get_doc($_) // next;
+				$rec->[0] = int_val($doc, CT);
+				last;
+			}
+		} else { # may be in another cindex:
+			push @pending, $rec;
+		}
+	}
+	@$todo = @pending;
+}
+
+sub load_coderepos { # each_cindex callback
+	my ($self, $pi_cfg) = @_;
+	my $name = $self->{name};
+	my $cfg_f = $pi_cfg->{-f};
+	my $lpfx = $self->{localprefix} or return warn <<EOM;
+W: cindex.$name.localprefix unset in $cfg_f, ignoring cindex.$name
+EOM
+	my $lre = join('|', map { $_ .= '/'; tr!/!/!s; quotemeta } @$lpfx);
+	$lre = qr!\A(?:$lre)!;
+	my $coderepos = $pi_cfg->{-coderepos};
+	my $nick_pfx = $name eq '' ? '' : "$name/";
+	my %dir2cr;
+	for my $p ($self->all_terms('P')) {
+		my $nick = $p;
+		$nick =~ s!$lre!$nick_pfx!s or next;
+		$dir2cr{$p} = $coderepos->{$nick} //= do {
+			my $git = PublicInbox::Git->new($p);
+			$git->{nick} = $nick; # for git->pub_urls
+			$git;
+		};
+	}
+	my $jd = join_data($self) or return warn <<EOM;
+W: cindex.$name.topdir=$self->{topdir} has no usable join data for $cfg_f
+EOM
+	my ($ekeys, $roots, $ibx2root) = @$jd{qw(ekeys roots ibx2root)};
+	my $roots2paths = roots2paths($self);
+	for my $root_offs (@$ibx2root) {
+		my $ekey = shift(@$ekeys) // die 'BUG: {ekeys} empty';
+		scalar(@$root_offs) or next;
+		my $ibx = $pi_cfg->lookup_eidx_key($ekey) // do {
+			warn "W: `$ekey' gone from $cfg_f\n";
+			next;
+		};
+		my $gits = $ibx->{-repo_objs} //= [];
+		my $cr_score = $ibx->{-cr_score} //= {};
+		my %ibx_p2g = map { $_->{git_dir} => $_ } @$gits;
+		my $ibx2self; # cindex has an association w/ inbox?
+		for (@$root_offs) { # sorted by $nr descending
+			my ($nr, $root_off) = @$_;
+			my $root_oid = $roots->[$root_off] // do {
+				warn <<EOM;
+BUG: root #$root_off invalid in join data for `$ekey' with $cfg_f
+EOM
+				next;
+			};
+			my $git_dirs = $roots2paths->{$root_oid};
+			my @gits = map { $dir2cr{$_} // () } @$git_dirs;
+			$cr_score->{$_->{nick}} //= $nr for @gits;
+			@$git_dirs = grep { !$ibx_p2g{$_} } @$git_dirs;
+			# @$git_dirs or warn "W: no matches for $root_oid\n";
+			for (@$git_dirs) {
+				if (my $git = $dir2cr{$_}) {
+					$ibx_p2g{$_} = $git;
+					$ibx2self = 1;
+					$ibx->{-hide}->{www} or
+						push @{$git->{ibx_score}},
+							[ $nr, $ibx->{name} ];
+					push @$gits, $git;
+				} else {
+					warn <<EOM;
+W: no coderepo available for $_ (localprefix=@$lpfx)
+EOM
+				}
+			}
+		}
+		if (@$gits) {
+			push @{$ibx->{-csrch}}, $self if $ibx2self;
+		} else {
+			delete $ibx->{-repo_objs};
+			delete $ibx->{-cr_score};
+		}
+	}
+	for my $git (values %dir2cr) {
+		my $s = $git->{ibx_score};
+		@$s = sort { $b->[0] <=> $a->[0] } @$s if $s;
+	}
+}
+
 1;
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index bb1d698b..a6cbe0b0 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -172,7 +172,7 @@ sub count_shards { scalar($_[0]->xdb_shards_flat) }
 sub update_commit ($$$) {
 	my ($self, $cmt, $roots) = @_; # fields from @FMT
 	my $x = 'Q'.$cmt->{H};
-	my ($docid, @extra) = sort { $a <=> $b } docids_by_postlist($self, $x);
+	my ($docid, @extra) = sort { $a <=> $b } $self->docids_by_postlist($x);
 	@extra and warn "W: $cmt->{H} indexed multiple times, pruning ",
 			join(', ', map { "#$_" } @extra), "\n";
 	$self->{xdb}->delete_document($_) for @extra;
@@ -377,15 +377,6 @@ sub seen ($$) {
 # used to select the shard for a GIT_DIR
 sub git_dir_hash ($) { hex(substr(sha256_hex($_[0]), 0, 8)) }
 
-sub docids_by_postlist ($$) { # consider moving to PublicInbox::Search
-	my ($self, $q) = @_;
-	my $cur = $self->{xdb}->postlist_begin($q);
-	my $end = $self->{xdb}->postlist_end($q);
-	my @ids;
-	for (; $cur != $end; $cur++) { push(@ids, $cur->get_docid) };
-	@ids;
-}
-
 sub _cb { # run_await cb
 	my ($pid, $cmd, undef, $opt, $cb, $self, $git, @arg) = @_;
 	return if $DO_QUIT;
@@ -452,7 +443,7 @@ sub prep_repo ($$) {
 
 sub check_existing { # retry_reopen callback
 	my ($shard, $self, $git) = @_;
-	my @docids = docids_by_postlist($shard, 'P'.$git->{git_dir});
+	my @docids = $shard->docids_by_postlist('P'.$git->{git_dir});
 	my $docid = shift(@docids) // return get_roots($self, $git);
 	my $doc = $shard->get_doc($docid) //
 			die "BUG: no #$docid ($git->{git_dir})";
@@ -778,7 +769,7 @@ sub prune_init { # via wq_io_do in IDX_SHARDS
 
 sub prune_one { # via wq_io_do in IDX_SHARDS
 	my ($self, $term) = @_;
-	my @docids = docids_by_postlist($self, $term);
+	my @docids = $self->docids_by_postlist($term);
 	for (@docids) {
 		$TXN_BYTES -= $self->{xdb}->get_doclength($_) * 42;
 		$self->{xdb}->delete_document($_);
@@ -894,10 +885,9 @@ sub current_join_data ($) {
 sub score_old_join_data ($$$) {
 	my ($self, $score, $ekeys_new) = @_;
 	my $old = ($JOIN{reset} ? undef : current_join_data($self)) or return;
-	my @old = @$old{qw(ekeys roots ibx2root)};
-	@old == 3 or return warn "W: ekeys/roots missing from old JOIN data\n";
 	progress($self, 'merging old join data...');
-	my ($ekeys_old, $roots_old, $ibx2root_old) = @old;
+	my ($ekeys_old, $roots_old, $ibx2root_old) =
+					@$old{qw(ekeys roots ibx2root)};
 	# score: "ibx_off root_off" => nr
 	my $i = -1;
 	my %root2id_new = map { $_ => ++$i } @OFF2ROOT;
@@ -905,16 +895,24 @@ sub score_old_join_data ($$$) {
 	my %ekey2id_new = map { $_ => ++$i } @$ekeys_new;
 	for my $ibx_off_old (0..$#$ibx2root_old) {
 		my $root_offs_old = $ibx2root_old->[$ibx_off_old];
-		my $ekey = $ekeys_old->[$ibx_off_old] //
-			warn "W: no ibx #$ibx_off_old in old JOIN data\n";
-		my $ibx_off_new = $ekey2id_new{$ekey // next} //
+		my $ekey = $ekeys_old->[$ibx_off_old] // do {
+			warn "W: no ibx #$ibx_off_old in old join data\n";
+			next;
+		};
+		my $ibx_off_new = $ekey2id_new{$ekey} // do {
 			warn "W: `$ekey' no longer exists\n";
+			next;
+		};
 		for (@$root_offs_old) {
 			my ($nr, $rid_old) = @$_;
-			my $root_old = $roots_old->[$rid_old] //
-				warn "W: no root #$rid_old in old JOIN data\n";
-			my $rid_new = $root2id_new{$root_old // next} //
+			my $root_old = $roots_old->[$rid_old] // do {
+				warn "W: no root #$rid_old in old data\n";
+				next;
+			};
+			my $rid_new = $root2id_new{$root_old} // do {
 				warn "W: root `$root_old' no longer exists\n";
+				next;
+			};
 			$score->{"$ibx_off_new $rid_new"} += $nr;
 		}
 	}
@@ -963,7 +961,7 @@ sub do_join {
 		progress($self, "$ekey => $root has $nr matches");
 		push @{$new->{ibx2root}->[$ibx_off]}, [ $nr, $root_off ];
 	}
-	for my $ary (values %$new) { # sort by nr
+	for my $ary (values %$new) { # sort by nr (largest first)
 		for (@$ary) { @$_ = sort { $b->[0] <=> $a->[0] } @$_ }
 	}
 	$new->{ekeys} = \@ekeys;
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 9bee94b8..779e3140 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -412,8 +412,8 @@ sub get_1 {
 
 sub repo_objs {
 	my ($self, $ibxish) = @_;
-	my $ibx_coderepos = $ibxish->{coderepo} // return;
 	$ibxish->{-repo_objs} // do {
+		my $ibx_coderepos = $ibxish->{coderepo} // return;
 		parse_cgitrc($self, undef, 0);
 		my $coderepos = $self->{-coderepos};
 		my @repo_objs;
@@ -568,6 +568,43 @@ sub _fill_ei ($$) {
 	$es;
 }
 
+sub _fill_csrch ($$) {
+	my ($self, $name) = @_; # "" is a valid name for cindex
+	return if $name ne '' && !valid_foo_name($name, 'cindex');
+	eval { require PublicInbox::CodeSearch } or return;
+	my $pfx = "cindex.$name";
+	my $d = $self->{"$pfx.topdir"} // return;
+	-d $d or return;
+	if (index($d, "\n") >= 0) {
+		warn "E: `$d' must not contain `\\n'\n";
+		return;
+	}
+	my $csrch = PublicInbox::CodeSearch->new($d, $self);
+	for my $k (qw(localprefix)) {
+		my $v = $self->{"$pfx.$k"} // next;
+		$csrch->{$k} = _array($v);
+	}
+	$csrch->{name} = $name;
+	$csrch;
+}
+
+sub lookup_cindex ($$) {
+	my ($self, $name) = @_;
+	$self->{-csrch_by_name}->{$name} //= _fill_csrch($self, $name);
+}
+
+sub each_cindex {
+	my ($self, $cb, @arg) = @_;
+	my @csrch = map {
+		lookup_cindex($self, substr($_, length('cindex.'))) // ()
+	} grep(m!\Acindex\.[^\./]*\z!, @{$self->{-section_order}});
+	if (ref($cb) eq 'CODE') {
+		$cb->($_, @arg) for @csrch;
+	} else { # string function
+		$_->$cb(@arg) for @csrch;
+	}
+}
+
 sub config_cmd {
 	my ($self, $env, $opt) = @_;
 	my $f = $self->{-f} // default_file();
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 6145b027..8ef17d58 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -649,4 +649,21 @@ sub xh_args { # prep getopt args to feed to xap_helper.h socket
 	map { ('-d', $_) } shard_dirs($_[0]);
 }
 
+sub docids_by_postlist ($$) {
+	my ($self, $q) = @_;
+	my $cur = $self->xdb->postlist_begin($q);
+	my $end = $self->{xdb}->postlist_end($q);
+	my @ids;
+	for (; $cur != $end; $cur++) { push(@ids, $cur->get_docid) };
+	@ids;
+}
+
+sub get_doc ($$) {
+	my ($self, $docid) = @_;
+	eval { $self->{xdb}->get_document($docid) } // do {
+		die $@ if $@ && ref($@) !~ /\bDocNotFoundError\b/;
+		undef;
+	}
+}
+
 1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index f569428c..17538027 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -545,17 +545,9 @@ sub add_message {
 	$smsg->{num};
 }
 
-sub get_doc ($$) {
-	my ($self, $docid) = @_;
-	eval { $self->{xdb}->get_document($docid) } // do {
-		die $@ if $@ && ref($@) !~ /\bDocNotFoundError\b/;
-		undef;
-	}
-}
-
 sub _get_doc ($$) {
 	my ($self, $docid) = @_;
-	get_doc($self, $docid) // do {
+	$self->get_doc($docid) // do {
 		warn "E: #$docid missing in Xapian\n";
 		undef;
 	}
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 7cc10198..4e79f750 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -643,9 +643,13 @@ sub resolve_patch ($$) {
 # so user_cb never references the SolverGit object
 sub new {
 	my ($class, $ibx, $user_cb, $uarg) = @_;
+	my $gits = $ibx ? $ibx->{-repo_objs} : undef;
+
+	# FIXME: cindex --join= is super-aggressive and may hit too many
+	$gits = [ @$gits[0..2] ] if $gits && @$gits > 3;
 
 	bless { # $ibx is undef if coderepo only (see WwwCoderepo)
-		gits => $ibx ? $ibx->{-repo_objs} : undef,
+		gits => $gits,
 		user_cb => $user_cb,
 		uarg => $uarg,
 		# -cur_di, -qsp_err, -msg => temp fields for Qspawn callbacks
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index e5f748f7..d81c66b7 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -80,7 +80,7 @@ sub msg_page {
 	# allow user to easily browse the range around this message if
 	# they have ->over
 	$ctx->{-t_max} = $smsg->{ts};
-	$ctx->{-spfx} = '../' if $ibx->{coderepo};
+	$ctx->{-spfx} = '../' if $ibx->{-repo_objs};
 	PublicInbox::WwwStream::aresponse($ctx, \&msg_page_i);
 }
 
@@ -443,7 +443,7 @@ sub thread_html {
 	my $ibx = $ctx->{ibx};
 	my ($nr, $msgs) = $ibx->over->get_thread($mid);
 	return missing_thread($ctx) if $nr == 0;
-	$ctx->{-spfx} = '../../' if $ibx->{coderepo};
+	$ctx->{-spfx} = '../../' if $ibx->{-repo_objs};
 
 	# link $INBOX_DIR/description text to "index_topics" view around
 	# the newest message in this thread
@@ -779,6 +779,9 @@ href=#t>this message</a>:
 <input type=submit value=search
 />\t(<a href=${upfx}_/text/help/#search>help</a>)</pre></form>
 EOM
+		# TODO: related codesearch
+		# my $csrchv = $ctx->{ibx}->{-csrch} // [];
+		# push @related, '<pre>'.ascii_html(Dumper($csrchv)).'</pre>';
 	}
 	if ($ctx->{ibx}->over) {
 		my $t = ts2str($ctx->{-t_max});
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 6b616bd4..289599b8 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -189,6 +189,7 @@ sub preload {
 		}
 		$pi_cfg->ALL and require PublicInbox::Isearch;
 		$self->cgit;
+		$self->coderepo;
 		$self->stylesheets_prepare($_) for ('', '../', '../../');
 		$self->news_www;
 	}
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
index 0eb4a2d6..8ab4911f 100644
--- a/lib/PublicInbox/WwwCoderepo.pm
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -14,12 +14,14 @@ use PublicInbox::ViewVCS;
 use PublicInbox::WwwStatic qw(r);
 use PublicInbox::GitHTTPBackend;
 use PublicInbox::WwwStream;
-use PublicInbox::Hval qw(ascii_html utf8_maybe);
+use PublicInbox::Hval qw(prurl ascii_html utf8_maybe);
 use PublicInbox::ViewDiff qw(uri_escape_path);
 use PublicInbox::RepoSnapshot;
 use PublicInbox::RepoAtom;
 use PublicInbox::RepoTree;
 use PublicInbox::OnDestroy;
+use URI::Escape qw(uri_escape_utf8);
+use File::Spec;
 
 my @EACH_REF = (qw(git for-each-ref --sort=-creatordate),
 		"--format=%(HEAD)%00".join('%00', map { "%($_)" }
@@ -62,6 +64,7 @@ sub prepare_coderepos {
 		my $eidx = $pi_cfg->lookup_ei($k) // next;
 		$pi_cfg->repo_objs($eidx);
 	}
+	$pi_cfg->each_cindex('load_coderepos', $pi_cfg);
 }
 
 sub new {
@@ -119,6 +122,41 @@ sub _refs_tags_link {
 		"</a>$align ", ascii_html($s), " ($cd)", @snap_fmt, "\n");
 }
 
+sub emit_joined_inboxes ($) {
+	my ($ctx) = @_;
+	my $names = $ctx->{git}->{ibx_names}; # coderepo directives in config
+	my $score = $ctx->{git}->{ibx_score}; # generated w/ cindex --join
+	($names || $score) or return;
+	my $pi_cfg = $ctx->{wcr}->{pi_cfg};
+	my ($u, $h);
+	my $zfh = $ctx->zfh;
+	print $zfh "\n# associated public inboxes:",
+		"\n# (number on the left is used for dev purposes)";
+	my @ns = map { [ 0, $_ ] } @$names;
+	my $env = $ctx->{env};
+	for (@ns, @$score) {
+		my ($nr, $name) = @$_;
+		my $ibx = $pi_cfg->lookup_name($name) // do {
+			warn "W: inbox `$name' gone for $ctx->{git}->{git_dir}";
+			say $zfh '# ', ascii_html($name), ' (missing inbox?)';
+			next;
+		};
+		if (scalar(@{$ibx->{url} // []})) {
+			$u = $h = ascii_html(prurl($env, $ibx->{url}));
+		} else {
+			$h = ascii_html(prurl($env, uri_escape_utf8($name)));
+			$h .= '/';
+			$u = ascii_html($name);
+		}
+		if ($nr) {
+			printf $zfh "\n% 11u", $nr;
+		} else {
+			print $zfh "\n", ' 'x11;
+		}
+		print $zfh qq{ <a\nhref="$h">$u</a>};
+	}
+}
+
 sub summary_END { # called via OnDestroy
 	my ($ctx) = @_;
 	my $wcb = delete($ctx->{-wcb}) or return; # already done
@@ -174,6 +212,7 @@ EOM
 	for (@r) { print $zfh _refs_tags_link($_, './', $snap_pfx, @snap_fmt) }
 	print $zfh $NO_TAGS if !@r;
 	print $zfh qq(<a href="refs/tags/">...</a>\n) if $last;
+	emit_joined_inboxes $ctx;
 	$wcb->($ctx->html_done('</pre>'));
 }
 
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index f4508b3f..4b4b2e4c 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -7,7 +7,7 @@ use strict;
 use v5.10.1;
 use PublicInbox::Linkify;
 use PublicInbox::WwwStream;
-use PublicInbox::Hval qw(ascii_html prurl);
+use PublicInbox::Hval qw(ascii_html prurl fmt_ts);
 use HTTP::Date qw(time2str);
 use URI::Escape qw(uri_escape_utf8);
 use PublicInbox::GzipFilter qw(gzf_maybe);
@@ -248,14 +248,23 @@ EOS
 
 sub coderepos_raw ($$) {
 	my ($ctx, $top_url) = @_;
-	my $cr = $ctx->{ibx}->{coderepo} // return ();
 	my $cfg = $ctx->{www}->{pi_cfg};
+	my $cr = $cfg->repo_objs($ctx->{ibx}) or return ();
 	my $buf = 'Code repositories for project(s) associated with this '.
-		$ctx->{ibx}->thing_type . "\n";
-	for my $git (@{$ctx->{www}->{pi_cfg}->repo_objs($ctx->{ibx})}) {
+		$ctx->{ibx}->thing_type . ":\n";
+	my @recs = map { [ 0, $_ ] } @$cr;
+	my @todo = @recs;
+	$cfg->each_cindex('load_commit_times', \@todo);
+	@recs = sort { $b->[0] <=> $a->[0] } @recs;
+	my $cr_score = $ctx->{ibx}->{-cr_score};
+	for (@recs) {
+		my ($t, $git) = @$_;
 		for ($git->pub_urls($ctx->{env})) {
 			my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ : $top_url.$_;
-			$buf .= "\n\t" . prurl($ctx->{env}, $u);
+			my $nr = $cr_score->{$git->{nick}};
+			$buf .= "\n";
+			$buf .= $nr ? sprintf('% 9u', $nr) : (' 'x9);
+			$buf .= ' '.fmt_ts($t).' '.prurl($ctx->{env}, $u);
 		}
 	}
 	($buf);
diff --git a/t/cindex.t b/t/cindex.t
index a9075092..29d88ca8 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -5,7 +5,7 @@ use v5.12;
 use PublicInbox::TestCommon;
 use Cwd qw(getcwd abs_path);
 use List::Util qw(sum);
-use autodie qw(close open rename);
+use autodie qw(close mkdir open rename);
 require_mods(qw(json Xapian +SCM_RIGHTS));
 use_ok 'PublicInbox::CodeSearchIdx';
 use PublicInbox::Import;
@@ -227,7 +227,7 @@ SKIP: { # --prune
 }
 
 File::Path::remove_tree("$tmp/ext");
-ok(mkdir("$tmp/ext", 0707), 'create $tmp/ext with odd permissions');
+mkdir("$tmp/ext", 0707);
 ok(run_script([qw(-cindex --dangerous -q -d), "$tmp/ext", $zp]),
 	'external on existing dir');
 {
@@ -265,4 +265,28 @@ EOM
 		'non-Xapian-enabled inbox noted');
 }
 
+# we need to support blank sections for a top-level repos
+# (e.g. <https://example.com/my-project>
+# git.kernel.org could use "pub" as section name, though, since all git repos
+# are currently under //git.kernel.org/pub/**/*
+{
+	mkdir(my $d = "$tmp/blanksection");
+	my $cfg = cfg_new($d, <<EOM);
+[cindex ""]
+	topdir = $tmp/ext
+	localprefix = $tmp
+EOM
+	my $csrch = $cfg->lookup_cindex('');
+	is ref($csrch), 'PublicInbox::CodeSearch', 'codesearch w/ blank name';
+	is_deeply $csrch->{localprefix}, [ "$tmp" ], 'localprefix respected';
+	my $nr = 0;
+	$cfg->each_cindex(sub {
+		my ($cs, @rest) = @_;
+		is $cs->{topdir}, $csrch->{topdir}, 'each_cindex works';
+		is_deeply \@rest, [ '.' ], 'got expected arg';
+		++$nr;
+	}, '.');
+	is $nr, 1, 'iterated through cindices';
+}
+
 done_testing;
diff --git a/xt/solver.t b/xt/solver.t
index 51b4144c..372d003b 100644
--- a/xt/solver.t
+++ b/xt/solver.t
@@ -10,6 +10,7 @@ use_ok($_) for @psgi;
 use_ok 'PublicInbox::WWW';
 my $cfg = PublicInbox::Config->new;
 my $www = PublicInbox::WWW->new($cfg);
+$www->preload;
 my $app = sub {
 	my $env = shift;
 	$env->{'psgi.errors'} = \*STDERR;
@@ -63,7 +64,7 @@ while (my ($ibx_name, $urls) = each %$todo) {
 			skip(qq{[publicinbox "$ibx_name"] not configured},
 				scalar(@$urls));
 		}
-		if (!defined($ibx->{coderepo})) {
+		if (!defined($ibx->{-repo_objs})) {
 			push @gone, $ibx_name;
 			skip(qq{publicinbox.$ibx_name.coderepo not configured},
 				scalar(@$urls));

  parent reply	other threads:[~2023-11-28 14:56 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-28 14:56 [PATCH 00/14] IT'S ALIVE! www loads cindex join data Eric Wong
2023-11-28 14:56 ` [PATCH 01/14] test_common: create_*: detect changes all parameters Eric Wong
2023-11-28 14:56 ` [PATCH 02/14] t/cindex*: require SCM_RIGHTS for these tests Eric Wong
2024-01-29 21:23   ` [PATCH 0/2] pure Perl sendmsg/recvmsg on *BSD Eric Wong
2024-01-29 21:23     ` [PATCH 1/2] syscall: update formatting to match our codebase Eric Wong
2024-01-29 21:23     ` [PATCH 2/2] syscall: use pure Perl sendmsg/recvmsg on *BSD Eric Wong
2024-04-06  0:43       ` Gaelan Steele
2024-04-08  9:48         ` [RFT] syscall: set default constants for Inline::C platforms Eric Wong
2024-04-08 12:12           ` Gaelan Steele
2024-04-08 20:11             ` Eric Wong
2023-11-28 14:56 ` [PATCH 03/14] codesearch: eliminate redundant substitutions Eric Wong
2023-11-28 14:56 ` [PATCH 04/14] solver: schedule cleanup after synchronous git->check Eric Wong
2023-11-28 14:56 ` [PATCH 05/14] xap_helper.h: move cindex endpoints to separate file Eric Wong
2023-11-28 14:56 ` [PATCH 06/14] xap_helper: implement mset endpoint for WWW, IMAP, etc Eric Wong
2023-11-28 14:56 ` [PATCH 07/14] hval: use File::Spec to make relative paths for href Eric Wong
2023-11-28 14:56 ` Eric Wong [this message]
2023-11-28 14:56 ` [PATCH 09/14] git: speed up ->git_path for non-worktrees Eric Wong
2023-11-28 14:56 ` [PATCH 10/14] cindex: require `-g GIT_DIR' or `-r PROJECT_ROOT' Eric Wong
2023-11-28 14:56 ` [PATCH 11/14] git: speed up Git->new by 5% or so Eric Wong
2023-11-28 14:56 ` [PATCH 12/14] admin: resolve_git_dir respects symlinks Eric Wong
2023-11-28 14:56 ` [PATCH 13/14] cindex: extra quit checks Eric Wong
2023-11-28 14:56 ` [PATCH 14/14] www: start working on a repo listing Eric Wong
2023-11-28 17:55 ` [PATCH 15/14] www: load cindex join data for ->ALL, too Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231128145628.1455176-9-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).