From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 08/14] www: load and use cindex join data
Date: Tue, 28 Nov 2023 14:56:21 +0000 [thread overview]
Message-ID: <20231128145628.1455176-9-e@80x24.org> (raw)
In-Reply-To: <20231128145628.1455176-1-e@80x24.org>
This is a major step in solving the problem of having to
manually associate hundreds/thousands of coderepos with
hundreds/thousands of public-inboxes to power solver
(and more).
---
lib/PublicInbox/CodeSearch.pm | 153 +++++++++++++++++++++++++++++--
lib/PublicInbox/CodeSearchIdx.pm | 42 ++++-----
lib/PublicInbox/Config.pm | 39 +++++++-
lib/PublicInbox/Search.pm | 17 ++++
lib/PublicInbox/SearchIdx.pm | 10 +-
lib/PublicInbox/SolverGit.pm | 6 +-
lib/PublicInbox/View.pm | 7 +-
lib/PublicInbox/WWW.pm | 1 +
lib/PublicInbox/WwwCoderepo.pm | 41 ++++++++-
lib/PublicInbox/WwwText.pm | 19 +++-
t/cindex.t | 28 +++++-
xt/solver.t | 3 +-
12 files changed, 312 insertions(+), 54 deletions(-)
diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm
index eb057525..7d7f6df6 100644
--- a/lib/PublicInbox/CodeSearch.pm
+++ b/lib/PublicInbox/CodeSearch.pm
@@ -21,7 +21,7 @@ use constant {
our @CODE_NRP;
our @CODE_VMAP = (
[ AT, 'd:' ], # mairix compat
- [ AT, 'dt:' ], # mail compat
+ [ AT, 'dt:' ], # public-inbox mail compat
[ CT, 'ct:' ],
);
@@ -51,7 +51,7 @@ my %prob_prefix = ( # copied from PublicInbox::Search
sub new {
my ($cls, $dir, $cfg) = @_;
# can't have a PublicInbox::Config here due to circular refs
- bless { xpfx => "$dir/cidx".CIDX_SCHEMA_VER,
+ bless { topdir => $dir, xpfx => "$dir/cidx".CIDX_SCHEMA_VER,
-cfg_f => $cfg->{-f} }, $cls;
}
@@ -63,7 +63,20 @@ sub join_data {
my $cur = $self->xdb->get_metadata($key) or return;
$cur = eval { PublicInbox::Config::json()->decode(uncompress($cur)) };
warn "E: $@ (corrupt metadata in `$key' key?)" if $@;
- $cur;
+ my @m = grep { ref($cur->{$_}) ne 'ARRAY' } qw(ekeys roots ibx2root);
+ if (@m) {
+ warn <<EOM;
+W: $self->{topdir} join data for $self->{-cfg_f} missing: @m
+EOM
+ undef;
+ } elsif (@{$cur->{ekeys}} != @{$cur->{ibx2root}}) {
+ warn <<EOM;
+W: $self->{topdir} join data for $self->{-cfg_f} mismatched ekeys and ibx2root
+EOM
+ undef;
+ } else {
+ $cur;
+ }
}
sub qparse_new ($) {
@@ -196,16 +209,136 @@ sub roots2paths { # for diagnostics
\%ret;
}
-sub paths2roots { # for diagnostics
- my ($self) = @_;
+sub root_oids ($$) {
+ my ($self, $git_dir) = @_;
+ my @ids = $self->docids_by_postlist('P'.$git_dir);
+ @ids or warn <<"";
+BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir}
+
+ warn <<"" if @ids > 1;
+BUG: (non-fatal) $git_dir indexed multiple times in $self->{topdir}
+
my %ret;
- my $tmp = roots2paths($self);
- for my $root_oidhex (keys %$tmp) {
- my $paths = delete $tmp->{$root_oidhex};
- push @{$ret{$_}}, $root_oidhex for @$paths;
+ for my $docid (@ids) {
+ my @oids = xap_terms('G', $self->xdb, $docid);
+ @ret{@oids} = @oids;
+ }
+ sort keys %ret;
+}
+
+sub paths2roots {
+ my ($self, $paths) = @_;
+ my %ret;
+ if ($paths) {
+ for my $p (keys %$paths) { @{$ret{$p}} = root_oids($self, $p) }
+ } else {
+ my $tmp = roots2paths($self);
+ for my $root_oidhex (keys %$tmp) {
+ my $paths = delete $tmp->{$root_oidhex};
+ push @{$ret{$_}}, $root_oidhex for @$paths;
+ }
+ @$_ = sort(@$_) for values %ret;
}
- @$_ = sort(@$_) for values %ret;
\%ret;
}
+sub load_commit_times { # each_cindex callback
+ my ($self, $todo) = @_; # todo = [ [ time, git ], [ time, git ] ...]
+ my (@pending, $rec, $dir, @ids, $doc);
+ while ($rec = shift @$todo) {
+ @ids = $self->docids_by_postlist('P'.$rec->[1]->{git_dir});
+ if (@ids) {
+ warn <<EOM if @ids > 1;
+W: $rec->[1]->{git_dir} indexed multiple times in $self->{topdir}
+EOM
+ for (@ids) {
+ $doc = $self->get_doc($_) // next;
+ $rec->[0] = int_val($doc, CT);
+ last;
+ }
+ } else { # may be in another cindex:
+ push @pending, $rec;
+ }
+ }
+ @$todo = @pending;
+}
+
+sub load_coderepos { # each_cindex callback
+ my ($self, $pi_cfg) = @_;
+ my $name = $self->{name};
+ my $cfg_f = $pi_cfg->{-f};
+ my $lpfx = $self->{localprefix} or return warn <<EOM;
+W: cindex.$name.localprefix unset in $cfg_f, ignoring cindex.$name
+EOM
+ my $lre = join('|', map { $_ .= '/'; tr!/!/!s; quotemeta } @$lpfx);
+ $lre = qr!\A(?:$lre)!;
+ my $coderepos = $pi_cfg->{-coderepos};
+ my $nick_pfx = $name eq '' ? '' : "$name/";
+ my %dir2cr;
+ for my $p ($self->all_terms('P')) {
+ my $nick = $p;
+ $nick =~ s!$lre!$nick_pfx!s or next;
+ $dir2cr{$p} = $coderepos->{$nick} //= do {
+ my $git = PublicInbox::Git->new($p);
+ $git->{nick} = $nick; # for git->pub_urls
+ $git;
+ };
+ }
+ my $jd = join_data($self) or return warn <<EOM;
+W: cindex.$name.topdir=$self->{topdir} has no usable join data for $cfg_f
+EOM
+ my ($ekeys, $roots, $ibx2root) = @$jd{qw(ekeys roots ibx2root)};
+ my $roots2paths = roots2paths($self);
+ for my $root_offs (@$ibx2root) {
+ my $ekey = shift(@$ekeys) // die 'BUG: {ekeys} empty';
+ scalar(@$root_offs) or next;
+ my $ibx = $pi_cfg->lookup_eidx_key($ekey) // do {
+ warn "W: `$ekey' gone from $cfg_f\n";
+ next;
+ };
+ my $gits = $ibx->{-repo_objs} //= [];
+ my $cr_score = $ibx->{-cr_score} //= {};
+ my %ibx_p2g = map { $_->{git_dir} => $_ } @$gits;
+ my $ibx2self; # cindex has an association w/ inbox?
+ for (@$root_offs) { # sorted by $nr descending
+ my ($nr, $root_off) = @$_;
+ my $root_oid = $roots->[$root_off] // do {
+ warn <<EOM;
+BUG: root #$root_off invalid in join data for `$ekey' with $cfg_f
+EOM
+ next;
+ };
+ my $git_dirs = $roots2paths->{$root_oid};
+ my @gits = map { $dir2cr{$_} // () } @$git_dirs;
+ $cr_score->{$_->{nick}} //= $nr for @gits;
+ @$git_dirs = grep { !$ibx_p2g{$_} } @$git_dirs;
+ # @$git_dirs or warn "W: no matches for $root_oid\n";
+ for (@$git_dirs) {
+ if (my $git = $dir2cr{$_}) {
+ $ibx_p2g{$_} = $git;
+ $ibx2self = 1;
+ $ibx->{-hide}->{www} or
+ push @{$git->{ibx_score}},
+ [ $nr, $ibx->{name} ];
+ push @$gits, $git;
+ } else {
+ warn <<EOM;
+W: no coderepo available for $_ (localprefix=@$lpfx)
+EOM
+ }
+ }
+ }
+ if (@$gits) {
+ push @{$ibx->{-csrch}}, $self if $ibx2self;
+ } else {
+ delete $ibx->{-repo_objs};
+ delete $ibx->{-cr_score};
+ }
+ }
+ for my $git (values %dir2cr) {
+ my $s = $git->{ibx_score};
+ @$s = sort { $b->[0] <=> $a->[0] } @$s if $s;
+ }
+}
+
1;
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index bb1d698b..a6cbe0b0 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -172,7 +172,7 @@ sub count_shards { scalar($_[0]->xdb_shards_flat) }
sub update_commit ($$$) {
my ($self, $cmt, $roots) = @_; # fields from @FMT
my $x = 'Q'.$cmt->{H};
- my ($docid, @extra) = sort { $a <=> $b } docids_by_postlist($self, $x);
+ my ($docid, @extra) = sort { $a <=> $b } $self->docids_by_postlist($x);
@extra and warn "W: $cmt->{H} indexed multiple times, pruning ",
join(', ', map { "#$_" } @extra), "\n";
$self->{xdb}->delete_document($_) for @extra;
@@ -377,15 +377,6 @@ sub seen ($$) {
# used to select the shard for a GIT_DIR
sub git_dir_hash ($) { hex(substr(sha256_hex($_[0]), 0, 8)) }
-sub docids_by_postlist ($$) { # consider moving to PublicInbox::Search
- my ($self, $q) = @_;
- my $cur = $self->{xdb}->postlist_begin($q);
- my $end = $self->{xdb}->postlist_end($q);
- my @ids;
- for (; $cur != $end; $cur++) { push(@ids, $cur->get_docid) };
- @ids;
-}
-
sub _cb { # run_await cb
my ($pid, $cmd, undef, $opt, $cb, $self, $git, @arg) = @_;
return if $DO_QUIT;
@@ -452,7 +443,7 @@ sub prep_repo ($$) {
sub check_existing { # retry_reopen callback
my ($shard, $self, $git) = @_;
- my @docids = docids_by_postlist($shard, 'P'.$git->{git_dir});
+ my @docids = $shard->docids_by_postlist('P'.$git->{git_dir});
my $docid = shift(@docids) // return get_roots($self, $git);
my $doc = $shard->get_doc($docid) //
die "BUG: no #$docid ($git->{git_dir})";
@@ -778,7 +769,7 @@ sub prune_init { # via wq_io_do in IDX_SHARDS
sub prune_one { # via wq_io_do in IDX_SHARDS
my ($self, $term) = @_;
- my @docids = docids_by_postlist($self, $term);
+ my @docids = $self->docids_by_postlist($term);
for (@docids) {
$TXN_BYTES -= $self->{xdb}->get_doclength($_) * 42;
$self->{xdb}->delete_document($_);
@@ -894,10 +885,9 @@ sub current_join_data ($) {
sub score_old_join_data ($$$) {
my ($self, $score, $ekeys_new) = @_;
my $old = ($JOIN{reset} ? undef : current_join_data($self)) or return;
- my @old = @$old{qw(ekeys roots ibx2root)};
- @old == 3 or return warn "W: ekeys/roots missing from old JOIN data\n";
progress($self, 'merging old join data...');
- my ($ekeys_old, $roots_old, $ibx2root_old) = @old;
+ my ($ekeys_old, $roots_old, $ibx2root_old) =
+ @$old{qw(ekeys roots ibx2root)};
# score: "ibx_off root_off" => nr
my $i = -1;
my %root2id_new = map { $_ => ++$i } @OFF2ROOT;
@@ -905,16 +895,24 @@ sub score_old_join_data ($$$) {
my %ekey2id_new = map { $_ => ++$i } @$ekeys_new;
for my $ibx_off_old (0..$#$ibx2root_old) {
my $root_offs_old = $ibx2root_old->[$ibx_off_old];
- my $ekey = $ekeys_old->[$ibx_off_old] //
- warn "W: no ibx #$ibx_off_old in old JOIN data\n";
- my $ibx_off_new = $ekey2id_new{$ekey // next} //
+ my $ekey = $ekeys_old->[$ibx_off_old] // do {
+ warn "W: no ibx #$ibx_off_old in old join data\n";
+ next;
+ };
+ my $ibx_off_new = $ekey2id_new{$ekey} // do {
warn "W: `$ekey' no longer exists\n";
+ next;
+ };
for (@$root_offs_old) {
my ($nr, $rid_old) = @$_;
- my $root_old = $roots_old->[$rid_old] //
- warn "W: no root #$rid_old in old JOIN data\n";
- my $rid_new = $root2id_new{$root_old // next} //
+ my $root_old = $roots_old->[$rid_old] // do {
+ warn "W: no root #$rid_old in old data\n";
+ next;
+ };
+ my $rid_new = $root2id_new{$root_old} // do {
warn "W: root `$root_old' no longer exists\n";
+ next;
+ };
$score->{"$ibx_off_new $rid_new"} += $nr;
}
}
@@ -963,7 +961,7 @@ sub do_join {
progress($self, "$ekey => $root has $nr matches");
push @{$new->{ibx2root}->[$ibx_off]}, [ $nr, $root_off ];
}
- for my $ary (values %$new) { # sort by nr
+ for my $ary (values %$new) { # sort by nr (largest first)
for (@$ary) { @$_ = sort { $b->[0] <=> $a->[0] } @$_ }
}
$new->{ekeys} = \@ekeys;
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 9bee94b8..779e3140 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -412,8 +412,8 @@ sub get_1 {
sub repo_objs {
my ($self, $ibxish) = @_;
- my $ibx_coderepos = $ibxish->{coderepo} // return;
$ibxish->{-repo_objs} // do {
+ my $ibx_coderepos = $ibxish->{coderepo} // return;
parse_cgitrc($self, undef, 0);
my $coderepos = $self->{-coderepos};
my @repo_objs;
@@ -568,6 +568,43 @@ sub _fill_ei ($$) {
$es;
}
+sub _fill_csrch ($$) {
+ my ($self, $name) = @_; # "" is a valid name for cindex
+ return if $name ne '' && !valid_foo_name($name, 'cindex');
+ eval { require PublicInbox::CodeSearch } or return;
+ my $pfx = "cindex.$name";
+ my $d = $self->{"$pfx.topdir"} // return;
+ -d $d or return;
+ if (index($d, "\n") >= 0) {
+ warn "E: `$d' must not contain `\\n'\n";
+ return;
+ }
+ my $csrch = PublicInbox::CodeSearch->new($d, $self);
+ for my $k (qw(localprefix)) {
+ my $v = $self->{"$pfx.$k"} // next;
+ $csrch->{$k} = _array($v);
+ }
+ $csrch->{name} = $name;
+ $csrch;
+}
+
+sub lookup_cindex ($$) {
+ my ($self, $name) = @_;
+ $self->{-csrch_by_name}->{$name} //= _fill_csrch($self, $name);
+}
+
+sub each_cindex {
+ my ($self, $cb, @arg) = @_;
+ my @csrch = map {
+ lookup_cindex($self, substr($_, length('cindex.'))) // ()
+ } grep(m!\Acindex\.[^\./]*\z!, @{$self->{-section_order}});
+ if (ref($cb) eq 'CODE') {
+ $cb->($_, @arg) for @csrch;
+ } else { # string function
+ $_->$cb(@arg) for @csrch;
+ }
+}
+
sub config_cmd {
my ($self, $env, $opt) = @_;
my $f = $self->{-f} // default_file();
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 6145b027..8ef17d58 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -649,4 +649,21 @@ sub xh_args { # prep getopt args to feed to xap_helper.h socket
map { ('-d', $_) } shard_dirs($_[0]);
}
+sub docids_by_postlist ($$) {
+ my ($self, $q) = @_;
+ my $cur = $self->xdb->postlist_begin($q);
+ my $end = $self->{xdb}->postlist_end($q);
+ my @ids;
+ for (; $cur != $end; $cur++) { push(@ids, $cur->get_docid) };
+ @ids;
+}
+
+sub get_doc ($$) {
+ my ($self, $docid) = @_;
+ eval { $self->{xdb}->get_document($docid) } // do {
+ die $@ if $@ && ref($@) !~ /\bDocNotFoundError\b/;
+ undef;
+ }
+}
+
1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index f569428c..17538027 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -545,17 +545,9 @@ sub add_message {
$smsg->{num};
}
-sub get_doc ($$) {
- my ($self, $docid) = @_;
- eval { $self->{xdb}->get_document($docid) } // do {
- die $@ if $@ && ref($@) !~ /\bDocNotFoundError\b/;
- undef;
- }
-}
-
sub _get_doc ($$) {
my ($self, $docid) = @_;
- get_doc($self, $docid) // do {
+ $self->get_doc($docid) // do {
warn "E: #$docid missing in Xapian\n";
undef;
}
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 7cc10198..4e79f750 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -643,9 +643,13 @@ sub resolve_patch ($$) {
# so user_cb never references the SolverGit object
sub new {
my ($class, $ibx, $user_cb, $uarg) = @_;
+ my $gits = $ibx ? $ibx->{-repo_objs} : undef;
+
+ # FIXME: cindex --join= is super-aggressive and may hit too many
+ $gits = [ @$gits[0..2] ] if $gits && @$gits > 3;
bless { # $ibx is undef if coderepo only (see WwwCoderepo)
- gits => $ibx ? $ibx->{-repo_objs} : undef,
+ gits => $gits,
user_cb => $user_cb,
uarg => $uarg,
# -cur_di, -qsp_err, -msg => temp fields for Qspawn callbacks
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index e5f748f7..d81c66b7 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -80,7 +80,7 @@ sub msg_page {
# allow user to easily browse the range around this message if
# they have ->over
$ctx->{-t_max} = $smsg->{ts};
- $ctx->{-spfx} = '../' if $ibx->{coderepo};
+ $ctx->{-spfx} = '../' if $ibx->{-repo_objs};
PublicInbox::WwwStream::aresponse($ctx, \&msg_page_i);
}
@@ -443,7 +443,7 @@ sub thread_html {
my $ibx = $ctx->{ibx};
my ($nr, $msgs) = $ibx->over->get_thread($mid);
return missing_thread($ctx) if $nr == 0;
- $ctx->{-spfx} = '../../' if $ibx->{coderepo};
+ $ctx->{-spfx} = '../../' if $ibx->{-repo_objs};
# link $INBOX_DIR/description text to "index_topics" view around
# the newest message in this thread
@@ -779,6 +779,9 @@ href=#t>this message</a>:
<input type=submit value=search
/>\t(<a href=${upfx}_/text/help/#search>help</a>)</pre></form>
EOM
+ # TODO: related codesearch
+ # my $csrchv = $ctx->{ibx}->{-csrch} // [];
+ # push @related, '<pre>'.ascii_html(Dumper($csrchv)).'</pre>';
}
if ($ctx->{ibx}->over) {
my $t = ts2str($ctx->{-t_max});
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 6b616bd4..289599b8 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -189,6 +189,7 @@ sub preload {
}
$pi_cfg->ALL and require PublicInbox::Isearch;
$self->cgit;
+ $self->coderepo;
$self->stylesheets_prepare($_) for ('', '../', '../../');
$self->news_www;
}
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
index 0eb4a2d6..8ab4911f 100644
--- a/lib/PublicInbox/WwwCoderepo.pm
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -14,12 +14,14 @@ use PublicInbox::ViewVCS;
use PublicInbox::WwwStatic qw(r);
use PublicInbox::GitHTTPBackend;
use PublicInbox::WwwStream;
-use PublicInbox::Hval qw(ascii_html utf8_maybe);
+use PublicInbox::Hval qw(prurl ascii_html utf8_maybe);
use PublicInbox::ViewDiff qw(uri_escape_path);
use PublicInbox::RepoSnapshot;
use PublicInbox::RepoAtom;
use PublicInbox::RepoTree;
use PublicInbox::OnDestroy;
+use URI::Escape qw(uri_escape_utf8);
+use File::Spec;
my @EACH_REF = (qw(git for-each-ref --sort=-creatordate),
"--format=%(HEAD)%00".join('%00', map { "%($_)" }
@@ -62,6 +64,7 @@ sub prepare_coderepos {
my $eidx = $pi_cfg->lookup_ei($k) // next;
$pi_cfg->repo_objs($eidx);
}
+ $pi_cfg->each_cindex('load_coderepos', $pi_cfg);
}
sub new {
@@ -119,6 +122,41 @@ sub _refs_tags_link {
"</a>$align ", ascii_html($s), " ($cd)", @snap_fmt, "\n");
}
+sub emit_joined_inboxes ($) {
+ my ($ctx) = @_;
+ my $names = $ctx->{git}->{ibx_names}; # coderepo directives in config
+ my $score = $ctx->{git}->{ibx_score}; # generated w/ cindex --join
+ ($names || $score) or return;
+ my $pi_cfg = $ctx->{wcr}->{pi_cfg};
+ my ($u, $h);
+ my $zfh = $ctx->zfh;
+ print $zfh "\n# associated public inboxes:",
+ "\n# (number on the left is used for dev purposes)";
+ my @ns = map { [ 0, $_ ] } @$names;
+ my $env = $ctx->{env};
+ for (@ns, @$score) {
+ my ($nr, $name) = @$_;
+ my $ibx = $pi_cfg->lookup_name($name) // do {
+ warn "W: inbox `$name' gone for $ctx->{git}->{git_dir}";
+ say $zfh '# ', ascii_html($name), ' (missing inbox?)';
+ next;
+ };
+ if (scalar(@{$ibx->{url} // []})) {
+ $u = $h = ascii_html(prurl($env, $ibx->{url}));
+ } else {
+ $h = ascii_html(prurl($env, uri_escape_utf8($name)));
+ $h .= '/';
+ $u = ascii_html($name);
+ }
+ if ($nr) {
+ printf $zfh "\n% 11u", $nr;
+ } else {
+ print $zfh "\n", ' 'x11;
+ }
+ print $zfh qq{ <a\nhref="$h">$u</a>};
+ }
+}
+
sub summary_END { # called via OnDestroy
my ($ctx) = @_;
my $wcb = delete($ctx->{-wcb}) or return; # already done
@@ -174,6 +212,7 @@ EOM
for (@r) { print $zfh _refs_tags_link($_, './', $snap_pfx, @snap_fmt) }
print $zfh $NO_TAGS if !@r;
print $zfh qq(<a href="refs/tags/">...</a>\n) if $last;
+ emit_joined_inboxes $ctx;
$wcb->($ctx->html_done('</pre>'));
}
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index f4508b3f..4b4b2e4c 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -7,7 +7,7 @@ use strict;
use v5.10.1;
use PublicInbox::Linkify;
use PublicInbox::WwwStream;
-use PublicInbox::Hval qw(ascii_html prurl);
+use PublicInbox::Hval qw(ascii_html prurl fmt_ts);
use HTTP::Date qw(time2str);
use URI::Escape qw(uri_escape_utf8);
use PublicInbox::GzipFilter qw(gzf_maybe);
@@ -248,14 +248,23 @@ EOS
sub coderepos_raw ($$) {
my ($ctx, $top_url) = @_;
- my $cr = $ctx->{ibx}->{coderepo} // return ();
my $cfg = $ctx->{www}->{pi_cfg};
+ my $cr = $cfg->repo_objs($ctx->{ibx}) or return ();
my $buf = 'Code repositories for project(s) associated with this '.
- $ctx->{ibx}->thing_type . "\n";
- for my $git (@{$ctx->{www}->{pi_cfg}->repo_objs($ctx->{ibx})}) {
+ $ctx->{ibx}->thing_type . ":\n";
+ my @recs = map { [ 0, $_ ] } @$cr;
+ my @todo = @recs;
+ $cfg->each_cindex('load_commit_times', \@todo);
+ @recs = sort { $b->[0] <=> $a->[0] } @recs;
+ my $cr_score = $ctx->{ibx}->{-cr_score};
+ for (@recs) {
+ my ($t, $git) = @$_;
for ($git->pub_urls($ctx->{env})) {
my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ : $top_url.$_;
- $buf .= "\n\t" . prurl($ctx->{env}, $u);
+ my $nr = $cr_score->{$git->{nick}};
+ $buf .= "\n";
+ $buf .= $nr ? sprintf('% 9u', $nr) : (' 'x9);
+ $buf .= ' '.fmt_ts($t).' '.prurl($ctx->{env}, $u);
}
}
($buf);
diff --git a/t/cindex.t b/t/cindex.t
index a9075092..29d88ca8 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -5,7 +5,7 @@ use v5.12;
use PublicInbox::TestCommon;
use Cwd qw(getcwd abs_path);
use List::Util qw(sum);
-use autodie qw(close open rename);
+use autodie qw(close mkdir open rename);
require_mods(qw(json Xapian +SCM_RIGHTS));
use_ok 'PublicInbox::CodeSearchIdx';
use PublicInbox::Import;
@@ -227,7 +227,7 @@ SKIP: { # --prune
}
File::Path::remove_tree("$tmp/ext");
-ok(mkdir("$tmp/ext", 0707), 'create $tmp/ext with odd permissions');
+mkdir("$tmp/ext", 0707);
ok(run_script([qw(-cindex --dangerous -q -d), "$tmp/ext", $zp]),
'external on existing dir');
{
@@ -265,4 +265,28 @@ EOM
'non-Xapian-enabled inbox noted');
}
+# we need to support blank sections for a top-level repos
+# (e.g. <https://example.com/my-project>
+# git.kernel.org could use "pub" as section name, though, since all git repos
+# are currently under //git.kernel.org/pub/**/*
+{
+ mkdir(my $d = "$tmp/blanksection");
+ my $cfg = cfg_new($d, <<EOM);
+[cindex ""]
+ topdir = $tmp/ext
+ localprefix = $tmp
+EOM
+ my $csrch = $cfg->lookup_cindex('');
+ is ref($csrch), 'PublicInbox::CodeSearch', 'codesearch w/ blank name';
+ is_deeply $csrch->{localprefix}, [ "$tmp" ], 'localprefix respected';
+ my $nr = 0;
+ $cfg->each_cindex(sub {
+ my ($cs, @rest) = @_;
+ is $cs->{topdir}, $csrch->{topdir}, 'each_cindex works';
+ is_deeply \@rest, [ '.' ], 'got expected arg';
+ ++$nr;
+ }, '.');
+ is $nr, 1, 'iterated through cindices';
+}
+
done_testing;
diff --git a/xt/solver.t b/xt/solver.t
index 51b4144c..372d003b 100644
--- a/xt/solver.t
+++ b/xt/solver.t
@@ -10,6 +10,7 @@ use_ok($_) for @psgi;
use_ok 'PublicInbox::WWW';
my $cfg = PublicInbox::Config->new;
my $www = PublicInbox::WWW->new($cfg);
+$www->preload;
my $app = sub {
my $env = shift;
$env->{'psgi.errors'} = \*STDERR;
@@ -63,7 +64,7 @@ while (my ($ibx_name, $urls) = each %$todo) {
skip(qq{[publicinbox "$ibx_name"] not configured},
scalar(@$urls));
}
- if (!defined($ibx->{coderepo})) {
+ if (!defined($ibx->{-repo_objs})) {
push @gone, $ibx_name;
skip(qq{publicinbox.$ibx_name.coderepo not configured},
scalar(@$urls));
next prev parent reply other threads:[~2023-11-28 14:56 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-28 14:56 [PATCH 00/14] IT'S ALIVE! www loads cindex join data Eric Wong
2023-11-28 14:56 ` [PATCH 01/14] test_common: create_*: detect changes all parameters Eric Wong
2023-11-28 14:56 ` [PATCH 02/14] t/cindex*: require SCM_RIGHTS for these tests Eric Wong
2024-01-29 21:23 ` [PATCH 0/2] pure Perl sendmsg/recvmsg on *BSD Eric Wong
2024-01-29 21:23 ` [PATCH 1/2] syscall: update formatting to match our codebase Eric Wong
2024-01-29 21:23 ` [PATCH 2/2] syscall: use pure Perl sendmsg/recvmsg on *BSD Eric Wong
2024-04-06 0:43 ` Gaelan Steele
2024-04-08 9:48 ` [RFT] syscall: set default constants for Inline::C platforms Eric Wong
2024-04-08 12:12 ` Gaelan Steele
2024-04-08 20:11 ` Eric Wong
2023-11-28 14:56 ` [PATCH 03/14] codesearch: eliminate redundant substitutions Eric Wong
2023-11-28 14:56 ` [PATCH 04/14] solver: schedule cleanup after synchronous git->check Eric Wong
2023-11-28 14:56 ` [PATCH 05/14] xap_helper.h: move cindex endpoints to separate file Eric Wong
2023-11-28 14:56 ` [PATCH 06/14] xap_helper: implement mset endpoint for WWW, IMAP, etc Eric Wong
2023-11-28 14:56 ` [PATCH 07/14] hval: use File::Spec to make relative paths for href Eric Wong
2023-11-28 14:56 ` Eric Wong [this message]
2023-11-28 14:56 ` [PATCH 09/14] git: speed up ->git_path for non-worktrees Eric Wong
2023-11-28 14:56 ` [PATCH 10/14] cindex: require `-g GIT_DIR' or `-r PROJECT_ROOT' Eric Wong
2023-11-28 14:56 ` [PATCH 11/14] git: speed up Git->new by 5% or so Eric Wong
2023-11-28 14:56 ` [PATCH 12/14] admin: resolve_git_dir respects symlinks Eric Wong
2023-11-28 14:56 ` [PATCH 13/14] cindex: extra quit checks Eric Wong
2023-11-28 14:56 ` [PATCH 14/14] www: start working on a repo listing Eric Wong
2023-11-28 17:55 ` [PATCH 15/14] www: load cindex join data for ->ALL, too Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231128145628.1455176-9-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).