From fe3883762faf67fd6c4624ee721000e1f36bc59b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 21 Nov 2023 12:43:15 +0000 Subject: cindex: rename --associate to --join, test w/ real repos The association data is just stored as deflated JSON in Xapian metadata keys of shard[0] for now. It should be reasonably compact and fit in memory for now since we'll assume sane, non-malicious git coderepo history, for now. The new cindex-join.t test requires TEST_REMOTE_JOIN=1 to be set in the environment and tests the joins against the inboxes and coderepos of two small projects with a common history. Internally, we'll use `ibx_off', `root_off' instead of `ibx_id' and `root_id' since `_id' may be mistaken for columns in an SQL database which they are not. --- lib/PublicInbox/XapHelper.pm | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib/PublicInbox/XapHelper.pm') diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm index 428b732e..fe831b8f 100644 --- a/lib/PublicInbox/XapHelper.pm +++ b/lib/PublicInbox/XapHelper.pm @@ -93,10 +93,10 @@ sub cmd_dump_ibx { } sub dump_roots_iter ($$$) { - my ($req, $root2id, $it) = @_; + my ($req, $root2off, $it) = @_; eval { my $doc = $it->get_document; - my $G = join(' ', map { $root2id->{$_} } xap_terms('G', $doc)); + my $G = join(' ', map { $root2off->{$_} } xap_terms('G', $doc)); for my $p (@{$req->{A}}) { for (xap_terms($p, $doc)) { $req->{wbuf} .= "$_ $G\n"; @@ -118,14 +118,14 @@ sub dump_roots_flush ($$) { } sub cmd_dump_roots { - my ($req, $root2id_file, $qry_str) = @_; + my ($req, $root2off_file, $qry_str) = @_; $qry_str // die 'usage: dump_roots [OPTIONS] ROOT2ID_FILE QRY_STR'; $req->{A} or die 'dump_roots requires -A PREFIX'; - open my $fh, '<', $root2id_file; - my $root2id; # record format: $OIDHEX "\0" uint32_t + open my $fh, '<', $root2off_file; + my $root2off; # record format: $OIDHEX "\0" uint32_t my @x = split(/\0/, read_all $fh); while (defined(my $oidhex = shift @x)) { - $root2id->{$oidhex} = shift @x; + $root2off->{$oidhex} = shift @x; } my $opt = { relevance => -1, limit => $req->{'m'}, offset => $req->{o} // 0 }; @@ -134,7 +134,7 @@ sub cmd_dump_roots { $req->{wbuf} = ''; for my $it ($mset->items) { for (my $t = 10; $t > 0; --$t) { - $t = dump_roots_iter($req, $root2id, $it) // $t; + $t = dump_roots_iter($req, $root2off, $it) // $t; } if (!($req->{nr_out} & 0x3fff)) { dump_roots_flush($req, $fh); -- cgit v1.2.3-24-ge0c7