* [PATCH 1/3] cindex: avoid unneeded and redundant `local' calls
2023-11-21 12:43 6% [PATCH 0/3] cindex: rename `associate' to `join' Eric Wong
@ 2023-11-21 12:43 7% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-11-21 12:43 UTC (permalink / raw)
To: meta
We only set $MAX_SIZE at startup, and there's no need to
use a local $self->{roots} for the per-repo roots array.
---
lib/PublicInbox/CodeSearchIdx.pm | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 54a2ba22..bbccc0e3 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -152,8 +152,8 @@ EOM
# TODO: may be used for reshard/compact
sub count_shards { scalar($_[0]->xdb_shards_flat) }
-sub update_commit ($$) {
- my ($self, $cmt) = @_; # fields from @FMT
+sub update_commit ($$$) {
+ my ($self, $cmt, $roots) = @_; # fields from @FMT
my $x = 'Q'.$cmt->{H};
my ($docid, @extra) = sort { $a <=> $b } docids_by_postlist($self, $x);
@extra and warn "W: $cmt->{H} indexed multiple times, pruning ",
@@ -161,7 +161,7 @@ sub update_commit ($$) {
$self->{xdb}->delete_document($_) for @extra;
my $doc = $PublicInbox::Search::X{Document}->new;
$doc->add_boolean_term($x);
- $doc->add_boolean_term('G'.$_) for @{$self->{roots}};
+ $doc->add_boolean_term('G'.$_) for @$roots;
$doc->add_boolean_term('XP'.$_) for split(/ /, $cmt->{P});
$doc->add_boolean_term('T'.'c');
@@ -277,9 +277,7 @@ sub cidx_read_log_p {
my ($self, $log_p, $rd) = @_;
my $git = delete $log_p->{git} // die 'BUG: no {git}';
local $self->{current_info} = "$git->{git_dir} [$self->{shard}]";
- local $self->{roots} = delete $log_p->{roots} // die 'BUG: no {roots}';
-
- local $MAX_SIZE = $self->{-opt}->{max_size};
+ my $roots = delete $log_p->{roots} // die 'BUG: no {roots}';
# local-ized in parent before fork
$TXN_BYTES = $BATCH_BYTES;
local $self->{git} = $git; # for patchid
@@ -308,7 +306,7 @@ sub cidx_read_log_p {
cidx_ckpoint($self, "[$self->{shard}] $nr");
$TXN_BYTES -= $len; # len may be huge, >TXN_BYTES;
}
- update_commit($self, $cmt);
+ update_commit($self, $cmt, $roots);
++$nr;
cidx_ckpoint($self, "[$self->{shard}] $nr") if $TXN_BYTES <= 0;
$/ = $FS;
@@ -1143,6 +1141,7 @@ sub cidx_run { # main entry point
@ID2ROOT, $XHC, @SORT, $GITS_NR);
local $BATCH_BYTES = $self->{-opt}->{batch_size} //
$PublicInbox::SearchIdx::BATCH_BYTES;
+ local $MAX_SIZE = $self->{-opt}->{max_size};
local $self->{ASSOC_PFX} = \@ASSOC_PFX;
local $self->{PENDING} = {};
local $self->{-pi_cfg};
^ permalink raw reply related [relevance 7%]
* [PATCH 0/3] cindex: rename `associate' to `join'
@ 2023-11-21 12:43 6% Eric Wong
2023-11-21 12:43 7% ` [PATCH 1/3] cindex: avoid unneeded and redundant `local' calls Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2023-11-21 12:43 UTC (permalink / raw)
To: meta
3/3 fleshes out more join functionality, including storing the
join data in compressed JSON as Xapian metadata and loading it
as a Perl hash won't be excessive (compared to having 30-50k
inbox names+paths in memory).
Eric Wong (3):
cindex: avoid unneeded and redundant `local' calls
doc/cindex: point no-fsync+dangerous to -index(1)
cindex: rename --associate to --join, test w/ real repos
Documentation/public-inbox-cindex.pod | 7 +-
MANIFEST | 1 +
lib/PublicInbox/CodeSearch.pm | 62 ++++-
lib/PublicInbox/CodeSearchIdx.pm | 383 ++++++++++++++++----------
lib/PublicInbox/TestCommon.pm | 9 +-
lib/PublicInbox/XapHelper.pm | 14 +-
lib/PublicInbox/xap_helper.h | 59 ++--
script/public-inbox-cindex | 8 +-
t/cindex-join.t | 83 ++++++
t/cindex.t | 8 +-
10 files changed, 428 insertions(+), 206 deletions(-)
create mode 100644 t/cindex-join.t
^ permalink raw reply [relevance 6%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-11-21 12:43 6% [PATCH 0/3] cindex: rename `associate' to `join' Eric Wong
2023-11-21 12:43 7% ` [PATCH 1/3] cindex: avoid unneeded and redundant `local' calls Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).