From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 11A261F4EA for ; Tue, 21 Mar 2023 23:07:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1679440065; bh=grGFXh6N+I+UlHnrqUskfyLYR10vMRqAMIFVnGrOJoY=; h=From:To:Subject:Date:In-Reply-To:References:From; b=trhgMCHdCBUD9j1cDHBZmLEWi1UhX9J76kkWXYrRMyfkGLFTqgNgEBVPyoMArgVZi yssTpS1NWx9gjutJ6ERrjPAgyNyXc6ABtnJVRPOJFU1azwe/APBDAzqWJ2p2JXNQof qW+7ZHnj6kZ7zquLBvDUyVgFRZBBD4TeT5ujY8T8= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 08/28] cindex: use read-only shards during prep phases Date: Tue, 21 Mar 2023 23:07:23 +0000 Message-Id: <20230321230743.3020032-8-e@80x24.org> In-Reply-To: <20230321230743.3020032-1-e@80x24.org> References: <20230321230701.3019936-1-e@80x24.org> <20230321230743.3020032-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: No need to open shards for read/write access when read-only will do. Since we also control how a document gets sharded, we'll also access the shard directly instead of letting Xapian do the mappings. --reindex didn't work properly before this change since it was over-indexing. It is now broken in the opposite way in that it doesn't do reindexing at all. --reindex will be implemented properly in the future. --- lib/PublicInbox/CodeSearchIdx.pm | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index a926886e..02c9ed84 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -30,6 +30,7 @@ use PublicInbox::Spawn qw(spawn); use PublicInbox::OnDestroy; our $LIVE; # pid => callback our $LIVE_JOBS; +our @XDB_SHARDS_FLAT; # stop walking history if we see >$SEEN_MAX existing commits, this assumes # branches don't diverge by more than this number of commits... @@ -273,9 +274,9 @@ sub prep_repo ($$) { my $n = git_dir_hash($git_dir) % $self->{nshard}; my $shard = $repo->{shard} = bless { %$self, shard => $n }, ref($self); delete @$shard{qw(lockfh lock_path)}; - local $shard->{xdb}; - my $xdb = $shard->idx_acquire; - my @docids = docids_by_postlist($shard, 'P'.$git_dir); + my $xdb = $XDB_SHARDS_FLAT[$n] // die "BUG: shard[$n] undef"; + $xdb->reopen; + my @docids = docids_by_postlist({ xdb => $xdb }, 'P'.$git_dir); my $docid = shift(@docids) // return get_roots($self, $git); if (@docids) { warn "BUG: $git_dir indexed multiple times, culling\n"; @@ -298,19 +299,19 @@ sub partition_refs ($$$) { sysseek($refs, 0, SEEK_SET) or die "seek: $!"; # for rev-list --stdin my $fh = $git->popen(qw(rev-list --stdin), undef, { 0 => $refs }); close $refs or die "close: $!"; - local $self->{xdb}; - my $xdb = $self->{-opt}->{reindex} ? undef : $self->xdb; - my ($seen, $nchange, $nshard) = (0, 0, $self->{nshard}); - my @shard_in; - for (0..($nshard - 1)) { - open $shard_in[$_], '+>', undef or die "open: $!"; - } + my ($seen, $nchange) = (0, 0); + my @shard_in = map { + $_->reopen; + open my $fh, '+>', undef or die "open: $!"; + $fh; + } @XDB_SHARDS_FLAT; + while (defined(my $cmt = <$fh>)) { chomp $cmt; - if ($xdb && seen($xdb, 'Q'.$cmt)) { + my $n = hex(substr($cmt, 0, 8)) % scalar(@XDB_SHARDS_FLAT); + if (seen($XDB_SHARDS_FLAT[$n], 'Q'.$cmt)) { last if ++$seen > $SEEN_MAX; } else { - my $n = hex(substr($cmt, 0, 8)) % $nshard; say { $shard_in[$n] } $cmt or die "say: $!"; ++$nchange; $seen = 0; @@ -450,6 +451,7 @@ sub scan_git_dirs ($) { local $LIVE_JOBS = $self->{-opt}->{jobs} // PublicInbox::IPC::detect_nproc() // 2; local $LIVE = {}; + local @XDB_SHARDS_FLAT = $self->xdb_shards_flat; for (@{$self->{git_dirs}}) { my $git = PublicInbox::Git->new($_); my $prep_repo = PublicInbox::OnDestroy->new($$, \&prep_repo,