* [PATCH 2/2] compact: support codesearch indices
2023-05-03 3:11 7% [PATCH 0/2] compaction support for codesearch indices Eric Wong
@ 2023-05-03 3:11 5% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-05-03 3:11 UTC (permalink / raw)
To: meta
This is much easier to support than xcpdb since it's 1:1 and
doesn't follow a different sharding scheme than the inboxes and
extindices.
---
lib/PublicInbox/Admin.pm | 21 +++++++++++++++++----
lib/PublicInbox/Xapcmd.pm | 21 ++++++++++++++-------
script/public-inbox-compact | 20 +++++++++++---------
t/cindex.t | 7 +++++++
4 files changed, 49 insertions(+), 20 deletions(-)
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 96c6652c..72ac9420 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -50,6 +50,7 @@ sub resolve_any_idxdir ($$) {
}
sub resolve_eidxdir ($) { resolve_any_idxdir($_[0], 'ei.lock') }
+sub resolve_cidxdir ($) { resolve_any_idxdir($_[0], 'cidx.lock') }
sub resolve_inboxdir {
my ($cd, $ver) = @_;
@@ -97,12 +98,22 @@ sub resolve_inboxes ($;$$) {
$cfg or die "--all specified, but $cfgfile not readable\n";
@$argv and die "--all specified, but directories specified\n";
}
- my (@old, @ibxs, @eidx);
+ my (@old, @ibxs, @eidx, @cidx);
+ if ($opt->{-cidx_ok}) {
+ require PublicInbox::CodeSearchIdx;
+ @$argv = grep {
+ if (defined(my $d = resolve_cidxdir($_))) {
+ push @cidx, PublicInbox::CodeSearchIdx->new(
+ $d, $opt);
+ undef;
+ } else {
+ 1;
+ }
+ } @$argv;
+ }
if ($opt->{-eidx_ok}) {
require PublicInbox::ExtSearchIdx;
- my $i = -1;
@$argv = grep {
- $i++;
if (defined(my $ei = resolve_eidxdir($_))) {
$ei = PublicInbox::ExtSearchIdx->new($ei, $opt);
push @eidx, $ei;
@@ -124,6 +135,7 @@ sub resolve_inboxes ($;$$) {
warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n";
}
});
+ # TODO: no way to configure cindex in config file, yet
} else { # directories specified on the command-line
my @dirs = @$argv;
push @dirs, '.' if !@dirs && $opt->{-use_cwd};
@@ -164,7 +176,8 @@ sub resolve_inboxes ($;$$) {
die "-V$min_ver inboxes not supported by $0\n\t",
join("\n\t", @old), "\n";
}
- $opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs;
+ ($opt->{-eidx_ok} || $opt->{-cidx_ok}) ? (\@ibxs, \@eidx, \@cidx)
+ : @ibxs;
}
my @base_mod = ();
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 3a4c5622..f3eb8e4e 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -1,7 +1,7 @@
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
package PublicInbox::Xapcmd;
-use strict;
+use v5.12;
use PublicInbox::Spawn qw(which popen_rd);
use PublicInbox::Syscall;
use PublicInbox::Admin qw(setup_signals);
@@ -75,7 +75,7 @@ sub commit_changes ($$$$) {
$tmp = undef;
if (!$opt->{-coarse_lock}) {
$opt->{-skip_lock} = 1;
- $im //= $ibx if $ibx->can('eidx_sync');
+ $im //= $ibx if $ibx->can('eidx_sync') || $ibx->can('cidx_run');
if ($im->can('count_shards')) { # v2w or eidx
my $pr = $opt->{-progress};
my $n = $im->count_shards;
@@ -93,6 +93,8 @@ sub commit_changes ($$$$) {
local %ENV = (%ENV, %$env) if $env;
if ($ibx->can('eidx_sync')) {
$ibx->eidx_sync($opt);
+ } elsif ($ibx->can('cidx_run')) {
+ $ibx->cidx_run($opt);
} else {
PublicInbox::Admin::index_inbox($ibx, $im, $opt);
}
@@ -117,7 +119,8 @@ sub runnable_or_die ($) {
sub prepare_reindex ($$) {
my ($ibx, $opt) = @_;
- if ($ibx->can('eidx_sync')) { # no prep needed for ExtSearchIdx
+ if ($ibx->can('eidx_sync') || $ibx->can('cidx_run')) {
+ # no prep needed for ExtSearchIdx nor CodeSearchIdx
} elsif ($ibx->version == 1) {
my $dir = $ibx->search->xdir(1);
my $xdb = $PublicInbox::Search::X{Database}->new($dir);
@@ -186,7 +189,9 @@ sub prepare_run {
my $tmp = {}; # old shard dir => File::Temp->newdir object or undef
my @queue; # ([old//src,newdir]) - list of args for cpdb() or compact()
my ($old, $misc_ok);
- if ($ibx->can('eidx_sync')) {
+ if ($ibx->can('cidx_run')) {
+ $old = $ibx->xdir(1);
+ } elsif ($ibx->can('eidx_sync')) {
$misc_ok = 1;
$old = $ibx->xdir(1);
} elsif (my $srch = $ibx->search) {
@@ -261,15 +266,17 @@ sub run {
my $cb = \&$task;
PublicInbox::Admin::progress_prepare($opt ||= {});
my $dir;
- for my $fld (qw(inboxdir topdir)) {
+ for my $fld (qw(inboxdir topdir cidx_dir)) {
my $d = $ibx->{$fld} // next;
-d $d or die "$fld=$d does not exist\n";
$dir = $d;
last;
}
- check_compact() if $opt->{compact} && $ibx->search;
+ check_compact() if $opt->{compact} &&
+ ($ibx->can('cidx_run') || $ibx->search);
- if (!$ibx->can('eidx_sync') && !$opt->{-coarse_lock}) {
+ if (!$ibx->can('eidx_sync') && $ibx->can('version') &&
+ !$opt->{-coarse_lock}) {
# per-epoch ranges for v2
# v1:{ from => $OID }, v2:{ from => [ $OID, $OID, $OID ] } }
$opt->{reindex} = { from => $ibx->version == 1 ? '' : [] };
diff --git a/script/public-inbox-compact b/script/public-inbox-compact
index 80d0224b..1062be5a 100755
--- a/script/public-inbox-compact
+++ b/script/public-inbox-compact
@@ -1,12 +1,12 @@
#!perl -w
-# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use v5.10.1;
+use v5.12;
use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-my $opt = { compact => 1, -coarse_lock => 1, -eidx_ok => 1 };
+my $opt = { compact => 1, -coarse_lock => 1,
+ -eidx_ok => 1, -cidx_ok => 1 };
my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
-usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR>
+usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR>
Compact Xapian DBs in an inbox
@@ -31,12 +31,14 @@ PublicInbox::Admin::progress_prepare($opt);
require PublicInbox::InboxWritable;
require PublicInbox::Xapcmd;
my $cfg = PublicInbox::Config->new;
-my ($ibxs, $eidxs) = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
-unless ($ibxs) { print STDERR $help; exit 1 }
+my ($ibxs, $eidxs, $cidxs) =
+ PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
+unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 }
for my $ibx (@$ibxs) {
$ibx = PublicInbox::InboxWritable->new($ibx);
PublicInbox::Xapcmd::run($ibx, 'compact', $opt);
}
-for my $eidx (@$eidxs) {
- PublicInbox::Xapcmd::run($eidx, 'compact', $opt);
+for my $ibxish (@$eidxs, @$cidxs) {
+ my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef;
+ PublicInbox::Xapcmd::run($ibxish, 'compact', $opt);
}
diff --git a/t/cindex.t b/t/cindex.t
index 8b89ebff..b0d6f204 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -76,6 +76,13 @@ ok(!-d "$zp/.git/public-inbox-cindex", 'no cindex in original coderepo');
ok(run_script([qw(-cindex -L medium --dangerous -q -d),
"$tmp/med", $zp, "$tmp/wt0"]), 'cindex external medium');
+
+SKIP: {
+ have_xapian_compact;
+ ok(run_script([qw(-compact -q), "$tmp/ext"]), 'compact on full');
+ ok(run_script([qw(-compact -q), "$tmp/med"]), 'compact on medium');
+}
+
my $no_metadata_set = sub {
my ($i, $extra, $xdb) = @_;
for my $xdb (@$xdb) {
^ permalink raw reply related [relevance 5%]
* [PATCH 0/2] compaction support for codesearch indices
@ 2023-05-03 3:11 7% Eric Wong
2023-05-03 3:11 5% ` [PATCH 2/2] compact: support " Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2023-05-03 3:11 UTC (permalink / raw)
To: meta
I'm not 100% sure I like the cindex name, but oh well...
Compaction is important for the space-challenged.
Eric Wong (2):
admin: hoist out resolve_any_idxdir from resolve_{inboxdir,eidxdir}
compact: support codesearch indices
lib/PublicInbox/Admin.pm | 57 +++++++++++++++++++------------------
lib/PublicInbox/Xapcmd.pm | 21 +++++++++-----
script/public-inbox-compact | 20 +++++++------
t/cindex.t | 7 +++++
4 files changed, 61 insertions(+), 44 deletions(-)
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-05-03 3:11 7% [PATCH 0/2] compaction support for codesearch indices Eric Wong
2023-05-03 3:11 5% ` [PATCH 2/2] compact: support " Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).