user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 2/2] compact: support codesearch indices
  2023-05-03  3:11  7% [PATCH 0/2] compaction support for codesearch indices Eric Wong
@ 2023-05-03  3:11  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-05-03  3:11 UTC (permalink / raw)
  To: meta

This is much easier to support than xcpdb since it's 1:1 and
doesn't follow a different sharding scheme than the inboxes and
extindices.
---
 lib/PublicInbox/Admin.pm    | 21 +++++++++++++++++----
 lib/PublicInbox/Xapcmd.pm   | 21 ++++++++++++++-------
 script/public-inbox-compact | 20 +++++++++++---------
 t/cindex.t                  |  7 +++++++
 4 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 96c6652c..72ac9420 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -50,6 +50,7 @@ sub resolve_any_idxdir ($$) {
 }
 
 sub resolve_eidxdir ($) { resolve_any_idxdir($_[0], 'ei.lock') }
+sub resolve_cidxdir ($) { resolve_any_idxdir($_[0], 'cidx.lock') }
 
 sub resolve_inboxdir {
 	my ($cd, $ver) = @_;
@@ -97,12 +98,22 @@ sub resolve_inboxes ($;$$) {
 		$cfg or die "--all specified, but $cfgfile not readable\n";
 		@$argv and die "--all specified, but directories specified\n";
 	}
-	my (@old, @ibxs, @eidx);
+	my (@old, @ibxs, @eidx, @cidx);
+	if ($opt->{-cidx_ok}) {
+		require PublicInbox::CodeSearchIdx;
+		@$argv = grep {
+			if (defined(my $d = resolve_cidxdir($_))) {
+				push @cidx, PublicInbox::CodeSearchIdx->new(
+							$d, $opt);
+				undef;
+			} else {
+				1;
+			}
+		} @$argv;
+	}
 	if ($opt->{-eidx_ok}) {
 		require PublicInbox::ExtSearchIdx;
-		my $i = -1;
 		@$argv = grep {
-			$i++;
 			if (defined(my $ei = resolve_eidxdir($_))) {
 				$ei = PublicInbox::ExtSearchIdx->new($ei, $opt);
 				push @eidx, $ei;
@@ -124,6 +135,7 @@ sub resolve_inboxes ($;$$) {
 				warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n";
 			}
 		});
+		# TODO: no way to configure cindex in config file, yet
 	} else { # directories specified on the command-line
 		my @dirs = @$argv;
 		push @dirs, '.' if !@dirs && $opt->{-use_cwd};
@@ -164,7 +176,8 @@ sub resolve_inboxes ($;$$) {
 		die "-V$min_ver inboxes not supported by $0\n\t",
 		    join("\n\t", @old), "\n";
 	}
-	$opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs;
+	($opt->{-eidx_ok} || $opt->{-cidx_ok}) ? (\@ibxs, \@eidx, \@cidx)
+						: @ibxs;
 }
 
 my @base_mod = ();
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 3a4c5622..f3eb8e4e 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -1,7 +1,7 @@
 # Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::Xapcmd;
-use strict;
+use v5.12;
 use PublicInbox::Spawn qw(which popen_rd);
 use PublicInbox::Syscall;
 use PublicInbox::Admin qw(setup_signals);
@@ -75,7 +75,7 @@ sub commit_changes ($$$$) {
 	$tmp = undef;
 	if (!$opt->{-coarse_lock}) {
 		$opt->{-skip_lock} = 1;
-		$im //= $ibx if $ibx->can('eidx_sync');
+		$im //= $ibx if $ibx->can('eidx_sync') || $ibx->can('cidx_run');
 		if ($im->can('count_shards')) { # v2w or eidx
 			my $pr = $opt->{-progress};
 			my $n = $im->count_shards;
@@ -93,6 +93,8 @@ sub commit_changes ($$$$) {
 		local %ENV = (%ENV, %$env) if $env;
 		if ($ibx->can('eidx_sync')) {
 			$ibx->eidx_sync($opt);
+		} elsif ($ibx->can('cidx_run')) {
+			$ibx->cidx_run($opt);
 		} else {
 			PublicInbox::Admin::index_inbox($ibx, $im, $opt);
 		}
@@ -117,7 +119,8 @@ sub runnable_or_die ($) {
 
 sub prepare_reindex ($$) {
 	my ($ibx, $opt) = @_;
-	if ($ibx->can('eidx_sync')) { # no prep needed for ExtSearchIdx
+	if ($ibx->can('eidx_sync') || $ibx->can('cidx_run')) {
+		# no prep needed for ExtSearchIdx nor CodeSearchIdx
 	} elsif ($ibx->version == 1) {
 		my $dir = $ibx->search->xdir(1);
 		my $xdb = $PublicInbox::Search::X{Database}->new($dir);
@@ -186,7 +189,9 @@ sub prepare_run {
 	my $tmp = {}; # old shard dir => File::Temp->newdir object or undef
 	my @queue; # ([old//src,newdir]) - list of args for cpdb() or compact()
 	my ($old, $misc_ok);
-	if ($ibx->can('eidx_sync')) {
+	if ($ibx->can('cidx_run')) {
+		$old = $ibx->xdir(1);
+	} elsif ($ibx->can('eidx_sync')) {
 		$misc_ok = 1;
 		$old = $ibx->xdir(1);
 	} elsif (my $srch = $ibx->search) {
@@ -261,15 +266,17 @@ sub run {
 	my $cb = \&$task;
 	PublicInbox::Admin::progress_prepare($opt ||= {});
 	my $dir;
-	for my $fld (qw(inboxdir topdir)) {
+	for my $fld (qw(inboxdir topdir cidx_dir)) {
 		my $d = $ibx->{$fld} // next;
 		-d $d or die "$fld=$d does not exist\n";
 		$dir = $d;
 		last;
 	}
-	check_compact() if $opt->{compact} && $ibx->search;
+	check_compact() if $opt->{compact} &&
+				($ibx->can('cidx_run') || $ibx->search);
 
-	if (!$ibx->can('eidx_sync') && !$opt->{-coarse_lock}) {
+	if (!$ibx->can('eidx_sync') && $ibx->can('version') &&
+					!$opt->{-coarse_lock}) {
 		# per-epoch ranges for v2
 		# v1:{ from => $OID }, v2:{ from => [ $OID, $OID, $OID ] } }
 		$opt->{reindex} = { from => $ibx->version == 1 ? '' : [] };
diff --git a/script/public-inbox-compact b/script/public-inbox-compact
index 80d0224b..1062be5a 100755
--- a/script/public-inbox-compact
+++ b/script/public-inbox-compact
@@ -1,12 +1,12 @@
 #!perl -w
-# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use v5.10.1;
+use v5.12;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-my $opt = { compact => 1, -coarse_lock => 1, -eidx_ok => 1 };
+my $opt = { compact => 1, -coarse_lock => 1,
+	-eidx_ok => 1, -cidx_ok => 1 };
 my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
-usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR>
+usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR>
 
   Compact Xapian DBs in an inbox
 
@@ -31,12 +31,14 @@ PublicInbox::Admin::progress_prepare($opt);
 require PublicInbox::InboxWritable;
 require PublicInbox::Xapcmd;
 my $cfg = PublicInbox::Config->new;
-my ($ibxs, $eidxs) = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
-unless ($ibxs) { print STDERR $help; exit 1 }
+my ($ibxs, $eidxs, $cidxs) =
+	PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
+unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 }
 for my $ibx (@$ibxs) {
 	$ibx = PublicInbox::InboxWritable->new($ibx);
 	PublicInbox::Xapcmd::run($ibx, 'compact', $opt);
 }
-for my $eidx (@$eidxs) {
-	PublicInbox::Xapcmd::run($eidx, 'compact', $opt);
+for my $ibxish (@$eidxs, @$cidxs) {
+	my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef;
+	PublicInbox::Xapcmd::run($ibxish, 'compact', $opt);
 }
diff --git a/t/cindex.t b/t/cindex.t
index 8b89ebff..b0d6f204 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -76,6 +76,13 @@ ok(!-d "$zp/.git/public-inbox-cindex", 'no cindex in original coderepo');
 ok(run_script([qw(-cindex -L medium --dangerous -q -d),
 	"$tmp/med", $zp, "$tmp/wt0"]), 'cindex external medium');
 
+
+SKIP: {
+	have_xapian_compact;
+	ok(run_script([qw(-compact -q), "$tmp/ext"]), 'compact on full');
+	ok(run_script([qw(-compact -q), "$tmp/med"]), 'compact on medium');
+}
+
 my $no_metadata_set = sub {
 	my ($i, $extra, $xdb) = @_;
 	for my $xdb (@$xdb) {

^ permalink raw reply related	[relevance 5%]

* [PATCH 0/2] compaction support for codesearch indices
@ 2023-05-03  3:11  7% Eric Wong
  2023-05-03  3:11  5% ` [PATCH 2/2] compact: support " Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2023-05-03  3:11 UTC (permalink / raw)
  To: meta

I'm not 100% sure I like the cindex name, but oh well...
Compaction is important for the space-challenged.

Eric Wong (2):
  admin: hoist out resolve_any_idxdir from resolve_{inboxdir,eidxdir}
  compact: support codesearch indices

 lib/PublicInbox/Admin.pm    | 57 +++++++++++++++++++------------------
 lib/PublicInbox/Xapcmd.pm   | 21 +++++++++-----
 script/public-inbox-compact | 20 +++++++------
 t/cindex.t                  |  7 +++++
 4 files changed, 61 insertions(+), 44 deletions(-)

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-05-03  3:11  7% [PATCH 0/2] compaction support for codesearch indices Eric Wong
2023-05-03  3:11  5% ` [PATCH 2/2] compact: support " Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).