user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 1/2] doc: -cindex: correct and unify -g GIT_DIR usage string and man page
@ 2023-11-29 12:02  7% Štěpán Němec
  0 siblings, 0 replies; 3+ results
From: Štěpán Němec @ 2023-11-29 12:02 UTC (permalink / raw)
  To: meta

Fixes: c76a20d75200 ("cindex: require `-g GIT_DIR' or `-r PROJECT_ROOT'")
---
 Documentation/public-inbox-cindex.pod | 2 +-
 script/public-inbox-cindex            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/public-inbox-cindex.pod b/Documentation/public-inbox-cindex.pod
index 0c9c4bdbff03..02c2d80f7ae5 100644
--- a/Documentation/public-inbox-cindex.pod
+++ b/Documentation/public-inbox-cindex.pod
@@ -4,7 +4,7 @@ public-inbox-cindex - create and update search for code repositories
 
 =head1 SYNOPSIS
 
-public-inbox-cindex [OPTIONS] -g GIT_DIR [-g GIT_DIR]
+public-inbox-cindex [OPTIONS] -g GIT_DIR [-g GIT_DIR]...
 
 public-inbox-cindex [OPTIONS] --update
 
diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex
index a015d7a4f067..d61d0244833f 100755
--- a/script/public-inbox-cindex
+++ b/script/public-inbox-cindex
@@ -4,7 +4,7 @@
 use v5.12;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
 my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
-usage: public-inbox-cindex [options] -g GIT_DIR...
+usage: public-inbox-cindex [options] -g GIT_DIR [-g GIT_DIR]...
 usage: public-inbox-cindex [options] --project-list=FILE -r PROJECT_ROOT
 
   Create and update search indices for code repos

base-commit: 59e3e12f23994538383f54f44e92e8a0d63070b2
-- 
2.43.0


^ permalink raw reply related	[relevance 7%]

* [PATCH 10/14] cindex: require `-g GIT_DIR' or `-r PROJECT_ROOT'
  2023-11-28 14:56  6% [PATCH 00/14] IT'S ALIVE! www loads cindex join data Eric Wong
@ 2023-11-28 14:56  5% ` Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2023-11-28 14:56 UTC (permalink / raw)
  To: meta

Accepting @ARGV without switches ends up being ambiguous with
optional parameters for --join and --show.  Requiring users to
specify `--join=' or `--show=' is a bit awkward (as it with
-clone --objstore= and the like, but that is historical baggage
we need to carry at this point...)
---
 Documentation/public-inbox-cindex.pod |  2 +-
 lib/PublicInbox/CodeSearchIdx.pm      |  5 ++--
 script/public-inbox-cindex            | 38 ++++++++++++++++++---------
 t/cindex-join.t                       |  7 ++++-
 t/cindex.t                            |  9 ++++---
 t/xap_helper.t                        |  4 +--
 6 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/Documentation/public-inbox-cindex.pod b/Documentation/public-inbox-cindex.pod
index 3ff394be..0c9c4bdb 100644
--- a/Documentation/public-inbox-cindex.pod
+++ b/Documentation/public-inbox-cindex.pod
@@ -4,7 +4,7 @@ public-inbox-cindex - create and update search for code repositories
 
 =head1 SYNOPSIS
 
-public-inbox-cindex [OPTIONS] GIT_DIR...
+public-inbox-cindex [OPTIONS] -g GIT_DIR [-g GIT_DIR]
 
 public-inbox-cindex [OPTIONS] --update
 
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index a6cbe0b0..d49e9a8d 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -1131,8 +1131,6 @@ sub init_join_prefork ($) {
 	} split(/,/, join(',', @$subopt));
 	require PublicInbox::CidxXapHelperAux;
 	require PublicInbox::XapClient;
-	my $cfg = $self->{-opt}->{-pi_cfg} // die 'BUG: -pi_cfg unset';
-	$self->{-cfg_f} = $cfg->{-f} = rel2abs_collapsed($cfg->{-f});
 	my @unknown;
 	my $pfx = $JOIN{prefixes} // 'patchid';
 	for (split /\+/, $pfx) {
@@ -1223,7 +1221,8 @@ sub cidx_run { # main entry point
 				$PublicInbox::SearchIdx::BATCH_BYTES;
 	local $MAX_SIZE = $self->{-opt}->{max_size};
 	local $self->{PENDING} = {}; # used by PublicInbox::CidxXapHelperAux
-	local $self->{-cfg_f};
+	my $cfg = $self->{-opt}->{-pi_cfg} // die 'BUG: -pi_cfg unset';
+	$self->{-cfg_f} = $cfg->{-f} = rel2abs_collapsed($cfg->{-f});
 	if (grep { $_ } @{$self->{-opt}}{qw(prune join)}) {
 		require File::Temp;
 		$TMPDIR = File::Temp->newdir('cidx-all-git-XXXX', TMPDIR => 1);
diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex
index 97890c1b..a015d7a4 100755
--- a/script/public-inbox-cindex
+++ b/script/public-inbox-cindex
@@ -4,8 +4,8 @@
 use v5.12;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
 my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
-usage: public-inbox-cindex [options] GIT_DIR...
-usage: public-inbox-cindex [options] --project-list=FILE PROJECT_ROOT
+usage: public-inbox-cindex [options] -g GIT_DIR...
+usage: public-inbox-cindex [options] --project-list=FILE -r PROJECT_ROOT
 
   Create and update search indices for code repos
 
@@ -29,7 +29,8 @@ GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous
 		indexlevel|index-level|L=s join:s@
 		batch_size|batch-size=s max_size|max-size=s
 		include|I=s@ only=s@ all show:s@
-		project-list=s exclude=s@
+		project-list=s exclude=s@ project-root|r=s
+		git-dir|g=s@
 		sort-parallel=s sort-compress-program=s sort-buffer-size=s
 		d=s update|u scan! prune dry-run|n C=s@ help|h))
 	or die $help;
@@ -50,23 +51,36 @@ PublicInbox::Admin::progress_prepare($opt);
 my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
 %ENV = (%ENV, %$env) if $env;
 
-require PublicInbox::CodeSearchIdx; # unstable internal API
 my @git_dirs;
-if (defined(my $pl = $opt->{'project-list'})) {
-	my $pfx = shift @ARGV // die <<EOM;
+require PublicInbox::CodeSearchIdx; # unstable internal API
+if (@ARGV) {
+	my @g = map { "-g $_" } @ARGV;
+	die <<EOM;
+Specify git directories with `-g' (or --git-dir=): @g
+Or use --project-list=... and --project-root=...
+EOM
+} elsif (defined(my $pl = $opt->{'project-list'})) {
+	my $pfx = $opt->{'project-root'} // die <<EOM;
 PROJECTS_ROOT required for --project-list
 EOM
-	@ARGV and die <<EOM;
---project-list does not accept additional directories
-(@ARGV)
-beyond `$pfx'
+	$opt->{'git-dir'} and die <<EOM;
+--project-list does not accept additional --git-dir directories
+(@{$opt->{'git-dir'}})
 EOM
 	open my $fh, '<', $pl or die "open($pl): $!\n";
 	chomp(@git_dirs = <$fh>);
-	$_ = PublicInbox::Admin::resolve_git_dir("$pfx/$_") for @git_dirs;
+	$pfx .= '/';
+	$pfx =~ tr!/!/!s;
+	substr($_, 0, 0, $pfx) for @git_dirs;
+} elsif (my $gd = $opt->{'git-dir'}) {
+	@git_dirs = @$gd;
+} elsif (grep defined, @$opt{qw(show update prune)}) {
 } else {
-	@git_dirs = map { PublicInbox::Admin::resolve_git_dir($_) } @ARGV;
+	warn "No --git-dir= nor --project-list= + --project-root= specified\n";
+	die $help;
 }
+
+$_ = PublicInbox::Admin::resolve_git_dir($_) for @git_dirs;
 if (defined $cidx_dir) { # external index
 	die "`%' is not allowed in $cidx_dir\n" if $cidx_dir =~ /\%/;
 	my $cidx = PublicInbox::CodeSearchIdx->new($cidx_dir, $opt);
diff --git a/t/cindex-join.t b/t/cindex-join.t
index ac90cd64..c2e85332 100644
--- a/t/cindex-join.t
+++ b/t/cindex-join.t
@@ -70,7 +70,7 @@ my $cidxdir = "$tmpdir/cidx";
 my $rdr = { 1 => \my $cout, 2 => \my $cerr };
 ok run_script([qw(-cindex -v --all --show=join_data),
 		'--join=aggressive,dt:..2022-12-01',
-		'-d', $cidxdir, values %code ],
+		'-d', $cidxdir, map { ('-g', $_) } values %code ],
 		$env, $rdr), 'initial join inboxes w/ coderepos';
 my $out = PublicInbox::Config->json->decode($cout);
 is($out->{join_data}->{dt}->[0], '19700101'.'000000',
@@ -79,4 +79,9 @@ is($out->{join_data}->{dt}->[0], '19700101'.'000000',
 ok run_script([qw(-cindex -v --all -u --join --show),
 		'-d', $cidxdir], $env, $rdr), 'incremental --join';
 
+ok run_script([qw(-cindex -v --no-scan --show),
+		'-d', $cidxdir], $env, $rdr), 'show';
+$out = PublicInbox::Config->json->decode($cout);
+is ref($out->{join_data}), 'HASH', 'got hash join data';
+is $cerr, '', 'no warnings or errors in stderr w/ --show';
 done_testing;
diff --git a/t/cindex.t b/t/cindex.t
index 29d88ca8..0193cf18 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -33,7 +33,7 @@ git gc -q
 EOM
 }; # /create_coderepo
 
-ok(run_script([qw(-cindex --dangerous -q), "$tmp/wt0"]), 'cindex internal');
+ok(run_script([qw(-cindex --dangerous -q -g), "$tmp/wt0"]), 'cindex internal');
 {
 	my $exists = -e "$tmp/wt0/.git/public-inbox-cindex/cidx.lock";
 	my @st = stat(_);
@@ -67,13 +67,14 @@ git gc -q
 EOM
 }; # /create_coderepo
 
-ok(run_script([qw(-cindex --dangerous -q -d), "$tmp/ext", $zp, "$tmp/wt0"]),
+ok(run_script([qw(-cindex --dangerous -q -d), "$tmp/ext",
+		'-g', $zp, '-g', "$tmp/wt0" ]),
 	'cindex external');
 ok(-e "$tmp/ext/cidx.lock", 'external dir created');
 ok(!-d "$zp/.git/public-inbox-cindex", 'no cindex in original coderepo');
 
 ok(run_script([qw(-cindex -L medium --dangerous -q -d),
-	"$tmp/med", $zp, "$tmp/wt0"]), 'cindex external medium');
+	"$tmp/med", '-g', $zp, '-g', "$tmp/wt0"]), 'cindex external medium');
 
 
 SKIP: {
@@ -228,7 +229,7 @@ SKIP: { # --prune
 
 File::Path::remove_tree("$tmp/ext");
 mkdir("$tmp/ext", 0707);
-ok(run_script([qw(-cindex --dangerous -q -d), "$tmp/ext", $zp]),
+ok(run_script([qw(-cindex --dangerous -q -d), "$tmp/ext", '-g', $zp]),
 	'external on existing dir');
 {
 	my @st = stat("$tmp/ext/cidx.lock");
diff --git a/t/xap_helper.t b/t/xap_helper.t
index ee25b2dc..37679ae9 100644
--- a/t/xap_helper.t
+++ b/t/xap_helper.t
@@ -20,10 +20,10 @@ my $crepo = create_coderepo 'for-cindex', sub {
 	xsys_e([qw(git init -q --bare)]);
 	xsys_e([qw(git fast-import --quiet)], undef, { 0 => $fi_fh });
 	chdir($dh);
-	run_script([qw(-cindex --dangerous -L medium --no-fsync -q -j1), $d])
+	run_script([qw(-cindex --dangerous -L medium --no-fsync -q -j1), '-g', $d])
 		or xbail '-cindex internal';
 	run_script([qw(-cindex --dangerous -L medium --no-fsync -q -j3 -d),
-		"$d/cidx-ext", $d]) or xbail '-cindex "external"';
+		"$d/cidx-ext", '-g', $d]) or xbail '-cindex "external"';
 };
 $dh = $fi_fh = undef;
 

^ permalink raw reply related	[relevance 5%]

* [PATCH 00/14] IT'S ALIVE! www loads cindex join data
@ 2023-11-28 14:56  6% Eric Wong
  2023-11-28 14:56  5% ` [PATCH 10/14] cindex: require `-g GIT_DIR' or `-r PROJECT_ROOT' Eric Wong
  0 siblings, 1 reply; 3+ results
From: Eric Wong @ 2023-11-28 14:56 UTC (permalink / raw)
  To: meta

8/14 is the killer one which actually makes the cindex data
useful for WWW and powering solver.  Keep in mind, I've had
to cap solver at 3 coderepos as a temporary measure since
there's a lot of "weak" joins we should be weeding out.

More documentation coming, but cindex joins are very much
a fuzzy thing which will have to deal with false positives
and such.  So figuring out the scoring for sanity would
make sense...

Fortunately, --join=aggressive,reset only takes ~1 hour for me,
so probably 1/3 that on modern hardware.  Incremental
`-cindex --join' (no suboptions) usually takes <5 minutes if
done frequently.

New performance problem: solver could definitely be smarter
about dealing with common roots/groups.  For the longest time,
I've only had 1 coderepo per-inbox, having hundreds is wacky.

Actual searching against the cindex isn't done, yet, but
that's kinda straightforward.

Eric Wong (14):
  test_common: create_*: detect changes all parameters
  t/cindex*: require SCM_RIGHTS for these tests
  codesearch: eliminate redundant substitutions
  solver: schedule cleanup after synchronous git->check
  xap_helper.h: move cindex endpoints to separate file
  xap_helper: implement mset endpoint for WWW, IMAP, etc...
  hval: use File::Spec to make relative paths for href
  www: load and use cindex join data
  git: speed up ->git_path for non-worktrees
  cindex: require `-g GIT_DIR' or `-r PROJECT_ROOT'
  git: speed up Git->new by 5% or so
  admin: resolve_git_dir respects symlinks
  cindex: extra quit checks
  www: start working on a repo listing

 Documentation/public-inbox-cindex.pod |   2 +-
 MANIFEST                              |   3 +
 Makefile.PL                           |   8 +-
 lib/PublicInbox/Admin.pm              |  25 +-
 lib/PublicInbox/CodeSearch.pm         | 162 ++++++++++-
 lib/PublicInbox/CodeSearchIdx.pm      |  52 ++--
 lib/PublicInbox/Config.pm             |  39 ++-
 lib/PublicInbox/Git.pm                |  27 +-
 lib/PublicInbox/Hval.pm               |  12 +-
 lib/PublicInbox/RepoList.pm           |  39 +++
 lib/PublicInbox/Search.pm             |  42 +++
 lib/PublicInbox/SearchIdx.pm          |  10 +-
 lib/PublicInbox/SolverGit.pm          |   9 +-
 lib/PublicInbox/TestCommon.pm         |  35 ++-
 lib/PublicInbox/View.pm               |   7 +-
 lib/PublicInbox/WWW.pm                |   1 +
 lib/PublicInbox/WwwCoderepo.pm        |  44 ++-
 lib/PublicInbox/WwwStream.pm          |  11 +-
 lib/PublicInbox/WwwText.pm            |  19 +-
 lib/PublicInbox/XapHelper.pm          |  51 ++--
 lib/PublicInbox/XapHelperCxx.pm       |  14 +-
 lib/PublicInbox/xap_helper.h          | 379 +++++++-------------------
 lib/PublicInbox/xh_cidx.h             | 244 +++++++++++++++++
 lib/PublicInbox/xh_mset.h             |  96 +++++++
 script/public-inbox-cindex            |  38 ++-
 t/admin.t                             |  12 +
 t/cindex-join.t                       |   9 +-
 t/cindex.t                            |  91 ++++++-
 t/xap_helper.t                        |  53 +++-
 xt/solver.t                           |   3 +-
 30 files changed, 1111 insertions(+), 426 deletions(-)
 create mode 100644 lib/PublicInbox/RepoList.pm
 create mode 100644 lib/PublicInbox/xh_cidx.h
 create mode 100644 lib/PublicInbox/xh_mset.h

^ permalink raw reply	[relevance 6%]

Results 1-3 of 3 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-11-28 14:56  6% [PATCH 00/14] IT'S ALIVE! www loads cindex join data Eric Wong
2023-11-28 14:56  5% ` [PATCH 10/14] cindex: require `-g GIT_DIR' or `-r PROJECT_ROOT' Eric Wong
2023-11-29 12:02  7% [PATCH 1/2] doc: -cindex: correct and unify -g GIT_DIR usage string and man page Štěpán Němec

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).