user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 01/14] test_common: create_*: detect changes all parameters
  2023-11-28 14:56  5% [PATCH 00/14] IT'S ALIVE! www loads cindex join data Eric Wong
@ 2023-11-28 14:56  7% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-11-28 14:56 UTC (permalink / raw)
  To: meta

Data::Dumper+B::Deparse seems fast enough to generate cache keys
with, so this makes updating and developing tests easier (as
opposed to forcing the developer to change the identifier).  The
main downside is we'll have to deal with cache expiration, but
"make clean" seems overly aggressive already (it keeps blowing
away the clones made by t/cindex-join.t :<)
---
 lib/PublicInbox/TestCommon.pm | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 361a2356..8e7eb950 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -793,6 +793,19 @@ our %COMMIT_ENV = (
 	GIT_COMMITTER_EMAIL => 'c@example.com',
 );
 
+# for memoizing based on coderefs and various create_* params
+sub my_sum {
+	require PublicInbox::SHA;
+	require Data::Dumper;
+	my $d = Data::Dumper->new(\@_);
+	$d->$_(1) for qw(Deparse Sortkeys Terse);
+	my @l = split /\n/s, $d->Dump;
+	@l = grep !/\$\^H\{.+?[A-Z]+\(0x[0-9a-f]+\)/, @l; # autodie addresses
+	my @addr = grep /[A-Za-z]+\(0x[0-9a-f]+\)/, @l;
+	xbail 'undumpable addresses: ', \@addr if @addr;
+	substr PublicInbox::SHA::sha256_hex(join('', @l)), 0, 8;
+}
+
 sub create_coderepo ($$;@) {
 	my $ident = shift;
 	my $cb = pop;
@@ -801,15 +814,12 @@ sub create_coderepo ($$;@) {
 	require PublicInbox::Import;
 	my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
 	my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
-	my $dir = "t/data-gen/$base.$ident-$db";
-	my $new = !-d $dir;
-	if ($new && !CORE::mkdir($dir)) {
-		my $err = $!;
-		-d $dir or xbail "mkdir($dir): $err";
-	}
+	my $tmpdir = delete $opt{tmpdir};
+	my $dir = "t/data-gen/$base.$ident-".my_sum($db, $cb, \%opt);
+	require File::Path;
+	my $new = File::Path::make_path($dir);
 	my $lk = PublicInbox::Lock->new("$dir/creat.lock");
 	my $scope = $lk->lock_for_scope;
-	my $tmpdir = delete $opt{tmpdir};
 	if (!-f "$dir/creat.stamp") {
 		opendir(my $dfh, '.');
 		chdir($dir);
@@ -832,12 +842,10 @@ sub create_inbox ($;@) {
 	require PublicInbox::Import;
 	my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
 	my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
-	my $dir = "t/data-gen/$base.$ident-$db";
-	my $new = !-d $dir;
-	if ($new && !mkdir($dir)) {
-		my $err = $!;
-		-d $dir or xbail "mkdir($dir): $err";
-	}
+	my $tmpdir = delete $opt{tmpdir};
+	my $dir = "t/data-gen/$base.$ident-".my_sum($db, $cb, \%opt);
+	require File::Path;
+	my $new = File::Path::make_path($dir);
 	my $lk = PublicInbox::Lock->new("$dir/creat.lock");
 	$opt{inboxdir} = File::Spec->rel2abs($dir);
 	$opt{name} //= $ident;
@@ -846,7 +854,6 @@ sub create_inbox ($;@) {
 	$pre_cb->($dir) if $pre_cb && $new;
 	$opt{-no_fsync} = 1;
 	my $no_gc = delete $opt{-no_gc};
-	my $tmpdir = delete $opt{tmpdir};
 	my $addr = $opt{address} // [];
 	$opt{-primary_address} //= $addr->[0] // "$ident\@example.com";
 	my $parallel = delete($opt{importer_parallel}) // 0;

^ permalink raw reply related	[relevance 7%]

* [PATCH 00/14] IT'S ALIVE! www loads cindex join data
@ 2023-11-28 14:56  5% Eric Wong
  2023-11-28 14:56  7% ` [PATCH 01/14] test_common: create_*: detect changes all parameters Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2023-11-28 14:56 UTC (permalink / raw)
  To: meta

8/14 is the killer one which actually makes the cindex data
useful for WWW and powering solver.  Keep in mind, I've had
to cap solver at 3 coderepos as a temporary measure since
there's a lot of "weak" joins we should be weeding out.

More documentation coming, but cindex joins are very much
a fuzzy thing which will have to deal with false positives
and such.  So figuring out the scoring for sanity would
make sense...

Fortunately, --join=aggressive,reset only takes ~1 hour for me,
so probably 1/3 that on modern hardware.  Incremental
`-cindex --join' (no suboptions) usually takes <5 minutes if
done frequently.

New performance problem: solver could definitely be smarter
about dealing with common roots/groups.  For the longest time,
I've only had 1 coderepo per-inbox, having hundreds is wacky.

Actual searching against the cindex isn't done, yet, but
that's kinda straightforward.

Eric Wong (14):
  test_common: create_*: detect changes all parameters
  t/cindex*: require SCM_RIGHTS for these tests
  codesearch: eliminate redundant substitutions
  solver: schedule cleanup after synchronous git->check
  xap_helper.h: move cindex endpoints to separate file
  xap_helper: implement mset endpoint for WWW, IMAP, etc...
  hval: use File::Spec to make relative paths for href
  www: load and use cindex join data
  git: speed up ->git_path for non-worktrees
  cindex: require `-g GIT_DIR' or `-r PROJECT_ROOT'
  git: speed up Git->new by 5% or so
  admin: resolve_git_dir respects symlinks
  cindex: extra quit checks
  www: start working on a repo listing

 Documentation/public-inbox-cindex.pod |   2 +-
 MANIFEST                              |   3 +
 Makefile.PL                           |   8 +-
 lib/PublicInbox/Admin.pm              |  25 +-
 lib/PublicInbox/CodeSearch.pm         | 162 ++++++++++-
 lib/PublicInbox/CodeSearchIdx.pm      |  52 ++--
 lib/PublicInbox/Config.pm             |  39 ++-
 lib/PublicInbox/Git.pm                |  27 +-
 lib/PublicInbox/Hval.pm               |  12 +-
 lib/PublicInbox/RepoList.pm           |  39 +++
 lib/PublicInbox/Search.pm             |  42 +++
 lib/PublicInbox/SearchIdx.pm          |  10 +-
 lib/PublicInbox/SolverGit.pm          |   9 +-
 lib/PublicInbox/TestCommon.pm         |  35 ++-
 lib/PublicInbox/View.pm               |   7 +-
 lib/PublicInbox/WWW.pm                |   1 +
 lib/PublicInbox/WwwCoderepo.pm        |  44 ++-
 lib/PublicInbox/WwwStream.pm          |  11 +-
 lib/PublicInbox/WwwText.pm            |  19 +-
 lib/PublicInbox/XapHelper.pm          |  51 ++--
 lib/PublicInbox/XapHelperCxx.pm       |  14 +-
 lib/PublicInbox/xap_helper.h          | 379 +++++++-------------------
 lib/PublicInbox/xh_cidx.h             | 244 +++++++++++++++++
 lib/PublicInbox/xh_mset.h             |  96 +++++++
 script/public-inbox-cindex            |  38 ++-
 t/admin.t                             |  12 +
 t/cindex-join.t                       |   9 +-
 t/cindex.t                            |  91 ++++++-
 t/xap_helper.t                        |  53 +++-
 xt/solver.t                           |   3 +-
 30 files changed, 1111 insertions(+), 426 deletions(-)
 create mode 100644 lib/PublicInbox/RepoList.pm
 create mode 100644 lib/PublicInbox/xh_cidx.h
 create mode 100644 lib/PublicInbox/xh_mset.h

^ permalink raw reply	[relevance 5%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-11-28 14:56  5% [PATCH 00/14] IT'S ALIVE! www loads cindex join data Eric Wong
2023-11-28 14:56  7% ` [PATCH 01/14] test_common: create_*: detect changes all parameters Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).