user/dev discussion of public-inbox itself
 help / color / mirror / Atom feed
* [PATCH] index: v2: parallel by default
@ 2020-06-07 20:02 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2020-06-07 20:02 UTC (permalink / raw)
  To: meta

InboxWritable should only set $v2w->{parallel} if the $parallel
flag is defined to 0 or 1.  We want indexing a new inbox to
utilize SMP, just like --reindex.

-index once again allows -j0/--jobs=0 to force single-process
use, and we'll be ensuring that works in tests to maintain
performance on small systems.

Fixes: 61a2fff5b34a3e32 ("admin: move index_inbox over")
---
 lib/PublicInbox/InboxWritable.pm |  2 +-
 script/public-inbox-index        |  2 +-
 t/cgi.t                          |  2 +-
 t/convert-compact.t              |  4 ++--
 t/indexlevels-mirror.t           | 14 +++++++-------
 t/mda_filter_rubylang.t          |  2 +-
 t/multi-mid.t                    |  2 +-
 t/nntpd.t                        |  2 +-
 t/replace.t                      |  4 +++-
 t/v2mirror.t                     | 12 ++++++------
 t/v2reindex.t                    |  2 +-
 t/xcpdb-reshard.t                |  2 +-
 12 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 3558403bca6..c54be046f95 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -57,7 +57,7 @@ sub importer {
 		die "v2 not supported: $@\n" if $@;
 		my $opt = $self->{-creat_opt};
 		my $v2w = PublicInbox::V2Writable->new($self, $opt);
-		$v2w->{parallel} = $parallel;
+		$v2w->{parallel} = $parallel if defined $parallel;
 		$v2w;
 	} elsif ($v == 1) {
 		my @arg = (undef, undef, undef, $self);
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 0018668e6bd..6217fb86c4e 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -18,7 +18,7 @@ my $opt = { quiet => -1, compact => 0, maxsize => undef };
 GetOptions($opt, qw(verbose|v+ reindex compact|c+ jobs|j=i prune
 		indexlevel|L=s maxsize|max-size=s batchsize|batch-size=s))
 	or die "bad command-line args\n$usage";
-die "--jobs must be positive\n" if defined $opt->{jobs} && $opt->{jobs} <= 0;
+die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
 
 if ($opt->{compact}) {
 	require PublicInbox::Xapcmd;
diff --git a/t/cgi.t b/t/cgi.t
index d1f971504ef..366d6594bb0 100644
--- a/t/cgi.t
+++ b/t/cgi.t
@@ -39,7 +39,7 @@ use_ok 'PublicInbox::InboxWritable';
 use_ok 'PublicInbox::Config';
 my $cfg = PublicInbox::Config->new($pi_config);
 my $ibx = $cfg->lookup_name('test');
-my $im = PublicInbox::InboxWritable->new($ibx)->importer;
+my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
 
 {
 	local $ENV{HOME} = $home;
diff --git a/t/convert-compact.t b/t/convert-compact.t
index 80efc19c798..26a8fca025f 100644
--- a/t/convert-compact.t
+++ b/t/convert-compact.t
@@ -116,12 +116,12 @@ is(scalar @$msgs, 1, 'only one message in history');
 
 $ibx = undef;
 $err = '';
-$cmd = [ qw(-index --reindex -c), "$tmpdir/v2" ];
+$cmd = [ qw(-index -j0 --reindex -c), "$tmpdir/v2" ];
 ok(run_script($cmd, undef, $rdr), '--reindex -c');
 like($err, qr/xapian-compact/, 'xapian-compact ran (-c)');
 
 $rdr->{2} = \(my $err2 = '');
-$cmd = [ qw(-index --reindex -cc), "$tmpdir/v2" ];
+$cmd = [ qw(-index -j0 --reindex -cc), "$tmpdir/v2" ];
 ok(run_script($cmd, undef, $rdr), '--reindex -c -c');
 like($err2, qr/xapian-compact/, 'xapian-compact ran (-c -c)');
 ok(($err2 =~ tr/\n/\n/) > ($err =~ tr/\n/\n/), '-compacted twice');
diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t
index 704f7e1174e..44313e40118 100644
--- a/t/indexlevels-mirror.t
+++ b/t/indexlevels-mirror.t
@@ -33,13 +33,13 @@ sub import_index_incremental {
 		-primary_address => 'test@example.com',
 		indexlevel => $level,
 	});
-	my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer;
+	my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer(0);
 	$mime->header_set('Message-ID', '<m@1>');
 	ok($im->add($mime), 'first message added');
 	$im->done;
 
 	# index master (required for v1)
-	ok(run_script(['-index', $ibx->{inboxdir}, "-L$level"]),
+	ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]),
 		'index master OK');
 	my $ro_master = PublicInbox::Inbox->new({
 		inboxdir => $ibx->{inboxdir},
@@ -68,7 +68,7 @@ sub import_index_incremental {
 	ok(run_script(\@cmd), "v$v init OK");
 
 	# index mirror
-	ok(run_script(['-index', $mirror]), "v$v index mirror OK");
+	ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror OK");
 
 	# read-only access
 	my $ro_mirror = PublicInbox::Inbox->new({
@@ -86,14 +86,14 @@ sub import_index_incremental {
 
 	# mirror updates
 	is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
-	ok(run_script(['-index', $mirror]), "v$v index mirror again OK");
+	ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK");
 	($nr, $msgs) = $ro_mirror->recent;
 	is($nr, 2, '2nd message seen in mirror');
 	is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs],
 		['m@1','m@2'], 'got both messages in mirror');
 
 	# incremental index master (required for v1)
-	ok(run_script(['-index', $ibx->{inboxdir}, "-L$level"]),
+	ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]),
 		'index master OK');
 	($nr, $msgs) = $ro_master->recent;
 	is($nr, 2, '2nd message seen in master');
@@ -123,7 +123,7 @@ sub import_index_incremental {
 
 	# sync the mirror
 	is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
-	ok(run_script(['-index', $mirror]), "v$v index mirror again OK");
+	ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK");
 	($nr, $msgs) = $ro_mirror->recent;
 	is($nr, 1, '2nd message gone from mirror');
 	is_deeply([map { $_->{mid} } @$msgs], ['m@1'],
@@ -148,7 +148,7 @@ sub import_index_incremental {
 	}
 	$im->done;
 	is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
-	ok(run_script(['-index', '--reindex', $mirror]),
+	ok(run_script([qw(-index -j0 --reindex), $mirror]),
 		"v$v index --reindex mirror OK");
 	@ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)};
 	@rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)};
diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t
index 483fcb85549..5b6bf28b862 100644
--- a/t/mda_filter_rubylang.t
+++ b/t/mda_filter_rubylang.t
@@ -25,7 +25,7 @@ for my $v (qw(V1 V2)) {
 	my $cmd = [ '-init', "-$v", $v, $inboxdir,
 		"http://example.com/$v", $addr ];
 	ok(run_script($cmd), 'public-inbox-init');
-	ok(run_script(['-index', $inboxdir]), 'public-inbox-index');
+	ok(run_script([qw(-index -j0), $inboxdir]), 'public-inbox-index');
 	is(xsys(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0);
 	is(xsys(@cfg, "$cfgpfx.altid",
 		'serial:alerts:file=msgmap.sqlite3'), 0);
diff --git a/t/multi-mid.t b/t/multi-mid.t
index 91c8597e2e6..41d556b9ceb 100644
--- a/t/multi-mid.t
+++ b/t/multi-mid.t
@@ -70,7 +70,7 @@ for my $order ([$bad, $good], [$good, $bad]) {
 	$cmd = [ '-init', '-Lbasic', '-V2', 'v2c', "$tmpdir/v2-clone",
 		'http://example.com/v2c', 'v2c@example.com' ];
 	ok(run_script($cmd, $env), 'init clone');
-	$cmd = [ '-index', "$tmpdir/v2-clone" ];
+	$cmd = [ qw(-index -j0), "$tmpdir/v2-clone" ];
 	sleep($delay) if $delay;
 	ok(run_script($cmd, $env), 'index the clone');
 	$ibx->cleanup;
diff --git a/t/nntpd.t b/t/nntpd.t
index 69f72ce1216..eee67ea65bb 100644
--- a/t/nntpd.t
+++ b/t/nntpd.t
@@ -53,7 +53,7 @@ $ibx = PublicInbox::Inbox->new($ibx);
 	my $len;
 
 	$ibx = PublicInbox::InboxWritable->new($ibx);
-	my $im = $ibx->importer;
+	my $im = $ibx->importer(0);
 
 	# ensure successful message delivery
 	{
diff --git a/t/replace.t b/t/replace.t
index cef4e7aa6cb..c4dcb89dec1 100644
--- a/t/replace.t
+++ b/t/replace.t
@@ -33,7 +33,7 @@ Date: Fri, 02 Oct 1993 00:00:00 +0000
 
 Top secret info about my house in Malibu...
 EOF
-	my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer;
+	my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer(0);
 	# fake a bunch of epochs
 	$im->{rotate_bytes} = $opt->{rotate_bytes} if $opt->{rotate_bytes};
 
@@ -145,10 +145,12 @@ EOF
 		is($smsg->{subject}, 'redacted', 'after subject');
 		is($smsg->{mid}, 'replace@example.com', 'before MID');
 	}
+	# $git->cleanup; # needed if $im->{parallel};
 	@warn = ();
 	is($im->replace($orig, $repl), undef, 'no-op replace returns undef');
 	is($im->purge($orig), undef, 'no-op purge returns undef');
 	is_deeply(\@warn, [], 'no warnings on noop');
+	# $im->done; # needed if $im->{parallel}
 }
 
 sub pad_msgs {
diff --git a/t/v2mirror.t b/t/v2mirror.t
index d588808d645..fc03c3d7c3d 100644
--- a/t/v2mirror.t
+++ b/t/v2mirror.t
@@ -84,7 +84,7 @@ foreach my $i (0..$epoch_max) {
 	'alt@example.com');
 ok(run_script(\@cmd), 'initialized public-inbox -V2');
 
-ok(run_script(['-index', "$tmpdir/m"]), 'indexed');
+ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'indexed');
 
 my $mibx = { inboxdir => "$tmpdir/m", address => 'alt@example.com' };
 $mibx = PublicInbox::Inbox->new($mibx);
@@ -111,7 +111,7 @@ $fetch_each_epoch->();
 
 my $mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1});
 is(scalar($mset->items), 0, 'new message not found in mirror, yet');
-ok(run_script(["-index", "$tmpdir/m"]), 'index updated');
+ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'index updated');
 is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax');
 $mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1});
 is(scalar($mset->items), 1, 'found message in mirror');
@@ -142,7 +142,7 @@ $fetch_each_epoch->();
 	$ibx->cleanup;
 	PublicInbox::InboxWritable::cleanup($mibx);
 	$v2w->done;
-	my $cmd = [ '-index', '--prune', "$tmpdir/m" ];
+	my $cmd = [ qw(-index --prune -j0), "$tmpdir/m" ];
 	my ($out, $err) = ('', '');
 	my $opt = { 1 => \$out, 2 => \$err };
 	ok(run_script($cmd, undef, $opt), '-index --prune');
@@ -178,7 +178,7 @@ is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror');
 	$fetch_each_epoch->();
 	PublicInbox::InboxWritable::cleanup($mibx);
 
-	my $cmd = [ "-index", "$tmpdir/m" ];
+	my $cmd = [ qw(-index -j0), "$tmpdir/m" ];
 	my ($out, $err) = ('', '');
 	my $opt = { 1 => \$out, 2 => \$err };
 	ok(run_script($cmd, undef, $opt), 'index ran');
@@ -196,7 +196,7 @@ if ('max size') {
 	$ibx->cleanup;
 	$fetch_each_epoch->();
 	PublicInbox::InboxWritable::cleanup($mibx);
-	my $cmd = ['-index', "$tmpdir/m", "--max-size=$max" ];
+	my $cmd = [qw(-index -j0), "$tmpdir/m", "--max-size=$max" ];
 	my $opt = { 2 => \(my $err) };
 	ok(run_script($cmd, undef, $opt), 'indexed with --max-size');
 	like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message');
@@ -211,7 +211,7 @@ if ('max size') {
 EOF
 		close $fh or die;
 	}
-	$cmd = ['-index', "$tmpdir/m", "--reindex" ];
+	$cmd = [ qw(-index -j0 --reindex), "$tmpdir/m" ];
 	ok(run_script($cmd, undef, $opt), 'reindexed w/ indexMaxSize in file');
 	like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message');
 	$mset = $mibx->search->reopen->query('m:2big@a', {mset =>1});
diff --git a/t/v2reindex.t b/t/v2reindex.t
index b99106d0fe7..77deffb4ba9 100644
--- a/t/v2reindex.t
+++ b/t/v2reindex.t
@@ -38,7 +38,7 @@ my ($mark1, $mark2, $mark3, $mark4);
 	my %config = %$ibx_config;
 	my $ibx = PublicInbox::Inbox->new(\%config);
 	my $im = PublicInbox::V2Writable->new($ibx, {nproc => 1});
-	my $im0 = $im->importer();
+	my $im0 = $im->importer(0);
 	foreach my $i (1..10) {
 		$mime->header_set('Message-Id', "<$i\@example.com>");
 		ok($im->add($mime), "message $i added");
diff --git a/t/xcpdb-reshard.t b/t/xcpdb-reshard.t
index 70012cc6f49..1835fa62140 100644
--- a/t/xcpdb-reshard.t
+++ b/t/xcpdb-reshard.t
@@ -30,7 +30,7 @@ my $ibx = PublicInbox::Inbox->new({
 my @xcpdb = qw(-xcpdb -q);
 my $nproc = 8;
 my $ndoc = 13;
-my $im = PublicInbox::InboxWritable->new($ibx, {nproc => $nproc})->importer(1);
+my $im = PublicInbox::InboxWritable->new($ibx, {nproc => $nproc})->importer;
 for my $i (1..$ndoc) {
 	$mime->header_set('Message-ID', "<m$i\@example.com>");
 	ok($im->add($mime), "message $i added");

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-06-07 20:02 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-07 20:02 [PATCH] index: v2: parallel by default Eric Wong

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ http://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for the project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git