From 8722adec7acac538c1cb92de6d53002a4e4e33b8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 7 Jun 2020 20:02:15 +0000 Subject: index: v2: parallel by default InboxWritable should only set $v2w->{parallel} if the $parallel flag is defined to 0 or 1. We want indexing a new inbox to utilize SMP, just like --reindex. -index once again allows -j0/--jobs=0 to force single-process use, and we'll be ensuring that works in tests to maintain performance on small systems. Fixes: 61a2fff5b34a3e32 ("admin: move index_inbox over") --- lib/PublicInbox/InboxWritable.pm | 2 +- script/public-inbox-index | 2 +- t/cgi.t | 2 +- t/convert-compact.t | 4 ++-- t/indexlevels-mirror.t | 14 +++++++------- t/mda_filter_rubylang.t | 2 +- t/multi-mid.t | 2 +- t/nntpd.t | 2 +- t/replace.t | 4 +++- t/v2mirror.t | 12 ++++++------ t/v2reindex.t | 2 +- t/xcpdb-reshard.t | 2 +- 12 files changed, 26 insertions(+), 24 deletions(-) diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 3558403b..c54be046 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -57,7 +57,7 @@ sub importer { die "v2 not supported: $@\n" if $@; my $opt = $self->{-creat_opt}; my $v2w = PublicInbox::V2Writable->new($self, $opt); - $v2w->{parallel} = $parallel; + $v2w->{parallel} = $parallel if defined $parallel; $v2w; } elsif ($v == 1) { my @arg = (undef, undef, undef, $self); diff --git a/script/public-inbox-index b/script/public-inbox-index index 0018668e..6217fb86 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -18,7 +18,7 @@ my $opt = { quiet => -1, compact => 0, maxsize => undef }; GetOptions($opt, qw(verbose|v+ reindex compact|c+ jobs|j=i prune indexlevel|L=s maxsize|max-size=s batchsize|batch-size=s)) or die "bad command-line args\n$usage"; -die "--jobs must be positive\n" if defined $opt->{jobs} && $opt->{jobs} <= 0; +die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; if ($opt->{compact}) { require PublicInbox::Xapcmd; diff --git a/t/cgi.t b/t/cgi.t index d1f97150..366d6594 100644 --- a/t/cgi.t +++ b/t/cgi.t @@ -39,7 +39,7 @@ use_ok 'PublicInbox::InboxWritable'; use_ok 'PublicInbox::Config'; my $cfg = PublicInbox::Config->new($pi_config); my $ibx = $cfg->lookup_name('test'); -my $im = PublicInbox::InboxWritable->new($ibx)->importer; +my $im = PublicInbox::InboxWritable->new($ibx)->importer(0); { local $ENV{HOME} = $home; diff --git a/t/convert-compact.t b/t/convert-compact.t index 80efc19c..26a8fca0 100644 --- a/t/convert-compact.t +++ b/t/convert-compact.t @@ -116,12 +116,12 @@ is(scalar @$msgs, 1, 'only one message in history'); $ibx = undef; $err = ''; -$cmd = [ qw(-index --reindex -c), "$tmpdir/v2" ]; +$cmd = [ qw(-index -j0 --reindex -c), "$tmpdir/v2" ]; ok(run_script($cmd, undef, $rdr), '--reindex -c'); like($err, qr/xapian-compact/, 'xapian-compact ran (-c)'); $rdr->{2} = \(my $err2 = ''); -$cmd = [ qw(-index --reindex -cc), "$tmpdir/v2" ]; +$cmd = [ qw(-index -j0 --reindex -cc), "$tmpdir/v2" ]; ok(run_script($cmd, undef, $rdr), '--reindex -c -c'); like($err2, qr/xapian-compact/, 'xapian-compact ran (-c -c)'); ok(($err2 =~ tr/\n/\n/) > ($err =~ tr/\n/\n/), '-compacted twice'); diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t index 704f7e11..44313e40 100644 --- a/t/indexlevels-mirror.t +++ b/t/indexlevels-mirror.t @@ -33,13 +33,13 @@ sub import_index_incremental { -primary_address => 'test@example.com', indexlevel => $level, }); - my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer; + my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer(0); $mime->header_set('Message-ID', ''); ok($im->add($mime), 'first message added'); $im->done; # index master (required for v1) - ok(run_script(['-index', $ibx->{inboxdir}, "-L$level"]), + ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]), 'index master OK'); my $ro_master = PublicInbox::Inbox->new({ inboxdir => $ibx->{inboxdir}, @@ -68,7 +68,7 @@ sub import_index_incremental { ok(run_script(\@cmd), "v$v init OK"); # index mirror - ok(run_script(['-index', $mirror]), "v$v index mirror OK"); + ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror OK"); # read-only access my $ro_mirror = PublicInbox::Inbox->new({ @@ -86,14 +86,14 @@ sub import_index_incremental { # mirror updates is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); - ok(run_script(['-index', $mirror]), "v$v index mirror again OK"); + ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK"); ($nr, $msgs) = $ro_mirror->recent; is($nr, 2, '2nd message seen in mirror'); is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], ['m@1','m@2'], 'got both messages in mirror'); # incremental index master (required for v1) - ok(run_script(['-index', $ibx->{inboxdir}, "-L$level"]), + ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]), 'index master OK'); ($nr, $msgs) = $ro_master->recent; is($nr, 2, '2nd message seen in master'); @@ -123,7 +123,7 @@ sub import_index_incremental { # sync the mirror is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); - ok(run_script(['-index', $mirror]), "v$v index mirror again OK"); + ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK"); ($nr, $msgs) = $ro_mirror->recent; is($nr, 1, '2nd message gone from mirror'); is_deeply([map { $_->{mid} } @$msgs], ['m@1'], @@ -148,7 +148,7 @@ sub import_index_incremental { } $im->done; is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); - ok(run_script(['-index', '--reindex', $mirror]), + ok(run_script([qw(-index -j0 --reindex), $mirror]), "v$v index --reindex mirror OK"); @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)}; @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t index 483fcb85..5b6bf28b 100644 --- a/t/mda_filter_rubylang.t +++ b/t/mda_filter_rubylang.t @@ -25,7 +25,7 @@ for my $v (qw(V1 V2)) { my $cmd = [ '-init', "-$v", $v, $inboxdir, "http://example.com/$v", $addr ]; ok(run_script($cmd), 'public-inbox-init'); - ok(run_script(['-index', $inboxdir]), 'public-inbox-index'); + ok(run_script([qw(-index -j0), $inboxdir]), 'public-inbox-index'); is(xsys(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0); is(xsys(@cfg, "$cfgpfx.altid", 'serial:alerts:file=msgmap.sqlite3'), 0); diff --git a/t/multi-mid.t b/t/multi-mid.t index 91c8597e..41d556b9 100644 --- a/t/multi-mid.t +++ b/t/multi-mid.t @@ -70,7 +70,7 @@ for my $order ([$bad, $good], [$good, $bad]) { $cmd = [ '-init', '-Lbasic', '-V2', 'v2c', "$tmpdir/v2-clone", 'http://example.com/v2c', 'v2c@example.com' ]; ok(run_script($cmd, $env), 'init clone'); - $cmd = [ '-index', "$tmpdir/v2-clone" ]; + $cmd = [ qw(-index -j0), "$tmpdir/v2-clone" ]; sleep($delay) if $delay; ok(run_script($cmd, $env), 'index the clone'); $ibx->cleanup; diff --git a/t/nntpd.t b/t/nntpd.t index 69f72ce1..eee67ea6 100644 --- a/t/nntpd.t +++ b/t/nntpd.t @@ -53,7 +53,7 @@ $ibx = PublicInbox::Inbox->new($ibx); my $len; $ibx = PublicInbox::InboxWritable->new($ibx); - my $im = $ibx->importer; + my $im = $ibx->importer(0); # ensure successful message delivery { diff --git a/t/replace.t b/t/replace.t index cef4e7aa..c4dcb89d 100644 --- a/t/replace.t +++ b/t/replace.t @@ -33,7 +33,7 @@ Date: Fri, 02 Oct 1993 00:00:00 +0000 Top secret info about my house in Malibu... EOF - my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer; + my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer(0); # fake a bunch of epochs $im->{rotate_bytes} = $opt->{rotate_bytes} if $opt->{rotate_bytes}; @@ -145,10 +145,12 @@ EOF is($smsg->{subject}, 'redacted', 'after subject'); is($smsg->{mid}, 'replace@example.com', 'before MID'); } + # $git->cleanup; # needed if $im->{parallel}; @warn = (); is($im->replace($orig, $repl), undef, 'no-op replace returns undef'); is($im->purge($orig), undef, 'no-op purge returns undef'); is_deeply(\@warn, [], 'no warnings on noop'); + # $im->done; # needed if $im->{parallel} } sub pad_msgs { diff --git a/t/v2mirror.t b/t/v2mirror.t index d588808d..fc03c3d7 100644 --- a/t/v2mirror.t +++ b/t/v2mirror.t @@ -84,7 +84,7 @@ foreach my $i (0..$epoch_max) { 'alt@example.com'); ok(run_script(\@cmd), 'initialized public-inbox -V2'); -ok(run_script(['-index', "$tmpdir/m"]), 'indexed'); +ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'indexed'); my $mibx = { inboxdir => "$tmpdir/m", address => 'alt@example.com' }; $mibx = PublicInbox::Inbox->new($mibx); @@ -111,7 +111,7 @@ $fetch_each_epoch->(); my $mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1}); is(scalar($mset->items), 0, 'new message not found in mirror, yet'); -ok(run_script(["-index", "$tmpdir/m"]), 'index updated'); +ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'index updated'); is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax'); $mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1}); is(scalar($mset->items), 1, 'found message in mirror'); @@ -142,7 +142,7 @@ $fetch_each_epoch->(); $ibx->cleanup; PublicInbox::InboxWritable::cleanup($mibx); $v2w->done; - my $cmd = [ '-index', '--prune', "$tmpdir/m" ]; + my $cmd = [ qw(-index --prune -j0), "$tmpdir/m" ]; my ($out, $err) = ('', ''); my $opt = { 1 => \$out, 2 => \$err }; ok(run_script($cmd, undef, $opt), '-index --prune'); @@ -178,7 +178,7 @@ is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror'); $fetch_each_epoch->(); PublicInbox::InboxWritable::cleanup($mibx); - my $cmd = [ "-index", "$tmpdir/m" ]; + my $cmd = [ qw(-index -j0), "$tmpdir/m" ]; my ($out, $err) = ('', ''); my $opt = { 1 => \$out, 2 => \$err }; ok(run_script($cmd, undef, $opt), 'index ran'); @@ -196,7 +196,7 @@ if ('max size') { $ibx->cleanup; $fetch_each_epoch->(); PublicInbox::InboxWritable::cleanup($mibx); - my $cmd = ['-index', "$tmpdir/m", "--max-size=$max" ]; + my $cmd = [qw(-index -j0), "$tmpdir/m", "--max-size=$max" ]; my $opt = { 2 => \(my $err) }; ok(run_script($cmd, undef, $opt), 'indexed with --max-size'); like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message'); @@ -211,7 +211,7 @@ if ('max size') { EOF close $fh or die; } - $cmd = ['-index', "$tmpdir/m", "--reindex" ]; + $cmd = [ qw(-index -j0 --reindex), "$tmpdir/m" ]; ok(run_script($cmd, undef, $opt), 'reindexed w/ indexMaxSize in file'); like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message'); $mset = $mibx->search->reopen->query('m:2big@a', {mset =>1}); diff --git a/t/v2reindex.t b/t/v2reindex.t index b99106d0..77deffb4 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -38,7 +38,7 @@ my ($mark1, $mark2, $mark3, $mark4); my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, {nproc => 1}); - my $im0 = $im->importer(); + my $im0 = $im->importer(0); foreach my $i (1..10) { $mime->header_set('Message-Id', "<$i\@example.com>"); ok($im->add($mime), "message $i added"); diff --git a/t/xcpdb-reshard.t b/t/xcpdb-reshard.t index 70012cc6..1835fa62 100644 --- a/t/xcpdb-reshard.t +++ b/t/xcpdb-reshard.t @@ -30,7 +30,7 @@ my $ibx = PublicInbox::Inbox->new({ my @xcpdb = qw(-xcpdb -q); my $nproc = 8; my $ndoc = 13; -my $im = PublicInbox::InboxWritable->new($ibx, {nproc => $nproc})->importer(1); +my $im = PublicInbox::InboxWritable->new($ibx, {nproc => $nproc})->importer; for my $i (1..$ndoc) { $mime->header_set('Message-ID', ""); ok($im->add($mime), "message $i added"); -- cgit v1.2.3-24-ge0c7