* [PATCH] index: v2: parallel by default
@ 2020-06-07 20:02 4% Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-06-07 20:02 UTC (permalink / raw)
To: meta
InboxWritable should only set $v2w->{parallel} if the $parallel
flag is defined to 0 or 1. We want indexing a new inbox to
utilize SMP, just like --reindex.
-index once again allows -j0/--jobs=0 to force single-process
use, and we'll be ensuring that works in tests to maintain
performance on small systems.
Fixes: 61a2fff5b34a3e32 ("admin: move index_inbox over")
---
lib/PublicInbox/InboxWritable.pm | 2 +-
script/public-inbox-index | 2 +-
t/cgi.t | 2 +-
t/convert-compact.t | 4 ++--
t/indexlevels-mirror.t | 14 +++++++-------
t/mda_filter_rubylang.t | 2 +-
t/multi-mid.t | 2 +-
t/nntpd.t | 2 +-
t/replace.t | 4 +++-
t/v2mirror.t | 12 ++++++------
t/v2reindex.t | 2 +-
t/xcpdb-reshard.t | 2 +-
12 files changed, 26 insertions(+), 24 deletions(-)
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 3558403bca6..c54be046f95 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -57,7 +57,7 @@ sub importer {
die "v2 not supported: $@\n" if $@;
my $opt = $self->{-creat_opt};
my $v2w = PublicInbox::V2Writable->new($self, $opt);
- $v2w->{parallel} = $parallel;
+ $v2w->{parallel} = $parallel if defined $parallel;
$v2w;
} elsif ($v == 1) {
my @arg = (undef, undef, undef, $self);
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 0018668e6bd..6217fb86c4e 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -18,7 +18,7 @@ my $opt = { quiet => -1, compact => 0, maxsize => undef };
GetOptions($opt, qw(verbose|v+ reindex compact|c+ jobs|j=i prune
indexlevel|L=s maxsize|max-size=s batchsize|batch-size=s))
or die "bad command-line args\n$usage";
-die "--jobs must be positive\n" if defined $opt->{jobs} && $opt->{jobs} <= 0;
+die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
if ($opt->{compact}) {
require PublicInbox::Xapcmd;
diff --git a/t/cgi.t b/t/cgi.t
index d1f971504ef..366d6594bb0 100644
--- a/t/cgi.t
+++ b/t/cgi.t
@@ -39,7 +39,7 @@ use_ok 'PublicInbox::InboxWritable';
use_ok 'PublicInbox::Config';
my $cfg = PublicInbox::Config->new($pi_config);
my $ibx = $cfg->lookup_name('test');
-my $im = PublicInbox::InboxWritable->new($ibx)->importer;
+my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
{
local $ENV{HOME} = $home;
diff --git a/t/convert-compact.t b/t/convert-compact.t
index 80efc19c798..26a8fca025f 100644
--- a/t/convert-compact.t
+++ b/t/convert-compact.t
@@ -116,12 +116,12 @@ is(scalar @$msgs, 1, 'only one message in history');
$ibx = undef;
$err = '';
-$cmd = [ qw(-index --reindex -c), "$tmpdir/v2" ];
+$cmd = [ qw(-index -j0 --reindex -c), "$tmpdir/v2" ];
ok(run_script($cmd, undef, $rdr), '--reindex -c');
like($err, qr/xapian-compact/, 'xapian-compact ran (-c)');
$rdr->{2} = \(my $err2 = '');
-$cmd = [ qw(-index --reindex -cc), "$tmpdir/v2" ];
+$cmd = [ qw(-index -j0 --reindex -cc), "$tmpdir/v2" ];
ok(run_script($cmd, undef, $rdr), '--reindex -c -c');
like($err2, qr/xapian-compact/, 'xapian-compact ran (-c -c)');
ok(($err2 =~ tr/\n/\n/) > ($err =~ tr/\n/\n/), '-compacted twice');
diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t
index 704f7e1174e..44313e40118 100644
--- a/t/indexlevels-mirror.t
+++ b/t/indexlevels-mirror.t
@@ -33,13 +33,13 @@ sub import_index_incremental {
-primary_address => 'test@example.com',
indexlevel => $level,
});
- my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer;
+ my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer(0);
$mime->header_set('Message-ID', '<m@1>');
ok($im->add($mime), 'first message added');
$im->done;
# index master (required for v1)
- ok(run_script(['-index', $ibx->{inboxdir}, "-L$level"]),
+ ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]),
'index master OK');
my $ro_master = PublicInbox::Inbox->new({
inboxdir => $ibx->{inboxdir},
@@ -68,7 +68,7 @@ sub import_index_incremental {
ok(run_script(\@cmd), "v$v init OK");
# index mirror
- ok(run_script(['-index', $mirror]), "v$v index mirror OK");
+ ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror OK");
# read-only access
my $ro_mirror = PublicInbox::Inbox->new({
@@ -86,14 +86,14 @@ sub import_index_incremental {
# mirror updates
is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
- ok(run_script(['-index', $mirror]), "v$v index mirror again OK");
+ ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK");
($nr, $msgs) = $ro_mirror->recent;
is($nr, 2, '2nd message seen in mirror');
is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs],
['m@1','m@2'], 'got both messages in mirror');
# incremental index master (required for v1)
- ok(run_script(['-index', $ibx->{inboxdir}, "-L$level"]),
+ ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]),
'index master OK');
($nr, $msgs) = $ro_master->recent;
is($nr, 2, '2nd message seen in master');
@@ -123,7 +123,7 @@ sub import_index_incremental {
# sync the mirror
is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
- ok(run_script(['-index', $mirror]), "v$v index mirror again OK");
+ ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK");
($nr, $msgs) = $ro_mirror->recent;
is($nr, 1, '2nd message gone from mirror');
is_deeply([map { $_->{mid} } @$msgs], ['m@1'],
@@ -148,7 +148,7 @@ sub import_index_incremental {
}
$im->done;
is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
- ok(run_script(['-index', '--reindex', $mirror]),
+ ok(run_script([qw(-index -j0 --reindex), $mirror]),
"v$v index --reindex mirror OK");
@ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)};
@rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)};
diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t
index 483fcb85549..5b6bf28b862 100644
--- a/t/mda_filter_rubylang.t
+++ b/t/mda_filter_rubylang.t
@@ -25,7 +25,7 @@ for my $v (qw(V1 V2)) {
my $cmd = [ '-init', "-$v", $v, $inboxdir,
"http://example.com/$v", $addr ];
ok(run_script($cmd), 'public-inbox-init');
- ok(run_script(['-index', $inboxdir]), 'public-inbox-index');
+ ok(run_script([qw(-index -j0), $inboxdir]), 'public-inbox-index');
is(xsys(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0);
is(xsys(@cfg, "$cfgpfx.altid",
'serial:alerts:file=msgmap.sqlite3'), 0);
diff --git a/t/multi-mid.t b/t/multi-mid.t
index 91c8597e2e6..41d556b9ceb 100644
--- a/t/multi-mid.t
+++ b/t/multi-mid.t
@@ -70,7 +70,7 @@ for my $order ([$bad, $good], [$good, $bad]) {
$cmd = [ '-init', '-Lbasic', '-V2', 'v2c', "$tmpdir/v2-clone",
'http://example.com/v2c', 'v2c@example.com' ];
ok(run_script($cmd, $env), 'init clone');
- $cmd = [ '-index', "$tmpdir/v2-clone" ];
+ $cmd = [ qw(-index -j0), "$tmpdir/v2-clone" ];
sleep($delay) if $delay;
ok(run_script($cmd, $env), 'index the clone');
$ibx->cleanup;
diff --git a/t/nntpd.t b/t/nntpd.t
index 69f72ce1216..eee67ea65bb 100644
--- a/t/nntpd.t
+++ b/t/nntpd.t
@@ -53,7 +53,7 @@ $ibx = PublicInbox::Inbox->new($ibx);
my $len;
$ibx = PublicInbox::InboxWritable->new($ibx);
- my $im = $ibx->importer;
+ my $im = $ibx->importer(0);
# ensure successful message delivery
{
diff --git a/t/replace.t b/t/replace.t
index cef4e7aa6cb..c4dcb89dec1 100644
--- a/t/replace.t
+++ b/t/replace.t
@@ -33,7 +33,7 @@ Date: Fri, 02 Oct 1993 00:00:00 +0000
Top secret info about my house in Malibu...
EOF
- my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer;
+ my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer(0);
# fake a bunch of epochs
$im->{rotate_bytes} = $opt->{rotate_bytes} if $opt->{rotate_bytes};
@@ -145,10 +145,12 @@ EOF
is($smsg->{subject}, 'redacted', 'after subject');
is($smsg->{mid}, 'replace@example.com', 'before MID');
}
+ # $git->cleanup; # needed if $im->{parallel};
@warn = ();
is($im->replace($orig, $repl), undef, 'no-op replace returns undef');
is($im->purge($orig), undef, 'no-op purge returns undef');
is_deeply(\@warn, [], 'no warnings on noop');
+ # $im->done; # needed if $im->{parallel}
}
sub pad_msgs {
diff --git a/t/v2mirror.t b/t/v2mirror.t
index d588808d645..fc03c3d7c3d 100644
--- a/t/v2mirror.t
+++ b/t/v2mirror.t
@@ -84,7 +84,7 @@ foreach my $i (0..$epoch_max) {
'alt@example.com');
ok(run_script(\@cmd), 'initialized public-inbox -V2');
-ok(run_script(['-index', "$tmpdir/m"]), 'indexed');
+ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'indexed');
my $mibx = { inboxdir => "$tmpdir/m", address => 'alt@example.com' };
$mibx = PublicInbox::Inbox->new($mibx);
@@ -111,7 +111,7 @@ $fetch_each_epoch->();
my $mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1});
is(scalar($mset->items), 0, 'new message not found in mirror, yet');
-ok(run_script(["-index", "$tmpdir/m"]), 'index updated');
+ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'index updated');
is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax');
$mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1});
is(scalar($mset->items), 1, 'found message in mirror');
@@ -142,7 +142,7 @@ $fetch_each_epoch->();
$ibx->cleanup;
PublicInbox::InboxWritable::cleanup($mibx);
$v2w->done;
- my $cmd = [ '-index', '--prune', "$tmpdir/m" ];
+ my $cmd = [ qw(-index --prune -j0), "$tmpdir/m" ];
my ($out, $err) = ('', '');
my $opt = { 1 => \$out, 2 => \$err };
ok(run_script($cmd, undef, $opt), '-index --prune');
@@ -178,7 +178,7 @@ is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror');
$fetch_each_epoch->();
PublicInbox::InboxWritable::cleanup($mibx);
- my $cmd = [ "-index", "$tmpdir/m" ];
+ my $cmd = [ qw(-index -j0), "$tmpdir/m" ];
my ($out, $err) = ('', '');
my $opt = { 1 => \$out, 2 => \$err };
ok(run_script($cmd, undef, $opt), 'index ran');
@@ -196,7 +196,7 @@ if ('max size') {
$ibx->cleanup;
$fetch_each_epoch->();
PublicInbox::InboxWritable::cleanup($mibx);
- my $cmd = ['-index', "$tmpdir/m", "--max-size=$max" ];
+ my $cmd = [qw(-index -j0), "$tmpdir/m", "--max-size=$max" ];
my $opt = { 2 => \(my $err) };
ok(run_script($cmd, undef, $opt), 'indexed with --max-size');
like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message');
@@ -211,7 +211,7 @@ if ('max size') {
EOF
close $fh or die;
}
- $cmd = ['-index', "$tmpdir/m", "--reindex" ];
+ $cmd = [ qw(-index -j0 --reindex), "$tmpdir/m" ];
ok(run_script($cmd, undef, $opt), 'reindexed w/ indexMaxSize in file');
like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message');
$mset = $mibx->search->reopen->query('m:2big@a', {mset =>1});
diff --git a/t/v2reindex.t b/t/v2reindex.t
index b99106d0fe7..77deffb4ba9 100644
--- a/t/v2reindex.t
+++ b/t/v2reindex.t
@@ -38,7 +38,7 @@ my ($mark1, $mark2, $mark3, $mark4);
my %config = %$ibx_config;
my $ibx = PublicInbox::Inbox->new(\%config);
my $im = PublicInbox::V2Writable->new($ibx, {nproc => 1});
- my $im0 = $im->importer();
+ my $im0 = $im->importer(0);
foreach my $i (1..10) {
$mime->header_set('Message-Id', "<$i\@example.com>");
ok($im->add($mime), "message $i added");
diff --git a/t/xcpdb-reshard.t b/t/xcpdb-reshard.t
index 70012cc6f49..1835fa62140 100644
--- a/t/xcpdb-reshard.t
+++ b/t/xcpdb-reshard.t
@@ -30,7 +30,7 @@ my $ibx = PublicInbox::Inbox->new({
my @xcpdb = qw(-xcpdb -q);
my $nproc = 8;
my $ndoc = 13;
-my $im = PublicInbox::InboxWritable->new($ibx, {nproc => $nproc})->importer(1);
+my $im = PublicInbox::InboxWritable->new($ibx, {nproc => $nproc})->importer;
for my $i (1..$ndoc) {
$mime->header_set('Message-ID', "<m$i\@example.com>");
ok($im->add($mime), "message $i added");
^ permalink raw reply related [relevance 4%]
* news.public-inbox.org misbehaving?
@ 2020-06-09 22:13 7% Kyle Meyer
0 siblings, 0 replies; 2+ results
From: Kyle Meyer @ 2020-06-09 22:13 UTC (permalink / raw)
To: meta
Checking inbox.comp.mail.public-inbox.meta today, I ran into some odd
behavior in my reader (Gnus). While debugging that, I tried
$ w3m -m nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
The tail of that buffer was
4132 (06/06) Dmitry Alexandro Re: [Patch] Update 24-hour times to use two digits for the hour
4133 (06/07) Eric Wong [PATCH] index: v2: parallel by default
That didn't match up with what I saw at https://public-inbox.org/meta/:
IMAP server notes, maybe JMAP?
2020-06-09 11:34 UTC - mbox.gz / Atom
[PATCH] index: v2: parallel by default
2020-06-07 20:02 UTC - mbox.gz / Atom
inbox.comp.version-control.git also seems off. After invoking w3m, I
landed on an empty
Newsgroup: inbox.comp.version-control.git 399364-399413
[399314-399363]
Following "[399314-399363]" displayed articles. The last two were
399326 (06/08) Denton Liu Re: [PATCH] Recommend "git gc --prune=now" instead of "git prune"
399327 (06/08) Simon Ser Re: [PATCH v2] grep: add configuration variables for --heading
Again, that doesn't match what I see at https://public-inbox.org/git/,
which has a good number of messages from today (6/9). And the empty
landing page/counts make me think that public-inbox-nntpd knows about
the articles that aren't being displayed.
Any ideas what might be going on?
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-06-07 20:02 4% [PATCH] index: v2: parallel by default Eric Wong
2020-06-09 22:13 7% news.public-inbox.org misbehaving? Kyle Meyer
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).