* [PATCH/RFC 0/2] *index: sequential commits
@ 2021-07-27 10:44 7% Eric Wong
2021-07-27 10:44 6% ` [PATCH 1/2] treewide: s/sequential_shard/sequential-shard/g Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2021-07-27 10:44 UTC (permalink / raw)
To: meta
To avoid Xapian slowdowns with gigantic DBs, more shards seem
necessary. But that can mean excessive parallelism, especially
in writes. So introduce a middle road between fully parallel
and fully serialized behavior by only serializing commits.
2/2 is lightly tested (and will probably take a bit...)
Eric Wong (2):
treewide: s/sequential_shard/sequential-shard/g
index+extindex: support indexSequentialCommit
Documentation/public-inbox-index.pod | 22 ++++++++++++++++++++++
lib/PublicInbox/Admin.pm | 4 ++--
lib/PublicInbox/Config.pm | 2 +-
lib/PublicInbox/ExtSearchIdx.pm | 1 +
lib/PublicInbox/LEI.pm | 4 ++--
lib/PublicInbox/V2Writable.pm | 23 ++++++++++++++++-------
script/public-inbox-convert | 2 +-
script/public-inbox-extindex | 1 +
script/public-inbox-index | 13 ++++++++-----
script/public-inbox-xcpdb | 4 ++--
t/v2mirror.t | 8 ++++----
11 files changed, 60 insertions(+), 24 deletions(-)
^ permalink raw reply [relevance 7%]
* [PATCH 1/2] treewide: s/sequential_shard/sequential-shard/g
2021-07-27 10:44 7% [PATCH/RFC 0/2] *index: sequential commits Eric Wong
@ 2021-07-27 10:44 6% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2021-07-27 10:44 UTC (permalink / raw)
To: meta
The underscore variant was never documented and maintaining
the difference between the command-line and internal hash
is not worth it.
---
lib/PublicInbox/Admin.pm | 4 ++--
lib/PublicInbox/LEI.pm | 4 ++--
lib/PublicInbox/V2Writable.pm | 4 ++--
script/public-inbox-convert | 2 +-
script/public-inbox-index | 6 +++---
script/public-inbox-xcpdb | 4 ++--
6 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index b21fb241..eb38dd8f 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -327,9 +327,9 @@ sub index_prepare ($$) {
$opt->{batch_size} and
$env = { XAPIAN_FLUSH_THRESHOLD => '4294967295' };
- for my $k (qw(sequential_shard)) {
+ for my $k (qw(sequential-shard)) {
my $git_key = "publicInbox.index".ucfirst($k);
- $git_key =~ s/_([a-z])/\U$1/g;
+ $git_key =~ s/-([a-z])/\U$1/g;
defined(my $s = $opt->{$k} // $cfg->{lc($git_key)}) or next;
defined(my $v = $cfg->git_bool($s))
or die "`$git_key=$s' not boolean\n";
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 191a0790..0e6e9266 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -134,7 +134,7 @@ sub ale {
sub index_opt {
# TODO: drop underscore variants everywhere, they're undocumented
qw(fsync|sync! jobs|j=i indexlevel|L=s compact
- max_size|max-size=s sequential_shard|sequential-shard
+ max_size|max-size=s sequential-shard
batch_size|batch-size=s skip-docdata)
}
@@ -371,7 +371,7 @@ my %OPTDESC = (
'do not index messages larger than SIZE (default: infinity)' ],
'batch_size|batch-size=s' => [ 'SIZE',
'flush changes to OS after given number of bytes (default: 1m)' ],
-'sequential_shard|sequential-shard' =>
+'sequential-shard' =>
'index Xapian shards sequentially for slow storage',
'skip-docdata' =>
'drop compatibility w/ public-inbox <1.6 to save ~1.5% space',
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 4c1accbb..025487d2 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -1279,7 +1279,7 @@ sub index_todo ($$$) {
sub xapian_only {
my ($self, $opt, $sync, $art_beg) = @_;
- my $seq = $opt->{sequential_shard};
+ my $seq = $opt->{'sequential-shard'};
$art_beg //= 0;
local $self->{parallel} = 0 if $seq;
$self->idx_init($opt); # acquire lock
@@ -1329,7 +1329,7 @@ sub index_sync {
}
my $pr = $opt->{-progress};
- my $seq = $opt->{sequential_shard};
+ my $seq = $opt->{'sequential-shard'};
my $art_beg; # the NNTP article number we start xapian_only at
my $idxlevel = $self->{ibx}->{indexlevel};
local $self->{ibx}->{indexlevel} = 'basic' if $seq;
diff --git a/script/public-inbox-convert b/script/public-inbox-convert
index 3c627b79..fec6b624 100755
--- a/script/public-inbox-convert
+++ b/script/public-inbox-convert
@@ -38,7 +38,7 @@ GetOptions($opt, qw(jobs|j=i index! help|h),
qw(verbose|v+ rethread compact|c+ fsync|sync!
indexlevel|index-level|L=s max_size|max-size=s
batch_size|batch-size=s
- sequential_shard|sequential-shard|seq-shard
+ sequential-shard|seq-shard
)) or die $help;
if ($opt->{help}) { print $help; exit 0 };
my $old_dir = shift(@ARGV) // '';
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 33169bd0..0034d44c 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -40,7 +40,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
fsync|sync! xapian_only|xapian-only
indexlevel|index-level|L=s max_size|max-size=s
batch_size|batch-size=s
- sequential_shard|seq-shard|sequential-shard
+ sequential-shard|seq-shard
no-update-extindex update-extindex|E=s@
fast-noop|F skip-docdata all help|h))
or die $help;
@@ -129,12 +129,12 @@ for my $ibx (@ibxs) {
defined(my $v = $cfg->git_bool($s)) or die <<EOL;
publicInbox.$ibx->{name}.indexSequentialShard not boolean
EOL
- $ibx_opt = { %$opt, sequential_shard => $v };
+ $ibx_opt = { %$opt, 'sequential-shard' => $v };
}
my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
last if $ibx_opt->{quit};
if (my $copt = $opt->{compact_opt}) {
- local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard};
+ local $copt->{jobs} = 0 if $ibx_opt->{'sequential-shard'};
PublicInbox::Xapcmd::run($ibx, 'compact', $copt);
}
last if $ibx_opt->{quit};
diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb
index 3c99fde8..768dc2ba 100755
--- a/script/public-inbox-xcpdb
+++ b/script/public-inbox-xcpdb
@@ -30,7 +30,7 @@ my $opt = { quiet => -1, compact => 0, fsync => 1 };
GetOptions($opt, qw(
fsync|sync! compact|c reshard|R=i
max_size|max-size=s batch_size|batch-size=s
- sequential_shard|seq-shard|sequential-shard
+ sequential-shard|seq-shard
jobs|j=i quiet|q verbose|v
blocksize|b=s no-full|n fuller|F
all help|h)) or die $help;
@@ -49,7 +49,7 @@ my $idx_env = PublicInbox::Admin::index_prepare($opt, $cfg);
# know sizes, only doccounts
$opt->{-idx_env} = $idx_env;
-if ($opt->{sequential_shard} && ($opt->{jobs} // 1) > 1) {
+if ($opt->{'sequential-shard'} && ($opt->{jobs} // 1) > 1) {
warn "W: --jobs=$opt->{jobs} ignored with --sequential-shard\n";
$opt->{jobs} = 0;
}
^ permalink raw reply related [relevance 6%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-07-27 10:44 7% [PATCH/RFC 0/2] *index: sequential commits Eric Wong
2021-07-27 10:44 6% ` [PATCH 1/2] treewide: s/sequential_shard/sequential-shard/g Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).