user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH/RFC 0/2] *index: sequential commits
@ 2021-07-27 10:44  7% Eric Wong
  2021-07-27 10:44  6% ` [PATCH 1/2] treewide: s/sequential_shard/sequential-shard/g Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2021-07-27 10:44 UTC (permalink / raw)
  To: meta

To avoid Xapian slowdowns with gigantic DBs, more shards seem
necessary.  But that can mean excessive parallelism, especially
in writes.  So introduce a middle road between fully parallel
and fully serialized behavior by only serializing commits.

2/2 is lightly tested (and will probably take a bit...)

Eric Wong (2):
  treewide: s/sequential_shard/sequential-shard/g
  index+extindex: support indexSequentialCommit

 Documentation/public-inbox-index.pod | 22 ++++++++++++++++++++++
 lib/PublicInbox/Admin.pm             |  4 ++--
 lib/PublicInbox/Config.pm            |  2 +-
 lib/PublicInbox/ExtSearchIdx.pm      |  1 +
 lib/PublicInbox/LEI.pm               |  4 ++--
 lib/PublicInbox/V2Writable.pm        | 23 ++++++++++++++++-------
 script/public-inbox-convert          |  2 +-
 script/public-inbox-extindex         |  1 +
 script/public-inbox-index            | 13 ++++++++-----
 script/public-inbox-xcpdb            |  4 ++--
 t/v2mirror.t                         |  8 ++++----
 11 files changed, 60 insertions(+), 24 deletions(-)

^ permalink raw reply	[relevance 7%]

* [PATCH 1/2] treewide: s/sequential_shard/sequential-shard/g
  2021-07-27 10:44  7% [PATCH/RFC 0/2] *index: sequential commits Eric Wong
@ 2021-07-27 10:44  6% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2021-07-27 10:44 UTC (permalink / raw)
  To: meta

The underscore variant was never documented and maintaining
the difference between the command-line and internal hash
is not worth it.
---
 lib/PublicInbox/Admin.pm      | 4 ++--
 lib/PublicInbox/LEI.pm        | 4 ++--
 lib/PublicInbox/V2Writable.pm | 4 ++--
 script/public-inbox-convert   | 2 +-
 script/public-inbox-index     | 6 +++---
 script/public-inbox-xcpdb     | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index b21fb241..eb38dd8f 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -327,9 +327,9 @@ sub index_prepare ($$) {
 	$opt->{batch_size} and
 		$env = { XAPIAN_FLUSH_THRESHOLD => '4294967295' };
 
-	for my $k (qw(sequential_shard)) {
+	for my $k (qw(sequential-shard)) {
 		my $git_key = "publicInbox.index".ucfirst($k);
-		$git_key =~ s/_([a-z])/\U$1/g;
+		$git_key =~ s/-([a-z])/\U$1/g;
 		defined(my $s = $opt->{$k} // $cfg->{lc($git_key)}) or next;
 		defined(my $v = $cfg->git_bool($s))
 					or die "`$git_key=$s' not boolean\n";
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 191a0790..0e6e9266 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -134,7 +134,7 @@ sub ale {
 sub index_opt {
 	# TODO: drop underscore variants everywhere, they're undocumented
 	qw(fsync|sync! jobs|j=i indexlevel|L=s compact
-	max_size|max-size=s sequential_shard|sequential-shard
+	max_size|max-size=s sequential-shard
 	batch_size|batch-size=s skip-docdata)
 }
 
@@ -371,7 +371,7 @@ my %OPTDESC = (
 	'do not index messages larger than SIZE (default: infinity)' ],
 'batch_size|batch-size=s' => [ 'SIZE',
 	'flush changes to OS after given number of bytes (default: 1m)' ],
-'sequential_shard|sequential-shard' =>
+'sequential-shard' =>
 	'index Xapian shards sequentially for slow storage',
 'skip-docdata' =>
 	'drop compatibility w/ public-inbox <1.6 to save ~1.5% space',
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 4c1accbb..025487d2 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -1279,7 +1279,7 @@ sub index_todo ($$$) {
 
 sub xapian_only {
 	my ($self, $opt, $sync, $art_beg) = @_;
-	my $seq = $opt->{sequential_shard};
+	my $seq = $opt->{'sequential-shard'};
 	$art_beg //= 0;
 	local $self->{parallel} = 0 if $seq;
 	$self->idx_init($opt); # acquire lock
@@ -1329,7 +1329,7 @@ sub index_sync {
 	}
 
 	my $pr = $opt->{-progress};
-	my $seq = $opt->{sequential_shard};
+	my $seq = $opt->{'sequential-shard'};
 	my $art_beg; # the NNTP article number we start xapian_only at
 	my $idxlevel = $self->{ibx}->{indexlevel};
 	local $self->{ibx}->{indexlevel} = 'basic' if $seq;
diff --git a/script/public-inbox-convert b/script/public-inbox-convert
index 3c627b79..fec6b624 100755
--- a/script/public-inbox-convert
+++ b/script/public-inbox-convert
@@ -38,7 +38,7 @@ GetOptions($opt, qw(jobs|j=i index! help|h),
 		qw(verbose|v+ rethread compact|c+ fsync|sync!
 		indexlevel|index-level|L=s max_size|max-size=s
 		batch_size|batch-size=s
-		sequential_shard|sequential-shard|seq-shard
+		sequential-shard|seq-shard
 		)) or die $help;
 if ($opt->{help}) { print $help; exit 0 };
 my $old_dir = shift(@ARGV) // '';
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 33169bd0..0034d44c 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -40,7 +40,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
 		fsync|sync! xapian_only|xapian-only
 		indexlevel|index-level|L=s max_size|max-size=s
 		batch_size|batch-size=s
-		sequential_shard|seq-shard|sequential-shard
+		sequential-shard|seq-shard
 		no-update-extindex update-extindex|E=s@
 		fast-noop|F skip-docdata all help|h))
 	or die $help;
@@ -129,12 +129,12 @@ for my $ibx (@ibxs) {
 		defined(my $v = $cfg->git_bool($s)) or die <<EOL;
 publicInbox.$ibx->{name}.indexSequentialShard not boolean
 EOL
-		$ibx_opt = { %$opt, sequential_shard => $v };
+		$ibx_opt = { %$opt, 'sequential-shard' => $v };
 	}
 	my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
 	last if $ibx_opt->{quit};
 	if (my $copt = $opt->{compact_opt}) {
-		local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard};
+		local $copt->{jobs} = 0 if $ibx_opt->{'sequential-shard'};
 		PublicInbox::Xapcmd::run($ibx, 'compact', $copt);
 	}
 	last if $ibx_opt->{quit};
diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb
index 3c99fde8..768dc2ba 100755
--- a/script/public-inbox-xcpdb
+++ b/script/public-inbox-xcpdb
@@ -30,7 +30,7 @@ my $opt = { quiet => -1, compact => 0, fsync => 1 };
 GetOptions($opt, qw(
 	fsync|sync! compact|c reshard|R=i
 	max_size|max-size=s batch_size|batch-size=s
-	sequential_shard|seq-shard|sequential-shard
+	sequential-shard|seq-shard
 	jobs|j=i quiet|q verbose|v
 	blocksize|b=s no-full|n fuller|F
 	all help|h)) or die $help;
@@ -49,7 +49,7 @@ my $idx_env = PublicInbox::Admin::index_prepare($opt, $cfg);
 # know sizes, only doccounts
 $opt->{-idx_env} = $idx_env;
 
-if ($opt->{sequential_shard} && ($opt->{jobs} // 1) > 1) {
+if ($opt->{'sequential-shard'} && ($opt->{jobs} // 1) > 1) {
 	warn "W: --jobs=$opt->{jobs} ignored with --sequential-shard\n";
 	$opt->{jobs} = 0;
 }

^ permalink raw reply related	[relevance 6%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-07-27 10:44  7% [PATCH/RFC 0/2] *index: sequential commits Eric Wong
2021-07-27 10:44  6% ` [PATCH 1/2] treewide: s/sequential_shard/sequential-shard/g Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).