From c43813b9138398ed2de06c3616a5932725090ae3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 17 May 2020 19:37:21 +0000 Subject: index: add --batch-size=SIZE option On powerful systems, having this option is preferable to XAPIAN_FLUSH_THRESHOLD due to lock granularity and contention with other processes (-learn, -mda, -watch). Setting XAPIAN_FLUSH_THRESHOLD can cause -learn, -mda, and -watch to get stuck until an epoch is completely processed. --- lib/PublicInbox/SearchIdx.pm | 12 +++++------- lib/PublicInbox/V2Writable.pm | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 5f5ae895..b4088933 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -22,11 +22,9 @@ use PublicInbox::Git qw(git_unquote); use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); my $X = \%PublicInbox::Search::X; my ($DB_CREATE_OR_OPEN, $DB_OPEN); -use constant { - BATCH_BYTES => defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ? - 0x7fffffff : 1_000_000, - DEBUG => !!$ENV{DEBUG}, -}; +our $BATCH_BYTES = defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ? + 0x7fffffff : 1_000_000; +use constant DEBUG => !!$ENV{DEBUG}; my $xapianlevels = qr/\A(?:full|medium)\z/; @@ -585,7 +583,7 @@ sub batch_adjust ($$$$$) { my ($max, $bytes, $batch_cb, $latest, $nr) = @_; $$max -= $bytes; if ($$max <= 0) { - $$max = BATCH_BYTES; + $$max = $BATCH_BYTES; $batch_cb->($nr, $latest); } } @@ -610,7 +608,7 @@ sub read_log { my $git = $self->{git}; my $latest; my $bytes; - my $max = BATCH_BYTES; + my $max = $BATCH_BYTES; local $/ = "\n"; my %D; my $line; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index bf5a0df9..c732b98a 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -159,7 +159,7 @@ sub do_idx ($$$$) { my $idx = idx_shard($self, $smsg->{num} % $self->{shards}); $idx->index_raw($msgref, $mime, $smsg); my $n = $self->{transact_bytes} += $smsg->{bytes}; - $n >= (PublicInbox::SearchIdx::BATCH_BYTES * $self->{shards}); + $n >= ($PublicInbox::SearchIdx::BATCH_BYTES * $self->{shards}); } sub _add { -- cgit v1.2.3-24-ge0c7