From 9225145d3e98d83889065f6d4dc4913226e5f121 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 21 Mar 2023 23:07:16 +0000 Subject: ipc: move nproc_shards from v2writable We'll be using nproc_shards for indexing non-Inbox stuff. --- lib/PublicInbox/IPC.pm | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/IPC.pm') diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm index 548a72eb..730f2cf6 100644 --- a/lib/PublicInbox/IPC.pm +++ b/lib/PublicInbox/IPC.pm @@ -19,7 +19,7 @@ use PublicInbox::WQWorker; use Socket qw(AF_UNIX MSG_EOR SOCK_STREAM); my $MY_MAX_ARG_STRLEN = 4096 * 33; # extra 4K for serialization my $SEQPACKET = eval { Socket::SOCK_SEQPACKET() }; # portable enough? -our @EXPORT_OK = qw(ipc_freeze ipc_thaw); +our @EXPORT_OK = qw(ipc_freeze ipc_thaw nproc_shards); my ($enc, $dec); # ->imports at BEGIN turns sereal_*_with_object into custom ops on 5.14+ # and eliminate method call overhead @@ -454,4 +454,28 @@ sub detect_nproc () { undef } +# SATA storage lags behind what CPUs are capable of, so relying on +# nproc(1) can be misleading and having extra Xapian shards is a +# waste of FDs and space. It can also lead to excessive IO latency +# and slow things down. Users on NVME or other fast storage can +# use the NPROC env or switches in our script/public-inbox-* programs +# to increase Xapian shards +our $NPROC_MAX_DEFAULT = 4; + +sub nproc_shards ($) { + my ($creat_opt) = @_; + my $n = $creat_opt->{nproc} if ref($creat_opt) eq 'HASH'; + $n //= $ENV{NPROC}; + if (!$n) { + # assume 2 cores if not detectable or zero + state $NPROC_DETECTED = PublicInbox::IPC::detect_nproc() || 2; + $n = $NPROC_DETECTED; + $n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT; + } + + # subtract for the main process and git-fast-import + $n -= 1; + $n < 1 ? 1 : $n; +} + 1; -- cgit v1.2.3-24-ge0c7 From d00087bcdf3a4c2411ecdf75b4f7ee583db530fb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 21 Mar 2023 23:07:26 +0000 Subject: ds: @post_loop_do replaces SetPostLoopCallback This allows us to avoid repeatedly using memory-intensive anonymous subs in CodeSearchIdx where the callback is assigned frequently. Anonymous subs are known to leak memory in old Perls (e.g. 5.16.3 in enterprise distros) and still expensive in newer Perls. So favor the (\&subroutine, @args) form which allows us to eliminate anonymous subs going forward. Only CodeSearchIdx takes advantage of the new API at the moment, since it's the biggest repeat user of post-loop callback changes. Getting rid of the subroutine and relying on a global `our' variable also has two advantages: 1) Perl warnings can detect typos at compile-time, whereas the (now gone) method could only detect errors at run-time. 2) `our' variable assignment can be `local'-ized to a scope --- lib/PublicInbox/IPC.pm | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/IPC.pm') diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm index 730f2cf6..da534aa7 100644 --- a/lib/PublicInbox/IPC.pm +++ b/lib/PublicInbox/IPC.pm @@ -245,11 +245,16 @@ sub recv_and_run { $n; } +sub sock_defined { + my (undef, $wqw) = @_; + defined($wqw->{sock}); +} + sub wq_worker_loop ($$) { my ($self, $bcast2) = @_; my $wqw = PublicInbox::WQWorker->new($self, $self->{-wq_s2}); PublicInbox::WQWorker->new($self, $bcast2) if $bcast2; - PublicInbox::DS->SetPostLoopCallback(sub { $wqw->{sock} }); + local @PublicInbox::DS::post_loop_do = (\&sock_defined, $wqw); PublicInbox::DS::event_loop(); PublicInbox::DS->Reset; } -- cgit v1.2.3-24-ge0c7