about summary refs log tree commit homepage
path: root/lib/PublicInbox/IPC.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-03-26 09:35:43 +0000
committerEric Wong <e@80x24.org>2023-03-26 09:35:43 +0000
commit511b7ca599f1c151f6c50f5ba848be60d5857e22 (patch)
treea7aa219b08af891c3d86d7be250c92ed597de261 /lib/PublicInbox/IPC.pm
parent2dd3cec8783700f061a0c9b69e329918a4f5cccd (diff)
parent2e28cc7edb58b04404a836dffc07d47b1a38ee17 (diff)
downloadpublic-inbox-511b7ca599f1c151f6c50f5ba848be60d5857e22.tar.gz
* cindex: (29 commits)
  cindex: --prune checkpoints to avoid OOM
  cindex: ignore SIGPIPE
  cindex: respect existing permissions
  cindex: squelch incompatible options
  cindex: implement reindex
  cindex: add support for --prune
  cindex: filter out non-existent git directories
  spawn: show failing directory for chdir failures
  cindex: improve granularity of quit checks
  cindex: attempt to give oldest commits lowest docids
  cindex: truncate or drop body for over-sized commits
  cindex: check for checkpoint before giant messages
  cindex: implement --max-size=SIZE
  sigfd: pass signal name rather than number to callback
  cindex: handle graceful shutdown by default
  cindex: drop `unchanged' progress message
  cindex: show shard number in progress message
  cindex: implement --exclude= like -clone
  ds: @post_loop_do replaces SetPostLoopCallback
  cindex: use DS and workqueues for parallelism
  ...
Diffstat (limited to 'lib/PublicInbox/IPC.pm')
-rw-r--r--lib/PublicInbox/IPC.pm33
1 files changed, 31 insertions, 2 deletions
diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm
index 7fa656d0..1f0e87ee 100644
--- a/lib/PublicInbox/IPC.pm
+++ b/lib/PublicInbox/IPC.pm
@@ -19,7 +19,7 @@ use PublicInbox::WQWorker;
 use Socket qw(AF_UNIX MSG_EOR SOCK_STREAM);
 my $MY_MAX_ARG_STRLEN = 4096 * 33; # extra 4K for serialization
 my $SEQPACKET = eval { Socket::SOCK_SEQPACKET() }; # portable enough?
-our @EXPORT_OK = qw(ipc_freeze ipc_thaw);
+our @EXPORT_OK = qw(ipc_freeze ipc_thaw nproc_shards);
 my ($enc, $dec);
 # ->imports at BEGIN turns sereal_*_with_object into custom ops on 5.14+
 # and eliminate method call overhead
@@ -263,11 +263,16 @@ sub recv_and_run {
         $n;
 }
 
+sub sock_defined {
+        my (undef, $wqw) = @_;
+        defined($wqw->{sock});
+}
+
 sub wq_worker_loop ($$) {
         my ($self, $bcast2) = @_;
         my $wqw = PublicInbox::WQWorker->new($self, $self->{-wq_s2});
         PublicInbox::WQWorker->new($self, $bcast2) if $bcast2;
-        PublicInbox::DS->SetPostLoopCallback(sub { $wqw->{sock} });
+        local @PublicInbox::DS::post_loop_do = (\&sock_defined, $wqw);
         PublicInbox::DS::event_loop();
         PublicInbox::DS->Reset;
 }
@@ -475,4 +480,28 @@ sub detect_nproc () {
         undef
 }
 
+# SATA storage lags behind what CPUs are capable of, so relying on
+# nproc(1) can be misleading and having extra Xapian shards is a
+# waste of FDs and space.  It can also lead to excessive IO latency
+# and slow things down.  Users on NVME or other fast storage can
+# use the NPROC env or switches in our script/public-inbox-* programs
+# to increase Xapian shards
+our $NPROC_MAX_DEFAULT = 4;
+
+sub nproc_shards ($) {
+        my ($creat_opt) = @_;
+        my $n = $creat_opt->{nproc} if ref($creat_opt) eq 'HASH';
+        $n //= $ENV{NPROC};
+        if (!$n) {
+                # assume 2 cores if not detectable or zero
+                state $NPROC_DETECTED = PublicInbox::IPC::detect_nproc() || 2;
+                $n = $NPROC_DETECTED;
+                $n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT;
+        }
+
+        # subtract for the main process and git-fast-import
+        $n -= 1;
+        $n < 1 ? 1 : $n;
+}
+
 1;