From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 645041F62B for ; Thu, 25 Apr 2024 21:31:47 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1714080707; bh=cHdve8wvU27Uivoduoi0uL9d1ymCb3lXqivInpIeDx0=; h=From:To:Subject:Date:In-Reply-To:References:From; b=m3ZAnkXCuyzGau0c1iX//UKKTj58GNC2nxUESSgF10W/JbP1yu6FVxBiHX7KbPHXe a6mO89nIH7yULXrZB8iR80XNpGtqeCph3LQW7y7mjEZGf7IP9GjnQIWC3eD9QJ4YoE Px+BbbvR+hTkkxG0i3RLSW5zb8T0hGeIWujhfJFE= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 5/5] daemon: share and allow configuring Xapian helpers Date: Thu, 25 Apr 2024 21:31:46 +0000 Message-ID: <20240425213146.1166555-6-e@80x24.org> In-Reply-To: <20240425213146.1166555-1-e@80x24.org> References: <20240425213146.1166555-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Xapian helper processes are disabled by default once again. However, they can be enabled via the new `-X INTEGER' parameter. One big positive is the Xapian helpers being spawned by the top-level daemon means they can be shared freely across all workers for improved load balancing and memory reduction. --- Documentation/public-inbox-daemon.pod | 38 +++++++++++++++++++++++++-- Makefile.PL | 6 +++++ lib/PublicInbox/Daemon.pm | 24 +++++++++++++++-- lib/PublicInbox/Search.pm | 8 +++--- lib/PublicInbox/TestCommon.pm | 9 ++++++- lib/PublicInbox/XapClient.pm | 7 ++--- 6 files changed, 80 insertions(+), 12 deletions(-) diff --git a/Documentation/public-inbox-daemon.pod b/Documentation/public-inbox-daemon.pod index 6f1e3b53..092be667 100644 --- a/Documentation/public-inbox-daemon.pod +++ b/Documentation/public-inbox-daemon.pod @@ -79,9 +79,9 @@ C may also be specified on a per-listener basis. Default: /dev/null with C<--daemonize>, inherited otherwise -=item -W +=item -W INTEGER -=item --worker-processes +=item --worker-processes INTEGER Set the number of worker processes. @@ -96,6 +96,40 @@ the master on crashes. Default: 1 +=item -X INTEGER + +=item --xapian-helpers INTEGER + +Enables the use of Xapian helper processes to handle expensive, +non-deterministic Xapian search queries asynchronously without +blocking simple requests. + +With positive values, there is an additional manager process +that can be signaled to control the number of Xapian helper workers. + +* C<-X0> one worker, no manager process +* C<-X1> one worker, one manager process +... +* C<-X8> eight workers, one manager process + +As with the public-facing public-inbox-* daemons, sending C +or C to the Xapian helper manager process will increment or +decrement the number of workers. + +Both Xapian helper workers and managers automatically respawn if they +crash or are explicitly killed, even with C<-X0>. + +A C++ compiler, L, and Xapian development files (e.g. +C or C) are required to gain access to +some expensive queries and significant memory savings. + +Xapian helper workers are shared by all C<--worker-processes> of the +Perl daemon for additional memory savings. + +New in public-inbox 2.0.0. + +Default: undefined, search queries are handled synchronously + =item --cert /path/to/cert The default TLS certificate for HTTPS, IMAPS, NNTPS, POP3S and/or STARTTLS diff --git a/Makefile.PL b/Makefile.PL index 2b2e6b18..27fe02ff 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -255,6 +255,12 @@ check-run : check-man # GNU and *BSD both allow it. check-run_T_ARGS = -j\$(N) +check-xh0 : + \$(MAKE) check-run TEST_DAEMON_XH='-X0' + +check-xh1 : + \$(MAKE) check-run TEST_DAEMON_XH='-X1' + check-debris check-run : pure_all \$(EATMYDATA) \$(PROVE) -bvw xt/\$@.t :: \$(\$\@_T_ARGS) -@\$(check_manifest) diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index ec76d6b8..e08102e9 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -22,9 +22,11 @@ use PublicInbox::GitAsyncCat; use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::OnDestroy; +use PublicInbox::Search; +use PublicInbox::XapClient; our $SO_ACCEPTFILTER = 0x1000; my @CMD; -my ($set_user, $oldset); +my ($set_user, $oldset, $xh_workers); my (@cfg_listen, $stdout, $stderr, $group, $user, $pid_file, $daemonize); my ($nworker, @listeners, %WORKERS, %logs); my %tls_opt; # scheme://sockname => args for IO::Socket::SSL::SSL_Context->new @@ -170,6 +172,7 @@ options: --cert=FILE default SSL/TLS certificate --key=FILE default SSL/TLS certificate key -W WORKERS number of worker processes to spawn (default: 1) + -X XWORKERS number of Xapian helper processes (default: undefined) See public-inbox-daemon(8) and $prog(1) man pages for more. EOF @@ -185,6 +188,7 @@ EOF 'multi-accept=i' => \$PublicInbox::Listener::MULTI_ACCEPT, 'cert=s' => \$default_cert, 'key=s' => \$default_key, + 'X|xapian-helpers=i' => \$xh_workers, 'help|h' => \(my $show_help), ); GetOptions(%opt) or die $help; @@ -687,6 +691,14 @@ sub worker_loop { PublicInbox::DS::event_loop(\%WORKER_SIG, $oldset); } +sub respawn_xh { # awaitpid cb + my ($pid) = @_; + return unless @listeners; + warn "W: xap_helper PID:$pid died: \$?=$?, respawning...\n"; + $PublicInbox::Search::XHC = + PublicInbox::XapClient::start_helper('-j', $xh_workers); +} + sub run { my ($default_listen) = @_; $nworker = 1; @@ -699,7 +711,15 @@ sub run { local $PublicInbox::Git::async_warn = 1; local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); local %WORKER_SIG = %WORKER_SIG; - local %POST_ACCEPT; + local $PublicInbox::XapClient::tries = 0; + + local $PublicInbox::Search::XHC = PublicInbox::XapClient::start_helper( + '-j', $xh_workers) if defined($xh_workers); + if ($PublicInbox::Search::XHC) { + require PublicInbox::XhcMset; + awaitpid($PublicInbox::Search::XHC->{io}->attached_pid, + \&respawn_xh); + } daemon_loop(); # $unlink_on_leave runs diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index b7732ae5..4adef366 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -11,7 +11,7 @@ our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms); use List::Util qw(max); use POSIX qw(strftime); use Carp (); -our $XHC; +our $XHC = 0; # defined but false # values for searching, changing the numeric value breaks # compatibility with old indices (so don't change them it) @@ -57,7 +57,7 @@ use constant { }; use PublicInbox::Smsg; -use PublicInbox::Over; +eval { require PublicInbox::Over }; our $QP_FLAGS; our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query); our $Xap; # 'Xapian' or 'Search::Xapian' @@ -428,9 +428,9 @@ sub mset { do_enquire($self, $qry, $opt, TS); } -sub xhc_start_maybe () { +sub xhc_start_maybe (@) { require PublicInbox::XapClient; - my $xhc = PublicInbox::XapClient::start_helper(); + my $xhc = PublicInbox::XapClient::start_helper(@_); require PublicInbox::XhcMset if $xhc; $xhc; } diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index a7ec9b5b..b8b7b827 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -17,6 +17,7 @@ my $lei_loud = $ENV{TEST_LEI_ERR_LOUD}; our $tail_cmd = $ENV{TAIL}; our ($lei_opt, $lei_out, $lei_err); use autodie qw(chdir close fcntl mkdir open opendir seek unlink); +$ENV{XDG_CACHE_HOME} //= "$ENV{HOME}/.cache"; # reuse C++ xap_helper builds $_ = File::Spec->rel2abs($_) for (grep(!m!^/!, @INC)); @@ -565,6 +566,9 @@ sub start_script { my $run_mode = $ENV{TEST_RUN_MODE} // $opt->{run_mode} // 2; my $sub = $run_mode == 0 ? undef : key2sub($key); my $tail; + my $xh = $ENV{TEST_DAEMON_XH}; + $xh && $key =~ /-(?:imapd|netd|httpd|pop3d|nntpd)\z/ and + push @argv, split(/\s+/, $xh); if ($tail_cmd) { my @paths; for (@argv) { @@ -720,7 +724,10 @@ SKIP: { require PublicInbox::Spawn; require PublicInbox::Config; require File::Path; - + eval { # use XDG_CACHE_HOME, first: + require PublicInbox::XapHelperCxx; + PublicInbox::XapHelperCxx::build(); + }; local %ENV = %ENV; delete $ENV{XDG_DATA_HOME}; delete $ENV{XDG_CONFIG_HOME}; diff --git a/lib/PublicInbox/XapClient.pm b/lib/PublicInbox/XapClient.pm index f0270091..24b3f45e 100644 --- a/lib/PublicInbox/XapClient.pm +++ b/lib/PublicInbox/XapClient.pm @@ -12,6 +12,7 @@ use PublicInbox::Spawn qw(spawn); use Socket qw(AF_UNIX SOCK_SEQPACKET); use PublicInbox::IPC; use autodie qw(pipe socketpair); +our $tries = 50; sub mkreq { my ($self, $ios, @arg) = @_; @@ -19,13 +20,13 @@ sub mkreq { pipe($r, $ios->[0]) if !defined($ios->[0]); my @fds = map fileno($_), @$ios; my $buf = join("\0", @arg, ''); - $n = $PublicInbox::IPC::send_cmd->($self->{io}, \@fds, $buf, 0) // - die "send_cmd: $!"; + $n = $PublicInbox::IPC::send_cmd->($self->{io}, \@fds, $buf, 0, $tries) + // die "send_cmd: $!"; $n == length($buf) or die "send_cmd: $n != ".length($buf); $r; } -sub start_helper { +sub start_helper (@) { $PublicInbox::IPC::send_cmd or return; # can't work w/o SCM_RIGHTS my @argv = @_; socketpair(my $sock, my $in, AF_UNIX, SOCK_SEQPACKET, 0);