diff options
89 files changed, 1100 insertions, 566 deletions
diff --git a/Documentation/RelNotes/v2.0.0.wip b/Documentation/RelNotes/v2.0.0.wip index 4d872fd7..794d7956 100644 --- a/Documentation/RelNotes/v2.0.0.wip +++ b/Documentation/RelNotes/v2.0.0.wip @@ -54,8 +54,9 @@ treewide * SHA-256 coderepos are fully supported (but not inboxes, yet) - * jemalloc (tested as an LD_PRELOAD) is recommended to reduce fragmentation - in long-running daemon processes serving unpredictable traffic + * for daemons serving public traffic, MALLOC_MMAP_THRESHOLD_=131072 is + recommended to reduce fragmentation in glibc malloc, while jemalloc + (tested as an LD_PRELOAD) is another option. PublicInbox::WWW diff --git a/Documentation/lei-q.pod b/Documentation/lei-q.pod index 4476a806..79156750 100644 --- a/Documentation/lei-q.pod +++ b/Documentation/lei-q.pod @@ -129,6 +129,15 @@ lei/store unless an MUA unflags it! (Behavior undecided) Caveat: C<-tt> only works on locally-indexed messages at the moment, and not on remote (HTTP(S)) endpoints. +=item --thread-id=MSGID + +=item -T MSGID + +Only search messages in the same thread as the given Message-ID. + +For HTTP(S) externals, this only works on instances running +public-inbox 2.0+ (UNRELEASED). + =item --jobs=QUERY_WORKERS[,WRITE_WORKERS] =item --jobs=,WRITE_WORKERS diff --git a/Documentation/mknews.perl b/Documentation/mknews.perl index 68866f44..001ad310 100755 --- a/Documentation/mknews.perl +++ b/Documentation/mknews.perl @@ -47,6 +47,10 @@ if ($dst eq 'NEWS') { -upfx => "$base_url/", -hr => 1, zfh => $out, + env => { + HTTP_HOST => 'public-inbox.org', + 'psgi.url_scheme' => 'https', + }, }; if ($dst eq 'NEWS.html') { html_start($out, $ctx); diff --git a/Documentation/public-inbox-daemon.pod b/Documentation/public-inbox-daemon.pod index 6f1e3b53..092be667 100644 --- a/Documentation/public-inbox-daemon.pod +++ b/Documentation/public-inbox-daemon.pod @@ -79,9 +79,9 @@ C<err=> may also be specified on a per-listener basis. Default: /dev/null with C<--daemonize>, inherited otherwise -=item -W +=item -W INTEGER -=item --worker-processes +=item --worker-processes INTEGER Set the number of worker processes. @@ -96,6 +96,40 @@ the master on crashes. Default: 1 +=item -X INTEGER + +=item --xapian-helpers INTEGER + +Enables the use of Xapian helper processes to handle expensive, +non-deterministic Xapian search queries asynchronously without +blocking simple requests. + +With positive values, there is an additional manager process +that can be signaled to control the number of Xapian helper workers. + +* C<-X0> one worker, no manager process +* C<-X1> one worker, one manager process +... +* C<-X8> eight workers, one manager process + +As with the public-facing public-inbox-* daemons, sending C<SIGTTIN> +or C<SIGTTOU> to the Xapian helper manager process will increment or +decrement the number of workers. + +Both Xapian helper workers and managers automatically respawn if they +crash or are explicitly killed, even with C<-X0>. + +A C++ compiler, L<pkg-config(1)>, and Xapian development files (e.g. +C<libxapian-dev> or C<xapian*-core-dev*>) are required to gain access to +some expensive queries and significant memory savings. + +Xapian helper workers are shared by all C<--worker-processes> of the +Perl daemon for additional memory savings. + +New in public-inbox 2.0.0. + +Default: undefined, search queries are handled synchronously + =item --cert /path/to/cert The default TLS certificate for HTTPS, IMAPS, NNTPS, POP3S and/or STARTTLS diff --git a/Documentation/public-inbox-extindex.pod b/Documentation/public-inbox-extindex.pod index b53e45ed..2db7d7e9 100644 --- a/Documentation/public-inbox-extindex.pod +++ b/Documentation/public-inbox-extindex.pod @@ -80,6 +80,19 @@ doubles the size of the already-large Xapian database. Used with C<--reindex>, it will only look for new and stale entries and not touch already-indexed messages. +=item --no-multi-pack-index + +Disable writing a L<git-multi-pack-index(1)> file to save memory. +Normally, enabling multi-pack-index speeds up startup time of +subsequent L<git-cat-file(1)> processes by 3-4%, but generating +this file requires several GB of memory with large repos. + +Unlike the C<core.multiPackIndex> directive in git, it's still +possible to read existing multi-pack-index files if they are +created elsewhere. + +Available in public-inbox 2.0.0+ + =back =head1 FILES diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod index 14f157a5..f1a2180a 100644 --- a/Documentation/public-inbox-index.pod +++ b/Documentation/public-inbox-index.pod @@ -192,6 +192,13 @@ external indices are configured. Do not update the C<all> external index by default. This negates all uses of C<-E> / C<--update-extindex=> on the command-line. +=item --no-multi-pack-index + +Disables writing the multi-pack-index when using L</--update-extindex>. +See L<public-inbox-extindex(1)/--no-multi-pack-index> for details. + +Available in public-inbox 2.0.0+ + =item --since=DATESTRING =item --after=DATESTRING diff --git a/Documentation/public-inbox-tuning.pod b/Documentation/public-inbox-tuning.pod index 73246144..892ee0f2 100644 --- a/Documentation/public-inbox-tuning.pod +++ b/Documentation/public-inbox-tuning.pod @@ -165,8 +165,10 @@ capacity planning. Bursts of small object allocations late in process life contribute to fragmentation of the heap due to arenas (slabs) used internally by Perl. -jemalloc (tested as an LD_PRELOAD on GNU/Linux) appears to reduce -overall fragmentation compared to glibc malloc in long-lived processes. +glibc malloc users should use C<MALLOC_MMAP_THRESHOLD_=131072> to reduce +fragmentation from the sliding mmap window. jemalloc (tested as an +LD_PRELOAD on GNU/Linux) also reduces fragmentation compared to an +unconfigured glibc malloc in long-lived processes. =head2 Other OS tuning knobs @@ -58,7 +58,7 @@ Where "deb" indicates package names for Debian-derived distributions, "pkgin" for NetBSD, "apk" for Alpine Linux and "rpm" is for RPM-based distributions (only known to work on Fedora). -Numerous optional modules are likely to be useful as well: +Most users will likely also want the following: - DBD::SQLite deb: libdbd-sqlite3-perl pkg: p5-DBD-SQLite @@ -71,7 +71,8 @@ Numerous optional modules are likely to be useful as well: rpm: perl-Search-Xapian (required for lei; HTTP and IMAP search) -Every effort has been to make everything else optional: +Other modules might be useful as well, depending on your use case and +preferences: - Plack deb: libplack-perl pkg: p5-Plack @@ -382,6 +382,8 @@ lib/PublicInbox/XapClient.pm lib/PublicInbox/XapHelper.pm lib/PublicInbox/XapHelperCxx.pm lib/PublicInbox/Xapcmd.pm +lib/PublicInbox/XhcMset.pm +lib/PublicInbox/XhcMsetIterator.pm lib/PublicInbox/gcf2_libgit2.h lib/PublicInbox/xap_helper.h lib/PublicInbox/xh_cidx.h diff --git a/Makefile.PL b/Makefile.PL index 2b2e6b18..27fe02ff 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -255,6 +255,12 @@ check-run : check-man # GNU and *BSD both allow it. check-run_T_ARGS = -j\$(N) +check-xh0 : + \$(MAKE) check-run TEST_DAEMON_XH='-X0' + +check-xh1 : + \$(MAKE) check-run TEST_DAEMON_XH='-X1' + check-debris check-run : pure_all \$(EATMYDATA) \$(PROVE) -bvw xt/\$@.t :: \$(\$\@_T_ARGS) -@\$(check_manifest) diff --git a/examples/public-inbox-httpd@.service b/examples/public-inbox-httpd@.service index 11859198..ca68fc7e 100644 --- a/examples/public-inbox-httpd@.service +++ b/examples/public-inbox-httpd@.service @@ -19,6 +19,7 @@ After = public-inbox-httpd.socket Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin \ TZ=UTC \ +MALLOC_MMAP_THRESHOLD_=131072 \ PERL_INLINE_DIRECTORY=/tmp/.pub-inline LimitNOFILE = 30000 diff --git a/examples/public-inbox-imapd@.service b/examples/public-inbox-imapd@.service index 80104605..1aede65d 100644 --- a/examples/public-inbox-imapd@.service +++ b/examples/public-inbox-imapd@.service @@ -16,6 +16,8 @@ After = public-inbox-imapd.socket [Service] Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin \ +TZ=UTC \ +MALLOC_MMAP_THRESHOLD_=131072 \ PERL_INLINE_DIRECTORY=/tmp/.pub-inline LimitNOFILE = 30000 diff --git a/examples/public-inbox-netd@.service b/examples/public-inbox-netd@.service index 83d2e995..51f58fbb 100644 --- a/examples/public-inbox-netd@.service +++ b/examples/public-inbox-netd@.service @@ -12,11 +12,15 @@ Wants = public-inbox-netd.socket After = public-inbox-netd.socket [Service] -# An LD_PRELOAD for libjemalloc can be added here. It currently seems -# more resistant to fragmentation in long-lived daemons than glibc. + +# Setting MALLOC_MMAP_THRESHOLD_=131072 reduces fragmentation by +# disabling the sliding mmap window in glibc malloc. An LD_PRELOAD for +# libjemalloc may be added here, instead. jemalloc is more resistant to +# fragmentation in long-lived daemons than unconfigured glibc malloc. Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin \ TZ=UTC \ +MALLOC_MMAP_THRESHOLD_=131072 \ PERL_INLINE_DIRECTORY=/tmp/.netd-inline LimitNOFILE = 30000 diff --git a/examples/public-inbox-nntpd@.service b/examples/public-inbox-nntpd@.service index 24f9ca73..556cb76f 100644 --- a/examples/public-inbox-nntpd@.service +++ b/examples/public-inbox-nntpd@.service @@ -16,6 +16,8 @@ After = public-inbox-nntpd.socket [Service] Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin \ +TZ=UTC \ +MALLOC_MMAP_THRESHOLD_=131072 \ PERL_INLINE_DIRECTORY=/tmp/.pub-inline LimitNOFILE = 30000 diff --git a/lib/PublicInbox/CmdIPC4.pm b/lib/PublicInbox/CmdIPC4.pm index 2f102ec6..fc77bd03 100644 --- a/lib/PublicInbox/CmdIPC4.pm +++ b/lib/PublicInbox/CmdIPC4.pm @@ -11,8 +11,8 @@ use Socket qw(SOL_SOCKET SCM_RIGHTS); sub sendmsg_retry ($) { return 1 if $!{EINTR}; return unless ($!{ENOMEM} || $!{ENOBUFS} || $!{ETOOMANYREFS}); - return if ++$_[0] >= 50; - warn "# sleeping on sendmsg: $! (#$_[0])\n"; + return if --$_[0] < 0; + warn "# sleeping on sendmsg: $! ($_[0] tries left)\n"; select(undef, undef, undef, 0.1); 1; } @@ -22,15 +22,15 @@ require Socket::MsgHdr; # XS no warnings 'once'; # any number of FDs per-sendmsg(2) + buffer -*send_cmd4 = sub ($$$$) { # (sock, fds, buf, flags) = @_; - my ($sock, $fds, undef, $flags) = @_; +*send_cmd4 = sub ($$$$;$) { # (sock, fds, buf, flags) = @_; + my ($sock, $fds, undef, $flags, $tries) = @_; + $tries //= 50; my $mh = Socket::MsgHdr->new(buf => $_[2]); $mh->cmsghdr(SOL_SOCKET, SCM_RIGHTS, pack('i' x scalar(@$fds), @$fds)); my $s; - my $try = 0; do { $s = Socket::MsgHdr::sendmsg($sock, $mh, $flags); - } while (!defined($s) && sendmsg_retry($try)); + } while (!defined($s) && sendmsg_retry($tries)); $s; }; diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 570ff64f..6d777bf6 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -368,7 +368,7 @@ sub repo_stored { $did > 0 or die "BUG: $repo_ctx->{repo}->{git_dir}: docid=$did"; my ($c, $p) = PublicInbox::PktOp->pair; $c->{ops}->{shard_done} = [ $self, $repo_ctx, - PublicInbox::OnDestroy->new(\&next_repos, $repo_ctx, $drs)]; + on_destroy(\&next_repos, $repo_ctx, $drs)]; # shard_done fires when all shards are committed my @active = keys %{$repo_ctx->{active}}; $IDX_SHARDS[$_]->wq_io_do('shard_commit', [ $p->{op_p} ]) for @active; @@ -425,7 +425,7 @@ sub fp_start ($$) { open my $refs, '+>', undef; $git->{-repo}->{refs} = $refs; my ($c, $p) = PublicInbox::PktOp->pair; - my $next_on_err = PublicInbox::OnDestroy->new(\&index_next, $self); + my $next_on_err = on_destroy \&index_next, $self; $c->{ops}->{fp_done} = [ $self, $git, $next_on_err ]; $IDX_SHARDS[++$ANY_SHARD % scalar(@IDX_SHARDS)]->wq_io_do('fp_async', [ $p->{op_p}, $refs ], $git->{git_dir}) @@ -664,8 +664,7 @@ sub index_repo { my $repo_ctx = $REPO_CTX = { self => $self, repo => $repo }; delete $git->{-cidx_gits_fini}; # may fire gits_fini my $drs = delete $git->{-cidx_dump_roots_start}; - my $index_done = PublicInbox::OnDestroy->new(\&index_done, - $repo_ctx, $drs); + my $index_done = on_destroy \&index_done, $repo_ctx, $drs; my ($c, $p) = PublicInbox::PktOp->pair; $c->{ops}->{shard_done} = [ $self, $repo_ctx, $index_done ]; for my $n (0..$#shard_in) { @@ -690,7 +689,7 @@ sub ct_fini { # run_git cb sub prep_repo ($$) { my ($self, $git) = @_; return if $DO_QUIT; - my $index_repo = PublicInbox::OnDestroy->new(\&index_repo, $self, $git); + my $index_repo = on_destroy \&index_repo, $self, $git; my $refs = $git->{-repo}->{refs} // die 'BUG: no {-repo}->{refs}'; sysseek($refs, 0, SEEK_SET); open my $roots_fh, '+>', undef; @@ -787,7 +786,7 @@ sub scan_git_dirs ($) { my ($self) = @_; @$SCANQ = () unless $self->{-opt}->{scan}; $GITS_NR = @$SCANQ or return; - my $gits_fini = PublicInbox::OnDestroy->new(\&gits_fini); + my $gits_fini = on_destroy \&gits_fini; $_->{-cidx_gits_fini} = $gits_fini for @$SCANQ; if (my $drs = $TODO{dump_roots_start}) { $_->{-cidx_dump_roots_start} = $drs for @$SCANQ; @@ -859,7 +858,7 @@ sub prep_umask ($) { umask == $um or progress($self, 'using umask from ', $self->{cidx_dir}, ': ', sprintf('0%03o', $um)); - PublicInbox::OnDestroy->new(\&CORE::umask, umask($um)); + on_destroy \&CORE::umask, umask($um); } else { $self->{umask} = umask; # for SearchIdx->with_umask undef; @@ -1083,12 +1082,12 @@ EOM ($JOIN_DT[1]) = ($QRY_STR =~ /\.\.([0-9]{14})\z/); # YYYYmmddHHMMSS ($JOIN_DT[0]) = ($QRY_STR =~ /\Adt:([0-9]{14})/); # YYYYmmddHHMMSS $JOIN_DT[0] //= '19700101'.'000000'; # git uses unsigned times - $TODO{do_join} = PublicInbox::OnDestroy->new(\&do_join, $self); + $TODO{do_join} = on_destroy \&do_join, $self; $TODO{joining} = 1; # keep shards_active() happy - $TODO{dump_ibx_start} = PublicInbox::OnDestroy->new(\&dump_ibx_start, - $self, $TODO{do_join}); - $TODO{dump_roots_start} = PublicInbox::OnDestroy->new( - \&dump_roots_start, $self, $TODO{do_join}); + $TODO{dump_ibx_start} = on_destroy \&dump_ibx_start, + $self, $TODO{do_join}; + $TODO{dump_roots_start} = on_destroy \&dump_roots_start, + $self, $TODO{do_join}; progress($self, "will join in $QRY_STR date range..."); my $id = -1; @IBXQ = map { ++$id } @IBX; @@ -1110,8 +1109,7 @@ sub init_prune ($) { require_progs('prune', 'xapian-delve' => \@delve, sed => \@sed, comm => \@COMM, awk => \@AWK); for (0..$#IDX_SHARDS) { push @delve, "$self->{xpfx}/$_" } - my $run_prune = PublicInbox::OnDestroy->new(\&run_prune, $self, - $TODO{dump_roots_start}); + my $run_prune = on_destroy \&run_prune, $self, $TODO{dump_roots_start}; my ($sort_opt, $sed_opt, $delve_opt); pipe(local $sed_opt->{0}, local $delve_opt->{1}); pipe(local $sort_opt->{0}, local $sed_opt->{1}); @@ -1279,8 +1277,7 @@ sub cidx_run { # main entry point my $restore_umask = prep_umask($self); local $SIGSET = PublicInbox::DS::block_signals( POSIX::SIGTSTP, POSIX::SIGCONT); - my $restore = PublicInbox::OnDestroy->new($$, - \&PublicInbox::DS::sig_setmask, $SIGSET); + my $restore = on_destroy \&PublicInbox::DS::sig_setmask, $SIGSET; local $PRUNE_DONE = []; local $IDXQ = []; local $SCANQ = []; diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index d6300610..49659a2e 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -13,6 +13,7 @@ use v5.10.1; use parent qw(Exporter); our @EXPORT_OK = qw(glob2re rel2abs_collapsed); use PublicInbox::Inbox; +use PublicInbox::Git qw(git_exe); use PublicInbox::Spawn qw(popen_rd run_qx); our $LD_PRELOAD = $ENV{LD_PRELOAD}; # only valid at startup our $DEDUPE; # set to {} to dedupe or clear cache @@ -188,7 +189,7 @@ sub git_config_dump { unshift(@opt_c, '-c', "include.path=$file") if defined($file); tmp_cmd_opt(\%env, $opt); } - my @cmd = ('git', @opt_c, qw(config -z -l --includes)); + my @cmd = (git_exe, @opt_c, qw(config -z -l --includes)); push(@cmd, '-f', $file) if !@opt_c && defined($file); my $fh = popen_rd(\@cmd, \%env, $opt); my $rv = config_fh_parse($fh, "\0", "\n"); @@ -608,7 +609,7 @@ sub config_cmd { my ($self, $env, $opt) = @_; my $f = $self->{-f} // default_file(); my @opt_c = @{$self->{-opt_c} // []}; - my @cmd = ('git', @opt_c, 'config'); + my @cmd = (git_exe, @opt_c, 'config'); @opt_c ? tmp_cmd_opt($env, $opt) : push(@cmd, '-f', $f); \@cmd; } diff --git a/lib/PublicInbox/ConfigIter.pm b/lib/PublicInbox/ConfigIter.pm index 14fcef83..f9e3451a 100644 --- a/lib/PublicInbox/ConfigIter.pm +++ b/lib/PublicInbox/ConfigIter.pm @@ -1,12 +1,11 @@ -# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # Intended for PublicInbox::DS::event_loop in read-only daemons # to avoid each_inbox() monopolizing the event loop when hundreds/thousands # of inboxes are in play. package PublicInbox::ConfigIter; -use strict; -use v5.10.1; +use v5.12; sub new { my ($class, $pi_cfg, $cb, @args) = @_; @@ -25,7 +24,7 @@ sub event_step { PublicInbox::DS::requeue($self) if defined($section); } -# for generic PSGI servers +# for generic PSGI servers, but also ManifestJsGz w/ ALL extindex sub each_section { my $self = shift; my ($pi_cfg, $i, $cb, @arg) = @$self; diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm index 8bc8cfb7..a6fec954 100644 --- a/lib/PublicInbox/DS.pm +++ b/lib/PublicInbox/DS.pm @@ -32,9 +32,9 @@ use PublicInbox::Syscall qw(%SIGNUM EPOLLIN EPOLLOUT EPOLLONESHOT EPOLLEXCLUSIVE); use PublicInbox::Tmpfile; use PublicInbox::Select; +use PublicInbox::OnDestroy; use Errno qw(EAGAIN EINVAL ECHILD); use Carp qw(carp croak); -use autodie qw(fork); our @EXPORT_OK = qw(now msg_more awaitpid add_timer add_uniq_timer); my $nextq; # queue for next_tick @@ -679,12 +679,13 @@ sub awaitpid { } } -sub do_fork () { +# for persistent child process +sub fork_persist () { my $seed = rand(0xffffffff); - my $pid = fork; + my $pid = PublicInbox::OnDestroy::fork_tmp; if ($pid == 0) { srand($seed); - eval { Net::SSLeay::randomize() }; + eval { Net::SSLeay::randomize() }; # may not be loaded Reset(); } $pid; diff --git a/lib/PublicInbox/DSKQXS.pm b/lib/PublicInbox/DSKQXS.pm index f84c196a..dc6621e4 100644 --- a/lib/PublicInbox/DSKQXS.pm +++ b/lib/PublicInbox/DSKQXS.pm @@ -15,6 +15,7 @@ use v5.12; use Symbol qw(gensym); use IO::KQueue; use Errno qw(EAGAIN); +use PublicInbox::OnDestroy; use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT EPOLLET); sub EV_DISPATCH () { 0x0080 } @@ -37,7 +38,8 @@ sub kq_flag ($$) { sub new { my ($class) = @_; - bless { kq => IO::KQueue->new, owner_pid => $$ }, $class; + my $fgen = $PublicInbox::OnDestroy::fork_gen; + bless { kq => IO::KQueue->new, fgen => $fgen }, $class; } # returns a new instance which behaves like signalfd on Linux. @@ -137,9 +139,8 @@ sub ep_wait { sub DESTROY { my ($self) = @_; my $kq = delete $self->{kq} or return; - if (delete($self->{owner_pid}) == $$) { + delete($self->{fgen}) == $PublicInbox::OnDestroy::fork_gen and POSIX::close($$kq); - } } 1; diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index e578f2e8..28458b19 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -21,9 +21,12 @@ use PublicInbox::Git; use PublicInbox::GitAsyncCat; use PublicInbox::Eml; use PublicInbox::Config; +use PublicInbox::OnDestroy; +use PublicInbox::Search; +use PublicInbox::XapClient; our $SO_ACCEPTFILTER = 0x1000; my @CMD; -my ($set_user, $oldset); +my ($set_user, $oldset, $xh_workers); my (@cfg_listen, $stdout, $stderr, $group, $user, $pid_file, $daemonize); my ($nworker, @listeners, %WORKERS, %logs); my %tls_opt; # scheme://sockname => args for IO::Socket::SSL::SSL_Context->new @@ -169,6 +172,7 @@ options: --cert=FILE default SSL/TLS certificate --key=FILE default SSL/TLS certificate key -W WORKERS number of worker processes to spawn (default: 1) + -X XWORKERS number of Xapian helper processes (default: undefined) See public-inbox-daemon(8) and $prog(1) man pages for more. EOF @@ -184,6 +188,7 @@ EOF 'multi-accept=i' => \$PublicInbox::Listener::MULTI_ACCEPT, 'cert=s' => \$default_cert, 'key=s' => \$default_key, + 'X|xapian-helpers=i' => \$xh_workers, 'help|h' => \(my $show_help), ); GetOptions(%opt) or die $help; @@ -338,22 +343,20 @@ EOF }; if ($daemonize) { - my $pid = fork // die "fork: $!"; + my $pid = PublicInbox::OnDestroy::fork_tmp; exit if $pid; - open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!\n"; open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n"; open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n"; POSIX::setsid(); - $pid = fork // die "fork: $!"; + $pid = PublicInbox::OnDestroy::fork_tmp; exit if $pid; } return unless defined $pid_file; write_pid($pid_file); - # for ->DESTROY: - bless { pid => $$, pid_file => \$pid_file }, __PACKAGE__; + on_destroy \&unlink_pid_file_safe_ish, \$pid_file; } sub has_busy_clients { # post_loop_do CB @@ -385,10 +388,30 @@ sub worker_quit { # $_[0] = signal name or number (unused) @PublicInbox::DS::post_loop_do = (\&has_busy_clients, { -w => 0 }) } +sub spawn_xh () { + $xh_workers // return; + require PublicInbox::XhcMset; + local $) = $gid if defined $gid; + local $( = $gid if defined $gid; + local $> = $uid if defined $uid; + local $< = $uid if defined $uid; + $PublicInbox::Search::XHC = eval { + local $ENV{STDERR_PATH} = $stderr; + local $ENV{STDOUT_PATH} = $stdout; + PublicInbox::XapClient::start_helper('-j', $xh_workers) + }; + warn "E: $@" if $@; + awaitpid($PublicInbox::Search::XHC->{io}->attached_pid, \&respawn_xh) + if $PublicInbox::Search::XHC; +} + sub reopen_logs { + my ($sig) = @_; $logs{$stdout} //= \*STDOUT if defined $stdout; $logs{$stderr} //= \*STDERR if defined $stderr; while (my ($p, $fh) = each %logs) { open_log_path($fh, $p) } + ($sig && defined($xh_workers) && $PublicInbox::Search::XHC) and + kill('USR1', $PublicInbox::Search::XHC->{io}->attached_pid); } sub sockname ($) { @@ -476,13 +499,13 @@ sub upgrade { # $_[0] = signal name or number (unused) warn "BUG: .oldbin suffix exists: $pid_file\n"; return; } - unlink_pid_file_safe_ish($$, $pid_file); + unlink_pid_file_safe_ish(\$pid_file); $pid_file .= '.oldbin'; write_pid($pid_file); } - my $pid = fork; + my $pid = eval { PublicInbox::OnDestroy::fork_tmp }; if (!defined($pid)) { - warn "fork failed: $!\n"; + warn "fork failed: $! $@\n"; } elsif ($pid == 0) { $ENV{LISTEN_FDS} = scalar @listeners; $ENV{LISTEN_PID} = $$; @@ -509,23 +532,20 @@ sub upgrade_aborted { my $file = $pid_file; $file =~ s/\.oldbin\z// or die "BUG: no '.oldbin' suffix in $file"; - unlink_pid_file_safe_ish($$, $pid_file); + unlink_pid_file_safe_ish(\$pid_file); $pid_file = $file; eval { write_pid($pid_file) }; warn $@, "\n" if $@; } -sub unlink_pid_file_safe_ish ($$) { - my ($unlink_pid, $file) = @_; - return unless defined $unlink_pid && $unlink_pid == $$; +sub unlink_pid_file_safe_ish ($) { + my ($fref) = @_; - open my $fh, '<', $file or return; + open my $fh, '<', $$fref or return; local $/ = "\n"; defined(my $read_pid = <$fh>) or return; chomp $read_pid; - if ($read_pid == $unlink_pid) { - Net::Server::Daemonize::unlink_pid_file($file); - } + Net::Server::Daemonize::unlink_pid_file($$fref) if $read_pid == $$; } sub master_quit ($) { @@ -545,9 +565,10 @@ sub reap_worker { # awaitpid CB sub start_worker ($) { my ($nr) = @_; return unless @listeners; - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; if ($pid == 0) { undef %WORKERS; + undef $xh_workers; local $PublicInbox::DS::Poller; # allow epoll/kqueue $set_user->() if $set_user; PublicInbox::EOFpipe->new($parent_pipe, \&worker_quit); @@ -575,8 +596,9 @@ sub master_loop { pipe($parent_pipe, my $p1) or die "failed to create parent-pipe: $!"; my $set_workers = $nworker; # for SIGWINCH reopen_logs(); + spawn_xh; my $msig = { - USR1 => sub { reopen_logs(); kill_workers($_[0]); }, + USR1 => sub { reopen_logs($_[0]); kill_workers($_[0]); }, USR2 => \&upgrade, QUIT => \&master_quit, INT => \&master_quit, @@ -675,6 +697,7 @@ sub daemon_loop () { sub worker_loop { $uid = $gid = undef; reopen_logs(); + spawn_xh; # only for -W0 @listeners = map {; my $l = sockname($_); my $tls_cb = $POST_ACCEPT{$l}; @@ -691,22 +714,30 @@ sub worker_loop { PublicInbox::DS::event_loop(\%WORKER_SIG, $oldset); } +sub respawn_xh { # awaitpid cb + my ($pid) = @_; + return unless @listeners; + warn "W: xap_helper PID:$pid died: \$?=$?, respawning...\n"; + spawn_xh; +} + sub run { my ($default_listen) = @_; $nworker = 1; local (%XNETD, %POST_ACCEPT); daemon_prepare($default_listen); - my $for_destroy = daemonize(); + my $unlink_on_leave = daemonize(); # localize GCF2C for tests: local $PublicInbox::GitAsyncCat::GCF2C; local $PublicInbox::Git::async_warn = 1; local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); local %WORKER_SIG = %WORKER_SIG; - local %POST_ACCEPT; + local $PublicInbox::XapClient::tries = 0; + local $PublicInbox::Search::XHC if defined($xh_workers); daemon_loop(); - # ->DESTROY runs when $for_destroy goes out-of-scope + # $unlink_on_leave runs } sub write_pid ($) { @@ -715,8 +746,4 @@ sub write_pid ($) { do_chown($path); } -sub DESTROY { - unlink_pid_file_safe_ish($_[0]->{pid}, ${$_[0]->{pid_file}}); -} - 1; diff --git a/lib/PublicInbox/EOFpipe.pm b/lib/PublicInbox/EOFpipe.pm index 3474874f..77b699a2 100644 --- a/lib/PublicInbox/EOFpipe.pm +++ b/lib/PublicInbox/EOFpipe.pm @@ -7,8 +7,8 @@ use parent qw(PublicInbox::DS); use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT $F_SETPIPE_SZ); sub new { - my (undef, $rd, $cb) = @_; - my $self = bless { cb => $cb }, __PACKAGE__; + my (undef, $rd, @cb_args) = @_; + my $self = bless { cb_args => \@cb_args }, __PACKAGE__; # 4096: page size fcntl($rd, $F_SETPIPE_SZ, 4096) if $F_SETPIPE_SZ; $self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT); @@ -17,7 +17,8 @@ sub new { sub event_step { my ($self) = @_; if ($self->do_read(my $buf, 1) == 0) { # auto-closed - $self->{cb}->(); + my ($cb, @args) = @{delete $self->{cb_args}}; + $cb->(@args); } } diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index ebbffffc..883dbea3 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -1287,11 +1287,11 @@ sub idx_init { # similar to V2Writable ($has_new || $prune_nr || $new ne '') and $self->{mg}->write_alternates($mode, $alt, $new); my $restore = $self->with_umask; - if ($git_midx) { - my @cmd = ('multi-pack-index'); - push @cmd, '--no-progress' if ($opt->{quiet}//0) > 1; + if ($git_midx && ($opt->{'multi-pack-index'} // 1)) { + my $cmd = $self->git->cmd('multi-pack-index'); + push @$cmd, '--no-progress' if ($opt->{quiet}//0) > 1; my $lk = $self->lock_for_scope; - system('git', "--git-dir=$ALL", @cmd, 'write'); + system(@$cmd, 'write'); # ignore errors, fairly new command, may not exist } $self->parallel_init($self->{indexlevel}); @@ -1424,5 +1424,6 @@ no warnings 'once'; *idx_shard = \&PublicInbox::V2Writable::idx_shard; *reindex_checkpoint = \&PublicInbox::V2Writable::reindex_checkpoint; *checkpoint = \&PublicInbox::V2Writable::checkpoint; +*barrier = \&PublicInbox::V2Writable::barrier; 1; diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm index b0f1437c..814d6e8e 100644 --- a/lib/PublicInbox/Fetch.pm +++ b/lib/PublicInbox/Fetch.pm @@ -12,6 +12,7 @@ use PublicInbox::LeiCurl; use PublicInbox::LeiMirror; use PublicInbox::SHA qw(sha_all); use File::Temp (); +use PublicInbox::Git qw(git_exe); sub new { bless {}, __PACKAGE__ } @@ -19,7 +20,7 @@ sub remote_url ($$) { my ($lei, $dir) = @_; my $rn = $lei->{opt}->{'try-remote'} // [ 'origin', '_grokmirror' ]; for my $r (@$rn) { - my $cmd = [ qw(git config), "remote.$r.url" ]; + my $cmd = [ git_exe, 'config', "remote.$r.url" ]; my $url = run_qx($cmd, undef, { -C => $dir, 2 => $lei->{2} }); next if $?; $url =~ s!/*\n!!s; @@ -92,7 +93,7 @@ sub do_manifest ($$$) { sub get_fingerprint2 { my ($git_dir) = @_; - my $rd = popen_rd([qw(git show-ref)], undef, { -C => $git_dir }); + my $rd = popen_rd([git_exe, 'show-ref'], undef, { -C => $git_dir }); sha_all(256, $rd)->digest; # ignore show-ref errors } @@ -132,8 +133,8 @@ sub do_fetch { # main entry point warn "W: $edir missing remote.*.url\n"; my $o = { -C => $edir }; $o->{1} = $o->{2} = $lei->{2}; - run_wait([qw(git config -l)], undef, $o) and - $lei->child_error($?); + run_wait([git_exe, qw(config -l)], undef, $o) + and $lei->child_error($?); } } @epochs = grep { !$skip->{$_} } @epochs if $skip; @@ -188,7 +189,7 @@ EOM my $opt = {}; # for spawn if (-d $d) { $fp2->[0] = get_fingerprint2($d) if $fp2; - $cmd = [ @$torsocks, 'git', "--git-dir=$d", + $cmd = [ @$torsocks, git_exe, "--git-dir=$d", PublicInbox::LeiMirror::fetch_args($lei, $opt)]; } else { my $e_uri = $ibx_uri->clone; diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index af12f141..a9a821ad 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -10,6 +10,7 @@ package PublicInbox::Git; use strict; use v5.10.1; use parent qw(Exporter PublicInbox::DS); +use PublicInbox::DS qw(now); use autodie qw(socketpair read); use POSIX (); use Socket qw(AF_UNIX SOCK_STREAM); @@ -25,7 +26,7 @@ use PublicInbox::SHA qw(sha_all); our %HEXLEN2SHA = (40 => 1, 64 => 256); our %OFMT2HEXLEN = (sha1 => 40, sha256 => 64); our @EXPORT_OK = qw(git_unquote git_quote %HEXLEN2SHA %OFMT2HEXLEN - $ck_unlinked_packs); + $ck_unlinked_packs git_exe); our $in_cleanup; our $async_warn; # true in read-only daemons @@ -54,7 +55,11 @@ my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC; my $EXE_ST = ''; # pack('dd', st_dev, st_ino); # no `q' in some 32-bit builds my ($GIT_EXE, $GIT_VER); -sub check_git_exe () { +sub git_exe () { + my $now = now; + state $next_check = $now - 10; + return $GIT_EXE if $now < $next_check; + $next_check = $now + 10; $GIT_EXE = which('git') // die "git not found in $ENV{PATH}"; my @st = stat(_) or die "stat($GIT_EXE): $!"; # can't do HiRes w/ _ my $st = pack('dd', $st[0], $st[1]); @@ -69,8 +74,8 @@ sub check_git_exe () { $GIT_EXE; } -sub git_version { - check_git_exe(); +sub git_version () { + git_exe; $GIT_VER; } @@ -174,7 +179,7 @@ sub _sock_cmd { # git 2.31.0+ supports -c core.abbrev=no, don't bother with # core.abbrev=64 since not many releases had SHA-256 prior to 2.31 - my $abbr = $GIT_VER lt v2.31.0 ? 40 : 'no'; + my $abbr = git_version lt v2.31.0 ? 40 : 'no'; my @cmd = ($GIT_EXE, "--git-dir=$gd", '-c', "core.abbrev=$abbr", 'cat-file', "--$batch"); if ($err_c) { @@ -210,14 +215,14 @@ sub cat_async_retry ($$) { sub gcf_inflight ($) { my ($self) = @_; # FIXME: the first {sock} check can succeed but Perl can complain - # about calling ->owner_pid on an undefined value. Not sure why or - # how this happens but t/imapd.t can complain about it, sometimes. + # about an undefined value. Not sure why or how this happens but + # t/imapd.t can complain about it, sometimes. if ($self->{sock}) { - if (eval { $self->{sock}->owner_pid == $$ }) { + if (eval { $self->{sock}->can_reap }) { return $self->{inflight}; } elsif ($@) { no warnings 'uninitialized'; - warn "E: $self sock=$self->{sock}: owner_pid failed: ". + warn "E: $self sock=$self->{sock}: can_reap failed: ". "$@ (continuing...)"; } delete @$self{qw(sock inflight)}; @@ -287,8 +292,7 @@ sub cat_async_wait ($) { sub batch_prepare ($) { my ($self) = @_; - check_git_exe(); - if ($GIT_VER ge BATCH_CMD_VER) { + if (git_version ge BATCH_CMD_VER) { $self->{-bc} = 1; _sock_cmd($self, 'batch-command', 1); } else { @@ -344,8 +348,7 @@ sub ck { sub check_async_begin ($) { my ($self) = @_; cleanup($self) if alternates_changed($self); - check_git_exe(); - if ($GIT_VER ge BATCH_CMD_VER) { + if (git_version ge BATCH_CMD_VER) { $self->{-bc} = 1; _sock_cmd($self, 'batch-command', 1); } else { @@ -421,15 +424,15 @@ sub async_err ($$$$$) { sub cmd { my $self = shift; - [ $GIT_EXE // check_git_exe(), "--git-dir=$self->{git_dir}", @_ ] + [ git_exe(), "--git-dir=$self->{git_dir}", @_ ] } # $git->popen(qw(show f00)); # or # $git->popen(qw(show f00), { GIT_CONFIG => ... }, { 2 => ... }); sub popen { my ($self, $cmd) = splice(@_, 0, 2); - $cmd = [ 'git', "--git-dir=$self->{git_dir}", - ref($cmd) ? @$cmd : ($cmd, grep { defined && !ref } @_) ]; + $cmd = $self->cmd(ref($cmd) ? @$cmd : + ($cmd, grep { defined && !ref } @_)); popen_rd($cmd, grep { !defined || ref } @_); # env and opt } @@ -577,9 +580,8 @@ sub cloneurl { # templates/this--description in git.git sub manifest_entry { my ($self, $epoch, $default_desc) = @_; - check_git_exe(); my $gd = $self->{git_dir}; - my @git = ($GIT_EXE, "--git-dir=$gd"); + my @git = (git_exe, "--git-dir=$gd"); my $sr = popen_rd([@git, 'show-ref']); my $own = popen_rd([@git, qw(config gitweb.owner)]); my $mod = popen_rd([@git, @MODIFIED_DATE]); diff --git a/lib/PublicInbox/IO.pm b/lib/PublicInbox/IO.pm index 5654f3b0..8640f112 100644 --- a/lib/PublicInbox/IO.pm +++ b/lib/PublicInbox/IO.pm @@ -10,12 +10,13 @@ our @EXPORT_OK = qw(poll_in read_all try_cat write_file); use Carp qw(croak); use IO::Poll qw(POLLIN); use Errno qw(EINTR EAGAIN); +use PublicInbox::OnDestroy; # don't autodie in top-level for Perl 5.16.3 (and maybe newer versions) # we have our own ->close, so we scope autodie into each sub sub waitcb { # awaitpid callback my ($pid, $errref, $cb, @args) = @_; - $$errref = $? if $errref; # sets .cerr for _close + $$errref = $?; # sets .cerr for _close $cb->($pid, @args) if $cb; # may clobber $? } @@ -23,8 +24,9 @@ sub attach_pid { my ($io, $pid, @cb_arg) = @_; bless $io, __PACKAGE__; # we share $err (and not $self) with awaitpid to avoid a ref cycle - ${*$io}{pi_io_reap} = [ $$, $pid, \(my $err) ]; - awaitpid($pid, \&waitcb, \$err, @cb_arg); + my $e = \(my $err); + ${*$io}{pi_io_reap} = [ $PublicInbox::OnDestroy::fork_gen, $pid, $e ]; + awaitpid($pid, \&waitcb, $e, @cb_arg); $io; } @@ -33,9 +35,9 @@ sub attached_pid { ${${*$io}{pi_io_reap} // []}[1]; } -sub owner_pid { +sub can_reap { my ($io) = @_; - ${${*$io}{pi_io_reap} // [-1]}[0]; + ${${*$io}{pi_io_reap} // [-1]}[0] == $PublicInbox::OnDestroy::fork_gen; } # caller cares about error result if they call close explicitly @@ -44,7 +46,7 @@ sub close { my ($io) = @_; my $ret = $io->SUPER::close; my $reap = delete ${*$io}{pi_io_reap}; - return $ret unless $reap && $reap->[0] == $$; + return $ret if ($reap->[0] // -1) != $PublicInbox::OnDestroy::fork_gen; if (defined ${$reap->[2]}) { # reap_pids already reaped asynchronously $? = ${$reap->[2]}; } else { # wait synchronously @@ -56,9 +58,9 @@ sub close { sub DESTROY { my ($io) = @_; my $reap = delete ${*$io}{pi_io_reap}; - if ($reap && $reap->[0] == $$) { + if (($reap->[0] // -1) == $PublicInbox::OnDestroy::fork_gen) { $io->SUPER::close; - awaitpid($reap->[1]); + ${$reap->[2]} // awaitpid($reap->[1]); } $io->SUPER::DESTROY; } diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm index a5cae6f2..ed6d27fd 100644 --- a/lib/PublicInbox/IPC.pm +++ b/lib/PublicInbox/IPC.pm @@ -10,7 +10,7 @@ package PublicInbox::IPC; use v5.12; use parent qw(Exporter); -use autodie qw(close fork pipe read socketpair sysread); +use autodie qw(close pipe read socketpair sysread); use Carp qw(croak); use PublicInbox::DS qw(awaitpid); use PublicInbox::Spawn; @@ -93,6 +93,8 @@ sub ipc_worker_loop ($$$) { } } +sub exit_exception { exit(!!$@) } + # starts a worker if Sereal or Storable is installed sub ipc_worker_spawn { my ($self, $ident, $oldset, $fields, @cb_args) = @_; @@ -102,7 +104,7 @@ sub ipc_worker_spawn { pipe(my $r_res, my $w_res); my $sigset = $oldset // PublicInbox::DS::block_signals(); $self->ipc_atfork_prepare; - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; if ($pid == 0) { delete @$self{qw(-wq_s1 -wq_s2 -wq_workers -wq_ppid)}; $w_req = $r_res = undef; @@ -110,7 +112,7 @@ sub ipc_worker_spawn { $SIG{$_} = 'IGNORE' for (qw(TERM INT QUIT)); local $0 = $ident; # ensure we properly exit even if warn() dies: - my $end = PublicInbox::OnDestroy->new($$, sub { exit(!!$@) }); + my $end = on_destroy \&exit_exception; eval { $fields //= {}; local @$self{keys %$fields} = values(%$fields); @@ -330,7 +332,7 @@ sub _wq_worker_start { my ($self, $oldset, $fields, $one, @cb_args) = @_; my ($bcast1, $bcast2); $one or socketpair($bcast1, $bcast2, AF_UNIX, SOCK_SEQPACKET, 0); - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; if ($pid == 0) { undef $bcast1; delete @$self{qw(-wq_s1 -wq_ppid)}; @@ -340,7 +342,7 @@ sub _wq_worker_start { local $0 = $one ? $self->{-wq_ident} : "$self->{-wq_ident} $self->{-wq_worker_nr}"; # ensure we properly exit even if warn() dies: - my $end = PublicInbox::OnDestroy->new($$, sub { exit(!!$@) }); + my $end = on_destroy \&exit_exception; eval { $fields //= {}; local @$self{keys %$fields} = values(%$fields); diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index ed34d548..fefc282a 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -73,8 +73,8 @@ sub gfi_start { die "fatal: ls-tree -r -z --name-only $ref: \$?=$?" if $?; $self->{-tree} = { map { $_ => 1 } split(/\0/, $t) }; } - my $gfi = [ 'git', "--git-dir=$git->{git_dir}", qw(fast-import - --quiet --done --date-format=raw) ]; + my $gfi = $git->cmd(qw(fast-import + --quiet --done --date-format=raw)); my $pid = spawn($gfi, undef, { 0 => $s2, 1 => $s2 }); $self->{nchg} = 0; $self->{io} = PublicInbox::IO::attach_pid($io, $pid); @@ -161,7 +161,7 @@ sub _update_git_info ($$) { # for compatibility with existing ssoma installations # we can probably remove this entirely by 2020 my $git_dir = $self->{git}->{git_dir}; - my @cmd = ('git', "--git-dir=$git_dir"); + my @cmd = @{$self->{git}->cmd}; my $index = "$git_dir/ssoma.index"; if (-e $index && !$ENV{FAST}) { my $env = { GIT_INDEX_FILE => $index }; @@ -631,7 +631,7 @@ sub replace_oids { chomp(my $cmt = $self->get_mark(":$mark")) if $nreplace; $self->{nchg} = 0; # prevent _update_git_info until update-ref: $self->done; - my @git = ('git', "--git-dir=$git->{git_dir}"); + my @git = @{$git->cmd}; run_die([@git, qw(update-ref), $old, $tmp]) if $nreplace; diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm index 62112171..20808d6d 100644 --- a/lib/PublicInbox/Isearch.pm +++ b/lib/PublicInbox/Isearch.pm @@ -26,34 +26,44 @@ SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1 sub query_approxidate { $_[0]->{es}->query_approxidate($_[1], $_[2]) } -sub mset { - my ($self, $str, $opt) = @_; +sub eidx_mset_prep ($$) { + my ($self, $opt) = @_; my %opt = $opt ? %$opt : (); $opt{eidx_key} = $self->{eidx_key}; - if (my $uid_range = $opt{uid_range}) { - my ($beg, $end) = @$uid_range; - my $ibx_id = $self->{-ibx_id} //= _ibx_id($self); - my $dbh = $self->{es}->over->dbh; - my $sth = $dbh->prepare_cached(<<'', undef, 1); + my $uid_range = $opt{uid_range} or return \%opt; + my ($beg, $end) = @$uid_range; + my $ibx_id = $self->{-ibx_id} //= _ibx_id($self); + my $dbh = $self->{es}->over->dbh; + my $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT MIN(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ? - $sth->execute($ibx_id, $beg, $end); - my @r = ($sth->fetchrow_array); + $sth->execute($ibx_id, $beg, $end); + my @r = ($sth->fetchrow_array); - $sth = $dbh->prepare_cached(<<'', undef, 1); + $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT MAX(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ? - $sth->execute($ibx_id, $beg, $end); - $r[1] = $sth->fetchrow_array; - if (defined($r[1]) && defined($r[0])) { - $opt{limit} = $r[1] - $r[0] + 1; - } else { - $r[1] //= $self->{es}->xdb->get_lastdocid; - $r[0] //= 0; - } - $opt{uid_range} = \@r; # these are fed to Xapian and SQLite + $sth->execute($ibx_id, $beg, $end); + $r[1] = $sth->fetchrow_array; + if (defined($r[1]) && defined($r[0])) { + $opt{limit} = $r[1] - $r[0] + 1; + } else { + $r[1] //= $self->{es}->xdb->get_lastdocid; + $r[0] //= 0; } - $self->{es}->mset($str, \%opt); + $opt{uid_range} = \@r; # these are fed to Xapian and SQLite + \%opt; +} + +sub mset { + my ($self, $str, $opt) = @_; + $self->{es}->mset($str, eidx_mset_prep $self, $opt); +} + +sub async_mset { + my ($self, $str, $opt, $cb, @args) = @_; + $opt = eidx_mset_prep $self, $opt; + $self->{es}->async_mset($str, $opt, $cb, @args); } sub mset_to_artnums { diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 81f940fe..e9a0de6c 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -9,7 +9,7 @@ package PublicInbox::LEI; use v5.12; use parent qw(PublicInbox::DS PublicInbox::LeiExternal PublicInbox::LeiQuery); -use autodie qw(bind chdir fork open pipe socket socketpair syswrite unlink); +use autodie qw(bind chdir open pipe socket socketpair syswrite unlink); use Getopt::Long (); use Socket qw(AF_UNIX SOCK_SEQPACKET pack_sockaddr_un); use Errno qw(EPIPE EAGAIN ECONNREFUSED ENOENT ECONNRESET); @@ -24,6 +24,7 @@ use PublicInbox::Lock; use PublicInbox::Eml; use PublicInbox::Import; use PublicInbox::ContentHash qw(git_sha); +use PublicInbox::OnDestroy; use PublicInbox::IPC; use Time::HiRes qw(stat); # ctime comparisons for config cache use File::Path (); @@ -176,6 +177,7 @@ our %CMD = ( # sorted in order of importance/use: 'stdin|', # /|\z/ must be first for lone dash @lxs_opt, @net_opt, qw(save! output|mfolder|o=s format|f=s dedupe|d=s threads|t+ + thread-id|T=s sort|s=s reverse|r offset=i pretty jobs|j=s globoff|g augment|a import-before! lock=s@ rsyncable alert=s@ mua=s verbose|v+ shared color! mail-sync!), @c_opt, opt_dash('limit|n=i', '[0-9]+') ], @@ -631,9 +633,8 @@ sub _delete_pkt_op { # OnDestroy callback to prevent leaks on die sub pkt_op_pair { my ($self) = @_; - require PublicInbox::OnDestroy; require PublicInbox::PktOp; - my $end = PublicInbox::OnDestroy->new($$, \&_delete_pkt_op, $self); + my $end = on_destroy \&_delete_pkt_op, $self; @$self{qw(pkt_op_c pkt_op_p)} = PublicInbox::PktOp->pair; $end; } @@ -727,8 +728,6 @@ sub optparse ($$$) { require PublicInbox::LeiInput; my @err = PublicInbox::LeiInput::vmd_mod_extract($self, $argv); return $self->fail(join("\n", @err)) if @err; - } else { - warn "proto $proto\n" if $cmd =~ /(add-watch|tag|index)/; } my $i = 0; @@ -1357,7 +1356,7 @@ sub lazy_start { STDIN->autoflush(1); dump_and_clear_log(); POSIX::setsid() > 0 or die "setsid: $!"; - my $pid = fork; + my $pid = PublicInbox::OnDestroy::fork_tmp; return if $pid; $0 = "lei-daemon $path"; local (%PATH2CFG, $MDIR2CFGPATH); @@ -1444,7 +1443,7 @@ sub wq_eof { # EOF callback for main daemon my ($lei, $wq_fld) = @_; local $current_lei = $lei; my $wq = delete $lei->{$wq_fld // 'wq1'}; - $lei->sto_done_request($wq); + $lei->sto_barrier_request($wq); $wq // $lei->fail; # already failed } @@ -1549,7 +1548,7 @@ sub lms { (-f $f || $creat) ? PublicInbox::LeiMailSync->new($f) : undef; } -sub sto_done_request { +sub sto_barrier_request { my ($lei, $wq) = @_; return unless $lei->{sto} && $lei->{sto}->{-wq_s1}; local $current_lei = $lei; @@ -1559,7 +1558,7 @@ sub sto_done_request { my $s = ($wq ? $wq->{lei_sock} : undef) // $lei->{sock}; my $errfh = $lei->{2} // *STDERR{GLOB}; my @io = $s ? ($errfh, $s) : ($errfh); - eval { $lei->{sto}->wq_io_do('done', \@io) }; + eval { $lei->{sto}->wq_io_do('barrier', \@io, 1) }; } warn($@) if $@; } diff --git a/lib/PublicInbox/LeiALE.pm b/lib/PublicInbox/LeiALE.pm index 528de22c..ce03f5b4 100644 --- a/lib/PublicInbox/LeiALE.pm +++ b/lib/PublicInbox/LeiALE.pm @@ -11,6 +11,7 @@ use parent qw(PublicInbox::LeiSearch PublicInbox::Lock); use PublicInbox::Git; use autodie qw(close open rename seek truncate); use PublicInbox::Import; +use PublicInbox::OnDestroy; use PublicInbox::LeiXSearch; use Fcntl qw(SEEK_SET); @@ -41,11 +42,11 @@ sub over {} # undef for xoids_for sub overs_all { # for xoids_for (called only in lei workers?) my ($self) = @_; - my $pid = $$; - if (($self->{owner_pid} // $pid) != $pid) { + my $fgen = $PublicInbox::OnDestroy::fork_gen ; + if (($self->{fgen} // $fgen) != $fgen) { delete($_->{over}) for @{$self->{ibxish}}; } - $self->{owner_pid} = $pid; + $self->{fgen} = $fgen; grep(defined, map { $_->over } @{$self->{ibxish}}); } diff --git a/lib/PublicInbox/LeiBlob.pm b/lib/PublicInbox/LeiBlob.pm index 127cc81e..7b2ea434 100644 --- a/lib/PublicInbox/LeiBlob.pm +++ b/lib/PublicInbox/LeiBlob.pm @@ -36,14 +36,13 @@ sub solver_user_cb { # called by solver when done ref($res) eq 'ARRAY' or return $lei->child_error(0, $$log_buf); $lei->qerr($$log_buf); my ($git, $oid, $type, $size, $di) = @$res; - my $gd = $git->{git_dir}; # don't try to support all the git-show(1) options for non-blob, # this is just a convenience: - $type ne 'blob' and - warn "# $oid is a $type of $size bytes in:\n#\t$gd\n"; - - my $cmd = [ 'git', "--git-dir=$gd", 'show', $oid ]; + $type ne 'blob' and warn <<EOM; +# $oid is a $type of $size bytes in:\n#\t$git->{git_dir} +EOM + my $cmd = $git->cmd('show', $oid); my $rdr = { 1 => $lei->{1}, 2 => $lei->{2} }; run_wait($cmd, $lei->{env}, $rdr) and $lei->child_error($?); } @@ -119,14 +118,17 @@ sub lei_blob { } else { open $rdr->{2}, '>', '/dev/null' or die "open: $!"; } - my $cmd = [ 'git', '--git-dir='.$lei->ale->git->{git_dir}, - 'cat-file', 'blob', $blob ]; + my $cmd = $lei->ale->git->cmd('cat-file', 'blob', $blob); + my $cerr; if (defined $lei->{-attach_idx}) { my $buf = run_qx($cmd, $lei->{env}, $rdr); return extract_attach($lei, $blob, \$buf) unless $?; + $cerr = $?; + } else { + $rdr->{1} = $lei->{1}; # write directly to client + $cerr = run_wait($cmd, $lei->{env}, $rdr) or return; } - $rdr->{1} = $lei->{1}; - my $cerr = run_wait($cmd, $lei->{env}, $rdr) or return; + # fall back to unimported ('lei index') and inflight blobs my $lms = $lei->lms; my $bref = ($lms ? $lms->local_blob($blob, 1) : undef) // do { my $sto = $lei->{sto} // $lei->_lei_store; diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index d003d983..c388f7dc 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -499,7 +499,7 @@ sub process_inputs { } # always commit first, even on error partial work is acceptable for # lei <import|tag|convert> - $self->{lei}->sto_done_request; + $self->{lei}->sto_barrier_request; $self->{lei}->fail($err) if $err; } diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 5353ae61..e7c265bd 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -24,6 +24,7 @@ use POSIX qw(strftime); use PublicInbox::Admin qw(fmt_localtime); use autodie qw(chdir chmod close open pipe readlink seek symlink sysopen sysseek truncate unlink); +use PublicInbox::Git qw(git_exe); our $LIVE; # pid => callback our $FGRP_TODO; # objstore -> [[ to resume ], [ to clone ]] @@ -105,7 +106,7 @@ E: confused by scraping <$uri>, got ambiguous results: sub clone_cmd { my ($lei, $opt) = @_; - my @cmd = qw(git); + my @cmd = (git_exe); $opt->{$_} = $lei->{$_} for (0..2); # we support "-c $key=$val" for arbitrary git config options # e.g.: git -c http.proxy=socks5h://127.0.0.1:9050 @@ -291,9 +292,9 @@ sub upr { # feed `git update-ref --stdin -z' verbosely sub start_update_ref { my ($fgrp) = @_; pipe(my $r, my $w); - my $cmd = [ 'git', "--git-dir=$fgrp->{cur_dst}", + my $cmd = [ git_exe, "--git-dir=$fgrp->{cur_dst}", qw(update-ref --stdin -z) ]; - my $pack = PublicInbox::OnDestroy->new($$, \&satellite_done, $fgrp); + my $pack = on_destroy \&satellite_done, $fgrp; start_cmd($fgrp, $cmd, { 0 => $r, 2 => $fgrp->{lei}->{2} }, $pack); close $r; $fgrp->{dry_run} ? undef : $w; @@ -353,7 +354,7 @@ sub satellite_done { sub pack_refs { my ($self, $git_dir) = @_; - my $cmd = [ 'git', "--git-dir=$git_dir", qw(pack-refs --all --prune) ]; + my $cmd = [git_exe, "--git-dir=$git_dir", qw(pack-refs --all --prune)]; start_cmd($self, $cmd, { 2 => $self->{lei}->{2} }); } @@ -373,18 +374,16 @@ sub fgrpv_done { for my $fgrp (@$fgrpv) { my $rn = $fgrp->{-remote}; my %opt = ( 2 => $fgrp->{lei}->{2} ); - - my $update_ref = PublicInbox::OnDestroy->new($$, - \&fgrp_update, $fgrp); - - my $src = [ 'git', "--git-dir=$fgrp->{-osdir}", 'for-each-ref', + my $update_ref = on_destroy \&fgrp_update, $fgrp; + my $src = [ git_exe, "--git-dir=$fgrp->{-osdir}", + 'for-each-ref', "--format=refs/%(refname:lstrip=3)%00%(objectname)", "refs/remotes/$rn/" ]; open(my $sfh, '+>', undef); $fgrp->{srcfh} = $sfh; start_cmd($fgrp, $src, { %opt, 1 => $sfh }, $update_ref); - my $dst = [ 'git', "--git-dir=$fgrp->{cur_dst}", 'for-each-ref', - '--format=%(refname)%00%(objectname)' ]; + my $dst = [ git_exe, "--git-dir=$fgrp->{cur_dst}", + 'for-each-ref', '--format=%(refname)%00%(objectname)' ]; open(my $dfh, '+>', undef); $fgrp->{dstfh} = $dfh; start_cmd($fgrp, $dst, { %opt, 1 => $dfh }, $update_ref); @@ -402,7 +401,7 @@ sub fgrp_fetch_all { # system argv limits: my $grp = 'fgrptmp'; - my @git = (@{$self->{-torsocks}}, 'git'); + my @git = (@{$self->{-torsocks}}, git_exe); my $j = $self->{lei}->{opt}->{jobs}; my $opt = {}; my @fetch = do { @@ -416,7 +415,7 @@ sub fgrp_fetch_all { my ($old, $new) = @$fgrp_old_new; @$old = sort { $b->{-sort} <=> $a->{-sort} } @$old; # $new is ordered by {references} - my $cmd = ['git', "--git-dir=$osdir", qw(config -f), $f ]; + my $cmd = [ git_exe, "--git-dir=$osdir", qw(config -f), $f ]; # clobber settings from previous run atomically for ("remotes.$grp", 'fetch.hideRefs') { @@ -467,7 +466,7 @@ sub fgrp_fetch_all { } $cmd = [ @git, "--git-dir=$osdir", @fetch, $grp ]; push @$old, @$new; - my $end = PublicInbox::OnDestroy->new($$, \&fgrpv_done, $old); + my $end = on_destroy \&fgrpv_done, $old; start_cmd($self, $cmd, $opt, $end); } } @@ -544,7 +543,7 @@ sub cmp_fp_do { return if $cur_ent->{fingerprint} eq $new; } my $dst = $self->{cur_dst} // $self->{dst}; - my $cmd = ['git', "--git-dir=$dst", 'show-ref']; + my $cmd = [git_exe, "--git-dir=$dst", 'show-ref']; my $opt = { 2 => $self->{lei}->{2} }; open($opt->{1}, '+>', undef); $self->{-show_ref} = $opt->{1}; @@ -558,7 +557,7 @@ sub resume_fetch { my ($self, $uri, $fini) = @_; return if !keep_going($self); my $dst = $self->{cur_dst} // $self->{dst}; - my @git = ('git', "--git-dir=$dst"); + my @git = (git_exe, "--git-dir=$dst"); my $opt = { 2 => $self->{lei}->{2} }; my $rn = 'random'.int(rand(1 << 30)); for ("url=$uri", "fetch=+refs/*:refs/*", 'mirror=true') { @@ -567,7 +566,7 @@ sub resume_fetch { my $cmd = [ @{$self->{-torsocks}}, @git, fetch_args($self->{lei}, $opt), $rn ]; push @$cmd, '-P' if $self->{lei}->{prune}; # --prune-tags implied - my $run_puh = PublicInbox::OnDestroy->new($$, \&run_puh, $self, $fini); + my $run_puh = on_destroy \&run_puh, $self, $fini; ++$self->{chg}->{nr_chg}; start_cmd($self, $cmd, $opt, $run_puh); } @@ -599,7 +598,7 @@ sub clone_v1 { return; } } - my $fini = PublicInbox::OnDestroy->new($$, \&v1_done, $self); + my $fini = on_destroy \&v1_done, $self; if (my $fgrp = forkgroup_prep($self, $uri)) { $fgrp->{-fini} = $fini; if ($resume) { @@ -621,8 +620,8 @@ sub clone_v1 { } } ++$self->{chg}->{nr_chg}; - start_cmd($self, $cmd, $opt, PublicInbox::OnDestroy->new($$, - \&run_puh, $self, $fini)); + start_cmd($self, $cmd, $opt, + on_destroy(\&run_puh, $self, $fini)); } if (!$self->{-is_epoch} && $lei->{opt}->{'inbox-config'} =~ /\A(?:always|v1)\z/s && @@ -737,7 +736,7 @@ sub atomic_write ($$$) { sub run_next_puh { my ($self) = @_; my $puh = shift @{$self->{-puh_todo}} // return delete($self->{-fini}); - my $fini = PublicInbox::OnDestroy->new($$, \&run_next_puh, $self); + my $fini = on_destroy \&run_next_puh, $self; my $cmd = [ @$puh, ($self->{cur_dst} // $self->{dst}) ]; my $opt = +{ map { $_ => $self->{lei}->{$_} } (0..2) }; start_cmd($self, $cmd, undef, $opt, $fini); @@ -758,18 +757,18 @@ sub update_ent { my $cur = $self->{-local_manifest}->{$key}->{fingerprint} // "\0"; my $dst = $self->{cur_dst} // $self->{dst}; if (defined($new) && $new ne $cur) { - my $cmd = ['git', "--git-dir=$dst", 'show-ref']; + my $cmd = [git_exe, "--git-dir=$dst", 'show-ref']; my $opt = { 2 => $self->{lei}->{2} }; open($opt->{1}, '+>', undef); $self->{-show_ref_up} = $opt->{1}; - my $done = PublicInbox::OnDestroy->new($$, \&up_fp_done, $self); + my $done = on_destroy \&up_fp_done, $self; start_cmd($self, $cmd, $opt, $done); } $new = $self->{-ent}->{head}; $cur = $self->{-local_manifest}->{$key}->{head} // "\0"; if (defined($new) && $new ne $cur) { # n.b. grokmirror writes raw contents to $dst/HEAD w/o locking - my $cmd = [ 'git', "--git-dir=$dst" ]; + my $cmd = [ git_exe, "--git-dir=$dst" ]; if ($new =~ s/\Aref: //) { push @$cmd, qw(symbolic-ref HEAD), $new; } elsif ($new =~ /\A[a-f0-9]{40,}\z/) { @@ -814,7 +813,8 @@ sub update_ent { $cur = $self->{-local_manifest}->{$key}->{owner} // "\0"; return if $cur eq $new; utf8::encode($new); # to octets - my $cmd = [ qw(git config -f), "$dst/config", 'gitweb.owner', $new ]; + my $cmd = [ git_exe, qw(config -f), "$dst/config", + 'gitweb.owner', $new ]; start_cmd($self, $cmd, { 2 => $self->{lei}->{2} }); } @@ -856,7 +856,7 @@ sub v2_done { # called via OnDestroy my $dst = $self->{cur_dst} // $self->{dst}; require PublicInbox::Lock; my $lk = PublicInbox::Lock->new("$dst/inbox.lock"); - my $lck = $lk->lock_for_scope($$); + my $lck = $lk->lock_for_scope; _write_inbox_config($self); require PublicInbox::MultiGit; my $mg = PublicInbox::MultiGit->new($dst, 'all.git', 'git'); @@ -883,7 +883,7 @@ sub clone_v2_prep ($$;$) { my $want = parse_epochs($lei->{opt}->{epoch}, $v2_epochs); my $task = $m ? bless { %$self }, __PACKAGE__ : $self; my (@skip, $desc); - my $fini = PublicInbox::OnDestroy->new($$, \&v2_done, $task); + my $fini = on_destroy \&v2_done, $task; for my $nr (sort { $a <=> $b } keys %$v2_epochs) { my ($uri, $key) = @{$v2_epochs->{$nr}}; my $src = $uri->as_string; @@ -1018,7 +1018,7 @@ sub clone_all { my ($self, $m) = @_; my $todo = $TODO; $TODO = \'BUG on further use'; - my $end = PublicInbox::OnDestroy->new($$, \&fgrp_fetch_all, $self); + my $end = on_destroy \&fgrp_fetch_all, $self; { my $nodep = delete $todo->{''}; diff --git a/lib/PublicInbox/LeiP2q.pm b/lib/PublicInbox/LeiP2q.pm index 610adb78..68faa016 100644 --- a/lib/PublicInbox/LeiP2q.pm +++ b/lib/PublicInbox/LeiP2q.pm @@ -189,7 +189,7 @@ sub lei_p2q { # the "lei patch-to-query" entry point sub ipc_atfork_child { my ($self) = @_; PublicInbox::LeiInput::input_only_atfork_child($self); - PublicInbox::OnDestroy->new($$, \&emit_query, $self); + on_destroy \&emit_query, $self; } no warnings 'once'; diff --git a/lib/PublicInbox/LeiRediff.pm b/lib/PublicInbox/LeiRediff.pm index 35728330..66359dd4 100644 --- a/lib/PublicInbox/LeiRediff.pm +++ b/lib/PublicInbox/LeiRediff.pm @@ -119,17 +119,16 @@ EOM map { $_->git_path('objects')."\n" } @{$self->{gits}}; $rw = PublicInbox::Git->new($d); } - my $w = popen_wr(['git', "--git-dir=$rw->{git_dir}", - qw(fast-import --quiet --done --date-format=raw)], + my $w = popen_wr($rw->cmd(qw(fast-import + --quiet --done --date-format=raw)), $lei->{env}, { 2 => $lei->{2} }); print $w $ta, "\n", $tb, "\ndone\n" or die "print fast-import: $!"; $w->close or die "close w fast-import: \$?=$? \$!=$!"; - my $cmd = [ 'diff' ]; + my $cmd = $rw->cmd('diff'); _lei_diff_prepare($lei, $cmd); - $lei->qerr("# git @$cmd"); + $lei->qerr("# git @$cmd[2..$#$cmd]"); push @$cmd, qw(A B); - unshift @$cmd, 'git', "--git-dir=$rw->{git_dir}"; run_wait($cmd, $lei->{env}, { 2 => $lei->{2}, 1 => $lei->{1} }) and $lei->child_error($?); # for git diff --exit-code undef; diff --git a/lib/PublicInbox/LeiRefreshMailSync.pm b/lib/PublicInbox/LeiRefreshMailSync.pm index a60a9a5e..dde23274 100644 --- a/lib/PublicInbox/LeiRefreshMailSync.pm +++ b/lib/PublicInbox/LeiRefreshMailSync.pm @@ -60,7 +60,7 @@ sub input_path_url { # overrides PublicInbox::LeiInput::input_path_url $self->folder_missing($$uri); } } else { die "BUG: $input not supported" } - $self->{lei}->sto_done_request; + $self->{lei}->sto_barrier_request; } sub lei_refresh_mail_sync { diff --git a/lib/PublicInbox/LeiRemote.pm b/lib/PublicInbox/LeiRemote.pm index 559fb8d5..d6fc40a4 100644 --- a/lib/PublicInbox/LeiRemote.pm +++ b/lib/PublicInbox/LeiRemote.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # Make remote externals HTTP(S) inboxes behave like @@ -51,7 +51,7 @@ sub mset { $fh = IO::Uncompress::Gunzip->new($fh, MultiStream=>1, AutoClose=>1); eval { PublicInbox::MboxReader->mboxrd($fh, \&each_mboxrd_eml, $self) }; my $err = $@ ? ": $@" : ''; - my $wait = $self->{lei}->{sto}->wq_do('done'); + my $wait = $self->{lei}->{sto}->wq_do('barrier'); $lei->child_error($?, "@$cmd failed$err") if $err || $?; $self; # we are the mset (and $ibx, and $self) } @@ -67,9 +67,16 @@ sub base_url { "$_[0]->{uri}" } sub smsg_eml { my ($self, $smsg) = @_; - if (my $bref = $self->{lei}->ale->git->cat_file($smsg->{blob})) { - return PublicInbox::Eml->new($bref); - } + my $bref = $self->{lei}->ale->git->cat_file($smsg->{blob}) // do { + my $lms = $self->{lei}->lms; + ($lms ? $lms->local_blob($smsg->{blob}, 1) : undef) // do { + my $sto = $self->{lei}->{sto} // + $self->{lei}->_lei_store; + $sto && $sto->{-wq_s1} ? + $sto->wq_do('cat_blob', $smsg->{blob}) : undef; + } + }; + return PublicInbox::Eml->new($bref) if $bref; warn("E: $self->{uri} $smsg->{blob} gone <$smsg->{mid}>\n"); undef; } diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index a752174d..b2da2bc3 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -28,6 +28,7 @@ use PublicInbox::Spawn qw(spawn); use PublicInbox::MdirReader; use PublicInbox::LeiToMail; use PublicInbox::Compat qw(uniqstr); +use PublicInbox::OnDestroy; use File::Temp qw(tmpnam); use POSIX (); use IO::Handle (); # ->autoflush @@ -80,7 +81,7 @@ sub importer { delete $self->{im}; $im->done; undef $im; - $self->checkpoint; + $self->barrier; $max = $self->{priv_eidx}->{mg}->git_epochs + 1; } my (undef, $tl) = eidx_init($self); # acquire lock @@ -117,7 +118,7 @@ sub cat_blob { sub schedule_commit { my ($self, $sec) = @_; - add_uniq_timer($self->{priv_eidx}->{topdir}, $sec, \&done, $self); + add_uniq_timer($self->{priv_eidx}->{topdir}, $sec, \&barrier, $self); } # follows the stderr file @@ -135,7 +136,7 @@ sub eidx_init { my ($self) = @_; my $eidx = $self->{priv_eidx}; my $tl = wantarray && $self->{-err_wr} ? - PublicInbox::OnDestroy->new($$, \&_tail_err, $self) : + on_destroy(\&_tail_err, $self) : undef; $eidx->idx_init({-private => 1}); # acquires lock wantarray ? ($eidx, $tl) : $eidx; @@ -390,7 +391,7 @@ sub reindex_done { my ($self) = @_; my ($eidx, $tl) = eidx_init($self); $eidx->git->async_wait_all; - # ->done to be called via sto_done_request + # ->done to be called via sto_barrier_request } sub add_eml { @@ -570,13 +571,11 @@ sub set_xvmd { sto_export_kw($self, $smsg->{num}, $vmd); } -sub checkpoint { - my ($self, $wait) = @_; - if (my $im = $self->{im}) { - $wait ? $im->barrier : $im->checkpoint; - } - delete $self->{lms}; - $self->{priv_eidx}->checkpoint($wait); +sub check_done { + my ($self) = @_; + $self->git->_active ? + add_uniq_timer("$self-check_done", 5, \&check_done, $self) : + done($self); } sub xchg_stderr { @@ -593,23 +592,33 @@ sub xchg_stderr { undef; } -sub done { - my ($self) = @_; - my ($errfh, $lei_sock) = @$self{0, 1}; # via sto_done_request +sub _commit ($$) { + my ($self, $cmd) = @_; # cmd is 'done' or 'barrier' + my ($errfh, $lei_sock) = @$self{0, 1}; # via sto_barrier_request my @err; - if (my $im = delete($self->{im})) { - eval { $im->done }; - push(@err, "E: import done: $@\n") if $@; + if ($self->{im}) { + eval { $self->{im}->$cmd }; + push(@err, "E: import $cmd: $@\n") if $@; } delete $self->{lms}; - eval { $self->{priv_eidx}->done }; # V2Writable::done - push(@err, "E: priv_eidx done: $@\n") if $@; - print { $errfh // *STDERR{GLOB} } @err; + eval { $self->{priv_eidx}->$cmd }; + push(@err, "E: priv_eidx $cmd: $@\n") if $@; + print { $errfh // \*STDERR } @err; send($lei_sock, 'child_error 256', 0) if @err && $lei_sock; xchg_stderr($self); die @err if @err; + # $lei_sock goes out-of-scope and script/lei can terminate } +sub barrier { + my ($self) = @_; + _commit $self, 'barrier'; + add_uniq_timer("$self-check_done", 5, \&check_done, $self); + undef; +} + +sub done { _commit $_[0], 'done' } + sub ipc_atfork_child { my ($self) = @_; my $lei = $self->{lei}; diff --git a/lib/PublicInbox/LeiTag.pm b/lib/PublicInbox/LeiTag.pm index 320b0355..da8caeb7 100644 --- a/lib/PublicInbox/LeiTag.pm +++ b/lib/PublicInbox/LeiTag.pm @@ -1,12 +1,12 @@ -# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # handles "lei tag" command package PublicInbox::LeiTag; -use strict; -use v5.10.1; +use v5.12; use parent qw(PublicInbox::IPC PublicInbox::LeiInput); use PublicInbox::InboxWritable qw(eml_from_path); +use PublicInbox::OnDestroy; sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh my ($self, $eml) = @_; @@ -49,7 +49,7 @@ sub ipc_atfork_child { PublicInbox::LeiInput::input_only_atfork_child($self); $self->{lse} = $self->{lei}->{sto}->search; # this goes out-of-scope at worker process exit: - PublicInbox::OnDestroy->new($$, \¬e_unimported, $self); + on_destroy \¬e_unimported, $self; } # Workaround bash word-splitting s to ['kw', ':', 'keyword' ...] diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index dfae29e9..5481b5e4 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -14,7 +14,7 @@ use PublicInbox::Import; use IO::Handle; # ->autoflush use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY); use PublicInbox::Syscall qw(rename_noreplace); -use autodie qw(open seek close); +use autodie qw(pipe open seek close); use Carp qw(croak); my %kw2char = ( # Maildir characters @@ -605,7 +605,7 @@ sub _pre_augment_mbox { $lei->{dedupe} && $lei->{dedupe}->can('reset_dedupe'); } if ($self->{zsfx} = PublicInbox::MboxReader::zsfx($dst)) { - pipe(my ($r, $w)) or die "pipe: $!"; + pipe(my $r, my $w); $lei->{zpipe} = [ $r, $w ]; $lei->{ovv}->{lock_path} and die 'BUG: unexpected {ovv}->{lock_path}'; @@ -719,16 +719,32 @@ sub do_augment { # slow, runs in wq worker $m->($self, $lei); } +sub post_augment_call ($$$$) { + my ($self, $lei, $m, $post_augment_done) = @_; + eval { $m->($self, $lei) }; + $lei->{post_augment_err} = $@ if $@; # for post_augment_done +} + # fast (spawn compressor or mkdir), runs in same process as pre_augment sub post_augment { - my ($self, $lei, @args) = @_; + my ($self, $lei, $post_augment_done) = @_; $self->{-au_noted}++ and $lei->qerr("# writing to $self->{dst} ..."); - my $wait = $lei->{opt}->{'import-before'} ? - $lei->{sto}->wq_do('checkpoint', 1) : 0; # _post_augment_mbox my $m = $self->can("_post_augment_$self->{base_type}") or return; - $m->($self, $lei, @args); + + # --import-before is only for lei-(q|lcat), not lei-convert + $lei->{opt}->{'import-before'} or + return post_augment_call $self, $lei, $m, $post_augment_done; + + # we can't deal with post_augment until import-before commits: + require PublicInbox::EOFpipe; + my @io = @$lei{qw(2 sock)}; + pipe(my $r, $io[2]); + PublicInbox::EOFpipe->new($r, \&post_augment_call, + $self, $lei, $m, $post_augment_done); + $lei->{sto}->wq_io_do('barrier', \@io); + # _post_augment_* && post_augment_done run when barrier is complete } # called by every single l2m worker process diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index fc95d401..43dedd10 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -13,7 +13,7 @@ use File::Temp 0.19 (); # 0.19 for ->newdir use File::Spec (); use PublicInbox::Search qw(xap_terms); use PublicInbox::Spawn qw(popen_rd popen_wr which); -use PublicInbox::MID qw(mids); +use PublicInbox::MID qw(mids mid_escape); use PublicInbox::Smsg; use PublicInbox::Eml; use PublicInbox::LEI; @@ -22,6 +22,7 @@ use PublicInbox::ContentHash qw(git_sha); use POSIX qw(strftime); use autodie qw(close open read seek truncate); use PublicInbox::Syscall qw($F_SETPIPE_SZ); +use PublicInbox::OnDestroy; sub new { my ($class) = @_; @@ -160,6 +161,8 @@ sub query_one_mset { # for --threads and l2m w/o sort my $can_kw = !!$ibxish->can('msg_keywords'); my $threads = $lei->{opt}->{threads} // 0; my $fl = $threads > 1 ? 1 : undef; + my $mid = $lei->{opt}->{'thread-id'}; + $mo->{threadid} = $over->mid2tid($mid) if defined $mid; my $lss = $lei->{lss}; my $maxk = "external.$dir.maxuid"; # max of previous, so our min my $min = $lss ? ($lss->{-cfg}->{$maxk} // 0) : 0; @@ -339,6 +342,12 @@ print STDERR $_; push @$curl, '-s', '-d', ''; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); $self->{import_sto} = $lei->{sto} if $lei->{opt}->{'import-remote'}; + if (defined(my $mid = $opt->{'thread-id'})) { + $mid = mid_escape($mid); + for my $uri (@$uris) { + $uri->path($uri->path.$mid.'/'); + } + } for my $uri (@$uris) { $lei->{-current_url} = $uri->as_string; my $start = time; @@ -355,7 +364,7 @@ print STDERR $_; $self, $lei, $each_smsg); }; my ($exc, $code) = ($@, $?); - $lei->sto_done_request if delete($self->{-sto_imported}); + $lei->sto_barrier_request if delete($self->{-sto_imported}); die "E: $exc" if $exc && !$code; my $nr = delete $lei->{-nr_remote_eml} // 0; if (!$code) { # don't update if no results, maybe MTA is down @@ -391,7 +400,7 @@ sub query_done { # EOF callback for main daemon delete $lei->{lxs}; ($lei->{opt}->{'mail-sync'} && !$lei->{sto}) and warn "BUG: {sto} missing with --mail-sync"; - $lei->sto_done_request; + $lei->sto_barrier_request; $lei->{ovv}->ovv_end($lei); if ($l2m) { # close() calls LeiToMail reap_compress $l2m->finish_output($lei); @@ -420,11 +429,9 @@ sub query_done { # EOF callback for main daemon $lei->dclose; } -sub do_post_augment { +sub post_augment_done { # via on_destroy in top-level lei-daemon my ($lei) = @_; - my $l2m = $lei->{l2m} or return; # client disconnected - eval { $l2m->post_augment($lei) }; - my $err = $@; + my $err = delete $lei->{post_augment_err}; if ($err) { if (my $lxs = delete $lei->{lxs}) { $lxs->wq_kill(-POSIX::SIGTERM()); @@ -439,6 +446,12 @@ sub do_post_augment { close(delete $lei->{au_done}); # trigger wait_startq if start_mua didn't } +sub do_post_augment { + my ($lei) = @_; + my $l2m = $lei->{l2m} or return; # client disconnected + $l2m->post_augment($lei, on_destroy(\&post_augment_done, $lei)); +} + sub incr_post_augment { # called whenever an l2m shard finishes augment my ($lei) = @_; my $l2m = $lei->{l2m} or return; # client disconnected @@ -459,7 +472,9 @@ sub concurrency { sub start_query ($$) { # always runs in main (lei-daemon) process my ($self, $lei) = @_; local $PublicInbox::LEI::current_lei = $lei; - if ($self->{opt_threads} || ($lei->{l2m} && !$self->{opt_sort})) { + if ($lei->{opt}->{threads} || + defined($lei->{opt}->{'thread-id'}) || + ($lei->{l2m} && !$lei->{opt}->{'sort'})) { for my $ibxish (locals($self)) { $self->wq_io_do('query_one_mset', [], $ibxish); } @@ -546,8 +561,6 @@ sub do_query { my $op_c = delete $lei->{pkt_op_c}; delete $lei->{pkt_op_p}; @$end = (); - $self->{opt_threads} = $lei->{opt}->{threads}; - $self->{opt_sort} = $lei->{opt}->{'sort'}; $self->{-do_lcat} = !!(delete $lei->{lcat_todo}); if ($l2m) { $l2m->net_merge_all_done($lei) unless $lei->{auth}; diff --git a/lib/PublicInbox/Lock.pm b/lib/PublicInbox/Lock.pm index ddaf3312..7162d80e 100644 --- a/lib/PublicInbox/Lock.pm +++ b/lib/PublicInbox/Lock.pm @@ -41,9 +41,9 @@ sub lock_release { # caller must use return value sub lock_for_scope { - my ($self, @single_pid) = @_; + my ($self) = @_; lock_acquire($self) or return; # lock_path not set - PublicInbox::OnDestroy->new(@single_pid, \&lock_release, $self); + on_destroy \&lock_release, $self; } sub lock_acquire_fast { @@ -58,9 +58,9 @@ sub lock_release_fast { # caller must use return value sub lock_for_scope_fast { - my ($self, @single_pid) = @_; + my ($self) = @_; lock_acquire_fast($self) or return; # lock_path not set - PublicInbox::OnDestroy->new(@single_pid, \&lock_release_fast, $self); + on_destroy \&lock_release_fast, $self; } 1; diff --git a/lib/PublicInbox/MHreader.pm b/lib/PublicInbox/MHreader.pm index 3e7bbd5c..16e505a2 100644 --- a/lib/PublicInbox/MHreader.pm +++ b/lib/PublicInbox/MHreader.pm @@ -52,7 +52,7 @@ EOM sub mh_each_file { my ($self, $efcb, @arg) = @_; opendir(my $dh, my $dir = $self->{dir}); - my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh}); + my $restore = on_destroy \&chdir, $self->{cwdfh}; chdir($dh); my $sort = $self->{sort}; if (defined $sort && "@$sort" ne 'none') { @@ -96,7 +96,7 @@ sub mh_each_eml { sub mh_read_one { my ($self, $n, $ucb, @arg) = @_; - my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh}); + my $restore = on_destroy \&chdir, $self->{cwdfh}; chdir(my $dir = $self->{dir}); _file2eml($dir, $n, $self, $ucb, @arg); } diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm index 1f739baa..be5d5f2a 100644 --- a/lib/PublicInbox/ManifestJsGz.pm +++ b/lib/PublicInbox/ManifestJsGz.pm @@ -82,8 +82,9 @@ sub response { $ctx->can('list_match_i'), $re, $ctx); sub { $ctx->{-wcb} = $_[0]; # HTTP server callback - $ctx->{env}->{'pi-httpd.async'} ? - $iter->event_step : $iter->each_section; + ($ctx->{www}->{pi_cfg}->ALL || + !$ctx->{env}->{'pi-httpd.async'}) ? + $iter->each_section : $iter->event_step; } } diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 52f88ae3..17893a09 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -4,7 +4,7 @@ # Streaming interface for mboxrd HTTP responses # See PublicInbox::GzipFilter for details. package PublicInbox::Mbox; -use strict; +use v5.12; use parent 'PublicInbox::GzipFilter'; use PublicInbox::MID qw/mid_escape/; use PublicInbox::Hval qw/to_filename/; @@ -31,8 +31,8 @@ sub async_next { my ($http) = @_; # PublicInbox::HTTP my $ctx = $http->{forward} or return; # client aborted eval { - my $smsg = $ctx->{smsg} or return $ctx->close; - $ctx->smsg_blob($smsg); + my $smsg = $ctx->{smsg} // return $ctx->close; + $ctx->smsg_blob($smsg) if $smsg; }; warn "E: $@" if $@; } @@ -159,6 +159,7 @@ sub all_ids_cb { } $ctx->{ids} = $ids = $over->ids_after(\($ctx->{prev})); } while (@$ids); + undef; } sub mbox_all_ids { @@ -175,56 +176,79 @@ sub mbox_all_ids { PublicInbox::MboxGz::mbox_gz($ctx, \&all_ids_cb, 'all'); } -sub results_cb { - my ($ctx) = @_; - my $over = $ctx->{ibx}->over or return $ctx->gone('over'); - while (1) { - while (defined(my $num = shift(@{$ctx->{ids}}))) { - my $smsg = $over->get_art($num) or next; - return $smsg; - } - # refill result set, deprioritize since there's many results - my $srch = $ctx->{ibx}->isrch or return $ctx->gone('search'); - my $mset = $srch->mset($ctx->{query}, $ctx->{qopts}); - my $size = $mset->size or return; - $ctx->{qopts}->{offset} += $size; - $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts}); - $ctx->{-low_prio} = 1; +my $refill_ids_cb = sub { # async_mset cb + my ($ctx, $http, $mset, $err) = @_; + $http = undef unless $ctx->{-really_async}; + if ($err) { + warn "E: $err"; + $ctx->close if $http; # our async httpd + return; } -} - -sub results_thread_cb { - my ($ctx) = @_; + # refill result set, deprioritize since there's many results + my $size = $mset->size or do { + $ctx->close if $http; + $ctx->{-mbox_done} = 1; + return; + }; + $ctx->{qopts}->{offset} += $size; + $ctx->{ids} = $ctx->{srch}->mset_to_artnums($mset, $ctx->{qopts}); + $ctx->{-low_prio} = 1; # true + return if !$http; + eval { + my $smsg = results_cb($ctx) // return $ctx->close; + return if !$smsg; # '' wait for async_mset + $ctx->smsg_blob($ctx->{smsg} = $smsg); + }; + warn "E: $@" if $@; +}; +sub results_cb { # async_next or MboxGz->getline cb + my ($ctx, $http) = @_; my $over = $ctx->{ibx}->over or return $ctx->gone('over'); while (1) { - while (defined(my $num = shift(@{$ctx->{xids}}))) { + my $ids = $ctx->{xids} // $ctx->{ids}; + while (defined(my $num = shift(@$ids))) { my $smsg = $over->get_art($num) or next; return $smsg; } - - # refills ctx->{xids} - next if $over->expand_thread($ctx); - - # refill result set, deprioritize since there's many results - my $srch = $ctx->{ibx}->isrch or return $ctx->gone('search'); - my $mset = $srch->mset($ctx->{query}, $ctx->{qopts}); - my $size = $mset->size or return; - $ctx->{qopts}->{offset} += $size; - $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts}); - $ctx->{-low_prio} = 1; + next if $ctx->{xids} && $over->expand_thread($ctx); + return '' if $ctx->{srch}->async_mset(@$ctx{qw(query qopts)}, + $refill_ids_cb, $ctx, $http); + return if $ctx->{-mbox_done}; } +} +sub mbox_qry_cb { # async_mset cb + my ($ctx, $q, $mset, $err) = @_; + my $wcb = delete $ctx->{wcb}; + if ($err) { + warn "E: $err"; + return $wcb->([500, [qw(Content-Type text/plain)], + [ "Internal server error\n" ]]) + } + $ctx->{qopts}->{offset} = $mset->size or + return $wcb->([404, [qw(Content-Type text/plain)], + ["No results found\n"]]); + $ctx->{ids} = $ctx->{srch}->mset_to_artnums($mset, $ctx->{qopts}); + my $fn; + if ($q->{t} && $ctx->{srch}->has_threadid) { + $ctx->{xids} = []; # triggers over->expand_thread + $fn = "results-thread-$ctx->{query}"; + } else { + $fn = "results-$ctx->{query}"; + } + require PublicInbox::MboxGz; + my $res = PublicInbox::MboxGz::mbox_gz($ctx, \&results_cb, $fn); + ref($res) eq 'CODE' ? $res->($wcb) : $wcb->($res); } sub mbox_all { my ($ctx, $q) = @_; - my $q_string = $q->{'q'}; - return mbox_all_ids($ctx) if $q_string !~ /\S/; - my $srch = $ctx->{ibx}->isrch or + my $qstr = $q->{'q'}; + return mbox_all_ids($ctx) if $qstr !~ /\S/; + my $srch = $ctx->{srch} = $ctx->{ibx}->isrch or return PublicInbox::WWW::need($ctx, 'Search'); - - my $qopts = $ctx->{qopts} = { relevance => -2 }; # ORDER BY docid DESC + my $opt = $ctx->{qopts} = { relevance => -2 }; # ORDER BY docid DESC # {threadid} limits results to a given thread # {threads} collapses results from messages in the same thread, @@ -234,25 +258,16 @@ sub mbox_all { $ctx->{ibx}->{isrch}->{es}->over : $ctx->{ibx}->over) or return PublicInbox::WWW::need($ctx, 'Overview'); - $qopts->{threadid} = $over->mid2tid($ctx->{mid}); - } - $qopts->{threads} = 1 if $q->{t}; - $srch->query_approxidate($ctx->{ibx}->git, $q_string); - my $mset = $srch->mset($q_string, $qopts); - $qopts->{offset} = $mset->size or - return [404, [qw(Content-Type text/plain)], - ["No results found\n"]]; - $ctx->{query} = $q_string; - $ctx->{ids} = $srch->mset_to_artnums($mset, $qopts); - require PublicInbox::MboxGz; - my $fn; - if ($q->{t} && $srch->has_threadid) { - $fn = 'results-thread-'.$q_string; - PublicInbox::MboxGz::mbox_gz($ctx, \&results_thread_cb, $fn); - } else { - $fn = 'results-'.$q_string; - PublicInbox::MboxGz::mbox_gz($ctx, \&results_cb, $fn); + $opt->{threadid} = $over->mid2tid($ctx->{mid}); } + $opt->{threads} = 1 if $q->{t}; + $srch->query_approxidate($ctx->{ibx}->git, $qstr); + $ctx->{query} = $qstr; + sub { # called by PSGI server + $ctx->{wcb} = $_[0]; # PSGI server supplied write cb + $srch->async_mset($qstr, $opt, \&mbox_qry_cb, $ctx, $q) and + $ctx->{-really_async} = 1; + }; } 1; diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm index 533d2ff1..90e69c09 100644 --- a/lib/PublicInbox/MboxGz.pm +++ b/lib/PublicInbox/MboxGz.pm @@ -1,7 +1,7 @@ # Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> package PublicInbox::MboxGz; -use strict; +use v5.12; use parent 'PublicInbox::GzipFilter'; use PublicInbox::Eml; use PublicInbox::Hval qw/to_filename/; @@ -13,8 +13,8 @@ sub async_next ($) { my ($http) = @_; # PublicInbox::HTTP my $ctx = $http->{forward} or return; eval { - $ctx->{smsg} = $ctx->{cb}->($ctx) or return $ctx->close; - $ctx->smsg_blob($ctx->{smsg}); + my $smsg = $ctx->{cb}->($ctx, $http) // return $ctx->close; + $smsg and $ctx->smsg_blob($ctx->{smsg} = $smsg); }; warn "E: $@" if $@; } diff --git a/lib/PublicInbox/MboxLock.pm b/lib/PublicInbox/MboxLock.pm index 9d7d4a32..5e373873 100644 --- a/lib/PublicInbox/MboxLock.pm +++ b/lib/PublicInbox/MboxLock.pm @@ -4,7 +4,7 @@ # Various mbox locking methods package PublicInbox::MboxLock; use v5.12; -use PublicInbox::OnDestroy; +use PublicInbox::OnDestroy (); use Fcntl qw(:flock F_SETLK F_SETLKW F_RDLCK F_WRLCK O_CREAT O_EXCL O_WRONLY SEEK_SET); use Carp qw(croak); @@ -122,10 +122,10 @@ sub acq { sub DESTROY { my ($self) = @_; my $f = $self->{".lock$$"} or return; - my $x; + my $od; if (my $dh = delete $self->{dh}) { opendir my $c, '.'; - $x = PublicInbox::OnDestroy->new(\&chdir, $c); + $od = PublicInbox::OnDestroy::all \&chdir, $c; chdir($dh); } CORE::unlink($f) or die "unlink($f): $! (lock stolen?)"; diff --git a/lib/PublicInbox/OnDestroy.pm b/lib/PublicInbox/OnDestroy.pm index d9a6cd24..4301edff 100644 --- a/lib/PublicInbox/OnDestroy.pm +++ b/lib/PublicInbox/OnDestroy.pm @@ -3,22 +3,29 @@ package PublicInbox::OnDestroy; use v5.12; +use parent qw(Exporter); +use autodie qw(fork); +our @EXPORT = qw(on_destroy); +our $fork_gen = 0; -sub new { - shift; # ($class, $cb, @args) - bless [ @_ ], __PACKAGE__; +# either parent or child is expected to exit or exec shortly after this: +sub fork_tmp () { + my $pid = fork; + ++$fork_gen if $pid == 0; + $pid; } +# all children +sub all (@) { bless [ undef, @_ ], __PACKAGE__ } + +# same process +sub on_destroy (@) { bless [ $fork_gen, @_ ], __PACKAGE__ } + sub cancel { @{$_[0]} = () } sub DESTROY { - my ($cb, @args) = @{$_[0]}; - if (!ref($cb) && $cb) { - my $pid = $cb; - return if $pid != $$; - $cb = shift @args; - } - $cb->(@args) if $cb; + my ($fgen, $cb, @args) = @{$_[0]}; + $cb->(@args) if ($cb && ($fgen // $fork_gen) == $fork_gen); } 1; diff --git a/lib/PublicInbox/RepoAtom.pm b/lib/PublicInbox/RepoAtom.pm index ab0f2fcc..eb0ed3c7 100644 --- a/lib/PublicInbox/RepoAtom.pm +++ b/lib/PublicInbox/RepoAtom.pm @@ -94,11 +94,10 @@ xmlns="http://www.w3.org/1999/xhtml"><pre style="white-space:pre-wrap"> sub srv_tags_atom { my ($ctx) = @_; my $max = 50; # TODO configurable - my @cmd = ('git', "--git-dir=$ctx->{git}->{git_dir}", - qw(for-each-ref --sort=-creatordate), "--count=$max", - '--perl', $EACH_REF_FMT, 'refs/tags'); + my $cmd = $ctx->{git}->cmd(qw(for-each-ref --sort=-creatordate), + "--count=$max", '--perl', $EACH_REF_FMT, 'refs/tags'); $ctx->{-feed_title} = "$ctx->{git}->{nick} tags"; - my $qsp = PublicInbox::Qspawn->new(\@cmd); + my $qsp = PublicInbox::Qspawn->new($cmd); $ctx->{-is_tag} = 1; $qsp->psgi_yield($ctx->{env}, undef, \&atom_ok, $ctx); } @@ -107,20 +106,19 @@ sub srv_atom { my ($ctx, $path) = @_; return if index($path, '//') >= 0 || index($path, '/') == 0; my $max = 50; # TODO configurable - my @cmd = ('git', "--git-dir=$ctx->{git}->{git_dir}", - qw(log --no-notes --no-color --no-abbrev), - $ATOM_FMT, "-$max"); + my $cmd = $ctx->{git}->cmd(qw(log --no-notes --no-color --no-abbrev), + $ATOM_FMT, "-$max"); my $tip = $ctx->{qp}->{h}; # same as cgit $ctx->{-feed_title} = $ctx->{git}->{nick}; $ctx->{-feed_title} .= " $path" if $path ne ''; if (defined($tip)) { - push @cmd, $tip; + push @$cmd, $tip; $ctx->{-feed_title} .= ", $tip"; } # else: let git decide based on HEAD if $tip isn't defined - push @cmd, '--'; - push @cmd, $path if $path ne ''; - my $qsp = PublicInbox::Qspawn->new(\@cmd, undef, + push @$cmd, '--'; + push @$cmd, $path if $path ne ''; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, { quiet => 1, 2 => $ctx->{lh} }); $qsp->psgi_yield($ctx->{env}, undef, \&atom_ok, $ctx); } diff --git a/lib/PublicInbox/RepoSnapshot.pm b/lib/PublicInbox/RepoSnapshot.pm index 4c372569..bff97bc8 100644 --- a/lib/PublicInbox/RepoSnapshot.pm +++ b/lib/PublicInbox/RepoSnapshot.pm @@ -50,15 +50,13 @@ sub ver_check { # git->check_async callback delete($ctx->{env}->{'qspawn.wcb'})->(r(404)); } else { # found, done: $ctx->{etag} = $oid; - my @cfg; + my $cmd = $ctx->{git}->cmd; if (my $cmd = $FMT_CFG{$ctx->{snap_fmt}}) { - @cfg = ('-c', "tar.$ctx->{snap_fmt}.command=$cmd"); + push @$cmd, '-c', "tar.$ctx->{snap_fmt}.command=$cmd"; } - my $qsp = PublicInbox::Qspawn->new(['git', @cfg, - "--git-dir=$ctx->{git}->{git_dir}", 'archive', - "--prefix=$ctx->{snap_pfx}/", - "--format=$ctx->{snap_fmt}", $treeish], undef, - { quiet => 1 }); + push @$cmd, 'archive', "--prefix=$ctx->{snap_pfx}/", + "--format=$ctx->{snap_fmt}", $treeish; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, { quiet => 1 }); $qsp->psgi_yield($ctx->{env}, undef, \&archive_hdr, $ctx); } } diff --git a/lib/PublicInbox/RepoTree.pm b/lib/PublicInbox/RepoTree.pm index 5c73531a..4c85f9a8 100644 --- a/lib/PublicInbox/RepoTree.pm +++ b/lib/PublicInbox/RepoTree.pm @@ -51,8 +51,8 @@ sub find_missing { $res->[0] = 404; return delete($ctx->{-wcb})->($res); } - my $cmd = ['git', "--git-dir=$ctx->{git}->{git_dir}", - qw(log --no-color -1), '--pretty=%H %h %s (%as)' ]; + my $cmd = $ctx->{git}->cmd(qw(log --no-color -1), + '--pretty=%H %h %s (%as)'); push @$cmd, $ctx->{qp}->{h} if defined($ctx->{qp}->{h}); push @$cmd, '--'; push @$cmd, $ctx->{-path}; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 678c8c5d..e5c5d6ab 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -11,6 +11,7 @@ our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms); use List::Util qw(max); use POSIX qw(strftime); use Carp (); +our $XHC = 0; # defined but false # values for searching, changing the numeric value breaks # compatibility with old indices (so don't change them it) @@ -56,7 +57,7 @@ use constant { }; use PublicInbox::Smsg; -use PublicInbox::Over; +eval { require PublicInbox::Over }; our $QP_FLAGS; our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query); our $Xap; # 'Xapian' or 'Search::Xapian' @@ -85,14 +86,13 @@ our @XH_SPEC = ( 'k=i', # sort column (like sort(1)) 'm=i', # maximum number of results 'o=i', # offset - 'p', # show percent 'r', # 1=relevance then column 't', # collapse threads 'A=s@', # prefixes - 'D', # emit docdata 'K=i', # timeout kill after i seconds 'O=s', # eidx_key 'T=i', # threadid + 'Q=s@', # query prefixes "$user_prefix[:=]$XPREFIX" ); sub load_xapian () { @@ -429,6 +429,68 @@ sub mset { do_enquire($self, $qry, $opt, TS); } +sub xhc_start_maybe (@) { + require PublicInbox::XapClient; + my $xhc = PublicInbox::XapClient::start_helper(@_); + require PublicInbox::XhcMset if $xhc; + $xhc; +} + +sub xh_opt ($$) { + my ($self, $opt) = @_; + my $lim = $opt->{limit} || 50; + my @ret; + push @ret, '-o', $opt->{offset} if $opt->{offset}; + push @ret, '-m', $lim; + my $rel = $opt->{relevance} // 0; + if ($rel == -2) { # ORDER BY docid/UID (highest first) + push @ret, '-k', '-1'; + } elsif ($rel == -1) { # ORDER BY docid/UID (lowest first) + push @ret, '-k', '-1'; + push @ret, '-a'; + } elsif ($rel == 0) { + push @ret, '-k', $opt->{sort_col} // TS; + push @ret, '-a' if $opt->{asc}; + } else { # rel > 0 + push @ret, '-r'; + push @ret, '-k', $opt->{sort_col} // TS; + push @ret, '-a' if $opt->{asc}; + } + push @ret, '-t' if $opt->{threads}; + push @ret, '-T', $opt->{threadid} if defined $opt->{threadid}; + push @ret, '-O', $opt->{eidx_key} if defined $opt->{eidx_key}; + my $apfx = $self->{-alt_pfx} //= do { + my @tmp; + for (grep /\Aserial:/, @{$self->{altid} // []}) { + my (undef, $pfx) = split /:/, $_; + push @tmp, '-Q', "$pfx=X\U$pfx"; + } + # TODO: arbitrary header indexing goes here + \@tmp; + }; + (@ret, @$apfx); +} + +# returns a true value if actually handled asynchronously, +# and a falsy value if handled synchronously +sub async_mset { + my ($self, $qry_str, $opt, $cb, @args) = @_; + if ($XHC) { # unconditionally retrieving pct + rank for now + xdb($self); # populate {nshards} + my @margs = ($self->xh_args, xh_opt($self, $opt)); + my $ret = eval { + my $rd = $XHC->mkreq(undef, 'mset', @margs, $qry_str); + PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args); + }; + $cb->(@args, undef, $@) if $@; + $ret; + } else { # synchronous + my $mset = $self->mset($qry_str, $opt); + $cb->(@args, $mset); + undef; + } +} + sub do_enquire { # shared with CodeSearch my ($self, $qry, $opt, $col) = @_; my $enq = $X{Enquire}->new(xdb($self)); @@ -578,7 +640,7 @@ EOM $ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n} } } - # TODO: altid support + # altid support is handled in xh_opt and srch_init_extra in XH for my $name (sort keys %prob_prefix) { for (split(/ /, $prob_prefix{$name})) { $ret .= qq{\tqp->add_prefix("$name", "$_");\n} diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 1cbf6d23..4fd493d9 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -119,7 +119,7 @@ sub load_xapian_writable () { my $ver = eval 'v'.join('.', eval($xap.'::major_version()'), eval($xap.'::minor_version()'), eval($xap.'::revision()')); - if ($ver ge 1.4) { # new flags in Xapian 1.4 + if ($ver ge v1.4) { # new flags in Xapian 1.4 $DB_NO_SYNC = 0x4; $DB_DANGEROUS = 0x10; } @@ -1003,8 +1003,7 @@ sub prepare_stack ($$) { sub is_ancestor ($$$) { my ($git, $cur, $tip) = @_; return 0 unless $git->check($cur); - my $cmd = [ 'git', "--git-dir=$git->{git_dir}", - qw(merge-base --is-ancestor), $cur, $tip ]; + my $cmd = $git->cmd(qw(merge-base --is-ancestor), $cur, $tip); run_wait($cmd) == 0; } diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 1630eb4a..ea261bda 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -45,7 +45,7 @@ sub ipc_atfork_child { # called automatically before ipc_worker_loop $v2w->{current_info} = "[$self->{shard}]"; # for $SIG{__WARN__} $self->begin_txn_lazy; # caller (ipc_worker_spawn) must capture this: - PublicInbox::OnDestroy->new($$, \&_worker_done, $self); + on_destroy \&_worker_done, $self; } sub index_eml { diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 2d3e942c..9919e25c 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -30,10 +30,9 @@ sub mbox_results { sub sres_top_html { my ($ctx) = @_; - my $srch = $ctx->{ibx}->isrch or + my $srch = $ctx->{srch} = $ctx->{ibx}->isrch or return PublicInbox::WWW::need($ctx, 'Search'); my $q = PublicInbox::SearchQuery->new($ctx->{qp}); - my $x = $q->{x}; my $o = $q->{o} // 0; my $asc; if ($o < 0) { @@ -41,48 +40,57 @@ sub sres_top_html { $o = -($o + 1); # so [-1] is the last element, like Perl lists } - my $code = 200; # double the limit for expanded views: - my $opts = { + my $opt = { limit => $q->{l}, offset => $o, relevance => $q->{r}, threads => $q->{t}, asc => $asc, }; - my ($mset, $total, $err, $html); -retry: - eval { - my $query = $q->{'q'}; - $srch->query_approxidate($ctx->{ibx}->git, $query); - $mset = $srch->mset($query, $opts); - $total = $mset->get_matches_estimated; - }; - $err = $@; + my $qs = $q->{'q'}; + $srch->query_approxidate($ctx->{ibx}->git, $qs); + sub { + $ctx->{wcb} = $_[0]; # PSGI server supplied write cb + $srch->async_mset($qs, $opt, \&sres_html_cb, $ctx, $opt, $q); + } +} + +sub sres_html_cb { # async_mset cb + my ($ctx, $opt, $q, $mset, $err) = @_; + my $code = 200; + my $total = $mset ? $mset->get_matches_estimated : undef; ctx_prepare($q, $ctx); + my ($res, $html); if ($err) { $code = 400; $html = '<pre>'.err_txt($ctx, $err).'</pre><hr>'; } elsif ($total == 0) { - if (defined($ctx->{-uxs_retried})) { - # undo retry damage: + if (defined($ctx->{-uxs_retried})) { # undo retry damage: $q->{'q'} = $ctx->{-uxs_retried}; - } elsif (index($q->{'q'}, '%') >= 0) { + } elsif (index($q->{'q'}, '%') >= 0) { # retry unescaped $ctx->{-uxs_retried} = $q->{'q'}; - $q->{'q'} = uri_unescape($q->{'q'}); - goto retry; + my $qs = $q->{'q'} = uri_unescape($q->{'q'}); + $ctx->{srch}->query_approxidate($ctx->{ibx}->git, $qs); + return $ctx->{srch}->async_mset($qs, $opt, + \&sres_html_cb, $ctx, $opt, $q); } $code = 404; $html = "<pre>\n[No results found]</pre><hr>"; + } elsif ($q->{x} eq 'A') { + $res = adump($mset, $q, $ctx); } else { - return adump($_[0], $mset, $q, $ctx) if $x eq 'A'; - $ctx->{-html_tip} = search_nav_top($mset, $q, $ctx); - return mset_thread($ctx, $mset, $q) if $x eq 't'; - mset_summary($ctx, $mset, $q); # appends to {-html_tip} - $html = ''; + if ($q->{x} eq 't') { + $res = mset_thread($ctx, $mset, $q); + } else { + mset_summary($ctx, $mset, $q); # appends to {-html_tip} + $html = ''; + } } - html_oneshot($ctx, $code, $html); + $res //= html_oneshot($ctx, $code, $html); + my $wcb = delete $ctx->{wcb}; + ref($res) eq 'CODE' ? $res->($wcb) : $wcb->($res); } # display non-nested search results similar to what users expect from @@ -357,7 +365,7 @@ sub ctx_prepare { } sub adump { - my ($cb, $mset, $q, $ctx) = @_; + my ($mset, $q, $ctx) = @_; $ctx->{ids} = $ctx->{ibx}->isrch->mset_to_artnums($mset); $ctx->{search_query} = $q; # used by WwwAtomStream::atom_header PublicInbox::WwwAtomStream->response($ctx, \&adump_i); diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm index e36659ce..e9e81e88 100644 --- a/lib/PublicInbox/Spawn.pm +++ b/lib/PublicInbox/Spawn.pm @@ -176,15 +176,15 @@ out: return (int)pid; } -static int sendmsg_retry(unsigned *tries) +static int sendmsg_retry(int *tries) { const struct timespec req = { 0, 100000000 }; /* 100ms */ int err = errno; switch (err) { case EINTR: PERL_ASYNC_CHECK(); return 1; case ENOBUFS: case ENOMEM: case ETOOMANYREFS: - if (++*tries >= 50) return 0; - fprintf(stderr, "# sleeping on sendmsg: %s (#%u)\n", + if (--*tries < 0) return 0; + fprintf(stderr, "# sleeping on sendmsg: %s (%d tries left)\n", strerror(err), *tries); nanosleep(&req, NULL); PERL_ASYNC_CHECK(); @@ -201,7 +201,7 @@ union my_cmsg { char pad[sizeof(struct cmsghdr) + 16 + SEND_FD_SPACE]; }; -SV *send_cmd4(PerlIO *s, SV *svfds, SV *data, int flags) +SV *send_cmd4_(PerlIO *s, SV *svfds, SV *data, int flags, int tries) { struct msghdr msg = { 0 }; union my_cmsg cmsg = { 0 }; @@ -211,7 +211,6 @@ SV *send_cmd4(PerlIO *s, SV *svfds, SV *data, int flags) AV *fds = (AV *)SvRV(svfds); I32 i, nfds = av_len(fds) + 1; int *fdp; - unsigned tries = 0; if (SvOK(data)) { iov.iov_base = SvPV(data, dlen); @@ -332,6 +331,9 @@ EOM if (defined $all_libc) { # set for Gcf2 $ENV{PERL_INLINE_DIRECTORY} = $inline_dir; %RLIMITS = rlimit_map(); + *send_cmd4 = sub ($$$$;$) { + send_cmd4_($_[0], $_[1], $_[2], $_[3], 50); + } } else { require PublicInbox::SpawnPP; *pi_fork_exec = \&PublicInbox::SpawnPP::pi_fork_exec diff --git a/lib/PublicInbox/SpawnPP.pm b/lib/PublicInbox/SpawnPP.pm index f89d37d4..9ad4d0a1 100644 --- a/lib/PublicInbox/SpawnPP.pm +++ b/lib/PublicInbox/SpawnPP.pm @@ -7,7 +7,8 @@ package PublicInbox::SpawnPP; use v5.12; use POSIX qw(dup2 _exit setpgid :signal_h); -use autodie qw(chdir close fork pipe); +use autodie qw(chdir close pipe); +use PublicInbox::OnDestroy; # this is loaded by PublicInbox::Spawn, so we can't use/require it, here # Pure Perl implementation for folks that do not use Inline::C @@ -22,7 +23,7 @@ sub pi_fork_exec ($$$$$$$) { } sigprocmask(SIG_SETMASK, $set, $old) or die "SIG_SETMASK(set): $!"; pipe(my $r, my $w); - my $pid = fork; + my $pid = PublicInbox::OnDestroy::fork_tmp; if ($pid == 0) { close $r; $SIG{__DIE__} = sub { diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm index 829cfa3c..4cbe9623 100644 --- a/lib/PublicInbox/Syscall.pm +++ b/lib/PublicInbox/Syscall.pm @@ -317,6 +317,10 @@ BEGIN { ) } $PACK{CMSG_ALIGN_size} = SIZEOF_size_t; + $PACK{SIZEOF_cmsghdr} //= 0; + $PACK{TMPL_cmsg_len} //= undef; + $PACK{CMSG_DATA_off} //= undef; + $PACK{TMPL_msghdr} //= undef; } # SFD_CLOEXEC is arch-dependent, so IN_CLOEXEC may be, too @@ -463,8 +467,8 @@ if (defined($SYS_sendmsg) && defined($SYS_recvmsg)) { no warnings 'once'; require PublicInbox::CmdIPC4; -*send_cmd4 = sub ($$$$) { - my ($sock, $fds, undef, $flags) = @_; +*send_cmd4 = sub ($$$$;$) { + my ($sock, $fds, undef, $flags, $tries) = @_; my $iov = pack('P'.TMPL_size_t, $_[2] // NUL, length($_[2] // NUL) || 1); my $fd_space = scalar(@$fds) * SIZEOF_int; @@ -483,10 +487,10 @@ require PublicInbox::CmdIPC4; $msg_controllen, 0); # msg_flags my $s; - my $try = 0; + $tries //= 50; do { $s = syscall($SYS_sendmsg, fileno($sock), $mh, $flags); - } while ($s < 0 && PublicInbox::CmdIPC4::sendmsg_retry($try)); + } while ($s < 0 && PublicInbox::CmdIPC4::sendmsg_retry($tries)); $s >= 0 ? $s : undef; }; diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 5f159683..3a67ab54 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -17,9 +17,10 @@ my $lei_loud = $ENV{TEST_LEI_ERR_LOUD}; our $tail_cmd = $ENV{TAIL}; our ($lei_opt, $lei_out, $lei_err); use autodie qw(chdir close fcntl mkdir open opendir seek unlink); +$ENV{XDG_CACHE_HOME} //= "$ENV{HOME}/.cache"; # reuse C++ xap_helper builds $_ = File::Spec->rel2abs($_) for (grep(!m!^/!, @INC)); - +our $CURRENT_DAEMON; BEGIN { @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods run_script start_script key2sub xsys xsys_e xqx eml_load tick @@ -167,7 +168,7 @@ sub require_git_http_backend (;$) { my ($nr) = @_; state $ok = do { require PublicInbox::Git; - my $git = PublicInbox::Git::check_git_exe() or plan + my $git = PublicInbox::Git::git_exe() or plan skip_all => 'nothing in public-inbox works w/o git'; my $rdr = { 1 => \my $out, 2 => \my $err }; xsys([$git, qw(http-backend)], undef, $rdr); @@ -565,6 +566,9 @@ sub start_script { my $run_mode = $ENV{TEST_RUN_MODE} // $opt->{run_mode} // 2; my $sub = $run_mode == 0 ? undef : key2sub($key); my $tail; + my @xh = split(/\s+/, $ENV{TEST_DAEMON_XH} // ''); + @xh = () if $key !~ /-(?:imapd|netd|httpd|pop3d|nntpd)\z/; + push @argv, @xh; if ($tail_cmd) { my @paths; for (@argv) { @@ -588,9 +592,9 @@ sub start_script { require PublicInbox::DS; my $oset = PublicInbox::DS::block_signals(); require PublicInbox::OnDestroy; - my $tmp_mask = PublicInbox::OnDestroy->new( + my $tmp_mask = PublicInbox::OnDestroy::all( \&PublicInbox::DS::sig_setmask, $oset); - my $pid = PublicInbox::DS::do_fork(); + my $pid = PublicInbox::DS::fork_persist(); if ($pid == 0) { close($_) for (@{delete($opt->{-CLOFORK}) // []}); # pretend to be systemd (cf. sd_listen_fds(3)) @@ -612,7 +616,7 @@ sub start_script { $ENV{LISTEN_FDS} = $fds; } if ($opt->{-C}) { chdir($opt->{-C}) } - $0 = join(' ', @$cmd); + $0 = join(' ', @$cmd, @xh); local @SIG{keys %SIG} = map { undef } values %SIG; local $SIG{FPE} = 'IGNORE'; # Perl default undef $tmp_mask; @@ -720,7 +724,10 @@ SKIP: { require PublicInbox::Spawn; require PublicInbox::Config; require File::Path; - + eval { # use XDG_CACHE_HOME, first: + require PublicInbox::XapHelperCxx; + PublicInbox::XapHelperCxx::check_build(); + }; local %ENV = %ENV; delete $ENV{XDG_DATA_HOME}; delete $ENV{XDG_CONFIG_HOME}; @@ -945,6 +952,7 @@ sub test_httpd ($$;$$) { local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; my $ua = LWP::UserAgent->new; $ua->max_redirect(0); + local $CURRENT_DAEMON = $td; Plack::Test::ExternalServer::test_psgi(client => $client, ua => $ua); $cb->() if $cb; diff --git a/lib/PublicInbox/Umask.pm b/lib/PublicInbox/Umask.pm index 00772ce5..2c859e65 100644 --- a/lib/PublicInbox/Umask.pm +++ b/lib/PublicInbox/Umask.pm @@ -58,7 +58,7 @@ sub _umask_for { sub with_umask { my ($self, $cb, @arg) = @_; my $old = umask($self->{umask} //= umask_prepare($self)); - my $restore = PublicInbox::OnDestroy->new($$, \&CORE::umask, $old); + my $restore = on_destroy \&CORE::umask, $old; $cb ? $cb->(@arg) : $restore; } diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index fb259396..15a73158 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -507,13 +507,7 @@ sub set_last_commits ($) { # this is NOT for ExtSearchIdx sub checkpoint ($;$) { my ($self, $wait) = @_; - if (my $im = $self->{im}) { - if ($wait) { - $im->barrier; - } else { - $im->checkpoint; - } - } + $self->{im}->barrier if $self->{im}; my $shards = $self->{idx_shards}; if ($shards) { my $dbh = $self->{mm}->{dbh} if $self->{mm}; @@ -1077,8 +1071,8 @@ sub unindex_todo ($$$) { return if $before == $after; # ensure any blob can not longer be accessed via dumb HTTP - run_die(['git', "--git-dir=$unit->{git}->{git_dir}", - qw(-c gc.reflogExpire=now gc --prune=all --quiet)]); + run_die($unit->{git}->cmd(qw(-c gc.reflogExpire=now gc + --prune=all --quiet))); } sub sync_ranges ($$) { diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index 790b9a2c..83a83698 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -25,6 +25,7 @@ use PublicInbox::Tmpfile; use PublicInbox::ViewDiff qw(flush_diff uri_escape_path); use PublicInbox::View; use PublicInbox::Eml; +use PublicInbox::OnDestroy; use Text::Wrap qw(wrap); use PublicInbox::Hval qw(ascii_html to_filename prurl utf8_maybe); use POSIX qw(strftime); @@ -105,7 +106,7 @@ sub stream_large_blob ($$) { my ($ctx, $res) = @_; $ctx->{-res} = $res; my ($git, $oid, $type, $size, $di) = @$res; - my $cmd = ['git', "--git-dir=$git->{git_dir}", 'cat-file', $type, $oid]; + my $cmd = $git->cmd('cat-file', $type, $oid); my $qsp = PublicInbox::Qspawn->new($cmd); $ctx->{env}->{'qspawn.wcb'} = $ctx->{-wcb}; $qsp->psgi_yield($ctx->{env}, undef, \&stream_blob_parse_hdr, $ctx); @@ -367,10 +368,9 @@ sub stream_patch_parse_hdr { # {parse_hdr} for Qspawn sub show_patch ($$) { my ($ctx, $res) = @_; my ($git, $oid) = @$res; - my @cmd = ('git', "--git-dir=$git->{git_dir}", - qw(format-patch -1 --stdout -C), + my $cmd = $git->cmd(qw(format-patch -1 --stdout -C), "--signature=git format-patch -1 --stdout -C $oid", $oid); - my $qsp = PublicInbox::Qspawn->new(\@cmd); + my $qsp = PublicInbox::Qspawn->new($cmd); $ctx->{env}->{'qspawn.wcb'} = $ctx->{-wcb}; $ctx->{patch_oid} = $oid; $qsp->psgi_yield($ctx->{env}, undef, \&stream_patch_parse_hdr, $ctx); @@ -388,7 +388,7 @@ sub show_commit ($$) { qw(--encoding=UTF-8 -z --no-notes --no-patch), $oid), undef, { 1 => $ctx->{patch_fh} }); $qsp_h->{qsp_err} = \($ctx->{-qsp_err_h} = ''); - my $cmt_fin = PublicInbox::OnDestroy->new($$, \&cmt_fin, $ctx); + my $cmt_fin = on_destroy \&cmt_fin, $ctx; $ctx->{git} = $git; $ctx->{oid} = $oid; $qsp_h->psgi_qx($ctx->{env}, undef, \&cmt_hdr_prep, $ctx, $cmt_fin); @@ -399,8 +399,8 @@ sub show_other ($$) { # just in case... my ($git, $oid, $type, $size) = @$res; $size > $MAX_SIZE and return html_page($ctx, 200, ascii_html($type)." $oid is too big to show\n". dbg_log($ctx)); - my $cmd = ['git', "--git-dir=$git->{git_dir}", - qw(show --encoding=UTF-8 --no-color --no-abbrev), $oid ]; + my $cmd = $git->cmd(qw(show --encoding=UTF-8 + --no-color --no-abbrev), $oid); my $qsp = PublicInbox::Qspawn->new($cmd); $qsp->{qsp_err} = \($ctx->{-qsp_err} = ''); $qsp->psgi_qx($ctx->{env}, undef, \&show_other_result, $ctx); @@ -486,8 +486,7 @@ sub show_tree ($$) { # also used by RepoTree my ($git, $oid, undef, $size) = @$res; $size > $MAX_SIZE and return html_page($ctx, 200, "tree $oid is too big to show\n". dbg_log($ctx)); - my $cmd = [ 'git', "--git-dir=$git->{git_dir}", - qw(ls-tree -z -l --no-abbrev), $oid ]; + my $cmd = $git->cmd(qw(ls-tree -z -l --no-abbrev), $oid); my $qsp = PublicInbox::Qspawn->new($cmd); $ctx->{tree_oid} = $oid; $qsp->{qsp_err} = \($ctx->{-qsp_err} = ''); @@ -624,7 +623,7 @@ sub start_solver ($) { my $v = $ctx->{qp}->{$from} // next; $ctx->{hints}->{$to} = $v if $v ne ''; } - $ctx->{-next_solver} = PublicInbox::OnDestroy->new($$, \&next_solver); + $ctx->{-next_solver} = on_destroy \&next_solver; ++$solver_nr; $ctx->{-tmp} = File::Temp->newdir("solver.$ctx->{oid_b}-XXXX", TMPDIR => 1); diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 1ec574ea..eb90d353 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -445,7 +445,7 @@ sub imap_idle_reap { # awaitpid callback sub imap_idle_fork { my ($self, $uri, $intvl) = @_; return if $self->{quit}; - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; if ($pid == 0) { watch_atfork_child($self); watch_imap_idle_1($self, $uri, $intvl); @@ -506,7 +506,7 @@ sub poll_fetch_fork { # DS::add_timer callback my @imap = grep { # push() always returns > 0 $_->scheme =~ m!\Aimaps?!i ? 1 : (push(@nntp, $_) < 0) } @$uris; - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; if ($pid == 0) { watch_atfork_child($self); watch_imap_fetch_all($self, \@imap) if @imap; diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm index 61aa7862..a5e2dc4a 100644 --- a/lib/PublicInbox/WwwCoderepo.pm +++ b/lib/PublicInbox/WwwCoderepo.pm @@ -253,7 +253,7 @@ sub summary ($$) { push(@log, $tip) if defined $tip; # limit scope for MockHTTP test (t/solver_git.t) - my $END = PublicInbox::OnDestroy->new($$, \&summary_END, $ctx); + my $END = on_destroy \&summary_END, $ctx; for (['log', \@log], [ 'heads', [@EACH_REF, "--count=$nb", 'refs/heads'] ], [ 'tags', [@EACH_REF, "--count=$nt", 'refs/tags'] ]) { diff --git a/lib/PublicInbox/XapClient.pm b/lib/PublicInbox/XapClient.pm index 4dcbbe5d..24b3f45e 100644 --- a/lib/PublicInbox/XapClient.pm +++ b/lib/PublicInbox/XapClient.pm @@ -11,7 +11,8 @@ use v5.12; use PublicInbox::Spawn qw(spawn); use Socket qw(AF_UNIX SOCK_SEQPACKET); use PublicInbox::IPC; -use autodie qw(fork pipe socketpair); +use autodie qw(pipe socketpair); +our $tries = 50; sub mkreq { my ($self, $ios, @arg) = @_; @@ -19,13 +20,14 @@ sub mkreq { pipe($r, $ios->[0]) if !defined($ios->[0]); my @fds = map fileno($_), @$ios; my $buf = join("\0", @arg, ''); - $n = $PublicInbox::IPC::send_cmd->($self->{io}, \@fds, $buf, 0) // - die "send_cmd: $!"; + $n = $PublicInbox::IPC::send_cmd->($self->{io}, \@fds, $buf, 0, $tries) + // die "send_cmd: $!"; $n == length($buf) or die "send_cmd: $n != ".length($buf); $r; } -sub start_helper { +sub start_helper (@) { + $PublicInbox::IPC::send_cmd or return; # can't work w/o SCM_RIGHTS my @argv = @_; socketpair(my $sock, my $in, AF_UNIX, SOCK_SEQPACKET, 0); my $cls = 'PublicInbox::XapHelperCxx'; diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm index ed11a2f8..c9957f64 100644 --- a/lib/PublicInbox/XapHelper.pm +++ b/lib/PublicInbox/XapHelper.pm @@ -27,6 +27,8 @@ sub cmd_test_inspect { ($req->{srch}->has_threadid ? 1 : 0) } +sub cmd_test_sleep { select(undef, undef, undef, 0.01) while 1 } + sub iter_retry_check ($) { if (ref($@) =~ /\bDatabaseModifiedError\b/) { $_[0]->{srch}->reopen; @@ -147,17 +149,8 @@ sub cmd_dump_roots { sub mset_iter ($$) { my ($req, $it) = @_; - eval { - my $buf = $it->get_docid; - $buf .= "\0".$it->get_percent if $req->{p}; - my $doc = ($req->{A} || $req->{D}) ? $it->get_document : undef; - for my $p (@{$req->{A}}) { - $buf .= "\0".$p.$_ for xap_terms($p, $doc); - } - $buf .= "\0".$doc->get_data if $req->{D}; - say { $req->{0} } $buf; - }; - $@ ? iter_retry_check($req) : 0; + say { $req->{0} } $it->get_docid, "\0", + $it->get_percent, "\0", $it->get_rank; } sub cmd_mset { # to be used by WWW + IMAP @@ -170,7 +163,8 @@ sub cmd_mset { # to be used by WWW + IMAP $opt->{eidx_key} = $req->{O} if defined $req->{O}; $opt->{threadid} = $req->{T} if defined $req->{T}; my $mset = $req->{srch}->mset($qry_str, $opt); - say { $req->{0} } 'mset.size=', $mset->size; + say { $req->{0} } 'mset.size=', $mset->size, + ' .get_matches_estimated=', $mset->get_matches_estimated; for my $it ($mset->items) { for (my $t = 10; $t > 0; --$t) { $t = mset_iter($req, $it) // $t; @@ -178,6 +172,18 @@ sub cmd_mset { # to be used by WWW + IMAP } } +sub srch_init_extra ($) { + my ($req) = @_; + my $qp = $req->{srch}->{qp}; + for (@{$req->{Q}}) { + my ($upfx, $m, $xpfx) = split /([:=])/; + $xpfx // die "E: bad -Q $_"; + $m = $m eq '=' ? 'add_boolean_prefix' : 'add_prefix'; + $qp->$m($upfx, $xpfx); + } + $req->{srch}->{qp_extra_done} = 1; +} + sub dispatch { my ($req, $cmd, @argv) = @_; my $fn = $req->can("cmd_$cmd") or return; @@ -185,8 +191,9 @@ sub dispatch { or return; my $dirs = delete $req->{d} or die 'no -d args'; my $key = join("\0", @$dirs); + my $new; $req->{srch} = $SRCH{$key} //= do { - my $new = { qp_flags => $PublicInbox::Search::QP_FLAGS }; + $new = { qp_flags => $PublicInbox::Search::QP_FLAGS }; my $first = shift @$dirs; my $slow_phrase = -f "$first/iamchert"; $new->{xdb} = $X->{Database}->new($first); @@ -201,13 +208,20 @@ sub dispatch { $new->{qp} = $new->qparse_new; $new; }; + $req->{srch}->{xdb}->reopen unless $new; + $req->{Q} && !$req->{srch}->{qp_extra_done} and + srch_init_extra $req; + my $timeo = $req->{K}; + alarm($timeo) if $timeo; $fn->($req, @argv); + alarm(0) if $timeo; } sub recv_loop { local $SIG{__WARN__} = sub { print $stderr @_ }; my $rbuf; local $SIG{TERM} = sub { undef $in }; + local $SIG{USR1} = \&reopen_logs; while (defined($in)) { PublicInbox::DS::sig_setmask($workerset); my @fds = eval { # we undef $in in SIG{TERM} @@ -219,7 +233,7 @@ sub recv_loop { } scalar(@fds) or exit(66); # EX_NOINPUT die "recvmsg: $!" if !defined($fds[0]); - PublicInbox::DS::block_signals(); + PublicInbox::DS::block_signals(POSIX::SIGALRM); my $req = bless {}, __PACKAGE__; my $i = 0; open($req->{$i++}, '+<&=', $_) for @fds; @@ -244,7 +258,7 @@ sub reap_worker { # awaitpid CB sub start_worker ($) { my ($nr) = @_; - my $pid = eval { PublicInbox::DS::do_fork } // return(warn($@)); + my $pid = eval { PublicInbox::DS::fork_persist } // return(warn($@)); if ($pid == 0) { undef %WORKERS; $SIG{TTIN} = $SIG{TTOU} = 'IGNORE'; @@ -271,6 +285,18 @@ sub do_sigttou { } } +sub reopen_logs { + my $p = $ENV{STDOUT_PATH}; + defined($p) && open(STDOUT, '>>', $p) and STDOUT->autoflush(1); + $p = $ENV{STDERR_PATH}; + defined($p) && open(STDERR, '>>', $p) and STDERR->autoflush(1); +} + +sub parent_reopen_logs { + reopen_logs(); + kill('USR1', values %WORKERS); +} + sub xh_alive { $in || scalar(keys %WORKERS) } sub start (@) { @@ -284,7 +310,7 @@ sub start (@) { die 'bad args'; local $workerset = POSIX::SigSet->new; $workerset->fillset or die "fillset: $!"; - for (@PublicInbox::DS::UNBLOCKABLE) { + for (@PublicInbox::DS::UNBLOCKABLE, POSIX::SIGUSR1) { $workerset->delset($_) or die "delset($_): $!"; } @@ -303,6 +329,7 @@ sub start (@) { }, TTOU => \&do_sigttou, CHLD => \&PublicInbox::DS::enqueue_reap, + USR1 => \&parent_reopen_logs, }; PublicInbox::DS::block_signals(); start_workers(); diff --git a/lib/PublicInbox/XapHelperCxx.pm b/lib/PublicInbox/XapHelperCxx.pm index eafe61a8..74852ad1 100644 --- a/lib/PublicInbox/XapHelperCxx.pm +++ b/lib/PublicInbox/XapHelperCxx.pm @@ -16,8 +16,15 @@ use autodie; my $cxx = which($ENV{CXX} // 'c++') // which('clang') // die 'no C++ compiler'; my $dir = substr("$cxx-$Config{archname}", 1); # drop leading '/' $dir =~ tr!/!-!; -my $idir = ($ENV{XDG_CACHE_HOME} // - (($ENV{HOME} // die('HOME unset')).'/.cache')).'/public-inbox/jaot'; +my $idir; +if ((defined($ENV{XDG_CACHE_HOME}) && -d $ENV{XDG_CACHE_HOME}) || + (defined($ENV{HOME}) && -d $ENV{HOME})) { + $idir = ($ENV{XDG_CACHE_HOME} // + (($ENV{HOME} // die('HOME unset')).'/.cache') + ).'/public-inbox/jaot'; +} +$idir //= $ENV{PERL_INLINE_DIRECTORY} // + die 'HOME and PERL_INLINE_DIRECTORY unset'; substr($dir, 0, 0) = "$idir/"; my $bin = "$dir/xap_helper"; my ($srcpfx) = (__FILE__ =~ m!\A(.+/)[^/]+\z!); @@ -58,7 +65,11 @@ sub needs_rebuild () { sub build () { if (!-d $dir) { require File::Path; - File::Path::make_path($dir); + eval { File::Path::make_path($dir) }; + if (!-d $dir && defined($ENV{PERL_INLINE_DIRECTORY})) { + $dir = $ENV{PERL_INLINE_DIRECTORY}; + File::Path::make_path($dir); + } } require PublicInbox::CodeSearch; require PublicInbox::Lock; diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 69f0af43..9a148ae4 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -103,7 +103,7 @@ sub commit_changes ($$$$) { sub cb_spawn { my ($cb, $args, $opt) = @_; # $cb = cpdb() or compact() - my $pid = PublicInbox::DS::do_fork; + my $pid = PublicInbox::DS::fork_persist; return $pid if $pid > 0; $SIG{__DIE__} = sub { warn @_; _exit(1) }; # don't jump up stack $cb->($args, $opt); diff --git a/lib/PublicInbox/XhcMset.pm b/lib/PublicInbox/XhcMset.pm new file mode 100644 index 00000000..ac25eece --- /dev/null +++ b/lib/PublicInbox/XhcMset.pm @@ -0,0 +1,51 @@ +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# mocks Xapian::Mset and allows slow queries from blocking the event loop +package PublicInbox::XhcMset; +use v5.12; +use parent qw(PublicInbox::DS); +use PublicInbox::XhcMsetIterator; +use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); + +sub event_step { + my ($self) = @_; + my ($cb, @args) = @{delete $self->{cb_args} // return}; + my $rd = $self->{sock}; + eval { + my $hdr = <$rd> // die "E: reading mset header: $!"; + for (split /\s+/, $hdr) { # read mset.size + estimated_matches + my ($k, $v) = split /=/, $_, 2; + $k =~ s/\A[^\.]*\.//; # s/(mset)?\./ + $self->{$k} = $v; + } + my $size = $self->{size} // die "E: bad xhc header: `$hdr'"; + my @it = map { PublicInbox::XhcMsetIterator::make($_) } <$rd>; + $self->{items} = \@it; + scalar(@it) == $size or die + 'E: got ',scalar(@it),", expected mset.size=$size"; + }; + my $err = $@; + $self->close; + eval { $cb->(@args, $self, $err) }; + warn "E: $@\n" if $@; +} + +sub maybe_new { + my (undef, $rd, $srch, @cb_args) = @_; + my $self = bless { cb_args => \@cb_args, srch => $srch }, __PACKAGE__; + if ($PublicInbox::DS::in_loop) { # async + $self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT); + } else { # synchronous + $self->{sock} = $rd; + event_step($self); + undef; + } +} + +eval(join('', map { "sub $_ { \$_[0]->{$_} }\n" } qw(size + get_matches_estimated))); + +sub items { @{$_[0]->{items}} } + +1; diff --git a/lib/PublicInbox/XhcMsetIterator.pm b/lib/PublicInbox/XhcMsetIterator.pm new file mode 100644 index 00000000..dcfc61e4 --- /dev/null +++ b/lib/PublicInbox/XhcMsetIterator.pm @@ -0,0 +1,20 @@ +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# mocks Xapian::MsetIterator, there's many of these allocated at once +package PublicInbox::XhcMsetIterator; +use v5.12; + +sub make ($) { + chomp($_[0]); + my @self = map { $_ + 0 } split /\0/, $_[0]; # docid, pct, rank + # we don't store $xdb in self[4] since we avoid $it->get_document + # in favor of $xdb->get_document($it->get_docid) + bless \@self, __PACKAGE__; +} + +sub get_docid { $_[0]->[0] } +sub get_percent { $_[0]->[1] } +sub get_rank { $_[0]->[2] } + +1; diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h index 3456910b..a30a8768 100644 --- a/lib/PublicInbox/xap_helper.h +++ b/lib/PublicInbox/xap_helper.h @@ -27,6 +27,7 @@ #include <sys/types.h> #include <sys/uio.h> #include <sys/wait.h> +#include <poll.h> #include <assert.h> #include <err.h> // BSD, glibc, and musl all have this @@ -95,6 +96,8 @@ static pid_t *worker_pids; // nr => pid #define WORKER_MAX USHRT_MAX static unsigned long nworker, nworker_hwm; static int pipefds[2]; +static const char *stdout_path, *stderr_path; // for SIGUSR1 +static sig_atomic_t worker_needs_reopen; // PublicInbox::Search and PublicInbox::CodeSearch generate these: static void mail_nrp_init(void); @@ -111,6 +114,7 @@ enum exc_iter { struct srch { int paths_len; // int for comparisons unsigned qp_flags; + bool qp_extra_done; Xapian::Database *db; Xapian::QueryParser *qp; char paths[]; // $shard_path0\0$shard_path1\0... @@ -123,6 +127,7 @@ typedef bool (*cmd)(struct req *); struct req { // argv and pfxv point into global rbuf char *argv[MY_ARG_MAX]; char *pfxv[MY_ARG_MAX]; // -A <prefix> + char *qpfxv[MY_ARG_MAX]; // -Q <user_prefix>[:=]<INTERNAL_PREFIX> size_t *lenv; // -A <prefix>LENGTH struct srch *srch; char *Pgit_dir; @@ -136,13 +141,12 @@ struct req { // argv and pfxv point into global rbuf long sort_col; // value column, negative means BoolWeight int argc; int pfxc; + int qpfxc; FILE *fp[2]; // [0] response pipe or sock, [1] status/errors (optional) bool has_input; // fp[0] is bidirectional bool collapse_threads; bool code_search; bool relevance; // sort by relevance before column - bool emit_percent; - bool emit_docdata; bool asc; // ascending sort }; @@ -226,6 +230,13 @@ static Xapian::MSet mail_mset(struct req *req, const char *qry_str) qry = Xapian::Query(Xapian::Query::OP_FILTER, qry, Xapian::Query(req->Oeidx_key)); } + // TODO: uid_range + if (req->threadid != ULLONG_MAX) { + std::string tid = Xapian::sortable_serialise(req->threadid); + qry = Xapian::Query(Xapian::Query::OP_FILTER, qry, + Xapian::Query(Xapian::Query::OP_VALUE_RANGE, THREADID, + tid, tid)); + } Xapian::Enquire enq = prep_enquire(req); enq.set_query(qry); // THREADID is a CPP macro defined on CLI (see) XapHelperCxx.pm @@ -406,6 +417,11 @@ static bool cmd_test_inspect(struct req *req) return false; } +static bool cmd_test_sleep(struct req *req) +{ + for (;;) poll(NULL, 0, 10); + return false; +} #include "xh_mset.h" // read-only (WWW, IMAP, lei) stuff #include "xh_cidx.h" // CodeSearchIdx.pm stuff @@ -420,6 +436,7 @@ static const struct cmd_entry { CMD(dump_ibx), // many inboxes CMD(dump_roots), // per-cidx shard CMD(test_inspect), // least common commands last + CMD(test_sleep), // least common commands last }; #define MY_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) @@ -570,6 +587,31 @@ static bool srch_init(struct req *req) return true; } +// setup query parser for altid and arbitrary headers +static void srch_init_extra(struct req *req) +{ + const char *XPFX; + for (int i = 0; i < req->qpfxc; i++) { + size_t len = strlen(req->qpfxv[i]); + char *c = (char *)memchr(req->qpfxv[i], '=', len); + + if (c) { // it's boolean "gmane=XGMANE" + XPFX = c + 1; + *c = 0; + req->srch->qp->add_boolean_prefix(req->qpfxv[i], XPFX); + continue; + } + // maybe it's a non-boolean prefix "blob:XBLOBID" + c = (char *)memchr(req->qpfxv[i], ':', len); + if (!c) + errx(EXIT_FAILURE, "bad -Q %s", req->qpfxv[i]); + XPFX = c + 1; + *c = 0; + req->srch->qp->add_prefix(req->qpfxv[i], XPFX); + } + req->srch->qp_extra_done = true; +} + static void free_srch(void *p) // tdestroy { struct srch *srch = (struct srch *)p; @@ -633,7 +675,6 @@ static void dispatch(struct req *req) if (*end || req->off == ULLONG_MAX) ABORT("-o %s", optarg); break; - case 'p': req->emit_percent = true; break; case 'r': req->relevance = true; break; case 't': req->collapse_threads = true; break; case 'A': @@ -641,7 +682,6 @@ static void dispatch(struct req *req) if (MY_ARG_MAX == req->pfxc) ABORT("too many -A"); break; - case 'D': req->emit_docdata = true; break; case 'K': req->timeout_sec = strtoul(optarg, &end, 10); if (*end || req->timeout_sec == ULONG_MAX) @@ -653,12 +693,17 @@ static void dispatch(struct req *req) if (*end || req->threadid == ULLONG_MAX) ABORT("-T %s", optarg); break; + case 'Q': + req->qpfxv[req->qpfxc++] = optarg; + if (MY_ARG_MAX == req->qpfxc) ABORT("too many -Q"); + break; default: ABORT("bad switch `-%c'", c); } } ERR_CLOSE(kfp, EXIT_FAILURE); // may ENOMEM, sets kbuf.srch kbuf.srch->db = NULL; kbuf.srch->qp = NULL; + kbuf.srch->qp_extra_done = false; kbuf.srch->paths_len = size - offsetof(struct srch, paths); if (kbuf.srch->paths_len <= 0) ABORT("no -d args"); @@ -667,6 +712,7 @@ static void dispatch(struct req *req) req->srch = *s; if (req->srch != kbuf.srch) { // reuse existing free_srch(kbuf.srch); + req->srch->db->reopen(); } else if (!srch_init(req)) { assert(kbuf.srch == *((struct srch **)tfind( kbuf.srch, &srch_tree, srch_cmp))); @@ -675,6 +721,11 @@ static void dispatch(struct req *req) free_srch(kbuf.srch); goto cmd_err; // srch_init already warned } + if (req->qpfxc && !req->srch->qp_extra_done) + srch_init_extra(req); + if (req->timeout_sec) + alarm(req->timeout_sec > UINT_MAX ? + UINT_MAX : (unsigned)req->timeout_sec); try { if (!req->fn(req)) warnx("`%s' failed", req->argv[0]); @@ -683,6 +734,8 @@ static void dispatch(struct req *req) } catch (...) { warn("unhandled exception"); } + if (req->timeout_sec) + alarm(0); cmd_err: return; // just be silent on errors, for now } @@ -723,9 +776,12 @@ static void stderr_restore(FILE *tmp_err) clearerr(stderr); } -static void sigw(int sig) // SIGTERM handler for worker +static void sigw(int sig) // SIGTERM+SIGUSR1 handler for worker { - sock_fd = -1; // break out of recv_loop + switch (sig) { + case SIGUSR1: worker_needs_reopen = 1; break; + default: sock_fd = -1; // break out of recv_loop + } } #define CLEANUP_REQ __attribute__((__cleanup__(req_cleanup))) @@ -735,6 +791,18 @@ static void req_cleanup(void *ptr) free(req->lenv); } +static void reopen_logs(void) +{ + if (stdout_path && *stdout_path && !freopen(stdout_path, "a", stdout)) + err(EXIT_FAILURE, "freopen %s", stdout_path); + if (stderr_path && *stderr_path) { + if (!freopen(stderr_path, "a", stderr)) + err(EXIT_FAILURE, "freopen %s", stderr_path); + if (my_setlinebuf(stderr)) + err(EXIT_FAILURE, "setlinebuf(stderr)"); + } +} + static void recv_loop(void) // worker process loop { static char rbuf[4096 * 33]; // per-process @@ -742,6 +810,7 @@ static void recv_loop(void) // worker process loop sa.sa_handler = sigw; CHECK(int, 0, sigaction(SIGTERM, &sa, NULL)); + CHECK(int, 0, sigaction(SIGUSR1, &sa, NULL)); while (sock_fd == 0) { size_t len = sizeof(rbuf); @@ -758,6 +827,10 @@ static void recv_loop(void) // worker process loop stderr_restore(req.fp[1]); ERR_CLOSE(req.fp[1], 0); } + if (worker_needs_reopen) { + worker_needs_reopen = 0; + reopen_logs(); + } } } @@ -810,6 +883,16 @@ static void cleanup_all(void) #endif } +static void parent_reopen_logs(void) +{ + reopen_logs(); + for (unsigned long nr = nworker; nr < nworker_hwm; nr++) { + pid_t pid = worker_pids[nr]; + if (pid != 0 && kill(pid, SIGUSR1)) + warn("BUG?: kill(%d, SIGUSR1)", (int)pid); + } +} + static void sigp(int sig) // parent signal handler { static const char eagain[] = "signals coming in too fast"; @@ -822,6 +905,7 @@ static void sigp(int sig) // parent signal handler case SIGCHLD: c = '.'; break; case SIGTTOU: c = '-'; break; case SIGTTIN: c = '+'; break; + case SIGUSR1: c = '#'; break; default: write(STDERR_FILENO, bad_sig, sizeof(bad_sig) - 1); _exit(EXIT_FAILURE); @@ -928,6 +1012,8 @@ int main(int argc, char *argv[]) { int c; socklen_t slen = (socklen_t)sizeof(c); + stdout_path = getenv("STDOUT_PATH"); + stderr_path = getenv("STDERR_PATH"); if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &c, &slen)) err(EXIT_FAILURE, "getsockopt"); @@ -945,12 +1031,6 @@ int main(int argc, char *argv[]) } nworker = 1; -#ifdef _SC_NPROCESSORS_ONLN - long j = sysconf(_SC_NPROCESSORS_ONLN); - if (j > 0) - nworker = j > WORKER_MAX ? WORKER_MAX : j; -#endif // _SC_NPROCESSORS_ONLN - // make warn/warnx/err multi-process friendly: if (my_setlinebuf(stderr)) err(EXIT_FAILURE, "setlinebuf(stderr)"); @@ -992,6 +1072,8 @@ int main(int argc, char *argv[]) DELSET(SIGXCPU); DELSET(SIGXFSZ); #undef DELSET + CHECK(int, 0, sigdelset(&workerset, SIGUSR1)); + CHECK(int, 0, sigdelset(&fullset, SIGALRM)); if (nworker == 0) { // no SIGTERM handling w/o workers recv_loop(); @@ -1012,10 +1094,12 @@ int main(int argc, char *argv[]) CHECK(int, 0, sigdelset(&pset, SIGCHLD)); CHECK(int, 0, sigdelset(&pset, SIGTTIN)); CHECK(int, 0, sigdelset(&pset, SIGTTOU)); + CHECK(int, 0, sigdelset(&pset, SIGUSR1)); struct sigaction sa = {}; sa.sa_handler = sigp; + CHECK(int, 0, sigaction(SIGUSR1, &sa, NULL)); CHECK(int, 0, sigaction(SIGTTIN, &sa, NULL)); CHECK(int, 0, sigaction(SIGTTOU, &sa, NULL)); sa.sa_flags = SA_NOCLDSTOP; @@ -1040,6 +1124,7 @@ int main(int argc, char *argv[]) case '.': break; // do_sigchld already called case '-': do_sigttou(); break; case '+': do_sigttin(); break; + case '#': parent_reopen_logs(); break; default: errx(EXIT_FAILURE, "BUG: c=%c", sbuf[i]); } } diff --git a/lib/PublicInbox/xh_mset.h b/lib/PublicInbox/xh_mset.h index 4e97a284..db2692c9 100644 --- a/lib/PublicInbox/xh_mset.h +++ b/lib/PublicInbox/xh_mset.h @@ -3,49 +3,6 @@ // This file is only intended to be included by xap_helper.h // it implements pieces used by WWW, IMAP and lei -static void emit_doc_term(FILE *fp, const char *pfx, Xapian::Document *doc) -{ - Xapian::TermIterator cur = doc->termlist_begin(); - Xapian::TermIterator end = doc->termlist_end(); - size_t pfx_len = strlen(pfx); - - for (cur.skip_to(pfx); cur != end; cur++) { - std::string tn = *cur; - if (!starts_with(&tn, pfx, pfx_len)) break; - fputc(0, fp); - fwrite(tn.data(), tn.size(), 1, fp); - } -} - -static enum exc_iter mset_iter(const struct req *req, FILE *fp, off_t off, - Xapian::MSetIterator *i) -{ - try { - fprintf(fp, "%llu", (unsigned long long)(*(*i))); // get_docid - if (req->emit_percent) - fprintf(fp, "%c%d", 0, i->get_percent()); - if (req->pfxc || req->emit_docdata) { - Xapian::Document doc = i->get_document(); - for (int p = 0; p < req->pfxc; p++) - emit_doc_term(fp, req->pfxv[p], &doc); - if (req->emit_docdata) { - std::string d = doc.get_data(); - fputc(0, fp); - fwrite(d.data(), d.size(), 1, fp); - } - } - fputc('\n', fp); - } catch (const Xapian::DatabaseModifiedError & e) { - req->srch->db->reopen(); - if (fseeko(fp, off, SEEK_SET) < 0) EABORT("fseeko"); - return ITER_RETRY; - } catch (const Xapian::DocNotFoundError & e) { // oh well... - warnx("doc not found: %s", e.get_description().c_str()); - if (fseeko(fp, off, SEEK_SET) < 0) EABORT("fseeko"); - } - return ITER_OK; -} - #ifndef WBUF_FLUSH_THRESHOLD # define WBUF_FLUSH_THRESHOLD (BUFSIZ - 1000) #endif @@ -63,7 +20,9 @@ static bool cmd_mset(struct req *req) Xapian::MSet mset = req->code_search ? commit_mset(req, qry_str) : mail_mset(req, qry_str); fbuf_init(&wbuf); - fprintf(wbuf.fp, "mset.size=%llu\n", (unsigned long long)mset.size()); + fprintf(wbuf.fp, "mset.size=%llu .get_matches_estimated=%llu\n", + (unsigned long long)mset.size(), + (unsigned long long)mset.get_matches_estimated()); int fd = fileno(req->fp[0]); for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); i++) { off_t off = ftello(wbuf.fp); @@ -82,12 +41,10 @@ static bool cmd_mset(struct req *req) if (fseeko(wbuf.fp, 0, SEEK_SET)) EABORT("fseeko"); off = 0; } - for (int t = 10; t > 0; --t) - switch (mset_iter(req, wbuf.fp, off, &i)) { - case ITER_OK: t = 0; break; // leave inner loop - case ITER_RETRY: break; // continue for-loop - case ITER_ABORT: return false; // error - } + fprintf(wbuf.fp, "%llu" "%c" "%d" "%c" "%llu\n", + (unsigned long long)(*i), // get_docid + 0, i.get_percent(), + 0, (unsigned long long)i.get_rank()); } off_t off = ftello(wbuf.fp); if (off < 0) EABORT("ftello"); diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index bee824b1..2e5a5d2c 100755 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -32,7 +32,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s dedupe:s@ gc commit-interval=i watch scan! dry-run|n - all C=s@ help|h)) + multi-pack-index! all C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; diff --git a/script/public-inbox-index b/script/public-inbox-index index 74232ebf..a13e44bf 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -44,6 +44,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune batch_size|batch-size=s since|after=s until|before=s sequential-shard|seq-shard + multi-pack-index! no-update-extindex update-extindex|E=s@ fast-noop|F skip-docdata all C=s@ help|h)) or die $help; diff --git a/script/public-inbox-init b/script/public-inbox-init index 8915cf31..cf6443f7 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -122,7 +122,7 @@ sysopen($lockfh, $lockfile, O_RDWR|O_CREAT|O_EXCL) or do { exit(255); }; require PublicInbox::OnDestroy; -my $auto_unlink = PublicInbox::OnDestroy->new($$, sub { unlink $lockfile }); +my $auto_unlink = PublicInbox::OnDestroy::on_destroy(sub { unlink $lockfile }); my $perm = 0644 & ~umask; my %seen; if (-e $pi_config) { @@ -6,7 +6,7 @@ use PublicInbox::TestCommon; use Cwd qw(getcwd); use List::Util qw(sum); use autodie qw(close mkdir open rename); -require_mods(qw(json Xapian +SCM_RIGHTS)); +require_mods(qw(json Xapian +SCM_RIGHTS DBD::SQLite)); use_ok 'PublicInbox::CodeSearchIdx'; use PublicInbox::Import; my ($tmp, $for_destroy) = tmpdir(); @@ -147,26 +147,28 @@ if ('multi-repo search') { my $test_xhc = sub { my ($xhc) = @_; + my $csrch = PublicInbox::CodeSearch->new("$tmp/ext"); my $impl = $xhc->{impl}; my ($r, @l); - $r = $xhc->mkreq([], qw(mset -D -c -g), $zp_git, @xh_args, 'NUL'); + $r = $xhc->mkreq([], qw(mset -c -g), $zp_git, @xh_args, 'NUL'); chomp(@l = <$r>); - is(shift(@l), 'mset.size=2', "got expected header $impl"); + like shift(@l), qr/\bmset\.size=2\b/, "got expected header $impl"; my %docid2data; my @got = sort map { - my @f = split /\0/; - is scalar(@f), 2, 'got 2 entries'; - $docid2data{$f[0]} = $f[1]; - $f[1]; + my ($docid, $pct, $rank, @extra) = split /\0/; + ok $pct >= 0 && $pct <= 100, 'pct in range'; + ok $rank >= 0 && $rank <= 100000, 'rank ok'; + is scalar(@extra), 0, 'no extra fields'; + $docid2data{$docid} = + $csrch->xdb->get_document($docid)->get_data; } @l; is_deeply(\@got, $exp, "expected doc_data $impl"); $r = $xhc->mkreq([], qw(mset -c -g), "$tmp/wt0/.git", @xh_args, 'NUL'); chomp(@l = <$r>); - is(shift(@l), 'mset.size=0', "got miss in wrong dir $impl"); + like shift(@l), qr/\bmset.size=0\b/, "got miss in wrong dir $impl"; is_deeply(\@l, [], "no extra lines $impl"); - my $csrch = PublicInbox::CodeSearch->new("$tmp/ext"); while (my ($did, $expect) = each %docid2data) { is_deeply($csrch->xdb->get_document($did)->get_data, $expect, "docid=$did data matches"); @@ -179,14 +181,15 @@ SKIP: { require_mods('+SCM_RIGHTS', 1); require PublicInbox::XapClient; my $xhc = PublicInbox::XapClient::start_helper('-j0'); - $test_xhc->($xhc); + my $csrch = PublicInbox::CodeSearch->new("$tmp/ext"); + $test_xhc->($xhc, $csrch); skip 'PI_NO_CXX set', 1 if $ENV{PI_NO_CXX}; $xhc->{impl} =~ /Cxx/ or skip 'C++ compiler or xapian development libs missing', 1; skip 'TEST_XH_CXX_ONLY set', 1 if $ENV{TEST_XH_CXX_ONLY}; local $ENV{PI_NO_CXX} = 1; # force XS or SWIG binding test $xhc = PublicInbox::XapClient::start_helper('-j0'); - $test_xhc->($xhc); + $test_xhc->($xhc, $csrch); } if ('--update') { diff --git a/t/extsearch.t b/t/extsearch.t index 090f6db5..797aa8f5 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -559,6 +559,15 @@ EOM for (@xdb) { ok(!$_->get_metadata('indexlevel'), 'no indexlevel in >0 shard') } + my $mpi = "$d/ALL.git/objects/pack/multi-pack-index"; + SKIP: { + skip 'git too old for for multi-pack-index', 2 if !-f $mpi; + unlink glob("$d/ALL.git/objects/pack/*"); + ok run_script([qw(-extindex --all -L medium -j3 + --no-multi-pack-index), $d]), + 'test --no-multi-pack-index'; + ok !-f $mpi, '--no-multi-pack-index respected'; + } } test_lei(sub { diff --git a/t/imap_searchqp.t b/t/imap_searchqp.t index ff1b4535..d7840dd0 100644 --- a/t/imap_searchqp.t +++ b/t/imap_searchqp.t @@ -3,6 +3,8 @@ # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> use strict; use v5.10.1; +use autodie qw(open seek read); +use Fcntl qw(SEEK_SET); use Time::Local qw(timegm); use PublicInbox::TestCommon; require_mods(qw(-imapd)); @@ -29,12 +31,15 @@ is($q->{xap}, 'f:"b"', 'charset handled'); $q = $parse->(qq{CHARSET WTF-8 From b}); like($q, qr/\ANO \[/, 'bad charset rejected'); -for my $x ('', ' (try #2)') { - open my $fh, '>:scalar', \(my $buf = '') or die; - local *STDERR = $fh; +{ + open my $tmperr, '+>', undef; + open my $olderr, '>&', \*STDERR; + open STDERR, '>&', $tmperr; $q = $parse->(qq{CHARSET}); - last if is($buf, '', "nothing spewed to STDERR on bad query$x"); - diag 'FIXME: above fails mysteriously sometimes, so we try again...'; + open STDERR, '>&', $olderr; + seek $tmperr, 0, SEEK_SET; + read($tmperr, my $buf, -s $tmperr); + is($buf, '', 'nothing spewed to STDERR on bad query'); } like($q, qr/\ABAD /, 'bad charset rejected'); diff --git a/t/lei-sigpipe.t b/t/lei-sigpipe.t index 72bc6c7d..b9fd88a6 100644 --- a/t/lei-sigpipe.t +++ b/t/lei-sigpipe.t @@ -26,9 +26,7 @@ SKIP: { # https://public-inbox.org/meta/20220227080422.gyqowrxomzu6gyin@sourcephile.fr/ my $oldSIGPIPE = $SIG{PIPE}; $SIG{PIPE} = 'DEFAULT'; -my $cleanup = PublicInbox::OnDestroy->new($$, sub { - $SIG{PIPE} = $oldSIGPIPE; -}); +my $cleanup = on_destroy(sub { $SIG{PIPE} = $oldSIGPIPE }); test_lei(sub { my $f = "$ENV{HOME}/big.eml"; diff --git a/t/lei-store-fail.t b/t/lei-store-fail.t index c2f03148..1e83e383 100644 --- a/t/lei-store-fail.t +++ b/t/lei-store-fail.t @@ -39,7 +39,7 @@ EOM lei_ok qw(q m:testmessage@example.com); is($lei_out, "[null]\n", 'delayed commit is unindexed'); - # make immediate ->sto_done_request fail from mboxrd import: + # make immediate ->sto_barrier_request fail from mboxrd import: remove_tree("$ENV{HOME}/.local/share/lei/store"); # subsequent lei commands are undefined behavior, # but we need to make sure the current lei command fails: diff --git a/t/mbox_lock.t b/t/mbox_lock.t index c2fee0d4..1fc828aa 100644 --- a/t/mbox_lock.t +++ b/t/mbox_lock.t @@ -2,6 +2,7 @@ # Copyright (C) 2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> use strict; use v5.10.1; use PublicInbox::TestCommon; +use autodie qw(chdir); use POSIX qw(_exit); use PublicInbox::DS qw(now); use Errno qw(EAGAIN); @@ -18,11 +19,11 @@ ok(!-f "$f.lock", 'no dotlock with none'); undef $mbl; { opendir my $cur, '.' or BAIL_OUT $!; - my $od = PublicInbox::OnDestroy->new(sub { chdir $cur }); - chdir $tmpdir or BAIL_OUT; + my $od = on_destroy \&chdir, $cur; + chdir $tmpdir; my $abs = "$tmpdir/rel.lock"; my $rel = PublicInbox::MboxLock->acq('rel', 1, ['dotlock']); - chdir '/' or BAIL_OUT; + chdir '/'; ok(-f $abs, 'lock with abs path created'); undef $rel; ok(!-f $abs, 'lock gone despite being in the wrong dir'); diff --git a/t/mh_reader.t b/t/mh_reader.t index c81df32e..95a7be4a 100644 --- a/t/mh_reader.t +++ b/t/mh_reader.t @@ -5,7 +5,6 @@ use PublicInbox::TestCommon; require_ok 'PublicInbox::MHreader'; use PublicInbox::IO qw(write_file); use PublicInbox::Lock; -use PublicInbox::OnDestroy; use PublicInbox::Eml; use File::Path qw(remove_tree); use autodie; diff --git a/t/on_destroy.t b/t/on_destroy.t index e7945100..e8fdf35e 100644 --- a/t/on_destroy.t +++ b/t/on_destroy.t @@ -1,37 +1,44 @@ #!perl -w use v5.12; use Test::More; -require_ok 'PublicInbox::OnDestroy'; +use PublicInbox::OnDestroy; +use POSIX qw(_exit); my @x; -my $od = PublicInbox::OnDestroy->new(sub { push @x, 'hi' }); +my $od = on_destroy sub { push @x, 'hi' }; is_deeply(\@x, [], 'not called, yet'); undef $od; is_deeply(\@x, [ 'hi' ], 'no args works'); -$od = PublicInbox::OnDestroy->new(sub { $x[0] = $_[0] }, 'bye'); +$od = on_destroy sub { $x[0] = $_[0] }, 'bye'; is_deeply(\@x, [ 'hi' ], 'nothing changed while alive'); undef $od; is_deeply(\@x, [ 'bye' ], 'arg passed'); -$od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y)); +$od = on_destroy sub { @x = @_ }, qw(x y); undef $od; is_deeply(\@x, [ 'x', 'y' ], '2 args passed'); open my $tmp, '+>>', undef or BAIL_OUT $!; $tmp->autoflush(1); -$od = PublicInbox::OnDestroy->new(1, sub { print $tmp "$$ DESTROY\n" }); -undef $od; +$od = on_destroy sub { print $tmp "$$ DESTROY\n" }; +my $pid = PublicInbox::OnDestroy::fork_tmp; +if ($pid == 0) { undef $od; _exit 0; }; +waitpid($pid, 0); +is $?, 0, 'test process exited'; is(-s $tmp, 0, '$tmp is empty on pid mismatch'); -$od = PublicInbox::OnDestroy->new($$, sub { $tmp = $$ }); +$od->cancel; +undef $od; +is(-s $tmp, 0, '$tmp is empty after ->cancel'); +$od = on_destroy sub { $tmp = $$ }; undef $od; is($tmp, $$, '$tmp set to $$ by callback'); -$od = PublicInbox::OnDestroy->new($$, sub { $tmp = 'foo' }); +$od = on_destroy sub { $tmp = 'foo' }; $od->cancel; $od = undef; isnt($tmp, 'foo', '->cancel'); if (my $nr = $ENV{TEST_LEAK_NR}) { for (0..$nr) { - $od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y)); + $od = on_destroy sub { @x = @_ }, qw(x y); } } diff --git a/t/psgi_v2.t b/t/psgi_v2.t index 54faae9b..2b678fd8 100644 --- a/t/psgi_v2.t +++ b/t/psgi_v2.t @@ -9,6 +9,7 @@ require_git(2.6); use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::MID qw(mids); +use autodie qw(kill rename); require_mods(qw(DBD::SQLite Xapian HTTP::Request::Common Plack::Test URI::Escape Plack::Builder HTTP::Date)); use_ok($_) for (qw(HTTP::Request::Common Plack::Test)); @@ -101,6 +102,19 @@ EOM } }; +my $test_lei_q_threadid = sub { + my ($u) = @_; + test_lei(sub { + lei_ok qw(q -f text --only), $u, qw(-T t@1 s:unrelated); + is $lei_out, '', 'no results on unrelated thread'; + lei_ok qw(q -f text --only), $u, qw(-T t@1 dt:19931002000300..); + my @m = ($lei_out =~ m!^Message-ID: <([^>]+)>\n!gms); + is_deeply \@m, ['t@3'], 'got expected result from -T MSGID'; + }); +}; + +$test_lei_q_threadid->($m2t->{inboxdir}); + my $cfgpath = "$ibx->{inboxdir}/pi_config"; { open my $fh, '>', $cfgpath or BAIL_OUT $!; @@ -374,8 +388,64 @@ my $client3 = sub { $res = $cb->(POST("/m2t/t\@1/?q=s:unrelated&x=m")); is($res->code, 404, '404 on cross-thread search'); + + my $rmt = $ENV{PLACK_TEST_EXTERNALSERVER_URI}; + $rmt and $test_lei_q_threadid->("$rmt/m2t/"); }; test_psgi(sub { $www->call(@_) }, $client3); test_httpd($env, $client3, 4); +if ($^O eq 'linux' && -r "/proc/$$/stat") { + my $args; + my $search_xh_pid = sub { + my ($pid) = @_; + for my $f (glob('/proc/*/stat')) { + open my $fh, '<', $f or next; + my @s = split /\s+/, readline($fh) // next; + next if $s[3] ne $pid; # look for matching PPID + open $fh, '<', "/proc/$s[0]/cmdline" or next; + my $cmdline = readline($fh) // next; + if ($cmdline =~ /\0-MPublicInbox::XapHelper\0-e\0/ || + $cmdline =~ m!/xap_helper\0!) { + return $s[0]; + } + } + undef; + }; + my $usr1_test = sub { + my ($cb) = @_; + my $td = $PublicInbox::TestCommon::CURRENT_DAEMON; + my $pid = $td->{pid}; + my $res = $cb->(GET('/v2test/?q=m:a-mid@b')); + is $res->code, 200, '-httpd is running w/ search'; + + $search_xh_pid->($pid); + my $xh_pid = $search_xh_pid->($pid) or + BAIL_OUT "can't find XH pid with $args"; + my $xh_err = readlink "/proc/$xh_pid/fd/2"; + is $xh_err, "$env->{TMPDIR}/stderr.log", + "initial stderr expected ($args)"; + rename "$env->{TMPDIR}/stderr.log", + "$env->{TMPDIR}/stderr.old"; + $xh_err = readlink "/proc/$xh_pid/fd/2"; + is $xh_err, "$env->{TMPDIR}/stderr.old", + "stderr followed rename ($args)"; + kill 'USR1', $pid; + tick; + $res = $cb->(GET('/v2test/?q=m:a-mid@b')); + is $res->code, 200, '-httpd still running w/ search'; + my $new_xh_pid = $search_xh_pid->($pid) or + BAIL_OUT "can't find new XH pid with $args"; + is $new_xh_pid, $xh_pid, "XH pid unchanged ($args)"; + $xh_err = readlink "/proc/$new_xh_pid/fd/2"; + is $xh_err, "$env->{TMPDIR}/stderr.log", + "stderr updated ($args)"; + }; + for my $x ('-X0', '-X1', '-X0 -W1', '-X1 -W1') { + $args = $x; + local $ENV{TEST_DAEMON_XH} = $args; + test_httpd($env, $usr1_test); + } +} + done_testing; @@ -6,6 +6,7 @@ use Test::More; use PublicInbox::Spawn qw(which spawn popen_rd run_qx); require PublicInbox::Sigfd; require PublicInbox::DS; +use PublicInbox::OnDestroy; my $rlimit_map = PublicInbox::Spawn->can('rlimit_map'); { my $true = which('true'); @@ -171,7 +172,7 @@ EOF my @arg; my $fh = popen_rd(['cat'], undef, { 0 => $r }, sub { @arg = @_; warn "x=$$\n" }, 'hi'); - my $pid = fork // BAIL_OUT $!; + my $pid = PublicInbox::OnDestroy::fork_tmp; local $SIG{__WARN__} = sub { _exit(1) }; if ($pid == 0) { local $SIG{__DIE__} = sub { _exit(2) }; diff --git a/t/www_altid.t b/t/www_altid.t index de1e6ed6..7ad4a1d2 100644 --- a/t/www_altid.t +++ b/t/www_altid.t @@ -6,7 +6,7 @@ use PublicInbox::Config; use PublicInbox::Spawn qw(spawn); require_cmd('sqlite3'); require_mods(qw(DBD::SQLite HTTP::Request::Common Plack::Test URI::Escape - Plack::Builder IO::Uncompress::Gunzip)); + Plack::Builder IO::Uncompress::Gunzip Xapian)); use_ok($_) for qw(Plack::Test HTTP::Request::Common); require_ok 'PublicInbox::Msgmap'; require_ok 'PublicInbox::AltId'; @@ -14,17 +14,13 @@ require_ok 'PublicInbox::WWW'; my ($tmpdir, $for_destroy) = tmpdir(); my $aid = 'xyz'; my $cfgpath; -my $ibx = create_inbox 'test', indexlevel => 'basic', sub { +my $spec = "serial:$aid:file=blah.sqlite3"; +my $ibx = create_inbox 'test-altid', indexlevel => 'medium', + altid => [ $spec ], sub { my ($im, $ibx) = @_; - $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; -From: a@example.com -Message-Id: <a@example.com> - -EOF - # $im->done; - my $spec = "serial:$aid:file=blah.sqlite3"; my $altid = PublicInbox::AltId->new($ibx, $spec, 1); $altid->mm_alt->mid_set(1, 'a@example.com'); + undef $altid; $cfgpath = "$ibx->{inboxdir}/cfg"; open my $fh, '>', $cfgpath or BAIL_OUT "open $cfgpath: $!"; print $fh <<EOF or BAIL_OUT $!; @@ -35,6 +31,11 @@ EOF url = http://example.com/test EOF close $fh or BAIL_OUT $!; + $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; +From: a@example.com +Message-Id: <a@example.com> + +EOF }; $cfgpath //= "$ibx->{inboxdir}/cfg"; my $cfg = PublicInbox::Config->new($cfgpath); @@ -56,6 +57,11 @@ my $client = sub { is($mm_cmp->mid_for(1), 'a@example.com', 'sqlite3 dump valid'); $mm_cmp = undef; unlink $cmpfile or die; + + $res = $cb->(GET('/test/?q=xyz:1')); + is $res->code, 200, 'altid search hit'; + $res = $cb->(GET('/test/?q=xyz:10')); + is $res->code, 404, 'altid search miss'; }; test_psgi(sub { $www->call(@_) }, $client); SKIP: { diff --git a/t/xap_helper.t b/t/xap_helper.t index 0f474608..78be8539 100644 --- a/t/xap_helper.t +++ b/t/xap_helper.t @@ -9,6 +9,7 @@ use Socket qw(AF_UNIX SOCK_SEQPACKET SOCK_STREAM); require PublicInbox::AutoReap; use PublicInbox::IPC; require PublicInbox::XapClient; +use PublicInbox::DS qw(now); use autodie; my ($tmp, $for_destroy) = tmpdir(); @@ -204,44 +205,35 @@ for my $n (@NO_CXX) { $err = do { local $/; <$err_r> }; is $err, "mset.size=6 nr_out=5\n", "got expected status ($xhc->{impl})"; - $r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args, + $r = $xhc->mkreq([], qw(mset), @ibx_shard_args, 'dfn:lib/PublicInbox/Search.pm'); chomp((my $hdr, @res) = readline($r)); - is $hdr, 'mset.size=1', "got expected header via mset ($xhc->{impl}"; + like $hdr, qr/\bmset\.size=1\b/, + "got expected header via mset ($xhc->{impl}"; is scalar(@res), 1, 'got one result'; @res = split /\0/, $res[0]; { my $doc = $v2->search->xdb->get_document($res[0]); + ok $doc, 'valid document retrieved'; my @q = PublicInbox::Search::xap_terms('Q', $doc); is_deeply \@q, [ $mid ], 'docid usable'; } ok $res[1] > 0 && $res[1] <= 100, 'pct > 0 && <= 100'; - is $res[2], 'XDFID'.$dfid, 'XDFID result matches'; - is $res[3], 'Q'.$mid, 'Q (msgid) mset result matches'; - is scalar(@res), 4, 'only 4 columns in result'; + is scalar(@res), 3, 'only 3 columns in result'; - $r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args, + $r = $xhc->mkreq([], qw(mset), @ibx_shard_args, 'dt:19700101'.'000000..'); chomp(($hdr, @res) = readline($r)); - is $hdr, 'mset.size=6', + like $hdr, qr/\bmset\.size=6\b/, "got expected header via multi-result mset ($xhc->{impl}"; is(scalar(@res), 6, 'got 6 rows'); for my $r (@res) { - my ($docid, $pct, @rest) = split /\0/, $r; + my ($docid, $pct, $rank, @rest) = split /\0/, $r; my $doc = $v2->search->xdb->get_document($docid); ok $pct > 0 && $pct <= 100, "pct > 0 && <= 100 #$docid ($xhc->{impl})"; - my %terms; - for (@rest) { - s/\A([A-Z]+)// or xbail 'no prefix=', \@rest; - push @{$terms{$1}}, $_; - } - while (my ($pfx, $vals) = each %terms) { - @$vals = sort @$vals; - my @q = PublicInbox::Search::xap_terms($pfx, $doc); - is_deeply $vals, \@q, - "#$docid $pfx as expected ($xhc->{impl})"; - } + like $rank, qr/\A\d+\z/, 'rank is a digit'; + is scalar(@rest), 0, 'no extra rows returned'; } my $nr; for my $i (7, 8, 39, 40) { @@ -276,6 +268,20 @@ for my $n (@NO_CXX) { my @oids = (join('', @res) =~ /^([a-f0-9]{7}) /gms); is $nr_out, scalar(@oids), "output count matches $xhc->{impl}" or diag explain(\@res, \@err); + + if ($ENV{TEST_XH_TIMEOUT}) { + diag 'testing timeouts...'; + for my $j (qw(0 1)) { + my $t0 = now; + $r = $xhc->mkreq(undef, qw(test_sleep -K 1 -d), + $ibx_idx[0]); + is readline($r), undef, 'got EOF'; + my $diff = now - $t0; + ok $diff < 3, "timeout didn't take too long -j$j"; + ok $diff >= 0.9, "timeout didn't fire prematurely -j$j"; + $xhc = PublicInbox::XapClient::start_helper('-j1'); + } + } } done_testing; diff --git a/xt/net_writer-imap.t b/xt/net_writer-imap.t index f7796e8e..176502ba 100644 --- a/xt/net_writer-imap.t +++ b/xt/net_writer-imap.t @@ -82,7 +82,7 @@ my $mics = do { $nwr->imap_common_init; }; my $mic = (values %$mics)[0]; -my $cleanup = PublicInbox::OnDestroy->new($$, sub { +my $cleanup = on_destroy sub { if (defined($folder)) { my $mic = $nwr->mic_get($uri); $mic->delete($folder) or @@ -92,7 +92,7 @@ my $cleanup = PublicInbox::OnDestroy->new($$, sub { local $ENV{HOME} = $tmpdir; system(qw(git credential-cache exit)); } -}); +}; my $imap_append = $nwr->can('imap_append'); my $smsg = bless { kw => [ 'seen' ] }, 'PublicInbox::Smsg'; $imap_append->($mic, $folder, undef, $smsg, eml_load('t/plack-qp.eml')); |