diff options
Diffstat (limited to 'lib/PublicInbox')
27 files changed, 333 insertions, 136 deletions
diff --git a/lib/PublicInbox/CmdIPC4.pm b/lib/PublicInbox/CmdIPC4.pm index 2f102ec6..fc77bd03 100644 --- a/lib/PublicInbox/CmdIPC4.pm +++ b/lib/PublicInbox/CmdIPC4.pm @@ -11,8 +11,8 @@ use Socket qw(SOL_SOCKET SCM_RIGHTS); sub sendmsg_retry ($) { return 1 if $!{EINTR}; return unless ($!{ENOMEM} || $!{ENOBUFS} || $!{ETOOMANYREFS}); - return if ++$_[0] >= 50; - warn "# sleeping on sendmsg: $! (#$_[0])\n"; + return if --$_[0] < 0; + warn "# sleeping on sendmsg: $! ($_[0] tries left)\n"; select(undef, undef, undef, 0.1); 1; } @@ -22,15 +22,15 @@ require Socket::MsgHdr; # XS no warnings 'once'; # any number of FDs per-sendmsg(2) + buffer -*send_cmd4 = sub ($$$$) { # (sock, fds, buf, flags) = @_; - my ($sock, $fds, undef, $flags) = @_; +*send_cmd4 = sub ($$$$;$) { # (sock, fds, buf, flags) = @_; + my ($sock, $fds, undef, $flags, $tries) = @_; + $tries //= 50; my $mh = Socket::MsgHdr->new(buf => $_[2]); $mh->cmsghdr(SOL_SOCKET, SCM_RIGHTS, pack('i' x scalar(@$fds), @$fds)); my $s; - my $try = 0; do { $s = Socket::MsgHdr::sendmsg($sock, $mh, $flags); - } while (!defined($s) && sendmsg_retry($try)); + } while (!defined($s) && sendmsg_retry($tries)); $s; }; diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index d6300610..49659a2e 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -13,6 +13,7 @@ use v5.10.1; use parent qw(Exporter); our @EXPORT_OK = qw(glob2re rel2abs_collapsed); use PublicInbox::Inbox; +use PublicInbox::Git qw(git_exe); use PublicInbox::Spawn qw(popen_rd run_qx); our $LD_PRELOAD = $ENV{LD_PRELOAD}; # only valid at startup our $DEDUPE; # set to {} to dedupe or clear cache @@ -188,7 +189,7 @@ sub git_config_dump { unshift(@opt_c, '-c', "include.path=$file") if defined($file); tmp_cmd_opt(\%env, $opt); } - my @cmd = ('git', @opt_c, qw(config -z -l --includes)); + my @cmd = (git_exe, @opt_c, qw(config -z -l --includes)); push(@cmd, '-f', $file) if !@opt_c && defined($file); my $fh = popen_rd(\@cmd, \%env, $opt); my $rv = config_fh_parse($fh, "\0", "\n"); @@ -608,7 +609,7 @@ sub config_cmd { my ($self, $env, $opt) = @_; my $f = $self->{-f} // default_file(); my @opt_c = @{$self->{-opt_c} // []}; - my @cmd = ('git', @opt_c, 'config'); + my @cmd = (git_exe, @opt_c, 'config'); @opt_c ? tmp_cmd_opt($env, $opt) : push(@cmd, '-f', $f); \@cmd; } diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index ec76d6b8..28458b19 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -22,9 +22,11 @@ use PublicInbox::GitAsyncCat; use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::OnDestroy; +use PublicInbox::Search; +use PublicInbox::XapClient; our $SO_ACCEPTFILTER = 0x1000; my @CMD; -my ($set_user, $oldset); +my ($set_user, $oldset, $xh_workers); my (@cfg_listen, $stdout, $stderr, $group, $user, $pid_file, $daemonize); my ($nworker, @listeners, %WORKERS, %logs); my %tls_opt; # scheme://sockname => args for IO::Socket::SSL::SSL_Context->new @@ -170,6 +172,7 @@ options: --cert=FILE default SSL/TLS certificate --key=FILE default SSL/TLS certificate key -W WORKERS number of worker processes to spawn (default: 1) + -X XWORKERS number of Xapian helper processes (default: undefined) See public-inbox-daemon(8) and $prog(1) man pages for more. EOF @@ -185,6 +188,7 @@ EOF 'multi-accept=i' => \$PublicInbox::Listener::MULTI_ACCEPT, 'cert=s' => \$default_cert, 'key=s' => \$default_key, + 'X|xapian-helpers=i' => \$xh_workers, 'help|h' => \(my $show_help), ); GetOptions(%opt) or die $help; @@ -384,10 +388,30 @@ sub worker_quit { # $_[0] = signal name or number (unused) @PublicInbox::DS::post_loop_do = (\&has_busy_clients, { -w => 0 }) } +sub spawn_xh () { + $xh_workers // return; + require PublicInbox::XhcMset; + local $) = $gid if defined $gid; + local $( = $gid if defined $gid; + local $> = $uid if defined $uid; + local $< = $uid if defined $uid; + $PublicInbox::Search::XHC = eval { + local $ENV{STDERR_PATH} = $stderr; + local $ENV{STDOUT_PATH} = $stdout; + PublicInbox::XapClient::start_helper('-j', $xh_workers) + }; + warn "E: $@" if $@; + awaitpid($PublicInbox::Search::XHC->{io}->attached_pid, \&respawn_xh) + if $PublicInbox::Search::XHC; +} + sub reopen_logs { + my ($sig) = @_; $logs{$stdout} //= \*STDOUT if defined $stdout; $logs{$stderr} //= \*STDERR if defined $stderr; while (my ($p, $fh) = each %logs) { open_log_path($fh, $p) } + ($sig && defined($xh_workers) && $PublicInbox::Search::XHC) and + kill('USR1', $PublicInbox::Search::XHC->{io}->attached_pid); } sub sockname ($) { @@ -544,6 +568,7 @@ sub start_worker ($) { my $pid = PublicInbox::DS::fork_persist; if ($pid == 0) { undef %WORKERS; + undef $xh_workers; local $PublicInbox::DS::Poller; # allow epoll/kqueue $set_user->() if $set_user; PublicInbox::EOFpipe->new($parent_pipe, \&worker_quit); @@ -571,8 +596,9 @@ sub master_loop { pipe($parent_pipe, my $p1) or die "failed to create parent-pipe: $!"; my $set_workers = $nworker; # for SIGWINCH reopen_logs(); + spawn_xh; my $msig = { - USR1 => sub { reopen_logs(); kill_workers($_[0]); }, + USR1 => sub { reopen_logs($_[0]); kill_workers($_[0]); }, USR2 => \&upgrade, QUIT => \&master_quit, INT => \&master_quit, @@ -671,6 +697,7 @@ sub daemon_loop () { sub worker_loop { $uid = $gid = undef; reopen_logs(); + spawn_xh; # only for -W0 @listeners = map {; my $l = sockname($_); my $tls_cb = $POST_ACCEPT{$l}; @@ -687,6 +714,13 @@ sub worker_loop { PublicInbox::DS::event_loop(\%WORKER_SIG, $oldset); } +sub respawn_xh { # awaitpid cb + my ($pid) = @_; + return unless @listeners; + warn "W: xap_helper PID:$pid died: \$?=$?, respawning...\n"; + spawn_xh; +} + sub run { my ($default_listen) = @_; $nworker = 1; @@ -699,7 +733,8 @@ sub run { local $PublicInbox::Git::async_warn = 1; local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); local %WORKER_SIG = %WORKER_SIG; - local %POST_ACCEPT; + local $PublicInbox::XapClient::tries = 0; + local $PublicInbox::Search::XHC if defined($xh_workers); daemon_loop(); # $unlink_on_leave runs diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 763a124c..883dbea3 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -1287,11 +1287,11 @@ sub idx_init { # similar to V2Writable ($has_new || $prune_nr || $new ne '') and $self->{mg}->write_alternates($mode, $alt, $new); my $restore = $self->with_umask; - if ($git_midx) { - my @cmd = ('multi-pack-index'); - push @cmd, '--no-progress' if ($opt->{quiet}//0) > 1; + if ($git_midx && ($opt->{'multi-pack-index'} // 1)) { + my $cmd = $self->git->cmd('multi-pack-index'); + push @$cmd, '--no-progress' if ($opt->{quiet}//0) > 1; my $lk = $self->lock_for_scope; - system('git', "--git-dir=$ALL", @cmd, 'write'); + system(@$cmd, 'write'); # ignore errors, fairly new command, may not exist } $self->parallel_init($self->{indexlevel}); diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm index b0f1437c..814d6e8e 100644 --- a/lib/PublicInbox/Fetch.pm +++ b/lib/PublicInbox/Fetch.pm @@ -12,6 +12,7 @@ use PublicInbox::LeiCurl; use PublicInbox::LeiMirror; use PublicInbox::SHA qw(sha_all); use File::Temp (); +use PublicInbox::Git qw(git_exe); sub new { bless {}, __PACKAGE__ } @@ -19,7 +20,7 @@ sub remote_url ($$) { my ($lei, $dir) = @_; my $rn = $lei->{opt}->{'try-remote'} // [ 'origin', '_grokmirror' ]; for my $r (@$rn) { - my $cmd = [ qw(git config), "remote.$r.url" ]; + my $cmd = [ git_exe, 'config', "remote.$r.url" ]; my $url = run_qx($cmd, undef, { -C => $dir, 2 => $lei->{2} }); next if $?; $url =~ s!/*\n!!s; @@ -92,7 +93,7 @@ sub do_manifest ($$$) { sub get_fingerprint2 { my ($git_dir) = @_; - my $rd = popen_rd([qw(git show-ref)], undef, { -C => $git_dir }); + my $rd = popen_rd([git_exe, 'show-ref'], undef, { -C => $git_dir }); sha_all(256, $rd)->digest; # ignore show-ref errors } @@ -132,8 +133,8 @@ sub do_fetch { # main entry point warn "W: $edir missing remote.*.url\n"; my $o = { -C => $edir }; $o->{1} = $o->{2} = $lei->{2}; - run_wait([qw(git config -l)], undef, $o) and - $lei->child_error($?); + run_wait([git_exe, qw(config -l)], undef, $o) + and $lei->child_error($?); } } @epochs = grep { !$skip->{$_} } @epochs if $skip; @@ -188,7 +189,7 @@ EOM my $opt = {}; # for spawn if (-d $d) { $fp2->[0] = get_fingerprint2($d) if $fp2; - $cmd = [ @$torsocks, 'git', "--git-dir=$d", + $cmd = [ @$torsocks, git_exe, "--git-dir=$d", PublicInbox::LeiMirror::fetch_args($lei, $opt)]; } else { my $e_uri = $ibx_uri->clone; diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index aea389e8..a9a821ad 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -10,6 +10,7 @@ package PublicInbox::Git; use strict; use v5.10.1; use parent qw(Exporter PublicInbox::DS); +use PublicInbox::DS qw(now); use autodie qw(socketpair read); use POSIX (); use Socket qw(AF_UNIX SOCK_STREAM); @@ -25,7 +26,7 @@ use PublicInbox::SHA qw(sha_all); our %HEXLEN2SHA = (40 => 1, 64 => 256); our %OFMT2HEXLEN = (sha1 => 40, sha256 => 64); our @EXPORT_OK = qw(git_unquote git_quote %HEXLEN2SHA %OFMT2HEXLEN - $ck_unlinked_packs); + $ck_unlinked_packs git_exe); our $in_cleanup; our $async_warn; # true in read-only daemons @@ -54,7 +55,11 @@ my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC; my $EXE_ST = ''; # pack('dd', st_dev, st_ino); # no `q' in some 32-bit builds my ($GIT_EXE, $GIT_VER); -sub check_git_exe () { +sub git_exe () { + my $now = now; + state $next_check = $now - 10; + return $GIT_EXE if $now < $next_check; + $next_check = $now + 10; $GIT_EXE = which('git') // die "git not found in $ENV{PATH}"; my @st = stat(_) or die "stat($GIT_EXE): $!"; # can't do HiRes w/ _ my $st = pack('dd', $st[0], $st[1]); @@ -69,8 +74,8 @@ sub check_git_exe () { $GIT_EXE; } -sub git_version { - check_git_exe(); +sub git_version () { + git_exe; $GIT_VER; } @@ -174,7 +179,7 @@ sub _sock_cmd { # git 2.31.0+ supports -c core.abbrev=no, don't bother with # core.abbrev=64 since not many releases had SHA-256 prior to 2.31 - my $abbr = $GIT_VER lt v2.31.0 ? 40 : 'no'; + my $abbr = git_version lt v2.31.0 ? 40 : 'no'; my @cmd = ($GIT_EXE, "--git-dir=$gd", '-c', "core.abbrev=$abbr", 'cat-file', "--$batch"); if ($err_c) { @@ -287,8 +292,7 @@ sub cat_async_wait ($) { sub batch_prepare ($) { my ($self) = @_; - check_git_exe(); - if ($GIT_VER ge BATCH_CMD_VER) { + if (git_version ge BATCH_CMD_VER) { $self->{-bc} = 1; _sock_cmd($self, 'batch-command', 1); } else { @@ -344,8 +348,7 @@ sub ck { sub check_async_begin ($) { my ($self) = @_; cleanup($self) if alternates_changed($self); - check_git_exe(); - if ($GIT_VER ge BATCH_CMD_VER) { + if (git_version ge BATCH_CMD_VER) { $self->{-bc} = 1; _sock_cmd($self, 'batch-command', 1); } else { @@ -421,15 +424,15 @@ sub async_err ($$$$$) { sub cmd { my $self = shift; - [ $GIT_EXE // check_git_exe(), "--git-dir=$self->{git_dir}", @_ ] + [ git_exe(), "--git-dir=$self->{git_dir}", @_ ] } # $git->popen(qw(show f00)); # or # $git->popen(qw(show f00), { GIT_CONFIG => ... }, { 2 => ... }); sub popen { my ($self, $cmd) = splice(@_, 0, 2); - $cmd = [ 'git', "--git-dir=$self->{git_dir}", - ref($cmd) ? @$cmd : ($cmd, grep { defined && !ref } @_) ]; + $cmd = $self->cmd(ref($cmd) ? @$cmd : + ($cmd, grep { defined && !ref } @_)); popen_rd($cmd, grep { !defined || ref } @_); # env and opt } @@ -577,9 +580,8 @@ sub cloneurl { # templates/this--description in git.git sub manifest_entry { my ($self, $epoch, $default_desc) = @_; - check_git_exe(); my $gd = $self->{git_dir}; - my @git = ($GIT_EXE, "--git-dir=$gd"); + my @git = (git_exe, "--git-dir=$gd"); my $sr = popen_rd([@git, 'show-ref']); my $own = popen_rd([@git, qw(config gitweb.owner)]); my $mod = popen_rd([@git, @MODIFIED_DATE]); diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index ed34d548..fefc282a 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -73,8 +73,8 @@ sub gfi_start { die "fatal: ls-tree -r -z --name-only $ref: \$?=$?" if $?; $self->{-tree} = { map { $_ => 1 } split(/\0/, $t) }; } - my $gfi = [ 'git', "--git-dir=$git->{git_dir}", qw(fast-import - --quiet --done --date-format=raw) ]; + my $gfi = $git->cmd(qw(fast-import + --quiet --done --date-format=raw)); my $pid = spawn($gfi, undef, { 0 => $s2, 1 => $s2 }); $self->{nchg} = 0; $self->{io} = PublicInbox::IO::attach_pid($io, $pid); @@ -161,7 +161,7 @@ sub _update_git_info ($$) { # for compatibility with existing ssoma installations # we can probably remove this entirely by 2020 my $git_dir = $self->{git}->{git_dir}; - my @cmd = ('git', "--git-dir=$git_dir"); + my @cmd = @{$self->{git}->cmd}; my $index = "$git_dir/ssoma.index"; if (-e $index && !$ENV{FAST}) { my $env = { GIT_INDEX_FILE => $index }; @@ -631,7 +631,7 @@ sub replace_oids { chomp(my $cmt = $self->get_mark(":$mark")) if $nreplace; $self->{nchg} = 0; # prevent _update_git_info until update-ref: $self->done; - my @git = ('git', "--git-dir=$git->{git_dir}"); + my @git = @{$git->cmd}; run_die([@git, qw(update-ref), $old, $tmp]) if $nreplace; diff --git a/lib/PublicInbox/LeiBlob.pm b/lib/PublicInbox/LeiBlob.pm index 00697097..7b2ea434 100644 --- a/lib/PublicInbox/LeiBlob.pm +++ b/lib/PublicInbox/LeiBlob.pm @@ -36,14 +36,13 @@ sub solver_user_cb { # called by solver when done ref($res) eq 'ARRAY' or return $lei->child_error(0, $$log_buf); $lei->qerr($$log_buf); my ($git, $oid, $type, $size, $di) = @$res; - my $gd = $git->{git_dir}; # don't try to support all the git-show(1) options for non-blob, # this is just a convenience: - $type ne 'blob' and - warn "# $oid is a $type of $size bytes in:\n#\t$gd\n"; - - my $cmd = [ 'git', "--git-dir=$gd", 'show', $oid ]; + $type ne 'blob' and warn <<EOM; +# $oid is a $type of $size bytes in:\n#\t$git->{git_dir} +EOM + my $cmd = $git->cmd('show', $oid); my $rdr = { 1 => $lei->{1}, 2 => $lei->{2} }; run_wait($cmd, $lei->{env}, $rdr) and $lei->child_error($?); } diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 08e61e4b..e7c265bd 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -24,6 +24,7 @@ use POSIX qw(strftime); use PublicInbox::Admin qw(fmt_localtime); use autodie qw(chdir chmod close open pipe readlink seek symlink sysopen sysseek truncate unlink); +use PublicInbox::Git qw(git_exe); our $LIVE; # pid => callback our $FGRP_TODO; # objstore -> [[ to resume ], [ to clone ]] @@ -105,7 +106,7 @@ E: confused by scraping <$uri>, got ambiguous results: sub clone_cmd { my ($lei, $opt) = @_; - my @cmd = qw(git); + my @cmd = (git_exe); $opt->{$_} = $lei->{$_} for (0..2); # we support "-c $key=$val" for arbitrary git config options # e.g.: git -c http.proxy=socks5h://127.0.0.1:9050 @@ -291,7 +292,7 @@ sub upr { # feed `git update-ref --stdin -z' verbosely sub start_update_ref { my ($fgrp) = @_; pipe(my $r, my $w); - my $cmd = [ 'git', "--git-dir=$fgrp->{cur_dst}", + my $cmd = [ git_exe, "--git-dir=$fgrp->{cur_dst}", qw(update-ref --stdin -z) ]; my $pack = on_destroy \&satellite_done, $fgrp; start_cmd($fgrp, $cmd, { 0 => $r, 2 => $fgrp->{lei}->{2} }, $pack); @@ -353,7 +354,7 @@ sub satellite_done { sub pack_refs { my ($self, $git_dir) = @_; - my $cmd = [ 'git', "--git-dir=$git_dir", qw(pack-refs --all --prune) ]; + my $cmd = [git_exe, "--git-dir=$git_dir", qw(pack-refs --all --prune)]; start_cmd($self, $cmd, { 2 => $self->{lei}->{2} }); } @@ -374,14 +375,15 @@ sub fgrpv_done { my $rn = $fgrp->{-remote}; my %opt = ( 2 => $fgrp->{lei}->{2} ); my $update_ref = on_destroy \&fgrp_update, $fgrp; - my $src = [ 'git', "--git-dir=$fgrp->{-osdir}", 'for-each-ref', + my $src = [ git_exe, "--git-dir=$fgrp->{-osdir}", + 'for-each-ref', "--format=refs/%(refname:lstrip=3)%00%(objectname)", "refs/remotes/$rn/" ]; open(my $sfh, '+>', undef); $fgrp->{srcfh} = $sfh; start_cmd($fgrp, $src, { %opt, 1 => $sfh }, $update_ref); - my $dst = [ 'git', "--git-dir=$fgrp->{cur_dst}", 'for-each-ref', - '--format=%(refname)%00%(objectname)' ]; + my $dst = [ git_exe, "--git-dir=$fgrp->{cur_dst}", + 'for-each-ref', '--format=%(refname)%00%(objectname)' ]; open(my $dfh, '+>', undef); $fgrp->{dstfh} = $dfh; start_cmd($fgrp, $dst, { %opt, 1 => $dfh }, $update_ref); @@ -399,7 +401,7 @@ sub fgrp_fetch_all { # system argv limits: my $grp = 'fgrptmp'; - my @git = (@{$self->{-torsocks}}, 'git'); + my @git = (@{$self->{-torsocks}}, git_exe); my $j = $self->{lei}->{opt}->{jobs}; my $opt = {}; my @fetch = do { @@ -413,7 +415,7 @@ sub fgrp_fetch_all { my ($old, $new) = @$fgrp_old_new; @$old = sort { $b->{-sort} <=> $a->{-sort} } @$old; # $new is ordered by {references} - my $cmd = ['git', "--git-dir=$osdir", qw(config -f), $f ]; + my $cmd = [ git_exe, "--git-dir=$osdir", qw(config -f), $f ]; # clobber settings from previous run atomically for ("remotes.$grp", 'fetch.hideRefs') { @@ -541,7 +543,7 @@ sub cmp_fp_do { return if $cur_ent->{fingerprint} eq $new; } my $dst = $self->{cur_dst} // $self->{dst}; - my $cmd = ['git', "--git-dir=$dst", 'show-ref']; + my $cmd = [git_exe, "--git-dir=$dst", 'show-ref']; my $opt = { 2 => $self->{lei}->{2} }; open($opt->{1}, '+>', undef); $self->{-show_ref} = $opt->{1}; @@ -555,7 +557,7 @@ sub resume_fetch { my ($self, $uri, $fini) = @_; return if !keep_going($self); my $dst = $self->{cur_dst} // $self->{dst}; - my @git = ('git', "--git-dir=$dst"); + my @git = (git_exe, "--git-dir=$dst"); my $opt = { 2 => $self->{lei}->{2} }; my $rn = 'random'.int(rand(1 << 30)); for ("url=$uri", "fetch=+refs/*:refs/*", 'mirror=true') { @@ -755,7 +757,7 @@ sub update_ent { my $cur = $self->{-local_manifest}->{$key}->{fingerprint} // "\0"; my $dst = $self->{cur_dst} // $self->{dst}; if (defined($new) && $new ne $cur) { - my $cmd = ['git', "--git-dir=$dst", 'show-ref']; + my $cmd = [git_exe, "--git-dir=$dst", 'show-ref']; my $opt = { 2 => $self->{lei}->{2} }; open($opt->{1}, '+>', undef); $self->{-show_ref_up} = $opt->{1}; @@ -766,7 +768,7 @@ sub update_ent { $cur = $self->{-local_manifest}->{$key}->{head} // "\0"; if (defined($new) && $new ne $cur) { # n.b. grokmirror writes raw contents to $dst/HEAD w/o locking - my $cmd = [ 'git', "--git-dir=$dst" ]; + my $cmd = [ git_exe, "--git-dir=$dst" ]; if ($new =~ s/\Aref: //) { push @$cmd, qw(symbolic-ref HEAD), $new; } elsif ($new =~ /\A[a-f0-9]{40,}\z/) { @@ -811,7 +813,8 @@ sub update_ent { $cur = $self->{-local_manifest}->{$key}->{owner} // "\0"; return if $cur eq $new; utf8::encode($new); # to octets - my $cmd = [ qw(git config -f), "$dst/config", 'gitweb.owner', $new ]; + my $cmd = [ git_exe, qw(config -f), "$dst/config", + 'gitweb.owner', $new ]; start_cmd($self, $cmd, { 2 => $self->{lei}->{2} }); } diff --git a/lib/PublicInbox/LeiRediff.pm b/lib/PublicInbox/LeiRediff.pm index 35728330..66359dd4 100644 --- a/lib/PublicInbox/LeiRediff.pm +++ b/lib/PublicInbox/LeiRediff.pm @@ -119,17 +119,16 @@ EOM map { $_->git_path('objects')."\n" } @{$self->{gits}}; $rw = PublicInbox::Git->new($d); } - my $w = popen_wr(['git', "--git-dir=$rw->{git_dir}", - qw(fast-import --quiet --done --date-format=raw)], + my $w = popen_wr($rw->cmd(qw(fast-import + --quiet --done --date-format=raw)), $lei->{env}, { 2 => $lei->{2} }); print $w $ta, "\n", $tb, "\ndone\n" or die "print fast-import: $!"; $w->close or die "close w fast-import: \$?=$? \$!=$!"; - my $cmd = [ 'diff' ]; + my $cmd = $rw->cmd('diff'); _lei_diff_prepare($lei, $cmd); - $lei->qerr("# git @$cmd"); + $lei->qerr("# git @$cmd[2..$#$cmd]"); push @$cmd, qw(A B); - unshift @$cmd, 'git', "--git-dir=$rw->{git_dir}"; run_wait($cmd, $lei->{env}, { 2 => $lei->{2}, 1 => $lei->{1} }) and $lei->child_error($?); # for git diff --exit-code undef; diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 82fba5c6..17893a09 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -4,7 +4,7 @@ # Streaming interface for mboxrd HTTP responses # See PublicInbox::GzipFilter for details. package PublicInbox::Mbox; -use strict; +use v5.12; use parent 'PublicInbox::GzipFilter'; use PublicInbox::MID qw/mid_escape/; use PublicInbox::Hval qw/to_filename/; diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm index 864d701e..90e69c09 100644 --- a/lib/PublicInbox/MboxGz.pm +++ b/lib/PublicInbox/MboxGz.pm @@ -1,7 +1,7 @@ # Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> package PublicInbox::MboxGz; -use strict; +use v5.12; use parent 'PublicInbox::GzipFilter'; use PublicInbox::Eml; use PublicInbox::Hval qw/to_filename/; diff --git a/lib/PublicInbox/RepoAtom.pm b/lib/PublicInbox/RepoAtom.pm index ab0f2fcc..eb0ed3c7 100644 --- a/lib/PublicInbox/RepoAtom.pm +++ b/lib/PublicInbox/RepoAtom.pm @@ -94,11 +94,10 @@ xmlns="http://www.w3.org/1999/xhtml"><pre style="white-space:pre-wrap"> sub srv_tags_atom { my ($ctx) = @_; my $max = 50; # TODO configurable - my @cmd = ('git', "--git-dir=$ctx->{git}->{git_dir}", - qw(for-each-ref --sort=-creatordate), "--count=$max", - '--perl', $EACH_REF_FMT, 'refs/tags'); + my $cmd = $ctx->{git}->cmd(qw(for-each-ref --sort=-creatordate), + "--count=$max", '--perl', $EACH_REF_FMT, 'refs/tags'); $ctx->{-feed_title} = "$ctx->{git}->{nick} tags"; - my $qsp = PublicInbox::Qspawn->new(\@cmd); + my $qsp = PublicInbox::Qspawn->new($cmd); $ctx->{-is_tag} = 1; $qsp->psgi_yield($ctx->{env}, undef, \&atom_ok, $ctx); } @@ -107,20 +106,19 @@ sub srv_atom { my ($ctx, $path) = @_; return if index($path, '//') >= 0 || index($path, '/') == 0; my $max = 50; # TODO configurable - my @cmd = ('git', "--git-dir=$ctx->{git}->{git_dir}", - qw(log --no-notes --no-color --no-abbrev), - $ATOM_FMT, "-$max"); + my $cmd = $ctx->{git}->cmd(qw(log --no-notes --no-color --no-abbrev), + $ATOM_FMT, "-$max"); my $tip = $ctx->{qp}->{h}; # same as cgit $ctx->{-feed_title} = $ctx->{git}->{nick}; $ctx->{-feed_title} .= " $path" if $path ne ''; if (defined($tip)) { - push @cmd, $tip; + push @$cmd, $tip; $ctx->{-feed_title} .= ", $tip"; } # else: let git decide based on HEAD if $tip isn't defined - push @cmd, '--'; - push @cmd, $path if $path ne ''; - my $qsp = PublicInbox::Qspawn->new(\@cmd, undef, + push @$cmd, '--'; + push @$cmd, $path if $path ne ''; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, { quiet => 1, 2 => $ctx->{lh} }); $qsp->psgi_yield($ctx->{env}, undef, \&atom_ok, $ctx); } diff --git a/lib/PublicInbox/RepoSnapshot.pm b/lib/PublicInbox/RepoSnapshot.pm index 4c372569..bff97bc8 100644 --- a/lib/PublicInbox/RepoSnapshot.pm +++ b/lib/PublicInbox/RepoSnapshot.pm @@ -50,15 +50,13 @@ sub ver_check { # git->check_async callback delete($ctx->{env}->{'qspawn.wcb'})->(r(404)); } else { # found, done: $ctx->{etag} = $oid; - my @cfg; + my $cmd = $ctx->{git}->cmd; if (my $cmd = $FMT_CFG{$ctx->{snap_fmt}}) { - @cfg = ('-c', "tar.$ctx->{snap_fmt}.command=$cmd"); + push @$cmd, '-c', "tar.$ctx->{snap_fmt}.command=$cmd"; } - my $qsp = PublicInbox::Qspawn->new(['git', @cfg, - "--git-dir=$ctx->{git}->{git_dir}", 'archive', - "--prefix=$ctx->{snap_pfx}/", - "--format=$ctx->{snap_fmt}", $treeish], undef, - { quiet => 1 }); + push @$cmd, 'archive', "--prefix=$ctx->{snap_pfx}/", + "--format=$ctx->{snap_fmt}", $treeish; + my $qsp = PublicInbox::Qspawn->new($cmd, undef, { quiet => 1 }); $qsp->psgi_yield($ctx->{env}, undef, \&archive_hdr, $ctx); } } diff --git a/lib/PublicInbox/RepoTree.pm b/lib/PublicInbox/RepoTree.pm index 5c73531a..4c85f9a8 100644 --- a/lib/PublicInbox/RepoTree.pm +++ b/lib/PublicInbox/RepoTree.pm @@ -51,8 +51,8 @@ sub find_missing { $res->[0] = 404; return delete($ctx->{-wcb})->($res); } - my $cmd = ['git', "--git-dir=$ctx->{git}->{git_dir}", - qw(log --no-color -1), '--pretty=%H %h %s (%as)' ]; + my $cmd = $ctx->{git}->cmd(qw(log --no-color -1), + '--pretty=%H %h %s (%as)'); push @$cmd, $ctx->{qp}->{h} if defined($ctx->{qp}->{h}); push @$cmd, '--'; push @$cmd, $ctx->{-path}; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 60d12dbf..e5c5d6ab 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -11,7 +11,7 @@ our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms); use List::Util qw(max); use POSIX qw(strftime); use Carp (); -our $XHC; +our $XHC = 0; # defined but false # values for searching, changing the numeric value breaks # compatibility with old indices (so don't change them it) @@ -57,7 +57,7 @@ use constant { }; use PublicInbox::Smsg; -use PublicInbox::Over; +eval { require PublicInbox::Over }; our $QP_FLAGS; our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query); our $Xap; # 'Xapian' or 'Search::Xapian' @@ -92,6 +92,7 @@ our @XH_SPEC = ( 'K=i', # timeout kill after i seconds 'O=s', # eidx_key 'T=i', # threadid + 'Q=s@', # query prefixes "$user_prefix[:=]$XPREFIX" ); sub load_xapian () { @@ -428,15 +429,15 @@ sub mset { do_enquire($self, $qry, $opt, TS); } -sub xhc_start_maybe () { +sub xhc_start_maybe (@) { require PublicInbox::XapClient; - my $xhc = PublicInbox::XapClient::start_helper(); + my $xhc = PublicInbox::XapClient::start_helper(@_); require PublicInbox::XhcMset if $xhc; $xhc; } -sub xh_opt ($) { - my ($opt) = @_; +sub xh_opt ($$) { + my ($self, $opt) = @_; my $lim = $opt->{limit} || 50; my @ret; push @ret, '-o', $opt->{offset} if $opt->{offset}; @@ -458,19 +459,31 @@ sub xh_opt ($) { push @ret, '-t' if $opt->{threads}; push @ret, '-T', $opt->{threadid} if defined $opt->{threadid}; push @ret, '-O', $opt->{eidx_key} if defined $opt->{eidx_key}; - @ret; + my $apfx = $self->{-alt_pfx} //= do { + my @tmp; + for (grep /\Aserial:/, @{$self->{altid} // []}) { + my (undef, $pfx) = split /:/, $_; + push @tmp, '-Q', "$pfx=X\U$pfx"; + } + # TODO: arbitrary header indexing goes here + \@tmp; + }; + (@ret, @$apfx); } # returns a true value if actually handled asynchronously, # and a falsy value if handled synchronously sub async_mset { my ($self, $qry_str, $opt, $cb, @args) = @_; - $XHC //= xhc_start_maybe; if ($XHC) { # unconditionally retrieving pct + rank for now xdb($self); # populate {nshards} - my @margs = ($self->xh_args, xh_opt($opt)); - my $rd = $XHC->mkreq(undef, 'mset', @margs, $qry_str); - PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args); + my @margs = ($self->xh_args, xh_opt($self, $opt)); + my $ret = eval { + my $rd = $XHC->mkreq(undef, 'mset', @margs, $qry_str); + PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args); + }; + $cb->(@args, undef, $@) if $@; + $ret; } else { # synchronous my $mset = $self->mset($qry_str, $opt); $cb->(@args, $mset); @@ -627,7 +640,7 @@ EOM $ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n} } } - # TODO: altid support + # altid support is handled in xh_opt and srch_init_extra in XH for my $name (sort keys %prob_prefix) { for (split(/ /, $prob_prefix{$name})) { $ret .= qq{\tqp->add_prefix("$name", "$_");\n} diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 1cbf6d23..4fd493d9 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -119,7 +119,7 @@ sub load_xapian_writable () { my $ver = eval 'v'.join('.', eval($xap.'::major_version()'), eval($xap.'::minor_version()'), eval($xap.'::revision()')); - if ($ver ge 1.4) { # new flags in Xapian 1.4 + if ($ver ge v1.4) { # new flags in Xapian 1.4 $DB_NO_SYNC = 0x4; $DB_DANGEROUS = 0x10; } @@ -1003,8 +1003,7 @@ sub prepare_stack ($$) { sub is_ancestor ($$$) { my ($git, $cur, $tip) = @_; return 0 unless $git->check($cur); - my $cmd = [ 'git', "--git-dir=$git->{git_dir}", - qw(merge-base --is-ancestor), $cur, $tip ]; + my $cmd = $git->cmd(qw(merge-base --is-ancestor), $cur, $tip); run_wait($cmd) == 0; } diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index 296e7d17..b5f6b96e 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -13,7 +13,7 @@ use v5.10.1; use File::Temp 0.19 (); # 0.19 for ->newdir use autodie qw(mkdir); use Fcntl qw(SEEK_SET); -use PublicInbox::Git qw(git_unquote git_quote); +use PublicInbox::Git qw(git_unquote git_quote git_exe); use PublicInbox::IO qw(write_file); use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::Qspawn; @@ -136,6 +136,12 @@ sub extract_diff ($$) { if ($cte =~ /\bquoted-printable\b/i && $part->crlf eq "\n") { $s =~ s/\r\n/\n/sg; } + + # Quiet "Complex regular subexpression recursion limit" warning. + # Not much we can do about it, but it's no longer relevant to + # Perl 5.3x (the warning was removed in 5.37.1, and actual + # recursino sometime before then). + no warnings 'regexp'; $s =~ m!( # $1 start header lines we save for debugging: # everything before ^index is optional, but we don't @@ -287,7 +293,7 @@ sub prepare_index ($) { dbg($self, 'preparing index'); my $rdr = { 0 => $in }; - my $cmd = [ qw(git update-index -z --index-info) ]; + my $cmd = [ git_exe, qw(update-index -z --index-info) ]; my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr); $path_a = git_quote($path_a); $self->{-msg} = "index prepared:\n$mode_a $oid_full\t$path_a"; @@ -467,7 +473,7 @@ sub apply_result ($$) { # qx_cb skip_identical($self, $patches, $di->{oid_b}); } - my @cmd = qw(git ls-files -s -z); + my @cmd = (git_exe, qw(ls-files -s -z)); my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}); $self->{-cur_di} = $di; qsp_qx $self, $qsp, \&ls_files_result; @@ -478,7 +484,7 @@ sub do_git_apply ($) { my $patches = $self->{patches}; # we need --ignore-whitespace because some patches are CRLF - my @cmd = (qw(git apply --cached --ignore-whitespace + my @cmd = (git_exe, qw(apply --cached --ignore-whitespace --unidiff-zero --whitespace=warn --verbose)); my $len = length(join(' ', @cmd)); my $di; # keep track of the last one for "git ls-files" diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm index e36659ce..e9e81e88 100644 --- a/lib/PublicInbox/Spawn.pm +++ b/lib/PublicInbox/Spawn.pm @@ -176,15 +176,15 @@ out: return (int)pid; } -static int sendmsg_retry(unsigned *tries) +static int sendmsg_retry(int *tries) { const struct timespec req = { 0, 100000000 }; /* 100ms */ int err = errno; switch (err) { case EINTR: PERL_ASYNC_CHECK(); return 1; case ENOBUFS: case ENOMEM: case ETOOMANYREFS: - if (++*tries >= 50) return 0; - fprintf(stderr, "# sleeping on sendmsg: %s (#%u)\n", + if (--*tries < 0) return 0; + fprintf(stderr, "# sleeping on sendmsg: %s (%d tries left)\n", strerror(err), *tries); nanosleep(&req, NULL); PERL_ASYNC_CHECK(); @@ -201,7 +201,7 @@ union my_cmsg { char pad[sizeof(struct cmsghdr) + 16 + SEND_FD_SPACE]; }; -SV *send_cmd4(PerlIO *s, SV *svfds, SV *data, int flags) +SV *send_cmd4_(PerlIO *s, SV *svfds, SV *data, int flags, int tries) { struct msghdr msg = { 0 }; union my_cmsg cmsg = { 0 }; @@ -211,7 +211,6 @@ SV *send_cmd4(PerlIO *s, SV *svfds, SV *data, int flags) AV *fds = (AV *)SvRV(svfds); I32 i, nfds = av_len(fds) + 1; int *fdp; - unsigned tries = 0; if (SvOK(data)) { iov.iov_base = SvPV(data, dlen); @@ -332,6 +331,9 @@ EOM if (defined $all_libc) { # set for Gcf2 $ENV{PERL_INLINE_DIRECTORY} = $inline_dir; %RLIMITS = rlimit_map(); + *send_cmd4 = sub ($$$$;$) { + send_cmd4_($_[0], $_[1], $_[2], $_[3], 50); + } } else { require PublicInbox::SpawnPP; *pi_fork_exec = \&PublicInbox::SpawnPP::pi_fork_exec diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm index 99af5bf5..4cbe9623 100644 --- a/lib/PublicInbox/Syscall.pm +++ b/lib/PublicInbox/Syscall.pm @@ -467,8 +467,8 @@ if (defined($SYS_sendmsg) && defined($SYS_recvmsg)) { no warnings 'once'; require PublicInbox::CmdIPC4; -*send_cmd4 = sub ($$$$) { - my ($sock, $fds, undef, $flags) = @_; +*send_cmd4 = sub ($$$$;$) { + my ($sock, $fds, undef, $flags, $tries) = @_; my $iov = pack('P'.TMPL_size_t, $_[2] // NUL, length($_[2] // NUL) || 1); my $fd_space = scalar(@$fds) * SIZEOF_int; @@ -487,10 +487,10 @@ require PublicInbox::CmdIPC4; $msg_controllen, 0); # msg_flags my $s; - my $try = 0; + $tries //= 50; do { $s = syscall($SYS_sendmsg, fileno($sock), $mh, $flags); - } while ($s < 0 && PublicInbox::CmdIPC4::sendmsg_retry($try)); + } while ($s < 0 && PublicInbox::CmdIPC4::sendmsg_retry($tries)); $s >= 0 ? $s : undef; }; diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index a7ec9b5b..3a67ab54 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -17,9 +17,10 @@ my $lei_loud = $ENV{TEST_LEI_ERR_LOUD}; our $tail_cmd = $ENV{TAIL}; our ($lei_opt, $lei_out, $lei_err); use autodie qw(chdir close fcntl mkdir open opendir seek unlink); +$ENV{XDG_CACHE_HOME} //= "$ENV{HOME}/.cache"; # reuse C++ xap_helper builds $_ = File::Spec->rel2abs($_) for (grep(!m!^/!, @INC)); - +our $CURRENT_DAEMON; BEGIN { @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods run_script start_script key2sub xsys xsys_e xqx eml_load tick @@ -167,7 +168,7 @@ sub require_git_http_backend (;$) { my ($nr) = @_; state $ok = do { require PublicInbox::Git; - my $git = PublicInbox::Git::check_git_exe() or plan + my $git = PublicInbox::Git::git_exe() or plan skip_all => 'nothing in public-inbox works w/o git'; my $rdr = { 1 => \my $out, 2 => \my $err }; xsys([$git, qw(http-backend)], undef, $rdr); @@ -565,6 +566,9 @@ sub start_script { my $run_mode = $ENV{TEST_RUN_MODE} // $opt->{run_mode} // 2; my $sub = $run_mode == 0 ? undef : key2sub($key); my $tail; + my @xh = split(/\s+/, $ENV{TEST_DAEMON_XH} // ''); + @xh = () if $key !~ /-(?:imapd|netd|httpd|pop3d|nntpd)\z/; + push @argv, @xh; if ($tail_cmd) { my @paths; for (@argv) { @@ -612,7 +616,7 @@ sub start_script { $ENV{LISTEN_FDS} = $fds; } if ($opt->{-C}) { chdir($opt->{-C}) } - $0 = join(' ', @$cmd); + $0 = join(' ', @$cmd, @xh); local @SIG{keys %SIG} = map { undef } values %SIG; local $SIG{FPE} = 'IGNORE'; # Perl default undef $tmp_mask; @@ -720,7 +724,10 @@ SKIP: { require PublicInbox::Spawn; require PublicInbox::Config; require File::Path; - + eval { # use XDG_CACHE_HOME, first: + require PublicInbox::XapHelperCxx; + PublicInbox::XapHelperCxx::check_build(); + }; local %ENV = %ENV; delete $ENV{XDG_DATA_HOME}; delete $ENV{XDG_CONFIG_HOME}; @@ -945,6 +952,7 @@ sub test_httpd ($$;$$) { local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; my $ua = LWP::UserAgent->new; $ua->max_redirect(0); + local $CURRENT_DAEMON = $td; Plack::Test::ExternalServer::test_psgi(client => $client, ua => $ua); $cb->() if $cb; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 43f37f60..15a73158 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -1071,8 +1071,8 @@ sub unindex_todo ($$$) { return if $before == $after; # ensure any blob can not longer be accessed via dumb HTTP - run_die(['git', "--git-dir=$unit->{git}->{git_dir}", - qw(-c gc.reflogExpire=now gc --prune=all --quiet)]); + run_die($unit->{git}->cmd(qw(-c gc.reflogExpire=now gc + --prune=all --quiet))); } sub sync_ranges ($$) { diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index f47c2703..83a83698 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -106,7 +106,7 @@ sub stream_large_blob ($$) { my ($ctx, $res) = @_; $ctx->{-res} = $res; my ($git, $oid, $type, $size, $di) = @$res; - my $cmd = ['git', "--git-dir=$git->{git_dir}", 'cat-file', $type, $oid]; + my $cmd = $git->cmd('cat-file', $type, $oid); my $qsp = PublicInbox::Qspawn->new($cmd); $ctx->{env}->{'qspawn.wcb'} = $ctx->{-wcb}; $qsp->psgi_yield($ctx->{env}, undef, \&stream_blob_parse_hdr, $ctx); @@ -368,10 +368,9 @@ sub stream_patch_parse_hdr { # {parse_hdr} for Qspawn sub show_patch ($$) { my ($ctx, $res) = @_; my ($git, $oid) = @$res; - my @cmd = ('git', "--git-dir=$git->{git_dir}", - qw(format-patch -1 --stdout -C), + my $cmd = $git->cmd(qw(format-patch -1 --stdout -C), "--signature=git format-patch -1 --stdout -C $oid", $oid); - my $qsp = PublicInbox::Qspawn->new(\@cmd); + my $qsp = PublicInbox::Qspawn->new($cmd); $ctx->{env}->{'qspawn.wcb'} = $ctx->{-wcb}; $ctx->{patch_oid} = $oid; $qsp->psgi_yield($ctx->{env}, undef, \&stream_patch_parse_hdr, $ctx); @@ -400,8 +399,8 @@ sub show_other ($$) { # just in case... my ($git, $oid, $type, $size) = @$res; $size > $MAX_SIZE and return html_page($ctx, 200, ascii_html($type)." $oid is too big to show\n". dbg_log($ctx)); - my $cmd = ['git', "--git-dir=$git->{git_dir}", - qw(show --encoding=UTF-8 --no-color --no-abbrev), $oid ]; + my $cmd = $git->cmd(qw(show --encoding=UTF-8 + --no-color --no-abbrev), $oid); my $qsp = PublicInbox::Qspawn->new($cmd); $qsp->{qsp_err} = \($ctx->{-qsp_err} = ''); $qsp->psgi_qx($ctx->{env}, undef, \&show_other_result, $ctx); @@ -487,8 +486,7 @@ sub show_tree ($$) { # also used by RepoTree my ($git, $oid, undef, $size) = @$res; $size > $MAX_SIZE and return html_page($ctx, 200, "tree $oid is too big to show\n". dbg_log($ctx)); - my $cmd = [ 'git', "--git-dir=$git->{git_dir}", - qw(ls-tree -z -l --no-abbrev), $oid ]; + my $cmd = $git->cmd(qw(ls-tree -z -l --no-abbrev), $oid); my $qsp = PublicInbox::Qspawn->new($cmd); $ctx->{tree_oid} = $oid; $qsp->{qsp_err} = \($ctx->{-qsp_err} = ''); diff --git a/lib/PublicInbox/XapClient.pm b/lib/PublicInbox/XapClient.pm index f0270091..24b3f45e 100644 --- a/lib/PublicInbox/XapClient.pm +++ b/lib/PublicInbox/XapClient.pm @@ -12,6 +12,7 @@ use PublicInbox::Spawn qw(spawn); use Socket qw(AF_UNIX SOCK_SEQPACKET); use PublicInbox::IPC; use autodie qw(pipe socketpair); +our $tries = 50; sub mkreq { my ($self, $ios, @arg) = @_; @@ -19,13 +20,13 @@ sub mkreq { pipe($r, $ios->[0]) if !defined($ios->[0]); my @fds = map fileno($_), @$ios; my $buf = join("\0", @arg, ''); - $n = $PublicInbox::IPC::send_cmd->($self->{io}, \@fds, $buf, 0) // - die "send_cmd: $!"; + $n = $PublicInbox::IPC::send_cmd->($self->{io}, \@fds, $buf, 0, $tries) + // die "send_cmd: $!"; $n == length($buf) or die "send_cmd: $n != ".length($buf); $r; } -sub start_helper { +sub start_helper (@) { $PublicInbox::IPC::send_cmd or return; # can't work w/o SCM_RIGHTS my @argv = @_; socketpair(my $sock, my $in, AF_UNIX, SOCK_SEQPACKET, 0); diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm index c55a72ce..c9957f64 100644 --- a/lib/PublicInbox/XapHelper.pm +++ b/lib/PublicInbox/XapHelper.pm @@ -27,6 +27,8 @@ sub cmd_test_inspect { ($req->{srch}->has_threadid ? 1 : 0) } +sub cmd_test_sleep { select(undef, undef, undef, 0.01) while 1 } + sub iter_retry_check ($) { if (ref($@) =~ /\bDatabaseModifiedError\b/) { $_[0]->{srch}->reopen; @@ -170,6 +172,18 @@ sub cmd_mset { # to be used by WWW + IMAP } } +sub srch_init_extra ($) { + my ($req) = @_; + my $qp = $req->{srch}->{qp}; + for (@{$req->{Q}}) { + my ($upfx, $m, $xpfx) = split /([:=])/; + $xpfx // die "E: bad -Q $_"; + $m = $m eq '=' ? 'add_boolean_prefix' : 'add_prefix'; + $qp->$m($upfx, $xpfx); + } + $req->{srch}->{qp_extra_done} = 1; +} + sub dispatch { my ($req, $cmd, @argv) = @_; my $fn = $req->can("cmd_$cmd") or return; @@ -177,8 +191,9 @@ sub dispatch { or return; my $dirs = delete $req->{d} or die 'no -d args'; my $key = join("\0", @$dirs); + my $new; $req->{srch} = $SRCH{$key} //= do { - my $new = { qp_flags => $PublicInbox::Search::QP_FLAGS }; + $new = { qp_flags => $PublicInbox::Search::QP_FLAGS }; my $first = shift @$dirs; my $slow_phrase = -f "$first/iamchert"; $new->{xdb} = $X->{Database}->new($first); @@ -193,13 +208,20 @@ sub dispatch { $new->{qp} = $new->qparse_new; $new; }; + $req->{srch}->{xdb}->reopen unless $new; + $req->{Q} && !$req->{srch}->{qp_extra_done} and + srch_init_extra $req; + my $timeo = $req->{K}; + alarm($timeo) if $timeo; $fn->($req, @argv); + alarm(0) if $timeo; } sub recv_loop { local $SIG{__WARN__} = sub { print $stderr @_ }; my $rbuf; local $SIG{TERM} = sub { undef $in }; + local $SIG{USR1} = \&reopen_logs; while (defined($in)) { PublicInbox::DS::sig_setmask($workerset); my @fds = eval { # we undef $in in SIG{TERM} @@ -211,7 +233,7 @@ sub recv_loop { } scalar(@fds) or exit(66); # EX_NOINPUT die "recvmsg: $!" if !defined($fds[0]); - PublicInbox::DS::block_signals(); + PublicInbox::DS::block_signals(POSIX::SIGALRM); my $req = bless {}, __PACKAGE__; my $i = 0; open($req->{$i++}, '+<&=', $_) for @fds; @@ -263,6 +285,18 @@ sub do_sigttou { } } +sub reopen_logs { + my $p = $ENV{STDOUT_PATH}; + defined($p) && open(STDOUT, '>>', $p) and STDOUT->autoflush(1); + $p = $ENV{STDERR_PATH}; + defined($p) && open(STDERR, '>>', $p) and STDERR->autoflush(1); +} + +sub parent_reopen_logs { + reopen_logs(); + kill('USR1', values %WORKERS); +} + sub xh_alive { $in || scalar(keys %WORKERS) } sub start (@) { @@ -276,7 +310,7 @@ sub start (@) { die 'bad args'; local $workerset = POSIX::SigSet->new; $workerset->fillset or die "fillset: $!"; - for (@PublicInbox::DS::UNBLOCKABLE) { + for (@PublicInbox::DS::UNBLOCKABLE, POSIX::SIGUSR1) { $workerset->delset($_) or die "delset($_): $!"; } @@ -295,6 +329,7 @@ sub start (@) { }, TTOU => \&do_sigttou, CHLD => \&PublicInbox::DS::enqueue_reap, + USR1 => \&parent_reopen_logs, }; PublicInbox::DS::block_signals(); start_workers(); diff --git a/lib/PublicInbox/XapHelperCxx.pm b/lib/PublicInbox/XapHelperCxx.pm index eafe61a8..74852ad1 100644 --- a/lib/PublicInbox/XapHelperCxx.pm +++ b/lib/PublicInbox/XapHelperCxx.pm @@ -16,8 +16,15 @@ use autodie; my $cxx = which($ENV{CXX} // 'c++') // which('clang') // die 'no C++ compiler'; my $dir = substr("$cxx-$Config{archname}", 1); # drop leading '/' $dir =~ tr!/!-!; -my $idir = ($ENV{XDG_CACHE_HOME} // - (($ENV{HOME} // die('HOME unset')).'/.cache')).'/public-inbox/jaot'; +my $idir; +if ((defined($ENV{XDG_CACHE_HOME}) && -d $ENV{XDG_CACHE_HOME}) || + (defined($ENV{HOME}) && -d $ENV{HOME})) { + $idir = ($ENV{XDG_CACHE_HOME} // + (($ENV{HOME} // die('HOME unset')).'/.cache') + ).'/public-inbox/jaot'; +} +$idir //= $ENV{PERL_INLINE_DIRECTORY} // + die 'HOME and PERL_INLINE_DIRECTORY unset'; substr($dir, 0, 0) = "$idir/"; my $bin = "$dir/xap_helper"; my ($srcpfx) = (__FILE__ =~ m!\A(.+/)[^/]+\z!); @@ -58,7 +65,11 @@ sub needs_rebuild () { sub build () { if (!-d $dir) { require File::Path; - File::Path::make_path($dir); + eval { File::Path::make_path($dir) }; + if (!-d $dir && defined($ENV{PERL_INLINE_DIRECTORY})) { + $dir = $ENV{PERL_INLINE_DIRECTORY}; + File::Path::make_path($dir); + } } require PublicInbox::CodeSearch; require PublicInbox::Lock; diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h index 5a89544a..a30a8768 100644 --- a/lib/PublicInbox/xap_helper.h +++ b/lib/PublicInbox/xap_helper.h @@ -27,6 +27,7 @@ #include <sys/types.h> #include <sys/uio.h> #include <sys/wait.h> +#include <poll.h> #include <assert.h> #include <err.h> // BSD, glibc, and musl all have this @@ -95,6 +96,8 @@ static pid_t *worker_pids; // nr => pid #define WORKER_MAX USHRT_MAX static unsigned long nworker, nworker_hwm; static int pipefds[2]; +static const char *stdout_path, *stderr_path; // for SIGUSR1 +static sig_atomic_t worker_needs_reopen; // PublicInbox::Search and PublicInbox::CodeSearch generate these: static void mail_nrp_init(void); @@ -111,6 +114,7 @@ enum exc_iter { struct srch { int paths_len; // int for comparisons unsigned qp_flags; + bool qp_extra_done; Xapian::Database *db; Xapian::QueryParser *qp; char paths[]; // $shard_path0\0$shard_path1\0... @@ -123,6 +127,7 @@ typedef bool (*cmd)(struct req *); struct req { // argv and pfxv point into global rbuf char *argv[MY_ARG_MAX]; char *pfxv[MY_ARG_MAX]; // -A <prefix> + char *qpfxv[MY_ARG_MAX]; // -Q <user_prefix>[:=]<INTERNAL_PREFIX> size_t *lenv; // -A <prefix>LENGTH struct srch *srch; char *Pgit_dir; @@ -136,6 +141,7 @@ struct req { // argv and pfxv point into global rbuf long sort_col; // value column, negative means BoolWeight int argc; int pfxc; + int qpfxc; FILE *fp[2]; // [0] response pipe or sock, [1] status/errors (optional) bool has_input; // fp[0] is bidirectional bool collapse_threads; @@ -411,6 +417,11 @@ static bool cmd_test_inspect(struct req *req) return false; } +static bool cmd_test_sleep(struct req *req) +{ + for (;;) poll(NULL, 0, 10); + return false; +} #include "xh_mset.h" // read-only (WWW, IMAP, lei) stuff #include "xh_cidx.h" // CodeSearchIdx.pm stuff @@ -425,6 +436,7 @@ static const struct cmd_entry { CMD(dump_ibx), // many inboxes CMD(dump_roots), // per-cidx shard CMD(test_inspect), // least common commands last + CMD(test_sleep), // least common commands last }; #define MY_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) @@ -575,6 +587,31 @@ static bool srch_init(struct req *req) return true; } +// setup query parser for altid and arbitrary headers +static void srch_init_extra(struct req *req) +{ + const char *XPFX; + for (int i = 0; i < req->qpfxc; i++) { + size_t len = strlen(req->qpfxv[i]); + char *c = (char *)memchr(req->qpfxv[i], '=', len); + + if (c) { // it's boolean "gmane=XGMANE" + XPFX = c + 1; + *c = 0; + req->srch->qp->add_boolean_prefix(req->qpfxv[i], XPFX); + continue; + } + // maybe it's a non-boolean prefix "blob:XBLOBID" + c = (char *)memchr(req->qpfxv[i], ':', len); + if (!c) + errx(EXIT_FAILURE, "bad -Q %s", req->qpfxv[i]); + XPFX = c + 1; + *c = 0; + req->srch->qp->add_prefix(req->qpfxv[i], XPFX); + } + req->srch->qp_extra_done = true; +} + static void free_srch(void *p) // tdestroy { struct srch *srch = (struct srch *)p; @@ -656,12 +693,17 @@ static void dispatch(struct req *req) if (*end || req->threadid == ULLONG_MAX) ABORT("-T %s", optarg); break; + case 'Q': + req->qpfxv[req->qpfxc++] = optarg; + if (MY_ARG_MAX == req->qpfxc) ABORT("too many -Q"); + break; default: ABORT("bad switch `-%c'", c); } } ERR_CLOSE(kfp, EXIT_FAILURE); // may ENOMEM, sets kbuf.srch kbuf.srch->db = NULL; kbuf.srch->qp = NULL; + kbuf.srch->qp_extra_done = false; kbuf.srch->paths_len = size - offsetof(struct srch, paths); if (kbuf.srch->paths_len <= 0) ABORT("no -d args"); @@ -670,6 +712,7 @@ static void dispatch(struct req *req) req->srch = *s; if (req->srch != kbuf.srch) { // reuse existing free_srch(kbuf.srch); + req->srch->db->reopen(); } else if (!srch_init(req)) { assert(kbuf.srch == *((struct srch **)tfind( kbuf.srch, &srch_tree, srch_cmp))); @@ -678,6 +721,11 @@ static void dispatch(struct req *req) free_srch(kbuf.srch); goto cmd_err; // srch_init already warned } + if (req->qpfxc && !req->srch->qp_extra_done) + srch_init_extra(req); + if (req->timeout_sec) + alarm(req->timeout_sec > UINT_MAX ? + UINT_MAX : (unsigned)req->timeout_sec); try { if (!req->fn(req)) warnx("`%s' failed", req->argv[0]); @@ -686,6 +734,8 @@ static void dispatch(struct req *req) } catch (...) { warn("unhandled exception"); } + if (req->timeout_sec) + alarm(0); cmd_err: return; // just be silent on errors, for now } @@ -726,9 +776,12 @@ static void stderr_restore(FILE *tmp_err) clearerr(stderr); } -static void sigw(int sig) // SIGTERM handler for worker +static void sigw(int sig) // SIGTERM+SIGUSR1 handler for worker { - sock_fd = -1; // break out of recv_loop + switch (sig) { + case SIGUSR1: worker_needs_reopen = 1; break; + default: sock_fd = -1; // break out of recv_loop + } } #define CLEANUP_REQ __attribute__((__cleanup__(req_cleanup))) @@ -738,6 +791,18 @@ static void req_cleanup(void *ptr) free(req->lenv); } +static void reopen_logs(void) +{ + if (stdout_path && *stdout_path && !freopen(stdout_path, "a", stdout)) + err(EXIT_FAILURE, "freopen %s", stdout_path); + if (stderr_path && *stderr_path) { + if (!freopen(stderr_path, "a", stderr)) + err(EXIT_FAILURE, "freopen %s", stderr_path); + if (my_setlinebuf(stderr)) + err(EXIT_FAILURE, "setlinebuf(stderr)"); + } +} + static void recv_loop(void) // worker process loop { static char rbuf[4096 * 33]; // per-process @@ -745,6 +810,7 @@ static void recv_loop(void) // worker process loop sa.sa_handler = sigw; CHECK(int, 0, sigaction(SIGTERM, &sa, NULL)); + CHECK(int, 0, sigaction(SIGUSR1, &sa, NULL)); while (sock_fd == 0) { size_t len = sizeof(rbuf); @@ -761,6 +827,10 @@ static void recv_loop(void) // worker process loop stderr_restore(req.fp[1]); ERR_CLOSE(req.fp[1], 0); } + if (worker_needs_reopen) { + worker_needs_reopen = 0; + reopen_logs(); + } } } @@ -813,6 +883,16 @@ static void cleanup_all(void) #endif } +static void parent_reopen_logs(void) +{ + reopen_logs(); + for (unsigned long nr = nworker; nr < nworker_hwm; nr++) { + pid_t pid = worker_pids[nr]; + if (pid != 0 && kill(pid, SIGUSR1)) + warn("BUG?: kill(%d, SIGUSR1)", (int)pid); + } +} + static void sigp(int sig) // parent signal handler { static const char eagain[] = "signals coming in too fast"; @@ -825,6 +905,7 @@ static void sigp(int sig) // parent signal handler case SIGCHLD: c = '.'; break; case SIGTTOU: c = '-'; break; case SIGTTIN: c = '+'; break; + case SIGUSR1: c = '#'; break; default: write(STDERR_FILENO, bad_sig, sizeof(bad_sig) - 1); _exit(EXIT_FAILURE); @@ -931,6 +1012,8 @@ int main(int argc, char *argv[]) { int c; socklen_t slen = (socklen_t)sizeof(c); + stdout_path = getenv("STDOUT_PATH"); + stderr_path = getenv("STDERR_PATH"); if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &c, &slen)) err(EXIT_FAILURE, "getsockopt"); @@ -989,6 +1072,8 @@ int main(int argc, char *argv[]) DELSET(SIGXCPU); DELSET(SIGXFSZ); #undef DELSET + CHECK(int, 0, sigdelset(&workerset, SIGUSR1)); + CHECK(int, 0, sigdelset(&fullset, SIGALRM)); if (nworker == 0) { // no SIGTERM handling w/o workers recv_loop(); @@ -1009,10 +1094,12 @@ int main(int argc, char *argv[]) CHECK(int, 0, sigdelset(&pset, SIGCHLD)); CHECK(int, 0, sigdelset(&pset, SIGTTIN)); CHECK(int, 0, sigdelset(&pset, SIGTTOU)); + CHECK(int, 0, sigdelset(&pset, SIGUSR1)); struct sigaction sa = {}; sa.sa_handler = sigp; + CHECK(int, 0, sigaction(SIGUSR1, &sa, NULL)); CHECK(int, 0, sigaction(SIGTTIN, &sa, NULL)); CHECK(int, 0, sigaction(SIGTTOU, &sa, NULL)); sa.sa_flags = SA_NOCLDSTOP; @@ -1037,6 +1124,7 @@ int main(int argc, char *argv[]) case '.': break; // do_sigchld already called case '-': do_sigttou(); break; case '+': do_sigttin(); break; + case '#': parent_reopen_logs(); break; default: errx(EXIT_FAILURE, "BUG: c=%c", sbuf[i]); } } |