From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id CAC581F461 for ; Mon, 11 Mar 2024 19:40:12 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1710186012; bh=LDpmKjAopkwzODm6Lv/52QyWPec7IHeKjNjapakSAeE=; h=From:To:Subject:Date:In-Reply-To:References:From; b=gCDCpxz7PZOgP+lsrQksXZ5/3P3ZUYGPVMpy27ksz9UW/bw/9Be7tSrx36DS/rDNK cPh2rE8gAEDT6Xxyg5+0RRMFVVLGw95Woo3ulLOFHKCgWmW0s/MsQbe/tn5hDwL/EX BoeWtDyAk5gBHmrzHnH7t/o5+X+BYK7Pf6WfECFo= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/4] www: use a dedicated limiter for blob solver Date: Mon, 11 Mar 2024 19:40:09 +0000 Message-ID: <20240311194012.1266143-2-e@80x24.org> In-Reply-To: <20240311194012.1266143-1-e@80x24.org> References: <20240311194012.1266143-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Wrap the entire solver command chain with a dedicated limiter. The normal limiter is designed for longer-lived commands or ones which serve a single HTTP request (e.g. git-http-backend or cgit) and not effective for short memory + CPU intensive commands used for solver. Each overall solver request is both memory + CPU intensive: it spawns several short-lived git processes(*) in addition to a longer-lived `git cat-file --batch' process. Thus running parallel solvers from a single -netd/-httpd worker (which have their own parallelization) results in excessive parallelism that is both memory and CPU-bound (not network-bound) and cascade into slowdowns for handling simpler memory/CPU-bound requests. Parallel solvers were also responsible for the increased lifetime and frequency of zombies since the event loop was too saturated to reap them. We'll also return 503 on excessive solver queueing, since these require an FD for the client HTTP(S) socket to be held onto. (*) git (update-index|apply|ls-files) are all run by solver and short-lived --- lib/PublicInbox/SolverGit.pm | 15 ++++++----- lib/PublicInbox/ViewVCS.pm | 48 +++++++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index 4e79f750..296e7d17 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -256,6 +256,12 @@ sub update_index_result ($$) { next_step($self); # onto do_git_apply } +sub qsp_qx ($$$) { + my ($self, $qsp, $cb) = @_; + $qsp->{qsp_err} = \($self->{-qsp_err} = ''); + $qsp->psgi_qx($self->{psgi_env}, $self->{limiter}, $cb, $self); +} + sub prepare_index ($) { my ($self) = @_; my $patches = $self->{patches}; @@ -284,9 +290,8 @@ sub prepare_index ($) { my $cmd = [ qw(git update-index -z --index-info) ]; my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr); $path_a = git_quote($path_a); - $qsp->{qsp_err} = \($self->{-qsp_err} = ''); $self->{-msg} = "index prepared:\n$mode_a $oid_full\t$path_a"; - $qsp->psgi_qx($self->{psgi_env}, undef, \&update_index_result, $self); + qsp_qx $self, $qsp, \&update_index_result; } # pure Perl "git init" @@ -465,8 +470,7 @@ sub apply_result ($$) { # qx_cb my @cmd = qw(git ls-files -s -z); my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}); $self->{-cur_di} = $di; - $qsp->{qsp_err} = \($self->{-qsp_err} = ''); - $qsp->psgi_qx($self->{psgi_env}, undef, \&ls_files_result, $self); + qsp_qx $self, $qsp, \&ls_files_result; } sub do_git_apply ($) { @@ -495,8 +499,7 @@ sub do_git_apply ($) { my $opt = { 2 => 1, -C => _tmp($self)->dirname, quiet => 1 }; my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}, $opt); $self->{-cur_di} = $di; - $qsp->{qsp_err} = \($self->{-qsp_err} = ''); - $qsp->psgi_qx($self->{psgi_env}, undef, \&apply_result, $self); + qsp_qx $self, $qsp, \&apply_result; } sub di_url ($$) { diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index 61329db6..790b9a2c 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -49,6 +49,10 @@ my %GIT_MODE = ( '160000' => 'g', # commit (gitlink) ); +# TODO: not fork safe, but we don't fork w/o exec in PublicInbox::WWW +my (@solver_q, $solver_lim); +my $solver_nr = 0; + sub html_page ($$;@) { my ($ctx, $code) = @_[0, 1]; my $wcb = delete $ctx->{-wcb}; @@ -614,26 +618,52 @@ sub show_blob { # git->cat_async callback ''.dbg_log($ctx), @def); } -# GET /$INBOX/$GIT_OBJECT_ID/s/ -# GET /$INBOX/$GIT_OBJECT_ID/s/$FILENAME -sub show ($$;$) { - my ($ctx, $oid_b, $fn) = @_; - my $hints = $ctx->{hints} = {}; +sub start_solver ($) { + my ($ctx) = @_; while (my ($from, $to) = each %QP_MAP) { my $v = $ctx->{qp}->{$from} // next; - $hints->{$to} = $v if $v ne ''; + $ctx->{hints}->{$to} = $v if $v ne ''; } - $ctx->{fn} = $fn; - $ctx->{-tmp} = File::Temp->newdir("solver.$oid_b-XXXX", TMPDIR => 1); + $ctx->{-next_solver} = PublicInbox::OnDestroy->new($$, \&next_solver); + ++$solver_nr; + $ctx->{-tmp} = File::Temp->newdir("solver.$ctx->{oid_b}-XXXX", + TMPDIR => 1); $ctx->{lh} or open $ctx->{lh}, '+>>', "$ctx->{-tmp}/solve.log"; my $solver = PublicInbox::SolverGit->new($ctx->{ibx}, \&solve_result, $ctx); + $solver->{limiter} = $solver_lim; $solver->{gits} //= [ $ctx->{git} ]; $solver->{tmp} = $ctx->{-tmp}; # share tmpdir # PSGI server will call this immediately and give us a callback (-wcb) + $solver->solve(@$ctx{qw(env lh oid_b hints)}); +} + +# run the next solver job when done and DESTROY-ed +sub next_solver { + --$solver_nr; + # XXX FIXME: client may've disconnected if it waited a long while + start_solver(shift(@solver_q) // return); +} + +sub may_start_solver ($) { + my ($ctx) = @_; + $solver_lim //= $ctx->{www}->{pi_cfg}->limiter('codeblob'); + if ($solver_nr >= $solver_lim->{max}) { + @solver_q > 128 ? html_page($ctx, 503, 'too busy') + : push(@solver_q, $ctx); + } else { + start_solver($ctx); + } +} + +# GET /$INBOX/$GIT_OBJECT_ID/s/ +# GET /$INBOX/$GIT_OBJECT_ID/s/$FILENAME +sub show ($$;$) { + my ($ctx, $oid_b, $fn) = @_; + @$ctx{qw(oid_b fn)} = ($oid_b, $fn); sub { $ctx->{-wcb} = $_[0]; # HTTP write callback - $solver->solve($ctx->{env}, $ctx->{lh}, $oid_b, $hints); + may_start_solver $ctx; }; }