From: Eric Wong <e@80x24.org> To: meta@public-inbox.org Subject: [PATCH 12/11] solver: async blob retrieval for diff extraction Date: Thu, 10 Sep 2020 01:51:53 +0000 Message-ID: <20200910015153.GA8922@dcvr> (raw) In-Reply-To: <20200909062618.5940-1-e@80x24.org> Like the rest of the WWW code, public-inbox-httpd now uses git_async_cat to retrieve blobs without blocking the event loop. This improves fairness when git blobs are on slow storage and allows us to take better advantage of SMP systems. --- lib/PublicInbox/SolverGit.pm | 85 +++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 31 deletions(-) diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index c54d6d54..ae3997ca 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -16,6 +16,8 @@ use PublicInbox::Git qw(git_unquote git_quote); use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::Qspawn; use PublicInbox::Tmpfile; +use PublicInbox::GitAsyncCat; +use PublicInbox::Eml; use URI::Escape qw(uri_escape_utf8); # POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to @@ -540,6 +542,47 @@ sub try_harder { die "E: $@" if $@; } +sub extract_diffs_done { + my ($self, $want) = @_; + + delete $want->{try_smsgs}; + delete $want->{cur_ibx}; + + my $diffs = delete $self->{tmp_diffs}; + if (scalar @$diffs) { + unshift @{$self->{patches}}, @$diffs; + dbg($self, "found $want->{oid_b} in " . join(" ||\n\t", + map { di_url($self, $_) } @$diffs)); + + # good, we can find a path to the oid we $want, now + # lets see if we need to apply more patches: + my $di = $diffs->[0]; + my $src = $di->{oid_a}; + + unless ($src =~ /\A0+\z/) { + # we have to solve it using another oid, fine: + my $job = { oid_b => $src, path_b => $di->{path_a} }; + push @{$self->{todo}}, $job; + } + goto \&next_step; # onto the next todo item + } + goto \&try_harder; +} + +sub extract_diff_async { + my ($bref, $oid, $type, $size, $x) = @_; + my ($self, $want, $smsg) = @$x; + if (defined($oid)) { + $smsg->{blob} eq $oid or + ERR($self, "BUG: $smsg->{blob} != $oid"); + PublicInbox::Eml->new($bref)->each_part(\&extract_diff, $x, 1); + } + + scalar(@{$want->{try_smsgs}}) ? + retry_current($self, $want) : + extract_diffs_done($self, $want); +} + sub resolve_patch ($$) { my ($self, $want) = @_; @@ -550,39 +593,19 @@ sub resolve_patch ($$) { if (my $msgs = $want->{try_smsgs}) { my $smsg = shift @$msgs; - if (my $eml = $want->{cur_ibx}->smsg_eml($smsg)) { - $eml->each_part(\&extract_diff, - [ $self, $want, $smsg ], 1); - } - - # try the remaining smsgs later - goto \&retry_current if scalar @$msgs; - - delete $want->{try_smsgs}; - delete $want->{cur_ibx}; - - my $diffs = delete $self->{tmp_diffs}; - if (scalar @$diffs) { - unshift @{$self->{patches}}, @$diffs; - dbg($self, "found $cur_want in " . join(" ||\n\t", - map { di_url($self, $_) } @$diffs)); - - # good, we can find a path to the oid we $want, now - # lets see if we need to apply more patches: - my $di = $diffs->[0]; - my $src = $di->{oid_a}; - - unless ($src =~ /\A0+\z/) { - # we have to solve it using another oid, fine: - my $job = { - oid_b => $src, - path_b => $di->{path_a}, - }; - push @{$self->{todo}}, $job; + if ($self->{psgi_env}->{'pi-httpd.async'}) { + return git_async_cat($want->{cur_ibx}->git, + $smsg->{blob}, + \&extract_diff_async, + [$self, $want, $smsg]); + } else { + if (my $eml = $want->{cur_ibx}->smsg_eml($smsg)) { + $eml->each_part(\&extract_diff, + [ $self, $want, $smsg ], 1); } - goto \&next_step; # onto the next todo item } - goto \&try_harder; + + goto(scalar @$msgs ? \&retry_current : \&extract_diffs_done); } # see if we can find the blob in an existing git repo:
prev parent reply other threads:[~2020-09-10 1:51 UTC|newest] Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-09-09 6:26 [PATCH 00/11] httpd: further reduce event loop monopolization Eric Wong 2020-09-09 6:26 ` [PATCH 01/11] xt/solver: test with public-inbox-httpd, too Eric Wong 2020-09-09 6:26 ` [PATCH 02/11] solver: drop warnings, modernize use v5.10.1, use SEEK_SET Eric Wong 2020-09-09 6:26 ` [PATCH 03/11] use "\&" where possible when referring to subroutines Eric Wong 2020-09-09 6:26 ` [PATCH 04/11] www: manifest.js.gz generation no longer hogs event loop Eric Wong 2020-09-09 6:26 ` [PATCH 05/11] config: flatten each_inbox and iterate_start args Eric Wong 2020-09-09 6:26 ` [PATCH 06/11] config: split out iterator into separate object Eric Wong 2020-09-09 6:26 ` [PATCH 07/11] t/cgi.t: show stderr on failures Eric Wong 2020-09-09 6:26 ` [PATCH 08/11] extmsg: prevent cross-inbox matches from hogging event loop Eric Wong 2020-09-09 6:26 ` [PATCH 09/11] wwwlisting: avoid " Eric Wong 2020-09-09 6:26 ` [PATCH 10/11] solver: check one git coderepo and inbox at a time Eric Wong 2020-09-09 6:26 ` [PATCH 11/11] solver: break apart inbox blob retrieval Eric Wong 2020-09-10 1:51 ` Eric Wong [this message]
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style List information: http://public-inbox.org/README * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20200910015153.GA8922@dcvr \ --to=e@80x24.org \ --cc=meta@public-inbox.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
user/dev discussion of public-inbox itself This inbox may be cloned and mirrored by anyone: git clone --mirror http://public-inbox.org/meta git clone --mirror http://czquwvybam4bgbro.onion/meta git clone --mirror http://hjrcffqmbrq6wope.onion/meta git clone --mirror http://ou63pmih66umazou.onion/meta # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V1 meta meta/ http://public-inbox.org/meta \ meta@public-inbox.org public-inbox-index meta Example config snippet for mirrors. Newsgroups are available over NNTP: nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta nntp://news.gmane.io/gmane.mail.public-inbox.general note: .onion URLs require Tor: https://www.torproject.org/ code repositories for the project(s) associated with this inbox: https://80x24.org/public-inbox.git AGPL code for this site: git clone https://public-inbox.org/public-inbox.git