From c86657e655936d59cab8a88ef1fc1e986c5fb3f0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 5 Jul 2020 23:27:30 +0000 Subject: mboxgz: do asynchronous git blob retrievals This lets the -httpd worker process make better use of time instead of waiting for git-cat-file to respond. With 4 jobs in the new test case against a clone of , a speedup of 10-12% is shown. Even a single job shows a 2-5% improvement on an SSD. --- lib/PublicInbox/MboxGz.pm | 69 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 9 deletions(-) (limited to 'lib/PublicInbox/MboxGz.pm') diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm index 535ef96c..8c9010af 100644 --- a/lib/PublicInbox/MboxGz.pm +++ b/lib/PublicInbox/MboxGz.pm @@ -6,6 +6,9 @@ use parent 'PublicInbox::GzipFilter'; use PublicInbox::Eml; use PublicInbox::Hval qw/to_filename/; use PublicInbox::Mbox; +use PublicInbox::GitAsyncCat; +*msg_hdr = \&PublicInbox::Mbox::msg_hdr; +*msg_body = \&PublicInbox::Mbox::msg_body; sub new { my ($class, $ctx, $cb) = @_; @@ -17,33 +20,81 @@ sub new { }, $class; } +# this is public-inbox-httpd-specific +sub mboxgz_blob_cb { # git->cat_async callback + my ($bref, $oid, $type, $size, $self) = @_; + my $http = $self->{ctx}->{env}->{'psgix.io'} or return; # client abort + my $smsg = delete $self->{smsg} or die 'BUG: no smsg'; + if (!defined($oid)) { + # it's possible to have TOCTOU if an admin runs + # public-inbox-(edit|purge), just move onto the next message + return $http->next_step(\&async_next); + } else { + $smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid"; + } + $self->zmore(msg_hdr($self->{ctx}, + PublicInbox::Eml->new($bref)->header_obj, + $smsg->{mid})); + + # PublicInbox::HTTP::{Chunked,Identity}::write + $self->{http_out}->write($self->translate(msg_body($$bref))); + + $http->next_step(\&async_next); +} + +# this is public-inbox-httpd-specific +sub async_step ($) { + my ($self) = @_; + if (my $smsg = $self->{smsg} = $self->{cb}->($self->{ctx})) { + git_async_cat($self->{ctx}->{-inbox}->git, $smsg->{blob}, + \&mboxgz_blob_cb, $self); + } elsif (my $out = delete $self->{http_out}) { + $out->write($self->zflush); + $out->close; + } +} + +# called by PublicInbox::DS::write +sub async_next { + my ($http) = @_; # PublicInbox::HTTP + async_step($http->{forward}); +} + +# called by PublicInbox::HTTP::close, or any other PSGI server +sub close { !!delete($_[0]->{http_out}) } + sub response { my ($class, $ctx, $cb, $fn) = @_; - my $body = $class->new($ctx, $cb); + my $self = $class->new($ctx, $cb); # http://www.iana.org/assignments/media-types/application/gzip $fn = defined($fn) && $fn ne '' ? to_filename($fn) : 'no-subject'; my $h = [ qw(Content-Type application/gzip), 'Content-Disposition', "inline; filename=$fn.mbox.gz" ]; - [ 200, $h, $body ]; + if ($ctx->{env}->{'pi-httpd.async'}) { + sub { + my ($wcb) = @_; # -httpd provided write callback + $self->{http_out} = $wcb->([200, $h]); + $self->{ctx}->{env}->{'psgix.io'}->{forward} = $self; + async_step($self); # start stepping + }; + } else { # generic PSGI + [ 200, $h, $self ]; + } } -# called by Plack::Util::foreach or similar +# called by Plack::Util::foreach or similar (generic PSGI) sub getline { my ($self) = @_; my $ctx = $self->{ctx} or return; while (my $smsg = $self->{cb}->($ctx)) { my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next; my $h = PublicInbox::Eml->new($mref)->header_obj; - $self->zmore( - PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}) - ); - return $self->translate(PublicInbox::Mbox::msg_body($$mref)); + $self->zmore(msg_hdr($ctx, $h, $smsg->{mid})); + return $self->translate(msg_body($$mref)); } # signal that we're done and can return undef next call: delete $self->{ctx}; $self->zflush; } -sub close {} # noop - 1; -- cgit v1.2.3-24-ge0c7