about summary refs log tree commit homepage
path: root/lib/PublicInbox/MboxGz.pm
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-07-05 23:27:30 +0000
committerEric Wong <e@yhbt.net>2020-07-06 20:01:15 +0000
commitc86657e655936d59cab8a88ef1fc1e986c5fb3f0 (patch)
tree3fb85a047354a50827cc033f8fdc67da4fabf023 /lib/PublicInbox/MboxGz.pm
parenta3f00c4a7851b98b81a2fcb31d5ed131908e22de (diff)
downloadpublic-inbox-c86657e655936d59cab8a88ef1fc1e986c5fb3f0.tar.gz
This lets the -httpd worker process make better use of time
instead of waiting for git-cat-file to respond.  With 4 jobs in
the new test case against a clone of
<https://public-inbox.org/meta/>, a speedup of 10-12% is shown.
Even a single job shows a 2-5% improvement on an SSD.
Diffstat (limited to 'lib/PublicInbox/MboxGz.pm')
-rw-r--r--lib/PublicInbox/MboxGz.pm69
1 files changed, 60 insertions, 9 deletions
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index 535ef96c..8c9010af 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -6,6 +6,9 @@ use parent 'PublicInbox::GzipFilter';
 use PublicInbox::Eml;
 use PublicInbox::Hval qw/to_filename/;
 use PublicInbox::Mbox;
+use PublicInbox::GitAsyncCat;
+*msg_hdr = \&PublicInbox::Mbox::msg_hdr;
+*msg_body = \&PublicInbox::Mbox::msg_body;
 
 sub new {
         my ($class, $ctx, $cb) = @_;
@@ -17,33 +20,81 @@ sub new {
         }, $class;
 }
 
+# this is public-inbox-httpd-specific
+sub mboxgz_blob_cb { # git->cat_async callback
+        my ($bref, $oid, $type, $size, $self) = @_;
+        my $http = $self->{ctx}->{env}->{'psgix.io'} or return; # client abort
+        my $smsg = delete $self->{smsg} or die 'BUG: no smsg';
+        if (!defined($oid)) {
+                # it's possible to have TOCTOU if an admin runs
+                # public-inbox-(edit|purge), just move onto the next message
+                return $http->next_step(\&async_next);
+        } else {
+                $smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
+        }
+        $self->zmore(msg_hdr($self->{ctx},
+                                PublicInbox::Eml->new($bref)->header_obj,
+                                $smsg->{mid}));
+
+        # PublicInbox::HTTP::{Chunked,Identity}::write
+        $self->{http_out}->write($self->translate(msg_body($$bref)));
+
+        $http->next_step(\&async_next);
+}
+
+# this is public-inbox-httpd-specific
+sub async_step ($) {
+        my ($self) = @_;
+        if (my $smsg = $self->{smsg} = $self->{cb}->($self->{ctx})) {
+                git_async_cat($self->{ctx}->{-inbox}->git, $smsg->{blob},
+                                \&mboxgz_blob_cb, $self);
+        } elsif (my $out = delete $self->{http_out}) {
+                $out->write($self->zflush);
+                $out->close;
+        }
+}
+
+# called by PublicInbox::DS::write
+sub async_next {
+        my ($http) = @_; # PublicInbox::HTTP
+        async_step($http->{forward});
+}
+
+# called by PublicInbox::HTTP::close, or any other PSGI server
+sub close { !!delete($_[0]->{http_out}) }
+
 sub response {
         my ($class, $ctx, $cb, $fn) = @_;
-        my $body = $class->new($ctx, $cb);
+        my $self = $class->new($ctx, $cb);
         # http://www.iana.org/assignments/media-types/application/gzip
         $fn = defined($fn) && $fn ne '' ? to_filename($fn) : 'no-subject';
         my $h = [ qw(Content-Type application/gzip),
                 'Content-Disposition', "inline; filename=$fn.mbox.gz" ];
-        [ 200, $h, $body ];
+        if ($ctx->{env}->{'pi-httpd.async'}) {
+                sub {
+                        my ($wcb) = @_; # -httpd provided write callback
+                        $self->{http_out} = $wcb->([200, $h]);
+                        $self->{ctx}->{env}->{'psgix.io'}->{forward} = $self;
+                        async_step($self); # start stepping
+                };
+        } else { # generic PSGI
+                [ 200, $h, $self ];
+        }
 }
 
-# called by Plack::Util::foreach or similar
+# called by Plack::Util::foreach or similar (generic PSGI)
 sub getline {
         my ($self) = @_;
         my $ctx = $self->{ctx} or return;
         while (my $smsg = $self->{cb}->($ctx)) {
                 my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
                 my $h = PublicInbox::Eml->new($mref)->header_obj;
-                $self->zmore(
-                        PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid})
-                );
-                return $self->translate(PublicInbox::Mbox::msg_body($$mref));
+                $self->zmore(msg_hdr($ctx, $h, $smsg->{mid}));
+                return $self->translate(msg_body($$mref));
         }
         # signal that we're done and can return undef next call:
         delete $self->{ctx};
         $self->zflush;
 }
 
-sub close {} # noop
-
 1;