From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 2BD9F1F8C8 for ; Tue, 28 Sep 2021 07:53:49 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] www+httpd: lower priority of large mbox downloads Date: Tue, 28 Sep 2021 07:53:49 +0000 Message-Id: <20210928075349.5526-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: While each git blob request is treated fairly w.r.t other git blob requests, responses triggering thousands of git blob requests can still noticeably increase latency for less-expensive responses. Move large mbox results and the nasty all.mbox endpoint to a low priority queue which only fires once per-event loop iteration. This reduces the response time of short HTTP responses while many gigantic mboxes are being downloaded simultaneously, but still maximizes use of available I/O when there's no inexpensive HTTP responses happening. This only affects PublicInbox::WWW users who use public-inbox-httpd, not generic PSGI servers. --- lib/PublicInbox/GzipFilter.pm | 7 ++++++- lib/PublicInbox/Mbox.pm | 7 +++++-- lib/PublicInbox/WWW.pm | 9 +++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm index 334d6581..c50c26c5 100644 --- a/lib/PublicInbox/GzipFilter.pm +++ b/lib/PublicInbox/GzipFilter.pm @@ -175,7 +175,12 @@ sub async_blob_cb { # git->cat_async callback $smsg->{blob} eq $oid or bail($self, "BUG: $smsg->{blob} != $oid"); eval { $self->async_eml(PublicInbox::Eml->new($bref)) }; bail($self, "E: async_eml: $@") if $@; - $http->next_step($self->can('async_next')); + if ($self->{-low_prio}) { + push(@{$self->{www}->{-low_prio_q}}, $self) == 1 and + PublicInbox::DS::requeue($self->{www}); + } else { + $http->next_step($self->can('async_next')); + } } sub smsg_blob { diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index f72af26b..cec76182 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -174,6 +174,7 @@ sub mbox_all_ids { [404, [qw(Content-Type text/plain)], ["No results found\n"]]; $ctx->{ids} = $ids; $ctx->{prev} = $prev; + $ctx->{-low_prio} = 1; require PublicInbox::MboxGz; PublicInbox::MboxGz::mbox_gz($ctx, \&all_ids_cb, 'all'); } @@ -192,12 +193,13 @@ sub results_cb { my $smsg = $over->get_art($num) or next; return $smsg; } - # refill result set + # refill result set, deprioritize since there's many results my $srch = $ctx->{ibx}->isrch or return gone($ctx, 'search'); my $mset = $srch->mset($ctx->{query}, $ctx->{qopts}); my $size = $mset->size or return; $ctx->{qopts}->{offset} += $size; $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts}); + $ctx->{-low_prio} = 1; } } @@ -214,12 +216,13 @@ sub results_thread_cb { # refills ctx->{xids} next if $over->expand_thread($ctx); - # refill result set + # refill result set, deprioritize since there's many results my $srch = $ctx->{ibx}->isrch or return gone($ctx, 'search'); my $mset = $srch->mset($ctx->{query}, $ctx->{qopts}); my $size = $mset->size or return; $ctx->{qopts}->{offset} += $size; $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts}); + $ctx->{-low_prio} = 1; } } diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 570e690e..a7c961f4 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -659,4 +659,13 @@ sub get_description { }; } +sub event_step { # called via requeue + my ($self) = @_; + # gzf = PublicInbox::GzipFilter == $ctx + my $gzf = shift(@{$self->{-low_prio_q}}) // return; + PublicInbox::DS::requeue($self) if scalar(@{$self->{-low_prio_q}}); + my $http = $gzf->{env}->{'psgix.io'}; # PublicInbox::HTTP + $http->next_step($gzf->can('async_next')); +} + 1;