user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 22/43] wwwstream: reduce object graph depth
  2020-07-05 23:27  7% [PATCH 00/43] www: async git cat-file w/ -httpd Eric Wong
@ 2020-07-05 23:27  6% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-07-05 23:27 UTC (permalink / raw)
  To: meta

Like with WwwAtomStream and MboxGz, we can bless the existing
$ctx object directly to avoid allocating a new hashref.  We'll
also switch from "->" to "::" to reduce stack utilization.
---
 Documentation/mknews.perl     |  2 +-
 lib/PublicInbox/Feed.pm       |  2 +-
 lib/PublicInbox/SearchView.pm |  2 +-
 lib/PublicInbox/View.pm       |  8 ++---
 lib/PublicInbox/WwwStream.pm  | 59 +++++++++++++++--------------------
 5 files changed, 33 insertions(+), 40 deletions(-)

diff --git a/Documentation/mknews.perl b/Documentation/mknews.perl
index 2d22d147e..1bd704e68 100755
--- a/Documentation/mknews.perl
+++ b/Documentation/mknews.perl
@@ -112,7 +112,7 @@ sub html_start {
 	my ($out, $ctx) = @_;
 	require PublicInbox::WwwStream;
 	$ctx->{www} = My::MockObject->new(style => '');
-	my $www_stream = PublicInbox::WwwStream->new($ctx);
+	my $www_stream = PublicInbox::WwwStream::init($ctx);
 	print $out $www_stream->_html_top, '<pre>' or die;
 }
 
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 4c1056b46..f25dd267e 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -70,7 +70,7 @@ sub new_html {
 	$ctx->{-html_tip} = '<pre>';
 	$ctx->{-upfx} = '';
 	$ctx->{-hr} = 1;
-	PublicInbox::WwwStream->response($ctx, 200, \&new_html_i);
+	PublicInbox::WwwStream::response($ctx, 200, \&new_html_i);
 }
 
 # private subs
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index d53a533e5..71c3ae707 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -82,7 +82,7 @@ retry:
 			$cb = mset_summary($ctx, $mset, $q);
 		}
 	}
-	PublicInbox::WwwStream->response($ctx, $code, $cb);
+	PublicInbox::WwwStream::response($ctx, $code, $cb);
 }
 
 # display non-nested search results similar to what users expect from
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 0bc2b06e4..4d6f44e0b 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -66,7 +66,7 @@ sub msg_page {
 	$ctx->{mhref} = $next ? '../'.mid_href($smsg->{mid}).'/' : '';
 	multipart_text_as_html($mime, $ctx);
 	$ctx->{-html_tip} = (${delete $ctx->{obuf}} .= '</pre><hr>');
-	PublicInbox::WwwStream->response($ctx, 200, \&msg_page_i);
+	PublicInbox::WwwStream::response($ctx, 200, \&msg_page_i);
 }
 
 sub msg_page_more { # cold
@@ -413,7 +413,7 @@ sub stream_thread ($$) {
 	$ctx->{-title_html} = ascii_html($smsg->{subject});
 	$ctx->{-html_tip} = thread_eml_entry($ctx, $level, $smsg, $eml);
 	$ctx->{-queue} = \@q;
-	PublicInbox::WwwStream->response($ctx, 200, \&stream_thread_i);
+	PublicInbox::WwwStream::response($ctx, 200, \&stream_thread_i);
 }
 
 # /$INBOX/$MESSAGE_ID/t/
@@ -459,7 +459,7 @@ sub thread_html {
 	$ctx->{-title_html} = ascii_html($smsg->{subject});
 	$ctx->{-html_tip} = '<pre>'.eml_entry($ctx, $smsg, $eml, scalar @$msgs);
 	$ctx->{msgs} = $msgs;
-	PublicInbox::WwwStream->response($ctx, 200, \&thread_html_i);
+	PublicInbox::WwwStream::response($ctx, 200, \&thread_html_i);
 }
 
 sub thread_html_i { # PublicInbox::WwwStream::getline callback
@@ -1213,7 +1213,7 @@ sub index_topics {
 	if (@$msgs) {
 		walk_thread(thread_results($ctx, $msgs), $ctx, \&acc_topic);
 	}
-	PublicInbox::WwwStream->response($ctx, dump_topics($ctx), \&index_nav);
+	PublicInbox::WwwStream::response($ctx, dump_topics($ctx), \&index_nav);
 }
 
 sub thread_adj_level {
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 8623440b8..c80440d14 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -27,28 +27,24 @@ sub base_url ($) {
 	$base_url;
 }
 
-sub new {
-	my ($class, $ctx, $cb) = @_;
-
-	bless {
-		nr => 0,
-		cb => $cb,
-		ctx => $ctx,
-		base_url => base_url($ctx),
-	}, $class;
+sub init {
+	my ($ctx, $cb) = @_;
+	$ctx->{cb} = $cb;
+	$ctx->{base_url} = base_url($ctx);
+	$ctx->{nr} = 0;
+	bless $ctx, __PACKAGE__;
 }
 
 sub response {
-	my ($class, $ctx, $code, $cb) = @_;
+	my ($ctx, $code, $cb) = @_;
 	my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
-	my $self = $class->new($ctx, $cb);
-	$self->{gzf} = gzf_maybe($h, $ctx->{env});
-	[ $code, $h, $self ]
+	init($ctx, $cb);
+	$ctx->{gzf} = gzf_maybe($h, $ctx->{env});
+	[ $code, $h, $ctx ]
 }
 
 sub _html_top ($) {
-	my ($self) = @_;
-	my $ctx = $self->{ctx};
+	my ($ctx) = @_;
 	my $ibx = $ctx->{-inbox};
 	my $desc = ascii_html($ibx->description);
 	my $title = delete($ctx->{-title_html}) // $desc;
@@ -89,14 +85,13 @@ sub code_footer ($) {
 }
 
 sub _html_end {
-	my ($self) = @_;
+	my ($ctx) = @_;
 	my $urls = 'Archives are clonable:';
-	my $ctx = $self->{ctx};
 	my $ibx = $ctx->{-inbox};
 	my $desc = ascii_html($ibx->description);
 
 	my @urls;
-	my $http = $self->{base_url};
+	my $http = $ctx->{base_url};
 	my $max = $ibx->max_git_epoch;
 	my $dir = (split(m!/!, $http))[-1];
 	my %seen = ($http => 1);
@@ -163,41 +158,39 @@ EOF
 
 # callback for HTTP.pm (and any other PSGI servers)
 sub getline {
-	my ($self) = @_;
-	my $nr = $self->{nr}++;
+	my ($ctx) = @_;
+	my $nr = $ctx->{nr}++;
 
 	my $buf = do {
 		if ($nr == 0) {
-			_html_top($self);
-		} elsif (my $middle = $self->{cb}) {
-			$middle->($nr, $self->{ctx});
+			_html_top($ctx);
+		} elsif (my $middle = $ctx->{cb}) {
+			$middle->($nr, $ctx);
 		}
-	} // (delete($self->{cb}) ? _html_end($self) : undef);
+	} // (delete($ctx->{cb}) ? _html_end($ctx) : undef);
 
 	# gzf may be GzipFilter, `undef' or `0'
-	my $gzf = $self->{gzf} or return $buf;
+	my $gzf = $ctx->{gzf} or return $buf;
 
 	return $gzf->translate($buf) if defined $buf;
-	$self->{gzf} = 0; # next call to ->getline returns $buf (== undef)
+	$ctx->{gzf} = 0; # next call to ->getline returns $buf (== undef)
 	$gzf->translate(undef);
 }
 
 sub html_oneshot ($$;$) {
 	my ($ctx, $code, $sref) = @_;
-	my $self = bless {
-		ctx => $ctx,
-		base_url => base_url($ctx),
-	}, __PACKAGE__;
+	$ctx->{base_url} = base_url($ctx);
+	bless $ctx, __PACKAGE__;
 	my @x;
 	my $h = [ 'Content-Type' => 'text/html; charset=UTF-8',
 		'Content-Length' => undef ];
 	if (my $gzf = gzf_maybe($h, $ctx->{env})) {
-		$gzf->zmore(_html_top($self));
+		$gzf->zmore(_html_top($ctx));
 		$gzf->zmore($$sref) if $sref;
-		$x[0] = $gzf->zflush(_html_end($self));
+		$x[0] = $gzf->zflush(_html_end($ctx));
 		$h->[3] = length($x[0]);
 	} else {
-		@x = (_html_top($self), $sref ? $$sref : (), _html_end($self));
+		@x = (_html_top($ctx), $sref ? $$sref : (), _html_end($ctx));
 		$h->[3] += bytes::length($_) for @x;
 	}
 	[ $code, $h, \@x ]

^ permalink raw reply	[relevance 6%]

* [PATCH 00/43] www: async git cat-file w/ -httpd
@ 2020-07-05 23:27  7% Eric Wong
  2020-07-05 23:27  6% ` [PATCH 22/43] wwwstream: reduce object graph depth Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-07-05 23:27 UTC (permalink / raw)
  To: meta

This allows -httpd to make better use of time it spends waiting
on git-cat-file to respond.  It allows us to deal with
high-latency HDD storage without a client monopolizing the event
loop.  Even on a mid-range consumer-grade SSD, this seems to
give a 10+% speed improvement for HTTP responses requiring many
blobs, including all /T/, /t/, and /t.mbox.gz endpoints.

This only benefits indexed inboxes (both v1 and v2); I'm not
sure if anybody still uses unindexed v1 inboxes nowadays.

A new xt/httpd-async-stream.t maintainer test ensures checksums
for responses before and after this series match exactly as
before.

This builds off a branch I started several months ago (but never
published here) to integrate gzip responses into our codebase
and remove our optional dependency on Plack::Middleware::Deflater.

We already gzip a bunch of things independent of
Plack::Middleware::Deflater: manifest.js.gz, altid SQLite3 dumps
and all the *.mbox.gz endpoints; so being able to use gzip on
all of our responses without an extra dependency seemed logical.

Being able to consistently use our GzipFilter API to perform
buffering via ->zmore made it significantly easier to reason
about small response chunks for ghost messages interspersed with
large ones when streaming /$INBOX/$MSGID/t/ endpoints.

I'm not yet maximizing use of ->zmore for all buffering of HTTP
responses, yet; measurements need to happen, first.  That may
happen in the 1.7 time frame.  In particular, we would need to
ensure the Perl method dispatch and DSO overhead to Zlib.so and
libz.so of making many ->zmore calls doesn't cause performance
regressions compared to the current `.=' use and calling
->zmore/->translate fewer times.

Eric Wong (43):
  gzipfilter: minor cleanups
  wwwstream: oneshot: perform gzip without middleware
  www*stream: gzip ->getline responses
  wwwtext: gzip text/plain responses, as well
  wwwtext: switch to html_oneshot
  www: need: use WwwStream::html_oneshot
  wwwlisting: use GzipFilter for HTML
  gzipfilter: replace Compress::Raw::Deflate usages
  {gzip,noop}filter: ->zmore returns undef, always
  mbox: remove html_oneshot import
  wwwstatic: support gzipped response directory listings
  qspawn: learn to gzip streaming responses
  stop auto-loading Plack::Middleware::Deflater
  mboxgz: do asynchronous git blob retrievals
  mboxgz: reduce object hash depth
  mbox: async blob retrieval for "single message" raw mboxrd
  wwwatomstream: simplify feed_update callers
  wwwatomstream: use PublicInbox::Inbox->modified for feed_updated
  wwwatomstream: reuse $ctx as $self
  xt/httpd-async-stream: allow more options
  wwwatomstream: support asynchronous blob retrievals
  wwwstream: reduce object graph depth
  wwwstream: reduce blob retrieval paths for ->getline
  www: start making gzipfilter the parent response class
  remove unused/redundant zlib-related imports
  wwwstream: use parent.pm and no warnings
  wwwstream: subclass off GzipFilter
  view: wire up /$INBOX/$MESSAGE_ID/ permalink to async
  view: /$INBOX/$MSGID/t/ reads blobs asynchronously
  view: update /$INBOX/$MSGID/T/ to be async
  feed: generate_i: eliminate pointless loop
  feed: /$INBOX/new.html retrieves blobs asynchronously
  ssearchview: /$INBOX/?q=$QUERY&x=t uses async blobs
  view: eml_entry: reduce parameters
  view: /$INBOX/$MSGID/t/: avoid extra hash lookup in eml case
  wwwstream: eliminate ::response, use html_oneshot
  www: update internal docs
  view: simplify eml_entry callers further
  wwwtext: simplify gzf_maybe use
  wwwattach: support async blob retrievals
  gzipfilter: drop HTTP connection on bugs or data corruption
  daemon: warn on missing blobs
  gzipfilter: check http->{forward} for client disconnects

 Documentation/mknews.perl            |  20 +--
 Documentation/public-inbox-httpd.pod |   1 -
 Documentation/technical/ds.txt       |   4 +-
 INSTALL                              |   5 -
 MANIFEST                             |   2 +
 ci/deps.perl                         |   1 -
 examples/cgit.psgi                   |   8 -
 examples/newswww.psgi                |   8 -
 examples/public-inbox.psgi           |   9 --
 examples/unsubscribe.psgi            |   1 -
 lib/PublicInbox/CompressNoop.pm      |  22 +++
 lib/PublicInbox/Feed.pm              |  22 ++-
 lib/PublicInbox/GetlineBody.pm       |   4 +-
 lib/PublicInbox/GzipFilter.pm        | 168 +++++++++++++++++---
 lib/PublicInbox/HTTP.pm              |   7 +
 lib/PublicInbox/HTTPD.pm             |   5 +-
 lib/PublicInbox/IMAP.pm              |   1 +
 lib/PublicInbox/Mbox.pm              | 137 +++++++++--------
 lib/PublicInbox/MboxGz.pm            |  81 ++++------
 lib/PublicInbox/NNTP.pm              |   1 +
 lib/PublicInbox/Qspawn.pm            |   6 +-
 lib/PublicInbox/SearchView.pm        |  40 ++---
 lib/PublicInbox/View.pm              | 219 ++++++++++++++-------------
 lib/PublicInbox/WWW.pm               |   9 +-
 lib/PublicInbox/WwwAtomStream.pm     |  66 ++++----
 lib/PublicInbox/WwwAttach.pm         |  63 ++++++--
 lib/PublicInbox/WwwListing.pm        |  24 +--
 lib/PublicInbox/WwwStatic.pm         |  14 +-
 lib/PublicInbox/WwwStream.pm         | 110 ++++++++------
 lib/PublicInbox/WwwText.pm           |  26 ++--
 script/public-inbox-httpd            |   9 --
 script/public-inbox.cgi              |   7 -
 t/httpd-corner.psgi                  |   7 +
 t/httpd-corner.t                     |   9 +-
 t/plack.t                            |   4 +
 t/psgi_attach.t                      | 162 +++++++++++---------
 t/psgi_text.t                        |  33 +++-
 t/psgi_v2.t                          |  80 ++++++++--
 t/www_listing.t                      |   8 +-
 t/www_static.t                       |  11 +-
 xt/httpd-async-stream.t              | 104 +++++++++++++
 41 files changed, 964 insertions(+), 554 deletions(-)
 create mode 100644 lib/PublicInbox/CompressNoop.pm
 create mode 100644 xt/httpd-async-stream.t

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | sort options + mbox downloads above
-- links below jump to the message on this page --
2020-07-05 23:27  7% [PATCH 00/43] www: async git cat-file w/ -httpd Eric Wong
2020-07-05 23:27  6% ` [PATCH 22/43] wwwstream: reduce object graph depth Eric Wong

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).