user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 03/43] www*stream: gzip ->getline responses
Date: Sun,  5 Jul 2020 23:27:19 +0000	[thread overview]
Message-ID: <20200705232759.3161-4-e@yhbt.net> (raw)
In-Reply-To: <20200705232759.3161-1-e@yhbt.net>

Our most common endpoints deserve to be gzipped.
---
 lib/PublicInbox/GzipFilter.pm    | 21 +++++++++++++++-----
 lib/PublicInbox/WwwAtomStream.pm | 25 ++++++++++++++++-------
 lib/PublicInbox/WwwStream.pm     | 34 ++++++++++++++++++++------------
 3 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 115660cb1..95fced053 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -6,8 +6,9 @@ package PublicInbox::GzipFilter;
 use strict;
 use parent qw(Exporter);
 use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
-our @EXPORT_OK = qw(gzip_maybe);
+our @EXPORT_OK = qw(gzip_maybe gzf_maybe);
 my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
+my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip);
 
 sub new { bless {}, shift }
 
@@ -18,18 +19,28 @@ sub attach {
 	$self
 }
 
-sub gzip_maybe ($) {
-	my ($env) = @_;
+sub gzip_maybe ($$) {
+	my ($res_hdr, $env) = @_;
 	return if (($env->{HTTP_ACCEPT_ENCODING}) // '') !~ /\bgzip\b/;
 
+	my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
+	return if $err != Z_OK;
+
 	# in case Plack::Middleware::Deflater is loaded:
 	$env->{'plack.skip-deflater'} = 1;
 
-	my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
-	$err == Z_OK ? $gz : undef;
+	push @$res_hdr, @GZIP_HDRS;
+	$gz;
+}
+
+sub gzf_maybe ($$) {
+	my ($res_hdr, $env) = @_;
+	my $gz = gzip_maybe($res_hdr, $env) or return 0;
+	bless { gz => $gz }, __PACKAGE__;
 }
 
 # for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
+# Also used for ->getline callbacks
 sub translate ($$) {
 	my $self = $_[0];
 
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 9dc24e16e..c407e343f 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -14,6 +14,7 @@ use Digest::SHA qw(sha1_hex);
 use PublicInbox::Address;
 use PublicInbox::Hval qw(ascii_html mid_href);
 use PublicInbox::MsgTime qw(msg_timestamp);
+use PublicInbox::GzipFilter qw(gzf_maybe);
 
 # called by PSGI server after getline:
 sub close {}
@@ -26,18 +27,28 @@ sub new {
 
 sub response {
 	my ($class, $ctx, $code, $cb) = @_;
-	[ $code, [ 'Content-Type', 'application/atom+xml' ],
-	  $class->new($ctx, $cb) ]
+	my $h = [ 'Content-Type' => 'application/atom+xml' ];
+	my $self = $class->new($ctx, $cb);
+	$self->{gzf} = gzf_maybe($h, $ctx->{env});
+	[ $code, $h, $self ]
 }
 
 # called once for each message by PSGI server
 sub getline {
 	my ($self) = @_;
-	if (my $middle = $self->{cb}) {
-		my $smsg = $middle->($self->{ctx});
-		return feed_entry($self, $smsg) if $smsg;
-	}
-	delete $self->{cb} ? '</feed>' : undef;
+	my $buf = do {
+		if (my $middle = $self->{cb}) {
+			my $smsg = $middle->($self->{ctx});
+			feed_entry($self, $smsg) if $smsg;
+		}
+	} // (delete($self->{cb}) ? '</feed>' : undef);
+
+	# gzf may be GzipFilter, `undef' or `0'
+	my $gzf = $self->{gzf} or return $buf;
+
+	return $gzf->translate($buf) if defined $buf;
+	$self->{gzf} = 0; # next call to ->getline returns $buf (== undef)
+	$gzf->translate(undef);
 }
 
 # private
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 79ed6871e..c964dbd41 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -14,7 +14,7 @@ our @EXPORT_OK = qw(html_oneshot);
 use bytes (); # length
 use PublicInbox::Hval qw(ascii_html prurl);
 use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
-use PublicInbox::GzipFilter qw(gzip_maybe);
+use PublicInbox::GzipFilter qw(gzip_maybe gzf_maybe);
 our $TOR_URL = 'https://www.torproject.org/';
 our $CODE_URL = 'https://public-inbox.org/public-inbox.git';
 
@@ -41,8 +41,10 @@ sub new {
 
 sub response {
 	my ($class, $ctx, $code, $cb) = @_;
-	[ $code, [ 'Content-Type', 'text/html; charset=UTF-8' ],
-	  $class->new($ctx, $cb) ]
+	my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
+	my $self = $class->new($ctx, $cb);
+	$self->{gzf} = gzf_maybe($h, $ctx->{env});
+	[ $code, $h, $self ]
 }
 
 sub _html_top ($) {
@@ -165,13 +167,20 @@ sub getline {
 	my ($self) = @_;
 	my $nr = $self->{nr}++;
 
-	return _html_top($self) if $nr == 0;
+	my $buf = do {
+		if ($nr == 0) {
+			_html_top($self);
+		} elsif (my $middle = $self->{cb}) {
+			$middle->($nr, $self->{ctx});
+		}
+	} // (delete($self->{cb}) ? _html_end($self) : undef);
 
-	if (my $middle = $self->{cb}) {
-		$middle = $middle->($nr, $self->{ctx}) and return $middle;
-	}
+	# gzf may be GzipFilter, `undef' or `0'
+	my $gzf = $self->{gzf} or return $buf;
 
-	delete $self->{cb} ? _html_end($self) : undef;
+	return $gzf->translate($buf) if defined $buf;
+	$self->{gzf} = 0; # next call to ->getline returns $buf (== undef)
+	$gzf->translate(undef);
 }
 
 sub html_oneshot ($$;$) {
@@ -181,8 +190,8 @@ sub html_oneshot ($$;$) {
 		base_url => base_url($ctx),
 	}, __PACKAGE__;
 	my @x;
-	my @h = ('Content-Type' => 'text/html; charset=UTF-8');
-	if (my $gz = gzip_maybe($ctx->{env})) {
+	my $h = [ 'Content-Type' => 'text/html; charset=UTF-8' ];
+	if (my $gz = gzip_maybe($h, $ctx->{env})) {
 		my $err = $gz->deflate(_html_top($self), $x[0]);
 		die "gzip->deflate: $err" if $err != Z_OK;
 		if ($sref) {
@@ -193,15 +202,14 @@ sub html_oneshot ($$;$) {
 		die "gzip->deflate: $err" if $err != Z_OK;
 		$err = $gz->flush($x[0], Z_FINISH);
 		die "gzip->flush: $err" if $err != Z_OK;
-		push @h, qw(Vary Accept-Encoding Content-Encoding gzip);
 	} else {
 		@x = (_html_top($self), $sref ? $$sref : (), _html_end($self));
 	}
 
 	my $len = 0;
 	$len += bytes::length($_) for @x;
-	push @h, 'Content-Length', $len;
-	[ $code, \@h, \@x ]
+	push @$h, 'Content-Length', $len;
+	[ $code, $h, \@x ]
 }
 
 1;

  parent reply	other threads:[~2020-07-05 23:28 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-05 23:27 [PATCH 00/43] www: async git cat-file w/ -httpd Eric Wong
2020-07-05 23:27 ` [PATCH 01/43] gzipfilter: minor cleanups Eric Wong
2020-07-05 23:27 ` [PATCH 02/43] wwwstream: oneshot: perform gzip without middleware Eric Wong
2020-07-05 23:27 ` Eric Wong [this message]
2020-07-05 23:27 ` [PATCH 04/43] wwwtext: gzip text/plain responses, as well Eric Wong
2020-07-05 23:27 ` [PATCH 05/43] wwwtext: switch to html_oneshot Eric Wong
2020-07-05 23:27 ` [PATCH 06/43] www: need: use WwwStream::html_oneshot Eric Wong
2020-07-05 23:27 ` [PATCH 07/43] wwwlisting: use GzipFilter for HTML Eric Wong
2020-07-05 23:27 ` [PATCH 08/43] gzipfilter: replace Compress::Raw::Deflate usages Eric Wong
2020-07-05 23:27 ` [PATCH 09/43] {gzip,noop}filter: ->zmore returns undef, always Eric Wong
2020-07-05 23:27 ` [PATCH 10/43] mbox: remove html_oneshot import Eric Wong
2020-07-05 23:27 ` [PATCH 11/43] wwwstatic: support gzipped directory listings Eric Wong
2020-07-05 23:27 ` [PATCH 12/43] qspawn: learn to gzip streaming responses Eric Wong
2020-07-05 23:27 ` [PATCH 13/43] stop auto-loading Plack::Middleware::Deflater Eric Wong
2020-07-05 23:27 ` [PATCH 14/43] mboxgz: do asynchronous git blob retrievals Eric Wong
2020-07-05 23:27 ` [PATCH 15/43] mboxgz: reduce hash depth Eric Wong
2020-07-05 23:27 ` [PATCH 16/43] mbox: async blob fetch for "single message" raw mboxrd Eric Wong
2020-07-05 23:27 ` [PATCH 17/43] wwwatomstream: simplify feed_update callers Eric Wong
2020-07-05 23:27 ` [PATCH 18/43] wwwatomstream: use PublicInbox::Inbox->modified for feed_updated Eric Wong
2020-07-05 23:27 ` [PATCH 19/43] wwwatomstream: reuse $ctx as $self Eric Wong
2020-07-05 23:27 ` [PATCH 20/43] xt/httpd-async-stream: allow more options Eric Wong
2020-07-05 23:27 ` [PATCH 21/43] wwwatomstream: support async blob fetch Eric Wong
2020-07-05 23:27 ` [PATCH 22/43] wwwstream: reduce object graph depth Eric Wong
2020-07-05 23:27 ` [PATCH 23/43] wwwstream: reduce blob fetch paths for ->getline Eric Wong
2020-07-05 23:27 ` [PATCH 24/43] www: start making gzipfilter the parent response class Eric Wong
2020-07-05 23:27 ` [PATCH 25/43] remove unused/redundant zlib-related imports Eric Wong
2020-07-05 23:27 ` [PATCH 26/43] wwwstream: use parent.pm and no warnings Eric Wong
2020-07-05 23:27 ` [PATCH 27/43] wwwstream: subclass off GzipFilter Eric Wong
2020-07-05 23:27 ` [PATCH 28/43] view: make /$INBOX/$MSGID/ permalink async Eric Wong
2020-07-05 23:27 ` [PATCH 29/43] view: /$INBOX/$MSGID/t/ reads blobs asynchronously Eric Wong
2020-07-05 23:27 ` [PATCH 30/43] view: update /$INBOX/$MSGID/T/ to be async Eric Wong
2020-07-05 23:27 ` [PATCH 31/43] feed: generate_i: eliminate pointless loop Eric Wong
2020-07-05 23:27 ` [PATCH 32/43] feed: /$INBOX/new.html fetches blobs asynchronously Eric Wong
2020-07-05 23:27 ` [PATCH 33/43] ssearchview: /$INBOX/?q=$QUERY&x=t uses async blobs Eric Wong
2020-07-05 23:27 ` [PATCH 34/43] view: eml_entry: reduce parameters Eric Wong
2020-07-05 23:27 ` [PATCH 35/43] view: /$INBOX/$MSGID/t/: avoid extra hash lookup in eml case Eric Wong
2020-07-05 23:27 ` [PATCH 36/43] wwwstream: eliminate ::response, use html_oneshot Eric Wong
2020-07-05 23:27 ` [PATCH 37/43] www: update internal docs Eric Wong
2020-07-05 23:27 ` [PATCH 38/43] view: simplify eml_entry callers further Eric Wong
2020-07-05 23:27 ` [PATCH 39/43] wwwtext: simplify gzf_maybe use Eric Wong
2020-07-05 23:27 ` [PATCH 40/43] wwwattach: support async blob retrievals Eric Wong
2020-07-05 23:27 ` [PATCH 41/43] gzipfilter: drop HTTP connection on bugs or data corruption Eric Wong
2020-07-05 23:27 ` [PATCH 42/43] daemon: warn on missing blobs Eric Wong
2020-07-05 23:27 ` [PATCH 43/43] gzipfilter: check http->{forward} for client disconnects Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200705232759.3161-4-e@yhbt.net \
    --to=e@yhbt.net \
    --cc=meta@public-inbox.org \
    --subject='Re: [PATCH 03/43] www*stream: gzip ->getline responses' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.mail.public-inbox.meta
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.mail.public-inbox.meta
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git