* [PATCH 6/6] wwwstatic: add directory listing + index.html support
2020-01-01 10:38 7% [PATCH 0/6] wwwstatic: support directory listings Eric Wong
@ 2020-01-01 10:38 3% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-01-01 10:38 UTC (permalink / raw)
To: meta
It's now possible to use WwwStatic as a standalone PSGI
app to serve static files and recreate the award-winning
web design of https://public-inbox.org/ :>
---
MANIFEST | 1 +
lib/PublicInbox/Cgit.pm | 6 +-
lib/PublicInbox/WWW.pm | 15 +--
lib/PublicInbox/WwwStatic.pm | 198 ++++++++++++++++++++++++++++++++++-
t/www_static.t | 96 +++++++++++++++++
5 files changed, 294 insertions(+), 22 deletions(-)
create mode 100644 t/www_static.t
diff --git a/MANIFEST b/MANIFEST
index f649bbef..16c92c36 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -290,6 +290,7 @@ t/watch_filter_rubylang.t
t/watch_maildir.t
t/watch_maildir_v2.t
t/www_listing.t
+t/www_static.t
t/xcpdb-reshard.t
xt/git-http-backend.t
xt/git_async_cmp.t
diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm
index c0b1a73b..c42f8847 100644
--- a/lib/PublicInbox/Cgit.pm
+++ b/lib/PublicInbox/Cgit.pm
@@ -16,7 +16,6 @@ use PublicInbox::Git;
use warnings;
use PublicInbox::Qspawn;
use PublicInbox::WwwStatic qw(r);
-use Plack::MIME;
sub locate_cgit ($) {
my ($pi_config) = @_;
@@ -114,9 +113,8 @@ sub call {
}
} elsif ($path_info =~ m!$self->{static}! &&
defined($cgit_data = $self->{cgit_data})) {
- my $f = $1;
- return PublicInbox::WwwStatic::response($env, [], $cgit_data.$f,
- Plack::MIME->mime_type($f));
+ my $f = $cgit_data.$1; # {static} only matches leading slash
+ return PublicInbox::WwwStatic::response($env, [], $f);
}
my $cgi_env = { PATH_INFO => $path_info };
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 99f9f1dc..efe7c8ca 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -22,7 +22,7 @@ use PublicInbox::MID qw(mid_escape);
require PublicInbox::Git;
use PublicInbox::GitHTTPBackend;
use PublicInbox::UserContent;
-use PublicInbox::WwwStatic qw(r);
+use PublicInbox::WwwStatic qw(r path_info_raw);
# TODO: consider a routing tree now that we have more endpoints:
our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
@@ -43,19 +43,6 @@ sub run {
PublicInbox::WWW->new->call($req->env);
}
-# PATH_INFO is decoded, and we want the undecoded original
-my %path_re_cache;
-sub path_info_raw ($) {
- my ($env) = @_;
- my $sn = $env->{SCRIPT_NAME};
- my $re = $path_re_cache{$sn} ||= do {
- $sn = '/'.$sn unless index($sn, '/') == 0;
- $sn =~ s!/\z!!;
- qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!;
- };
- $env->{REQUEST_URI} =~ $re ? $1 : $env->{PATH_INFO};
-}
-
sub call {
my ($self, $env) = @_;
my $ctx = { env => $env, www => $self };
diff --git a/lib/PublicInbox/WwwStatic.pm b/lib/PublicInbox/WwwStatic.pm
index ce4bfe9b..bc42236e 100644
--- a/lib/PublicInbox/WwwStatic.pm
+++ b/lib/PublicInbox/WwwStatic.pm
@@ -1,19 +1,48 @@
# Copyright (C) 2016-2019 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# This package can either be a PSGI response body for a static file
+# OR a standalone PSGI app which returns the above PSGI response body
+# (or an HTML directory listing).
+#
+# It encapsulates the "autoindex", "index", and "gzip_static"
+# functionality of nginx.
package PublicInbox::WwwStatic;
use strict;
use parent qw(Exporter);
+use bytes ();
use Fcntl qw(SEEK_SET O_RDONLY O_NONBLOCK);
+use POSIX qw(strftime lround);
use HTTP::Date qw(time2str);
use HTTP::Status qw(status_message);
use Errno qw(EACCES ENOTDIR ENOENT);
-our @EXPORT_OK = qw(@NO_CACHE r);
+use URI::Escape qw(uri_escape_utf8);
+use PublicInbox::Hval qw(ascii_html);
+use Plack::MIME;
+our @EXPORT_OK = qw(@NO_CACHE r path_info_raw);
our @NO_CACHE = ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT',
'Pragma', 'no-cache',
'Cache-Control', 'no-cache, max-age=0, must-revalidate');
+our $STYLE = <<'EOF';
+<style>
+@media screen {
+ *{background:#000;color:#ccc}
+ a{color:#69f;text-decoration:none}
+ a:visited{color:#96f}
+}
+@media screen AND (prefers-color-scheme:light) {
+ *{background:#fff;color:#333}
+ a{color:#00f;text-decoration:none}
+ a:visited{color:#808}
+}
+</style>
+EOF
+
+$STYLE =~ s/^\s*//gm;
+$STYLE =~ tr/\n//d;
+
sub r ($;$) {
my ($code, $msg) = @_;
$msg ||= status_message($code);
@@ -69,8 +98,28 @@ sub prepare_range {
($code, $len);
}
-sub response {
+# returns a PSGI arrayref response iff .gz and non-.gz mtimes match
+sub try_gzip_static ($$$$) {
my ($env, $h, $path, $type) = @_;
+ return unless ($env->{HTTP_ACCEPT_ENCODING} // '') =~ /\bgzip\b/i;
+ my $mtime;
+ return unless -f $path && defined(($mtime = (stat(_))[9]));
+ my $gz = "$path.gz";
+ return unless -f $gz && (stat(_))[9] == $mtime;
+ my $res = response($env, $h, $gz, $type);
+ return if ($res->[0] > 300 || $res->[0] < 200);
+ push @{$res->[1]}, qw(Cache-Control no-transform Content-Encoding gzip);
+ $res;
+}
+
+sub response ($$$;$) {
+ my ($env, $h, $path, $type) = @_;
+ $type //= Plack::MIME->mime_type($path) // 'application/octet-stream';
+ if ($path !~ /\.gz\z/i) {
+ if (my $res = try_gzip_static($env, $h, $path, $type)) {
+ return $res;
+ }
+ }
my $in;
if ($env->{REQUEST_METHOD} eq 'HEAD') {
@@ -108,7 +157,7 @@ sub response {
[ $code, $h, $body ];
}
-# called by PSGI servers:
+# called by PSGI servers on each response chunk:
sub getline {
my ($self) = @_;
my $len = $self->{len} or return; # undef, tells server we're done
@@ -132,6 +181,147 @@ sub getline {
undef;
}
-sub close {} # noop, just let everything go out-of-scope
+sub close {} # noop, called by PSGI server, just let everything go out-of-scope
+
+# OO interface for use as a Plack app
+sub new {
+ my ($class, %opt) = @_;
+ my $index = $opt{'index'} // [ 'index.html' ];
+ $index = [ $index ] if defined($index) && ref($index) ne 'ARRAY';
+ $index = undef if scalar(@$index) == 0;
+ my $style = $opt{style};
+ if (defined $style) {
+ $style = \$style unless ref($style);
+ }
+ my $docroot = $opt{docroot};
+ die "`docroot' not set" unless defined($docroot) && $docroot ne '';
+ bless {
+ docroot => $docroot,
+ index => $index,
+ autoindex => $opt{autoindex},
+ style => $style // \$STYLE,
+ }, $class;
+}
+
+# PATH_INFO is decoded, and we want the undecoded original
+my %path_re_cache;
+sub path_info_raw ($) {
+ my ($env) = @_;
+ my $sn = $env->{SCRIPT_NAME};
+ my $re = $path_re_cache{$sn} ||= do {
+ $sn = '/'.$sn unless index($sn, '/') == 0;
+ $sn =~ s!/\z!!;
+ qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!;
+ };
+ $env->{REQUEST_URI} =~ $re ? $1 : $env->{PATH_INFO};
+}
+
+sub redirect_slash ($) {
+ my ($env) = @_;
+ my $url = $env->{'psgi.url_scheme'} . '://';
+ my $host_port = $env->{HTTP_HOST} //
+ "$env->{SERVER_NAME}:$env->{SERVER_PORT}";
+ $url .= $host_port . path_info_raw($env) . '/';
+ my $body = "Redirecting to $url\n";
+ [ 302, [ qw(Content-Type text/plain), 'Location', $url,
+ 'Content-Length', length($body) ], [ $body ] ]
+}
+
+sub human_size ($) {
+ my ($size) = @_;
+ my $suffix = '';
+ for my $s (qw(K M G T P)) {
+ last if $size < 1024;
+ $size /= 1024;
+ if ($size <= 1024) {
+ $suffix = $s;
+ last;
+ }
+ }
+ lround($size).$suffix;
+}
+
+# by default, this returns "index.html" if it exists for a given directory
+# It'll generate a directory listing, (autoindex).
+# May be disabled by setting autoindex => 0
+sub dir_response ($$$) {
+ my ($self, $env, $fs_path) = @_;
+ if (my $index = $self->{'index'}) { # serve index.html or similar
+ for my $html (@$index) {
+ my $p = $fs_path . $html;
+ my $res = response($env, [], $p);
+ return $res if $res->[0] != 404;
+ }
+ }
+ return r(404) unless $self->{autoindex};
+ opendir(my $dh, $fs_path) or do {
+ return r(404) if ($! == ENOENT || $! == ENOTDIR);
+ return r(403) if $! == EACCES;
+ return r(500);
+ };
+ my @entries = grep(!/\A\./, readdir($dh));
+ $dh = undef;
+ my (%dirs, %other, %want_gz);
+ my $path_info = $env->{PATH_INFO};
+ push @entries, '..' if $path_info ne '/';
+ for my $base (@entries) {
+ my $href = ascii_html(uri_escape_utf8($base));
+ my $name = ascii_html($base);
+ my @st = stat($fs_path . $base) or next; # unlikely
+ my ($gzipped, $uncompressed, $hsize);
+ my $entry = '';
+ my $mtime = $st[9];
+ if (-d _) {
+ $href .= '/';
+ $name .= '/';
+ $hsize = '-';
+ $dirs{"$base\0$mtime"} = \$entry;
+ } elsif (-f _) {
+ $other{"$base\0$mtime"} = \$entry;
+ if ($base !~ /\.gz\z/i) {
+ $want_gz{"$base.gz\0$mtime"} = undef;
+ }
+ $hsize = human_size($st[7]);
+ } else {
+ next;
+ }
+ # 54 = 80 - (SP length(strftime(%Y-%m-%d %k:%M)) SP human_size)
+ $hsize = sprintf('% 8s', $hsize);
+ my $pad = 54 - length($name);
+ $pad = 1 if $pad <= 0;
+ $entry .= qq(<a\nhref="$href">$name</a>) . (' ' x $pad);
+ $mtime = strftime('%Y-%m-%d %k:%M', gmtime($mtime));
+ $entry .= $mtime . $hsize;
+ }
+
+ # filter out '.gz' files as long as the mtime matches the
+ # uncompressed version
+ delete(@other{keys %want_gz});
+ @entries = ((map { ${$dirs{$_}} } sort keys %dirs),
+ (map { ${$other{$_}} } sort keys %other));
+
+ my $path_info_html = ascii_html($path_info);
+ my $body = "<html><head><title>Index of $path_info_html</title>" .
+ ${$self->{style}} .
+ "</head><body><pre>Index of $path_info_html</pre><hr><pre>\n";
+ $body .= join("\n", @entries) . "</pre><hr></body></html>\n";
+ [ 200, [ qw(Content-Type text/html
+ Content-Length), bytes::length($body) ], [ $body ] ]
+}
+
+sub call { # PSGI app endpoint
+ my ($self, $env) = @_;
+ return r(405) if $env->{REQUEST_METHOD} !~ /\A(?:GET|HEAD)\z/;
+ my $path_info = $env->{PATH_INFO};
+ return r(403) if index($path_info, "\0") >= 0;
+ my (@parts) = split(m!/+!, $path_info, -1);
+ return r(403) if grep(/\A(?:\.\.)\z/, @parts) || $parts[0] ne '';
+
+ my $fs_path = join('/', $self->{docroot}, @parts);
+ return dir_response($self, $env, $fs_path) if $parts[-1] eq '';
+
+ my $res = response($env, [], $fs_path);
+ $res->[0] == 404 && -d $fs_path ? redirect_slash($env) : $res;
+}
1;
diff --git a/t/www_static.t b/t/www_static.t
new file mode 100644
index 00000000..5f2e3380
--- /dev/null
+++ b/t/www_static.t
@@ -0,0 +1,96 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use PublicInbox::TestCommon;
+my ($tmpdir, $for_destroy) = tmpdir();
+my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape);
+require_mods(@mods);
+use_ok $_ foreach @mods;
+use_ok 'PublicInbox::WwwStatic';
+
+my $app = sub {
+ my $ws = PublicInbox::WwwStatic->new(docroot => $tmpdir, @_);
+ sub { $ws->call(shift) };
+};
+
+test_psgi($app->(), sub {
+ my $cb = shift;
+ my $res = $cb->(GET('/'));
+ is($res->code, 404, '404 on "/" by default');
+ open my $fh, '>', "$tmpdir/index.html" or die;
+ print $fh 'hi' or die;
+ close $fh or die;
+ $res = $cb->(GET('/'));
+ is($res->code, 200, '200 with index.html');
+ is($res->content, 'hi', 'default index.html returned');
+ $res = $cb->(HEAD('/'));
+ is($res->code, 200, '200 on HEAD /');
+ is($res->content, '', 'no content');
+ is($res->header('Content-Length'), '2', 'content-length set');
+ like($res->header('Content-Type'), qr!^text/html\b!,
+ 'content-type is html');
+});
+
+test_psgi($app->(autoindex => 1, index => []), sub {
+ my $cb = shift;
+ my $res = $cb->(GET('/'));
+ my $updir = 'href="../">../</a>';
+ is($res->code, 200, '200 with autoindex default');
+ my $ls = $res->content;
+ like($ls, qr/index\.html/, 'got listing with index.html');
+ ok(index($ls, $updir) < 0, 'no updir at /');
+ mkdir("$tmpdir/dir") or die;
+ rename("$tmpdir/index.html", "$tmpdir/dir/index.html") or die;
+
+ $res = $cb->(GET('/dir/'));
+ is($res->code, 200, '200 with autoindex for dir/');
+ $ls = $res->content;
+ ok(index($ls, $updir) > 0, 'updir at /dir/');
+
+ for my $up (qw(/../ .. /dir/.. /dir/../)) {
+ is($cb->(GET($up))->code, 403, "`$up' traversal rejected");
+ }
+
+ $res = $cb->(GET('/dir'));
+ is($res->code, 302, '302 w/o slash');
+ like($res->header('Location'), qr!://[^/]+/dir/\z!,
+ 'redirected w/ slash');
+
+ rename("$tmpdir/dir/index.html", "$tmpdir/dir/foo") or die;
+ link("$tmpdir/dir/foo", "$tmpdir/dir/foo.gz") or die;
+ $res = $cb->(GET('/dir/'));
+ unlike($res->content, qr/>foo\.gz</,
+ '.gz file hidden if mtime matches uncompressed');
+ like($res->content, qr/>foo</, 'uncompressed foo shown');
+
+ $res = $cb->(GET('/dir/foo/bar'));
+ is($res->code, 404, 'using file as dir fails');
+
+ unlink("$tmpdir/dir/foo") or die;
+ $res = $cb->(GET('/dir/'));
+ like($res->content, qr/>foo\.gz</,
+ '.gz shown when no uncompressed version exists');
+
+ open my $fh, '>', "$tmpdir/dir/foo" or die;
+ print $fh "uncompressed\n" or die;
+ close $fh or die;
+ utime(0, 0, "$tmpdir/dir/foo") or die;
+ $res = $cb->(GET('/dir/'));
+ my $html = $res->content;
+ like($html, qr/>foo</, 'uncompressed foo shown');
+ like($html, qr/>foo\.gz</, 'gzipped foo shown on mtime mismatch');
+
+ $res = $cb->(GET('/dir/foo'));
+ is($res->content, "uncompressed\n",
+ 'got uncompressed on mtime mismatch');
+
+ utime(0, 0, "$tmpdir/dir/foo.gz") or die;
+ my $get = GET('/dir/foo');
+ $get->header('Accept-Encoding' => 'gzip');
+ $res = $cb->($get);
+ is($res->content, "hi", 'got compressed on mtime match');
+});
+
+done_testing();
^ permalink raw reply related [relevance 3%]
* [PATCH 0/6] wwwstatic: support directory listings
@ 2020-01-01 10:38 7% Eric Wong
2020-01-01 10:38 3% ` [PATCH 6/6] wwwstatic: add directory listing + index.html support Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-01-01 10:38 UTC (permalink / raw)
To: meta
Now it'll be possible to replicate the timeless web design
of https://public-inbox.org/ with our own PSGI code!
I imagine per-inbox docroots might be useful for serving git
bundles, tarball releases, bundles, and maybe altid snapshots,
too.
Eric Wong (6):
wwwstatic: implement Last-Modified and If-Modified-Since
www: move more logic into path_info_raw
wwwstatic: move r(...) functions here
wwwstatic: do not open() files for HEAD requests
wwwstatic: avoid TOCTTOU for FIFO check
wwwstatic: add directory listing + index.html support
MANIFEST | 1 +
lib/PublicInbox/Cgit.pm | 9 +-
lib/PublicInbox/GitHTTPBackend.pm | 19 +--
lib/PublicInbox/WWW.pm | 23 +--
lib/PublicInbox/WwwHighlight.pm | 9 +-
lib/PublicInbox/WwwStatic.pm | 256 ++++++++++++++++++++++++++++--
t/www_static.t | 96 +++++++++++
xt/git-http-backend.t | 20 +++
8 files changed, 368 insertions(+), 65 deletions(-)
create mode 100644 t/www_static.t
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-01-01 10:38 7% [PATCH 0/6] wwwstatic: support directory listings Eric Wong
2020-01-01 10:38 3% ` [PATCH 6/6] wwwstatic: add directory listing + index.html support Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).