From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.1 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, URIBL_BLACK shortcircuit=no autolearn=no autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id E12DB1F4BD; Tue, 1 Oct 2019 07:13:42 +0000 (UTC) Date: Tue, 1 Oct 2019 07:13:42 +0000 From: Eric Wong To: edef Cc: meta@public-inbox.org, hi@alyssa.is Subject: [PATCH] www: fix absolute URLs when mounted under a subdir Message-ID: <20191001071342.GA9845@dcvr> References: <20190926030357.GA21009@dcvr> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20190926030357.GA21009@dcvr> List-Id: Eric Wong wrote: > Also, I suspect the mbox Archived-At headers could be wrong > and need a similar change... Maybe Atom feeds, too. Yup, mboxrd code needed changing. Atom feeds already had full URLs (and tests), so I added some test cases to t/psgi_mount.t and fixed the remaining cases. Just pushed this out to master: ---------8<----------- Subject: [PATCH] www: fix absolute URLs when mounted under a subdir While we avoid generating absolute URLs in most cases, our "git clone" instructions and URL headers in mboxrd files contain full URLs. So do the same thing we do for WwwAtomStream and pre-generate the full URL before Plack::App::URLMap changes $env->{PATH_INFO} and $env->{SCRIPT_NAME} back to their original values. Reported-by: edef Link: https://public-inbox.org/meta/cover.0f97c47bb88db8b875be7497289d8fedd3b11991.1569296942.git-series.edef@edef.eu/ --- lib/PublicInbox/Mbox.pm | 5 ++++- lib/PublicInbox/WwwStream.pm | 13 +++++++++--- t/psgi_mount.t | 38 ++++++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 6d902e6c..67b671f5 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -60,10 +60,12 @@ sub getline { sub close {} # noop +# /$INBOX/$MESSAGE_ID/raw sub emit_raw { my ($ctx) = @_; my $mid = $ctx->{mid}; my $ibx = $ctx->{-inbox}; + $ctx->{base_url} = $ibx->base_url($ctx->{env}); my ($mref, $more, $id, $prev, $next); if (my $over = $ibx->over) { my $smsg = $over->next_by_mid($mid, \$id, \$prev) or return; @@ -97,7 +99,7 @@ sub msg_hdr ($$;$) { $header_obj->header_set($d); } my $ibx = $ctx->{-inbox}; - my $base = $ibx->base_url($ctx->{env}); + my $base = $ctx->{base_url}; $mid = $ctx->{mid} unless defined $mid; $mid = mid_escape($mid); my @append = ( @@ -246,6 +248,7 @@ use PublicInbox::Hval qw/to_filename/; sub new { my ($class, $ctx, $cb) = @_; my $buf = ''; + $ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env}); bless { buf => \$buf, gz => IO::Compress::Gzip->new(\$buf, Time => 0), diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 7399b0ad..f5338c39 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -19,7 +19,15 @@ sub close {} sub new { my ($class, $ctx, $cb) = @_; - bless { nr => 0, cb => $cb || *close, ctx => $ctx }, $class; + + my $base_url = $ctx->{-inbox}->base_url($ctx->{env}); + chop $base_url; # no trailing slash for clone + bless { + nr => 0, + cb => $cb || *close, + ctx => $ctx, + base_url => $base_url, + }, $class; } sub response { @@ -83,8 +91,7 @@ sub _html_end { my $desc = ascii_html($ibx->description); my (%seen, @urls); - my $http = $ibx->base_url($ctx->{env}); - chop $http; # no trailing slash for clone + my $http = $self->{base_url}; my $max = $ibx->max_git_epoch; my $dir = (split(m!/!, $http))[-1]; if (defined($max)) { # v2 diff --git a/t/psgi_mount.t b/t/psgi_mount.t index 05dbd736..8da2bc89 100644 --- a/t/psgi_mount.t +++ b/t/psgi_mount.t @@ -60,11 +60,24 @@ test_psgi($app, sub { unlike($res->content, qr!\b\Qhttp://[^/]+/test/\E!, 'No URLs which are not mount-aware'); - # redirects + $res = $cb->(GET('/a/test/new.html')); + like($res->content, qr!git clone --mirror http://[^/]+/a/test\b!, + 'clone URL in new.html is mount-aware'); + $res = $cb->(GET('/a/test/blah%40example.com/')); is($res->code, 200, 'OK with URLMap mount'); + like($res->content, qr!git clone --mirror http://[^/]+/a/test\b!, + 'clone URL in /$INBOX/$MESSAGE_ID/ is mount-aware'); + $res = $cb->(GET('/a/test/blah%40example.com/raw')); is($res->code, 200, 'OK with URLMap mount'); + like($res->content, qr!^List-Archive: !m, + 'List-Archive set in /raw mboxrd'); + like($res->content, + qr!^Archived-At: !m, + 'Archived-At set in /raw mboxrd'); + + # redirects $res = $cb->(GET('/a/test/m/blah%40example.com.html')); is($res->header('Location'), 'http://localhost/a/test/blah@example.com/', @@ -72,7 +85,28 @@ test_psgi($app, sub { $res = $cb->(GET('/test/blah%40example.com/')); is($res->code, 404, 'intentional 404 with URLMap mount'); - }); +SKIP: { + my @mods = qw(DBI DBD::SQLite Search::Xapian IO::Uncompress::Gunzip); + foreach my $mod (@mods) { + eval "require $mod" or skip "$mod not available: $@", 2; + } + my $ibx = $config->lookup_name('test'); + PublicInbox::SearchIdx->new($ibx, 1)->index_sync; + test_psgi($app, sub { + my ($cb) = @_; + my $res = $cb->(GET('/a/test/blah@example.com/t.mbox.gz')); + my $gz = $res->content; + my $raw; + IO::Uncompress::Gunzip::gunzip(\$gz => \$raw); + like($raw, qr!^List-Archive: !m, + 'List-Archive set in /t.mbox.gz mboxrd'); + like($raw, + qr!^Archived-At:\x20 + !mx, + 'Archived-At set in /t.mbox.gz mboxrd'); + }); +} + done_testing(); -- EW