From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00, URIBL_BLOCKED shortcircuit=no autolearn=unavailable autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 183C71FCC0 for ; Mon, 16 May 2016 07:56:08 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/3] www: fix for running under mount paths Date: Mon, 16 May 2016 07:56:03 +0000 Message-Id: <20160516075603.15700-4-e@80x24.org> In-Reply-To: <20160516075603.15700-1-e@80x24.org> References: <20160516075603.15700-1-e@80x24.org> List-Id: We try to avoid issues like these by using relative URLs in hrefs, but we can't avoid the problem with Location: for redirects and Atom feeds which are likely to be rehosted elsewhere. We also reorder some of the code to work around a weird issue on the psgi-plack mailing list: <20160516073750.GA11931@dcvr.yhbt.net> (Somewhere on https://groups.google.com/group/psgi-plack but it's probably not bookmarkable) --- lib/PublicInbox/Feed.pm | 36 +++++++++++----------- lib/PublicInbox/Inbox.pm | 20 +++++++++++++ lib/PublicInbox/WWW.pm | 15 ++++++---- t/inbox.t | 12 ++++++++ t/psgi_mount.t | 78 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 138 insertions(+), 23 deletions(-) create mode 100644 t/inbox.t create mode 100644 t/psgi_mount.t diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 0d3bc81..52fe0db 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -61,9 +61,9 @@ sub atom_header { sub emit_atom { my ($cb, $ctx) = @_; + my $feed_opts = get_feedopts($ctx); my $fh = $cb->([ 200, ['Content-Type' => 'application/atom+xml']]); my $max = $ctx->{max} || MAX_PER_PAGE; - my $feed_opts = get_feedopts($ctx); my $x = atom_header($feed_opts); my $git = $ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir}); each_recent_blob($ctx, sub { @@ -95,8 +95,8 @@ sub emit_atom_thread { my ($cb, $ctx) = @_; my $res = $ctx->{srch}->get_thread($ctx->{mid}); return _no_thread($cb) unless $res->{total}; - my $fh = $cb->([200, ['Content-Type' => 'application/atom+xml']]); my $feed_opts = get_feedopts($ctx); + my $fh = $cb->([200, ['Content-Type' => 'application/atom+xml']]); my $html_url = $feed_opts->{atomurl} = $ctx->{self_url}; $html_url =~ s!/t\.atom\z!/!; @@ -112,10 +112,10 @@ sub emit_atom_thread { sub emit_html_index { my ($res, $ctx) = @_; + my $feed_opts = get_feedopts($ctx); my $fh = $res->([200,['Content-Type'=>'text/html; charset=UTF-8']]); my $max = $ctx->{max} || MAX_PER_PAGE; - my $feed_opts = get_feedopts($ctx); my $title = ascii_html($feed_opts->{description} || ''); my ($footer, $param, $last); @@ -261,15 +261,15 @@ sub get_feedopts { my ($ctx) = @_; my $pi_config = $ctx->{pi_config}; my $inbox = $ctx->{inbox}; + my $obj = $ctx->{-inbox}; my $cgi = $ctx->{cgi}; - my %rv; - if (open my $fh, '<', "$ctx->{git_dir}/description") { - chomp($rv{description} = <$fh>); - } else { - $rv{description} = '($GIT_DIR/description missing)'; - } + my %rv = ( description => $obj ? $obj->description : 'FIXME' ); - if ($pi_config && defined $inbox && $inbox ne '') { + if ($obj) { + $rv{address} = $obj->{address}; + $rv{id_addr} = $obj->{-primary_address}; + } elsif ($pi_config && defined $inbox && $inbox ne '') { + # TODO: remove my $addr = $pi_config->get($inbox, 'address') || ""; $rv{address} = $addr; $addr = $addr->[0] if ref($addr); @@ -278,19 +278,19 @@ sub get_feedopts { $rv{id_addr} ||= 'public-inbox@example.com'; my $url_base; - if ($cgi) { - $url_base = $cgi->base->as_string . $inbox; + if ($obj) { + $url_base = $obj->base_url($cgi); # CGI may be undef if (my $mid = $ctx->{mid}) { # per-thread feed: - $rv{atomurl} = "$url_base/$mid/t.atom"; + $rv{atomurl} = "$url_base$mid/t.atom"; } else { - $rv{atomurl} = "$url_base/new.atom"; + $rv{atomurl} = $url_base."new.atom"; } } else { - $url_base = "http://example.com"; - $rv{atomurl} = "$url_base/new.atom"; + $url_base = 'http://example.com/'; + $rv{atomurl} = $url_base.'new.atom'; } - $rv{url} ||= "$url_base/"; - $rv{midurl} = "$url_base/"; + $rv{url} ||= $url_base; + $rv{midurl} = $url_base; \%rv; } diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 5d9fdb3..4bcab96 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -64,6 +64,7 @@ sub cloneurl { $self->{cloneurl} = \@url; } +# TODO: can we remove this? sub footer_html { my ($self) = @_; my $footer = $self->{footer}; @@ -73,4 +74,23 @@ sub footer_html { $self->{footer} = $footer; } +sub base_url { + my ($self, $prq) = @_; # Plack::Request + if (defined $prq) { + my $url = $prq->base->as_string; + $url .= '/' if $url !~ m!/\z!; # for mount in Plack::Builder + $url .= $self->{name} . '/'; + } else { + # either called from a non-PSGI environment (e.g. NNTP/POP3) + $self->{-base_url} ||= do { + my $url = $self->{url}; + # expand protocol-relative URLs to HTTPS if we're + # not inside a web server + $url = "https:$url" if $url =~ m!\A//!; + $url .= '/' if $url !~ m!/\z!; + $url; + }; + } +} + 1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 94ab032..95288a7 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -39,7 +39,7 @@ sub run { sub call { my ($self, $env) = @_; my $cgi = Plack::Request->new($env); - my $ctx = { cgi => $cgi, pi_config => $self->{pi_config} }; + my $ctx = {cgi => $cgi, pi_config => $self->{pi_config}, www => $self}; my $path_info = $cgi->path_info; my $method = $cgi->method; @@ -124,7 +124,7 @@ sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } # returns undef if valid, array ref response if invalid sub invalid_inbox { - my ($self, $ctx, $inbox, $mid) = @_; + my ($self, $ctx, $inbox) = @_; my $obj = $ctx->{pi_config}->lookup_name($inbox); if (defined $obj) { $ctx->{git_dir} = $obj->{mainrepo}; @@ -144,7 +144,7 @@ sub invalid_inbox { # returns undef if valid, array ref response if invalid sub invalid_inbox_mid { my ($self, $ctx, $inbox, $mid) = @_; - my $ret = invalid_inbox($self, $ctx, $inbox, $mid); + my $ret = invalid_inbox($self, $ctx, $inbox); return $ret if $ret; $ctx->{mid} = $mid = uri_unescape($mid); @@ -382,9 +382,14 @@ sub legacy_redirects { sub r301 { my ($ctx, $inbox, $mid, $suffix) = @_; my $cgi = $ctx->{cgi}; - my $url; + my $obj = $ctx->{-inbox}; + unless ($obj) { + my $r404 = invalid_inbox($ctx->{www}, $ctx, $inbox); + return $r404 if $r404; + $obj = $ctx->{-inbox}; + } + my $url = $obj->base_url($cgi); my $qs = $cgi->env->{QUERY_STRING}; - $url = $cgi->base->as_string . $inbox . '/'; $url .= (uri_escape_utf8($mid) . '/') if (defined $mid); $url .= $suffix if (defined $suffix); $url .= "?$qs" if $qs ne ''; diff --git a/t/inbox.t b/t/inbox.t new file mode 100644 index 0000000..45ba1df --- /dev/null +++ b/t/inbox.t @@ -0,0 +1,12 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use_ok 'PublicInbox::Inbox'; +my $x = PublicInbox::Inbox->new({url => '//example.com/test/'}); +is($x->base_url, 'https://example.com/test/', 'expanded protocol-relative'); +$x = PublicInbox::Inbox->new({url => 'http://example.com/test'}); +is($x->base_url, 'http://example.com/test/', 'added trailing slash'); + +done_testing(); diff --git a/t/psgi_mount.t b/t/psgi_mount.t new file mode 100644 index 0000000..c1c1b0c --- /dev/null +++ b/t/psgi_mount.t @@ -0,0 +1,78 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use Email::MIME; +use File::Temp qw/tempdir/; +my $tmpdir = tempdir('psgi-path-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $maindir = "$tmpdir/main.git"; +my $addr = 'test-public@example.com'; +my $cfgpfx = "publicinbox.test"; +my @mods = qw(HTTP::Request::Common Plack::Request Plack::Test URI::Escape); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for plack.t" if $@; +} +use_ok $_ foreach @mods; +use PublicInbox::Import; +use PublicInbox::Git; +use PublicInbox::Config; +use PublicInbox::WWW; +use Plack::Builder; +use Plack::App::URLMap; +my $config = PublicInbox::Config->new({ + "$cfgpfx.address" => $addr, + "$cfgpfx.mainrepo" => $maindir, +}); +is(0, system(qw(git init -q --bare), $maindir), "git init (main)"); +my $git = PublicInbox::Git->new($maindir); +my $im = PublicInbox::Import->new($git, 'test', $addr); +{ + my $mime = Email::MIME->new(< +To: You +Cc: $addr +Message-Id: +Subject: hihi +Date: Thu, 01 Jan 1970 00:00:00 +0000 + +zzzzzz +EOF + $im->add($mime); + $im->done; +} + +my $www = PublicInbox::WWW->new($config); +my $app = builder { + enable 'Head'; + mount '/a' => builder { sub { $www->call(@_) } }; + mount '/b' => builder { sub { $www->call(@_) } }; +}; + +test_psgi($app, sub { + my ($cb) = @_; + my $res; + # Atom feed: + $res = $cb->(GET('/a/test/new.atom')); + like($res->content, qr!\bhttp://[^/]+/a/test/!, + 'URLs which exist in Atom feed are mount-aware'); + unlike($res->content, qr!\b\Qhttp://[^/]+/test/\E!, + 'No URLs which are not mount-aware'); + + # redirects + $res = $cb->(GET('/a/test/blah%40example.com/')); + is($res->code, 200, 'OK with URLMap mount'); + $res = $cb->(GET('/a/test/blah%40example.com/raw')); + is($res->code, 200, 'OK with URLMap mount'); + $res = $cb->(GET('/a/test/m/blah%40example.com.html')); + is($res->header('Location'), + 'http://localhost/a/test/blah%40example.com/', + 'redirect functions properly under mount'); + + $res = $cb->(GET('/test/blah%40example.com/')); + is($res->code, 404, 'intentional 404 with URLMap mount'); + +}); + +done_testing();