user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/3] www: fix for running under mount paths
Date: Mon, 16 May 2016 07:56:03 +0000	[thread overview]
Message-ID: <20160516075603.15700-4-e@80x24.org> (raw)
In-Reply-To: <20160516075603.15700-1-e@80x24.org>

We try to avoid issues like these by using relative URLs
in hrefs, but we can't avoid the problem with Location:
for redirects and Atom feeds which are likely to be
rehosted elsewhere.

We also reorder some of the code to work around a weird
issue on the psgi-plack mailing list:
<20160516073750.GA11931@dcvr.yhbt.net>
(Somewhere on https://groups.google.com/group/psgi-plack
 but it's probably not bookmarkable)
---
 lib/PublicInbox/Feed.pm  | 36 +++++++++++-----------
 lib/PublicInbox/Inbox.pm | 20 +++++++++++++
 lib/PublicInbox/WWW.pm   | 15 ++++++----
 t/inbox.t                | 12 ++++++++
 t/psgi_mount.t           | 78 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 138 insertions(+), 23 deletions(-)
 create mode 100644 t/inbox.t
 create mode 100644 t/psgi_mount.t

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 0d3bc81..52fe0db 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -61,9 +61,9 @@ sub atom_header {
 
 sub emit_atom {
 	my ($cb, $ctx) = @_;
+	my $feed_opts = get_feedopts($ctx);
 	my $fh = $cb->([ 200, ['Content-Type' => 'application/atom+xml']]);
 	my $max = $ctx->{max} || MAX_PER_PAGE;
-	my $feed_opts = get_feedopts($ctx);
 	my $x = atom_header($feed_opts);
 	my $git = $ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir});
 	each_recent_blob($ctx, sub {
@@ -95,8 +95,8 @@ sub emit_atom_thread {
 	my ($cb, $ctx) = @_;
 	my $res = $ctx->{srch}->get_thread($ctx->{mid});
 	return _no_thread($cb) unless $res->{total};
-	my $fh = $cb->([200, ['Content-Type' => 'application/atom+xml']]);
 	my $feed_opts = get_feedopts($ctx);
+	my $fh = $cb->([200, ['Content-Type' => 'application/atom+xml']]);
 
 	my $html_url = $feed_opts->{atomurl} = $ctx->{self_url};
 	$html_url =~ s!/t\.atom\z!/!;
@@ -112,10 +112,10 @@ sub emit_atom_thread {
 
 sub emit_html_index {
 	my ($res, $ctx) = @_;
+	my $feed_opts = get_feedopts($ctx);
 	my $fh = $res->([200,['Content-Type'=>'text/html; charset=UTF-8']]);
 
 	my $max = $ctx->{max} || MAX_PER_PAGE;
-	my $feed_opts = get_feedopts($ctx);
 
 	my $title = ascii_html($feed_opts->{description} || '');
 	my ($footer, $param, $last);
@@ -261,15 +261,15 @@ sub get_feedopts {
 	my ($ctx) = @_;
 	my $pi_config = $ctx->{pi_config};
 	my $inbox = $ctx->{inbox};
+	my $obj = $ctx->{-inbox};
 	my $cgi = $ctx->{cgi};
-	my %rv;
-	if (open my $fh, '<', "$ctx->{git_dir}/description") {
-		chomp($rv{description} = <$fh>);
-	} else {
-		$rv{description} = '($GIT_DIR/description missing)';
-	}
+	my %rv = ( description => $obj ? $obj->description : 'FIXME' );
 
-	if ($pi_config && defined $inbox && $inbox ne '') {
+	if ($obj) {
+		$rv{address} = $obj->{address};
+		$rv{id_addr} = $obj->{-primary_address};
+	} elsif ($pi_config && defined $inbox && $inbox ne '') {
+		# TODO: remove
 		my $addr = $pi_config->get($inbox, 'address') || "";
 		$rv{address} = $addr;
 		$addr = $addr->[0] if ref($addr);
@@ -278,19 +278,19 @@ sub get_feedopts {
 	$rv{id_addr} ||= 'public-inbox@example.com';
 
 	my $url_base;
-	if ($cgi) {
-		$url_base = $cgi->base->as_string . $inbox;
+	if ($obj) {
+		$url_base = $obj->base_url($cgi); # CGI may be undef
 		if (my $mid = $ctx->{mid}) { # per-thread feed:
-			$rv{atomurl} = "$url_base/$mid/t.atom";
+			$rv{atomurl} = "$url_base$mid/t.atom";
 		} else {
-			$rv{atomurl} = "$url_base/new.atom";
+			$rv{atomurl} = $url_base."new.atom";
 		}
 	} else {
-		$url_base = "http://example.com";
-		$rv{atomurl} = "$url_base/new.atom";
+		$url_base = 'http://example.com/';
+		$rv{atomurl} = $url_base.'new.atom';
 	}
-	$rv{url} ||= "$url_base/";
-	$rv{midurl} = "$url_base/";
+	$rv{url} ||= $url_base;
+	$rv{midurl} = $url_base;
 
 	\%rv;
 }
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 5d9fdb3..4bcab96 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -64,6 +64,7 @@ sub cloneurl {
 	$self->{cloneurl} = \@url;
 }
 
+# TODO: can we remove this?
 sub footer_html {
 	my ($self) = @_;
 	my $footer = $self->{footer};
@@ -73,4 +74,23 @@ sub footer_html {
 	$self->{footer} = $footer;
 }
 
+sub base_url {
+	my ($self, $prq) = @_; # Plack::Request
+	if (defined $prq) {
+		my $url = $prq->base->as_string;
+		$url .= '/' if $url !~ m!/\z!; # for mount in Plack::Builder
+		$url .= $self->{name} . '/';
+	} else {
+		# either called from a non-PSGI environment (e.g. NNTP/POP3)
+		$self->{-base_url} ||= do {
+			my $url = $self->{url};
+			# expand protocol-relative URLs to HTTPS if we're
+			# not inside a web server
+			$url = "https:$url" if $url =~ m!\A//!;
+			$url .= '/' if $url !~ m!/\z!;
+			$url;
+		};
+	}
+}
+
 1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 94ab032..95288a7 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -39,7 +39,7 @@ sub run {
 sub call {
 	my ($self, $env) = @_;
 	my $cgi = Plack::Request->new($env);
-	my $ctx = { cgi => $cgi, pi_config => $self->{pi_config} };
+	my $ctx = {cgi => $cgi, pi_config => $self->{pi_config}, www => $self};
 	my $path_info = $cgi->path_info;
 
 	my $method = $cgi->method;
@@ -124,7 +124,7 @@ sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] }
 
 # returns undef if valid, array ref response if invalid
 sub invalid_inbox {
-	my ($self, $ctx, $inbox, $mid) = @_;
+	my ($self, $ctx, $inbox) = @_;
 	my $obj = $ctx->{pi_config}->lookup_name($inbox);
 	if (defined $obj) {
 		$ctx->{git_dir} = $obj->{mainrepo};
@@ -144,7 +144,7 @@ sub invalid_inbox {
 # returns undef if valid, array ref response if invalid
 sub invalid_inbox_mid {
 	my ($self, $ctx, $inbox, $mid) = @_;
-	my $ret = invalid_inbox($self, $ctx, $inbox, $mid);
+	my $ret = invalid_inbox($self, $ctx, $inbox);
 	return $ret if $ret;
 
 	$ctx->{mid} = $mid = uri_unescape($mid);
@@ -382,9 +382,14 @@ sub legacy_redirects {
 sub r301 {
 	my ($ctx, $inbox, $mid, $suffix) = @_;
 	my $cgi = $ctx->{cgi};
-	my $url;
+	my $obj = $ctx->{-inbox};
+	unless ($obj) {
+		my $r404 = invalid_inbox($ctx->{www}, $ctx, $inbox);
+		return $r404 if $r404;
+		$obj = $ctx->{-inbox};
+	}
+	my $url = $obj->base_url($cgi);
 	my $qs = $cgi->env->{QUERY_STRING};
-	$url = $cgi->base->as_string . $inbox . '/';
 	$url .= (uri_escape_utf8($mid) . '/') if (defined $mid);
 	$url .= $suffix if (defined $suffix);
 	$url .= "?$qs" if $qs ne '';
diff --git a/t/inbox.t b/t/inbox.t
new file mode 100644
index 0000000..45ba1df
--- /dev/null
+++ b/t/inbox.t
@@ -0,0 +1,12 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use_ok 'PublicInbox::Inbox';
+my $x = PublicInbox::Inbox->new({url => '//example.com/test/'});
+is($x->base_url, 'https://example.com/test/', 'expanded protocol-relative');
+$x = PublicInbox::Inbox->new({url => 'http://example.com/test'});
+is($x->base_url, 'http://example.com/test/', 'added trailing slash');
+
+done_testing();
diff --git a/t/psgi_mount.t b/t/psgi_mount.t
new file mode 100644
index 0000000..c1c1b0c
--- /dev/null
+++ b/t/psgi_mount.t
@@ -0,0 +1,78 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use File::Temp qw/tempdir/;
+my $tmpdir = tempdir('psgi-path-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $maindir = "$tmpdir/main.git";
+my $addr = 'test-public@example.com';
+my $cfgpfx = "publicinbox.test";
+my @mods = qw(HTTP::Request::Common Plack::Request Plack::Test URI::Escape);
+foreach my $mod (@mods) {
+	eval "require $mod";
+	plan skip_all => "$mod missing for plack.t" if $@;
+}
+use_ok $_ foreach @mods;
+use PublicInbox::Import;
+use PublicInbox::Git;
+use PublicInbox::Config;
+use PublicInbox::WWW;
+use Plack::Builder;
+use Plack::App::URLMap;
+my $config = PublicInbox::Config->new({
+	"$cfgpfx.address" => $addr,
+	"$cfgpfx.mainrepo" => $maindir,
+});
+is(0, system(qw(git init -q --bare), $maindir), "git init (main)");
+my $git = PublicInbox::Git->new($maindir);
+my $im = PublicInbox::Import->new($git, 'test', $addr);
+{
+	my $mime = Email::MIME->new(<<EOF);
+From: Me <me\@example.com>
+To: You <you\@example.com>
+Cc: $addr
+Message-Id: <blah\@example.com>
+Subject: hihi
+Date: Thu, 01 Jan 1970 00:00:00 +0000
+
+zzzzzz
+EOF
+	$im->add($mime);
+	$im->done;
+}
+
+my $www = PublicInbox::WWW->new($config);
+my $app = builder {
+	enable 'Head';
+	mount '/a' => builder { sub { $www->call(@_) } };
+	mount '/b' => builder { sub { $www->call(@_) } };
+};
+
+test_psgi($app, sub {
+	my ($cb) = @_;
+	my $res;
+	# Atom feed:
+	$res = $cb->(GET('/a/test/new.atom'));
+	like($res->content, qr!\bhttp://[^/]+/a/test/!,
+		'URLs which exist in Atom feed are mount-aware');
+	unlike($res->content, qr!\b\Qhttp://[^/]+/test/\E!,
+		'No URLs which are not mount-aware');
+
+	# redirects
+	$res = $cb->(GET('/a/test/blah%40example.com/'));
+	is($res->code, 200, 'OK with URLMap mount');
+	$res = $cb->(GET('/a/test/blah%40example.com/raw'));
+	is($res->code, 200, 'OK with URLMap mount');
+	$res = $cb->(GET('/a/test/m/blah%40example.com.html'));
+	is($res->header('Location'),
+		'http://localhost/a/test/blah%40example.com/',
+		'redirect functions properly under mount');
+
+	$res = $cb->(GET('/test/blah%40example.com/'));
+	is($res->code, 404, 'intentional 404 with URLMap mount');
+
+});
+
+done_testing();

      parent reply	other threads:[~2016-05-16  7:56 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-16  7:56 [PATCH 0/3] inbox objectification cleanups + fixes Eric Wong
2016-05-16  7:56 ` [PATCH 1/3] declare Inbox object for reusability Eric Wong
2016-05-16  7:56 ` [PATCH 2/3] config: allow taking an existing reference Eric Wong
2016-05-16  7:56 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160516075603.15700-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).