user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH] additional tests for bad Message-IDs in URLs
  2018-06-13 22:43 14%       ` [PATCH] www: use undecoded paths for Message-ID extraction Eric Wong
@ 2018-06-26  7:46  5%         ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2018-06-26  7:46 UTC (permalink / raw)
  To: Leah Neukirchen; +Cc: meta

Followup-to: 73cfed86d8a8287a
   ("www: use undecoded paths for Message-ID extraction")

Reported-by: Leah Neukirchen <leah@vuxu.org>
  https://public-inbox.org/meta/8736xsb5s5.fsf@vuxu.org/
---
 Oops, forgot this earlier :x

 MANIFEST          |  1 +
 t/psgi_bad_mids.t | 85 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)
 create mode 100644 t/psgi_bad_mids.t

diff --git a/MANIFEST b/MANIFEST
index 08a8ef4..68c79c9 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -182,6 +182,7 @@ t/perf-threading.t
 t/plack.t
 t/precheck.t
 t/psgi_attach.t
+t/psgi_bad_mids.t
 t/psgi_mount.t
 t/psgi_search.t
 t/psgi_text.t
diff --git a/t/psgi_bad_mids.t b/t/psgi_bad_mids.t
new file mode 100644
index 0000000..5008f5b
--- /dev/null
+++ b/t/psgi_bad_mids.t
@@ -0,0 +1,85 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+use PublicInbox::Config;
+use PublicInbox::WWW;
+my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
+		URI::Escape Plack::Builder);
+foreach my $mod (@mods) {
+	eval "require $mod";
+	plan skip_all => "$mod missing for psgi_bad_mids.t" if $@;
+}
+use_ok($_) for @mods;
+use_ok 'PublicInbox::V2Writable';
+my $mainrepo = tempdir('pi-bad-mids-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $cfgpfx = "publicinbox.bad-mids";
+my $ibx = {
+	mainrepo => $mainrepo,
+	name => 'bad-mids',
+	version => 2,
+	-primary_address => 'test@example.com',
+};
+$ibx = PublicInbox::Inbox->new($ibx);
+my $im = PublicInbox::V2Writable->new($ibx, 1);
+$im->{parallel} = 0;
+
+my $msgs = <<'';
+F1V5OR6NMF.3M649JTLO9IXD@tux.localdomain/hehe1"'<foo
+F1V5NB0PTU.3U0DCVGAJ750Z@tux.localdomain"'<>/foo
+F1V5MIHGCU.2ABINKW6WBE8N@tux.localdomain/raw
+F1V5LF9D9C.2QT5PGXZQ050E@tux.localdomain/t.atom
+F1V58X3CMU.2DCCVAKQZGADV@tux.localdomain/../../../../foo
+F1TVKINT3G.2S6I36MXMHYG6@tux.localdomain" onclick="alert(1)"
+
+my @mids = split(/\n/, $msgs);
+my $i = 0;
+foreach my $mid (@mids) {
+	my $data = << "";
+Subject: test
+Message-ID: <$mid>
+From: a\@example.com
+To: b\@example.com
+Date: Fri, 02 Oct 1993 00:00:0$i +0000
+
+
+	my $mime = PublicInbox::MIME->new(\$data);
+	ok($im->add($mime), "added $mid");
+	$i++
+}
+$im->done;
+
+my $cfg = {
+	"$cfgpfx.address" => $ibx->{-primary_address},
+	"$cfgpfx.mainrepo" => $mainrepo,
+};
+my $config = PublicInbox::Config->new($cfg);
+my $www = PublicInbox::WWW->new($config);
+test_psgi(sub { $www->call(@_) }, sub {
+	my ($cb) = @_;
+	my $res = $cb->(GET('/bad-mids/'));
+	is($res->code, 200, 'got 200 OK listing');
+	my $raw = $res->content;
+	foreach my $mid (@mids) {
+		ok(index($raw, $mid) < 0, "escaped $mid");
+	}
+
+	my (@xmids) = ($raw =~ m!\bhref="([^"]+)/t\.mbox\.gz"!sg);
+	is(scalar(@xmids), scalar(@mids),
+		'got escaped links to all messages');
+
+	@xmids = reverse @xmids;
+	foreach my $i (0..$#xmids) {
+		$res = $cb->(GET("/bad-mids/$xmids[$i]/raw"));
+		is($res->code, 200, 'got 200 OK raw message');
+		like($res->content, qr/Message-ID: <\Q$mids[$i]\E>/s,
+			'retrieved correct message');
+	}
+});
+
+done_testing();
+
+1;
-- 
EW

^ permalink raw reply related	[relevance 5%]

* [PATCH] www: use undecoded paths for Message-ID extraction
  @ 2018-06-13 22:43 14%       ` Eric Wong
  2018-06-26  7:46  5%         ` [PATCH] additional tests for bad Message-IDs in URLs Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2018-06-13 22:43 UTC (permalink / raw)
  To: Leah Neukirchen; +Cc: meta

> Leah Neukirchen <leah@vuxu.org> wrote:
> > During testing, we also found another thing when obscure characters
> > are used in Message-IDs, esp. / and ?.
> > 
> > E.g. using a Message-ID of <F1WYEAZPOF.3LOD2T7ZHY9I1@localdomain/raw/T>
> > will create a corrupt link.  Some more "ideas" are at
> > https://inbox.vuxu.org/pi-test/

I guess I'm spoiled by Rack where PATH_INFO is undecoded :x
However, REQUEST_URI is specified in PSGI specs(*)

Very lightly tested, but this seems to work; additions to the
test suite will be necessary...

------8<----
Subject: [PATCH] www: use undecoded paths for Message-ID extraction

In PSGI, PATH_INFO contains URI-decoded paths which cause
problems when Message-IDs contain ambiguous characters for used
for routing.  Instead, extract the undecoded path from
REQUEST_URI and use that.

Reported-by: Leah Neukirchen <leah@vuxu.org>
  https://public-inbox.org/meta/8736xsb5s5.fsf@vuxu.org/
---
 lib/PublicInbox/WWW.pm | 40 ++++++++++++++++++++++++++++------------
 t/cgi.t                |  2 ++
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 24e24f1..c1c3926 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -36,6 +36,17 @@ sub run {
 	PublicInbox::WWW->new->call($req->env);
 }
 
+my %path_re_cache;
+
+sub path_re ($) {
+	my $sn = $_[0]->{SCRIPT_NAME};
+	$path_re_cache{$sn} ||= do {
+		$sn = '/'.$sn unless index($sn, '/') == 0;
+		$sn =~ s!/\z!!;
+		qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!;
+	};
+}
+
 sub call {
 	my ($self, $env) = @_;
 	my $ctx = { env => $env, www => $self };
@@ -50,7 +61,8 @@ sub call {
 	} split(/[&;]+/, $env->{QUERY_STRING});
 	$ctx->{qp} = \%qp;
 
-	my $path_info = $env->{PATH_INFO};
+	# not using $env->{PATH_INFO} here since that's already decoded
+	my ($path_info) = ($env->{REQUEST_URI} =~ path_re($env));
 	my $method = $env->{REQUEST_METHOD};
 
 	if ($method eq 'POST') {
@@ -91,13 +103,13 @@ sub call {
 		invalid_inbox_mid($ctx, $1, $2) || get_attach($ctx, $idx, $fn);
 	# in case people leave off the trailing slash:
 	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(T|t)\z!o) {
-		my ($inbox, $mid, $suffix) = ($1, $2, $3);
+		my ($inbox, $mid_ue, $suffix) = ($1, $2, $3);
 		$suffix .= $suffix =~ /\A[tT]\z/ ? '/#u' : '/';
-		r301($ctx, $inbox, $mid, $suffix);
+		r301($ctx, $inbox, $mid_ue, $suffix);
 
 	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/R/?\z!o) {
-		my ($inbox, $mid) = ($1, $2);
-		r301($ctx, $inbox, $mid, '#R');
+		my ($inbox, $mid_ue) = ($1, $2);
+		r301($ctx, $inbox, $mid_ue, '#R');
 
 	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) {
 		r301($ctx, $1, $2);
@@ -164,11 +176,11 @@ sub invalid_inbox ($$) {
 
 # returns undef if valid, array ref response if invalid
 sub invalid_inbox_mid {
-	my ($ctx, $inbox, $mid) = @_;
+	my ($ctx, $inbox, $mid_ue) = @_;
 	my $ret = invalid_inbox($ctx, $inbox);
 	return $ret if $ret;
 
-	$ctx->{mid} = $mid;
+	my $mid = $ctx->{mid} = uri_unescape($mid_ue);
 	my $ibx = $ctx->{-inbox};
 	if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) {
 		my ($x2, $x38) = ($1, $2);
@@ -177,7 +189,7 @@ sub invalid_inbox_mid {
 		require Email::Simple;
 		my $s = Email::Simple->new($str);
 		$mid = PublicInbox::MID::mid_clean($s->header('Message-ID'));
-		return r301($ctx, $inbox, $mid);
+		return r301($ctx, $inbox, mid_escape($mid));
 	}
 	undef;
 }
@@ -352,7 +364,7 @@ sub legacy_redirects {
 }
 
 sub r301 {
-	my ($ctx, $inbox, $mid, $suffix) = @_;
+	my ($ctx, $inbox, $mid_ue, $suffix) = @_;
 	my $obj = $ctx->{-inbox};
 	unless ($obj) {
 		my $r404 = invalid_inbox($ctx, $inbox);
@@ -361,7 +373,11 @@ sub r301 {
 	}
 	my $url = $obj->base_url($ctx->{env});
 	my $qs = $ctx->{env}->{QUERY_STRING};
-	$url .= (mid_escape($mid) . '/') if (defined $mid);
+	if (defined $mid_ue) {
+		# common, and much nicer as '@' than '%40':
+		$mid_ue =~ s/%40/@/g;
+		$url .= $mid_ue . '/';
+	}
 	$url .= $suffix if (defined $suffix);
 	$url .= "?$qs" if $qs ne '';
 
@@ -371,9 +387,9 @@ sub r301 {
 }
 
 sub msg_page {
-	my ($ctx, $inbox, $mid, $e) = @_;
+	my ($ctx, $inbox, $mid_ue, $e) = @_;
 	my $ret;
-	$ret = invalid_inbox_mid($ctx, $inbox, $mid) and return $ret;
+	$ret = invalid_inbox_mid($ctx, $inbox, $mid_ue) and return $ret;
 	'' eq $e and return get_mid_html($ctx);
 	'T/' eq $e and return get_thread($ctx, 1);
 	't/' eq $e and return get_thread($ctx);
diff --git a/t/cgi.t b/t/cgi.t
index bd92ca3..2e2476d 100644
--- a/t/cgi.t
+++ b/t/cgi.t
@@ -225,6 +225,8 @@ sub cgi_run {
 	my %env = (
 		PATH_INFO => $_[0],
 		QUERY_STRING => $_[1] || "",
+		SCRIPT_NAME => '',
+		REQUEST_URI => $_[0] . ($_[1] ? "?$_[1]" : ''),
 		REQUEST_METHOD => $_[2] || "GET",
 		GATEWAY_INTERFACE => 'CGI/1.1',
 		HTTP_ACCEPT => '*/*',
-- 
(*) git clone https://github.com/plack/psgi-specs.git

^ permalink raw reply related	[relevance 14%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2018-06-09 17:06     Some points on public-inbox Leah Neukirchen
2018-06-12 10:09     ` Eric Wong
2018-06-12 11:31       ` Leah Neukirchen
2018-06-13 21:40         ` Eric Wong
2018-06-13 22:43 14%       ` [PATCH] www: use undecoded paths for Message-ID extraction Eric Wong
2018-06-26  7:46  5%         ` [PATCH] additional tests for bad Message-IDs in URLs Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).