From 577e421a0815e66f965bd4317adad5aeea3cc52a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 27 Nov 2023 10:23:48 +0000 Subject: www: qs_html: fix escaping of `q' param MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our use of MID_ESC characters was only intended for the pathname component of URIs and not appropriate for the query string component. So use a different $unsafe parameter list for uri_escape to make the result appropriate for query strings by disallowing [\&\'\+=] characters. Most notably, this change also allows us to accept `/' (slash) unescaped to make dfn: queries nicer to look at. Finally, we'll also add a ascii_html call on the URI-escaped result as an extra safety measure even though it's not really needed. As far as I can tell, the code without this fix didn't result in in an HTML injection since all our uses of uri_escape did escape angle brackets. Reported-by: Ricardo Cañuelo Link: https://public-inbox.org/meta/87o7ff4nlk.fsf@collabora.com/ Tested-by: Ricardo Cañuelo --- t/psgi_search.t | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 't') diff --git a/t/psgi_search.t b/t/psgi_search.t index 289c34e7..8c981c6c 100644 --- a/t/psgi_search.t +++ b/t/psgi_search.t @@ -18,7 +18,8 @@ local $ENV{TZ} = 'UTC'; my $digits = '10010260936330'; my $ua = 'Pine.LNX.4.10'; my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com"; -my $ibx = create_inbox 'git', indexlevel => 'full', tmpdir => "$tmpdir/1", sub { +my $ibx = create_inbox '26-git', indexlevel => 'full', tmpdir => "$tmpdir/1", +sub { my ($im) = @_; # n.b. these headers are not properly RFC2047-encoded $im->add(PublicInbox::Eml->new(< From: no subject at all To: git@vger.kernel.org +EOF + $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; +Message-ID: +From: +To: git@vger.kernel.org +Subject: git & ampersand + +hi +++ b/foo +x=y +s'more + EOF }; @@ -155,6 +167,19 @@ test_psgi(sub { $www->call(@_) }, sub { is($res->code, 200, 'successful mbox download w/ threads'); gunzip(\($res->content) => \(my $after)); isnt($before, $after); + + $res = $cb->(GET('/test/?q=git+%26+ampersand&x=A')); + is $res->code, 200, 'Atom hit with ampersand'; + unlike $res->content, qr/git\+&\+ampersand/, '& is HTML-escaped'; + + $res = $cb->(GET('/test/?q=%22hi+%2b%2b%2b+b/foo%22&x=A')); + is $res->code, 200, 'slashes and plusses search hit'; + like $res->content, qr!q=%22hi\+(?:%2[bB]){3}\+b/foo%22!, + '+ and " escaped, but slash not escaped in query'; + + $res = $cb->(GET(q{/test/?q=%22s'more%22&x=A})); + is $res->code, 200, 'single quote inside phrase'; + # TODO: more tests and odd cases }); done_testing(); -- cgit v1.2.3-24-ge0c7