about summary refs log tree commit homepage
path: root/t
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-11-27 10:23:48 +0000
committerEric Wong <e@80x24.org>2023-11-27 21:25:46 +0000
commit577e421a0815e66f965bd4317adad5aeea3cc52a (patch)
treec53ac7d844f0134138ada0605dc6ac0368d4a40b /t
parentbedd1b759b3bcaa471bffc97391d8c04cdcbd550 (diff)
downloadpublic-inbox-577e421a0815e66f965bd4317adad5aeea3cc52a.tar.gz
Our use of MID_ESC characters was only intended for the pathname
component of URIs and not appropriate for the query string
component.  So use a different $unsafe parameter list for
uri_escape to make the result appropriate for query strings by
disallowing [\&\'\+=] characters.  Most notably, this change
also allows us to accept `/' (slash) unescaped to make dfn: queries
nicer to look at.

Finally, we'll also add a ascii_html call on the URI-escaped
result as an extra safety measure even though it's not really
needed.

As far as I can tell, the code without this fix didn't result in
in an HTML injection since all our uses of uri_escape did escape
angle brackets.

Reported-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
Link: https://public-inbox.org/meta/87o7ff4nlk.fsf@collabora.com/
Tested-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
Diffstat (limited to 't')
-rw-r--r--t/psgi_search.t27
1 files changed, 26 insertions, 1 deletions
diff --git a/t/psgi_search.t b/t/psgi_search.t
index 289c34e7..8c981c6c 100644
--- a/t/psgi_search.t
+++ b/t/psgi_search.t
@@ -18,7 +18,8 @@ local $ENV{TZ} = 'UTC';
 my $digits = '10010260936330';
 my $ua = 'Pine.LNX.4.10';
 my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com";
-my $ibx = create_inbox 'git', indexlevel => 'full', tmpdir => "$tmpdir/1", sub {
+my $ibx = create_inbox '26-git', indexlevel => 'full', tmpdir => "$tmpdir/1",
+sub {
         my ($im) = @_;
         # n.b. these headers are not properly RFC2047-encoded
         $im->add(PublicInbox::Eml->new(<<EOF)) or BAIL_OUT;
@@ -49,6 +50,17 @@ From: no subject at all <no-subject-at-all@example.com>
 To: git@vger.kernel.org
 
 EOF
+        $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT;
+Message-ID: <ampersand@example.com>
+From: <e@example.com>
+To: git@vger.kernel.org
+Subject: git & ampersand
+
+hi +++ b/foo
+x=y
+s'more
+
+EOF
 };
 
 my $cfg = cfg_new $tmpdir, <<EOF;
@@ -155,6 +167,19 @@ test_psgi(sub { $www->call(@_) }, sub {
         is($res->code, 200, 'successful mbox download w/ threads');
         gunzip(\($res->content) => \(my $after));
         isnt($before, $after);
+
+        $res = $cb->(GET('/test/?q=git+%26+ampersand&x=A'));
+        is $res->code, 200, 'Atom hit with ampersand';
+        unlike $res->content, qr/git\+&\+ampersand/, '& is HTML-escaped';
+
+        $res = $cb->(GET('/test/?q=%22hi+%2b%2b%2b+b/foo%22&x=A'));
+        is $res->code, 200, 'slashes and plusses search hit';
+        like $res->content, qr!q=%22hi\+(?:%2[bB]){3}\+b/foo%22!,
+                '+ and " escaped, but slash not escaped in query';
+
+        $res = $cb->(GET(q{/test/?q=%22s'more%22&x=A}));
+        is $res->code, 200, 'single quote inside phrase';
+        # TODO: more tests and odd cases
 });
 
 done_testing();