From 577e421a0815e66f965bd4317adad5aeea3cc52a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 27 Nov 2023 10:23:48 +0000 Subject: www: qs_html: fix escaping of `q' param MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our use of MID_ESC characters was only intended for the pathname component of URIs and not appropriate for the query string component. So use a different $unsafe parameter list for uri_escape to make the result appropriate for query strings by disallowing [\&\'\+=] characters. Most notably, this change also allows us to accept `/' (slash) unescaped to make dfn: queries nicer to look at. Finally, we'll also add a ascii_html call on the URI-escaped result as an extra safety measure even though it's not really needed. As far as I can tell, the code without this fix didn't result in in an HTML injection since all our uses of uri_escape did escape angle brackets. Reported-by: Ricardo Cañuelo Link: https://public-inbox.org/meta/87o7ff4nlk.fsf@collabora.com/ Tested-by: Ricardo Cañuelo --- lib/PublicInbox/MID.pm | 2 +- lib/PublicInbox/SearchQuery.pm | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index b1ae9939..97cf3a54 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -125,7 +125,7 @@ sub uniq_mids ($;$) { \@ret; } -# RFC3986, section 3.3: +# RFC3986, section 3.3 (pathnames only): sub MID_ESC () { '^A-Za-z0-9\-\._~!\$\&\'\(\)\*\+,;=:@' } sub mid_escape ($) { uri_escape_utf8($_[0], MID_ESC) } diff --git a/lib/PublicInbox/SearchQuery.pm b/lib/PublicInbox/SearchQuery.pm index 96246c53..747e3249 100644 --- a/lib/PublicInbox/SearchQuery.pm +++ b/lib/PublicInbox/SearchQuery.pm @@ -6,7 +6,7 @@ package PublicInbox::SearchQuery; use strict; use v5.10.1; use URI::Escape qw(uri_escape); -use PublicInbox::MID qw(MID_ESC); +use PublicInbox::Hval qw(ascii_html); our $LIM = 200; sub new { @@ -35,9 +35,13 @@ sub qs_html { } my $qs = ''; if (defined(my $q = $self->{'q'})) { - $q = uri_escape($q, MID_ESC); + # not using MID_ESC since that's for the path component and + # this is for the query component. Unlike MID_ESC, + # this disallows [\&\'\+=] and allows slash [/] for + # nicer looking dfn: queries + $q = uri_escape($q, '^A-Za-z0-9\-\._~!\$\(\)\*,;:@/'); $q =~ s/%20/+/g; # improve URL readability - $qs .= "q=$q"; + $qs .= 'q='.ascii_html($q); } if (my $o = $self->{o}) { # ignore o == 0 $qs .= "&o=$o"; -- cgit v1.2.3-24-ge0c7