From 82bc926ebe1ceba78dffd330c6bac92732bb41e0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 31 May 2023 22:10:01 +0000 Subject: www: more restrictive query string parsing Only allow single-character query keys to prevent clients from wasting memory in Perl's hash tables. We'll also perform the utf8::decode and tr/+/ / calls once on the whole query string at once to reduce op calls. This also avoids creating an empty hash in the common case when the QUERY_STRING is empty and instead relies on auto-vivification of Perl. --- lib/PublicInbox/Feed.pm | 13 ++++--------- lib/PublicInbox/ViewVCS.pm | 3 +-- lib/PublicInbox/WWW.pm | 23 +++++++++++++++-------- 3 files changed, 20 insertions(+), 19 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index de1e7dfe..69a92f8b 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -33,15 +33,11 @@ sub generate_html_index { my ($ctx) = @_; # if the 'r' query parameter is given, it is a legacy permalink # which we must continue supporting: - my $qp = $ctx->{qp}; - my $ibx = $ctx->{ibx}; - if ($qp && !$qp->{r} && $ibx->over) { + !$ctx->{qp}->{r} && $ctx->{ibx}->over and return PublicInbox::View::index_topics($ctx); - } - my $env = $ctx->{env}; - my $url = $ibx->base_url($env) . 'new.html'; - my $qs = $env->{QUERY_STRING}; + my $url = $ctx->{ibx}->base_url($ctx->{env}) . 'new.html'; + my $qs = $ctx->{env}->{QUERY_STRING}; $url .= "?$qs" if $qs ne ''; [302, [ 'Location', $url, 'Content-Type', 'text/plain'], [ "Redirecting to $url\n" ] ]; @@ -88,7 +84,6 @@ sub recent_msgs { return PublicInbox::View::paginate_recent($ctx, $max) if $ibx->over; # only for rare v1 inboxes which aren't indexed at all - my $qp = $ctx->{qp}; my $hex = '[a-f0-9]'; my $addmsg = qr!^:000000 100644 \S+ (\S+) A\t${hex}{2}/${hex}{38}$!; my $delmsg = qr!^:100644 000000 (\S+) \S+ D\t(${hex}{2}/${hex}{38})$!; @@ -96,7 +91,7 @@ sub recent_msgs { # revision ranges may be specified my $range = 'HEAD'; - my $r = $qp->{r} if $qp; + my $r = $ctx->{qp}->{r}; if ($r && ($r =~ /\A(?:$refhex\.\.)?$refhex\z/o)) { $range = $r; } diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index eb757089..5529ed5b 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -596,10 +596,9 @@ sub show_blob { # git->cat_async callback # GET /$INBOX/$GIT_OBJECT_ID/s/$FILENAME sub show ($$;$) { my ($ctx, $oid_b, $fn) = @_; - my $qp = $ctx->{qp}; my $hints = $ctx->{hints} = {}; while (my ($from, $to) = each %QP_MAP) { - defined(my $v = $qp->{$from}) or next; + my $v = $ctx->{qp}->{$from} // next; $hints->{$to} = $v if $v ne ''; } $ctx->{fn} = $fn; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index a8f1ad17..9919f975 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -45,14 +45,21 @@ sub call { my $ctx = { env => $env, www => $self }; # we don't care about multi-value - %{$ctx->{qp}} = map { - utf8::decode($_); - tr/+/ /; - my ($k, $v) = split(/=/, $_, 2); - # none of the keys we care about will need escaping - ($k // '', uri_unescape($v // '')) - } split(/[&;]+/, $env->{QUERY_STRING}); - + # '0' isn't a QUERY_STRING we care about + if (my $qs = $env->{QUERY_STRING}) { + utf8::decode($qs); + $qs =~ tr/+/ /; + %{$ctx->{qp}} = map { + # we only use single-char query param keys + if (s/\A([A-Za-z])=//) { + $1 => uri_unescape($_) + } elsif (/\A[a-z]\z/) { # some boolean options + $_ => '' + } else { + () # ignored + } + } split(/[&;]+/, $qs); + } my $path_info = path_info_raw($env); my $method = $env->{REQUEST_METHOD}; -- cgit v1.2.3-24-ge0c7