From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id DCD2C20281; Tue, 23 May 2017 22:02:32 +0000 (UTC) Date: Tue, 23 May 2017 22:02:32 +0000 From: Eric Wong To: =?utf-8?B?w4Z2YXIgQXJuZmrDtnLDsA==?= Bjarmason Cc: meta@public-inbox.org Subject: [PATCH] www: do not mangle characters from search queries Message-ID: <20170523220232.GA27792@dcvr> References: <20170523183940.GA9543@dcvr> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline Content-Transfer-Encoding: 8bit In-Reply-To: <20170523183940.GA9543@dcvr> List-Id: Eric Wong wrote: > Thanks for the report, I'm testing the patch below on > public-inbox.org and it seems fine. I'll need to write a test for > this... OK, I've pushed this out to public-inbox.git and deployed to all onions, too; will look at the MID unescaping in a bit. Thanks again. ----8<------ Subject: [PATCH] www: do not mangle characters from search queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: Ævar Arnfjörð Bjarmason https://public-inbox.org/meta/CACBZZX5Gnow08r=0A1J_kt3a=zpGyMfvsqu8nAN7kacNnDm+dg@mail.gmail.com/ --- MANIFEST | 1 + lib/PublicInbox/MID.pm | 2 +- lib/PublicInbox/SearchView.pm | 9 ++++-- lib/PublicInbox/WWW.pm | 1 + t/psgi_search.t | 71 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 t/psgi_search.t diff --git a/MANIFEST b/MANIFEST index d1e0952..3bfd9a4 100644 --- a/MANIFEST +++ b/MANIFEST @@ -155,6 +155,7 @@ t/plack.t t/precheck.t t/psgi_attach.t t/psgi_mount.t +t/psgi_search.t t/psgi_text.t t/qspawn.t t/search-thr-index.t diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index 1c2d75c..2613c8e 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -6,7 +6,7 @@ package PublicInbox::MID; use strict; use warnings; use base qw/Exporter/; -our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape/; +our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC/; use URI::Escape qw(uri_escape_utf8); use Digest::SHA qw/sha1_hex/; use constant MID_MAX => 40; # SHA-1 hex length diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index cec87c6..42bc648 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -222,7 +222,9 @@ sub mset_thread { sub ctx_prepare { my ($q, $ctx) = @_; - my $qh = ascii_html($q->{'q'}); + my $qh = $q->{'q'}; + utf8::decode($qh); + $qh = ascii_html($qh); $ctx->{-q_value_html} = $qh; $ctx->{-atom} = '?'.$q->qs_html(x => 'A', r => undef); $ctx->{-title_html} = "$qh - search results"; @@ -254,8 +256,9 @@ sub adump { package PublicInbox::SearchQuery; use strict; use warnings; +use URI::Escape qw(uri_escape); use PublicInbox::Hval; -use PublicInbox::MID qw(mid_escape); +use PublicInbox::MID qw(MID_ESC); sub new { my ($class, $qp) = @_; @@ -280,7 +283,7 @@ sub qs_html { $self = $tmp; } - my $q = mid_escape($self->{'q'}); + my $q = uri_escape($self->{'q'}, MID_ESC); $q =~ s/%20/+/g; # improve URL readability my $qs = "q=$q"; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 13b3921..f3c702e 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -42,6 +42,7 @@ sub call { # we don't care about multi-value my %qp = map { + utf8::decode($_); my ($k, $v) = split('=', uri_unescape($_), 2); $v = '' unless defined $v; $v =~ tr/+/ /; diff --git a/t/psgi_search.t b/t/psgi_search.t new file mode 100644 index 0000000..cc9c9cf --- /dev/null +++ b/t/psgi_search.t @@ -0,0 +1,71 @@ +# Copyright (C) 2017 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +use Email::MIME; +use PublicInbox::Config; +use PublicInbox::WWW; +my @mods = qw(PublicInbox::SearchIdx HTTP::Request::Common Plack::Test + URI::Escape Plack::Builder); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for psgi_search.t" if $@; +} +use_ok $_ foreach @mods; +my $tmpdir = tempdir('pi-psgi-search.XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $git_dir = "$tmpdir/a.git"; + +is(0, system(qw(git init -q --bare), $git_dir), "git init (main)"); +my $rw = PublicInbox::SearchIdx->new($git_dir, 1); +ok($rw, "search indexer created"); +my $data = <<'EOF'; +Subject: test +Message-Id: +From: Ævar Arnfjörð Bjarmason +To: git@vger.kernel.org + +EOF + +my $num = 0; +# nb. using internal API, fragile! +my $xdb = $rw->_xdb_acquire; +$xdb->begin_transaction; + +foreach (reverse split(/\n\n/, $data)) { + $_ .= "\n"; + my $mime = Email::MIME->new(\$_); + my $bytes = bytes::length($mime->as_string); + my $doc_id = $rw->add_message($mime, $bytes, ++$num, 'ignored'); + my $mid = $mime->header('Message-Id'); + ok($doc_id, 'message added: '. $mid); +} + +$xdb->commit_transaction; +$rw = undef; + +my $cfgpfx = "publicinbox.test"; +my $config = PublicInbox::Config->new({ + "$cfgpfx.address" => 'git@vger.kernel.org', + "$cfgpfx.mainrepo" => $git_dir, +}); +my $www = PublicInbox::WWW->new($config); +test_psgi(sub { $www->call(@_) }, sub { + my ($cb) = @_; + my $res; + $res = $cb->(GET('/test/?q=%C3%86var')); + my $html = $res->content; + like($html, qr/Ævar - /, 'HTML escaped in title'); + my @res = ($html =~ m/\?q=(.+var)\b/g); + ok(scalar(@res), 'saw query strings'); + my %uniq = map { $_ => 1 } @res; + is(1, scalar keys %uniq, 'all query values identical in HTML'); + is('%C3%86var', (keys %uniq)[0], 'matches original query'); + ok(index($html, 'by Ævar Arnfjörð Bjarmason') >= 0, + "displayed Ævar's name properly in HTML"); +}); + +done_testing(); + +1; -- EW