diff options
Diffstat (limited to 't/psgi_search.t')
-rw-r--r-- | t/psgi_search.t | 155 |
1 files changed, 110 insertions, 45 deletions
diff --git a/t/psgi_search.t b/t/psgi_search.t index 4fe315a1..8c981c6c 100644 --- a/t/psgi_search.t +++ b/t/psgi_search.t @@ -1,80 +1,95 @@ -# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -use strict; -use warnings; -use Test::More; -use Email::MIME; -use PublicInbox::Config; -use PublicInbox::Inbox; -use PublicInbox::InboxWritable; -use bytes (); # only for bytes::length +use v5.12; use PublicInbox::TestCommon; -my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test +use IO::Uncompress::Gunzip qw(gunzip); +use PublicInbox::Eml; +use PublicInbox::Inbox; +my @mods = qw(DBD::SQLite Xapian HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); require_mods(@mods); use_ok($_) for (qw(HTTP::Request::Common Plack::Test)); use_ok 'PublicInbox::WWW'; +use_ok 'PublicInbox::SearchIdx'; my ($tmpdir, $for_destroy) = tmpdir(); +local $ENV{TZ} = 'UTC'; -my $ibx = PublicInbox::Inbox->new({ - inboxdir => $tmpdir, - address => 'git@vger.kernel.org', - name => 'test', -}); -$ibx = PublicInbox::InboxWritable->new($ibx); -$ibx->init_inbox(1); -my $im = $ibx->importer(0); my $digits = '10010260936330'; my $ua = 'Pine.LNX.4.10'; my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com"; -my $mime = PublicInbox::MIME->new(<<EOF); -Subject: test +my $ibx = create_inbox '26-git', indexlevel => 'full', tmpdir => "$tmpdir/1", +sub { + my ($im) = @_; + # n.b. these headers are not properly RFC2047-encoded + $im->add(PublicInbox::Eml->new(<<EOF)) or BAIL_OUT; +Subject: test Ævar Message-ID: <$mid> From: Ævar Arnfjörð Bjarmason <avarab\@example> To: git\@vger.kernel.org EOF -$im->add($mime); -$mime = PublicInbox::MIME->new(<<'EOF'); + $im->add(PublicInbox::Eml->new(<<"")) or BAIL_OUT; +Message-ID: <reply\@asdf> +From: replier <r\@example.com> +In-Reply-To: <$mid> +Subject: mismatch + + $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; Subject: Message-ID: <blank-subject@example.com> From: blank subject <blank-subject@example.com> To: git@vger.kernel.org EOF -$im->add($mime); -$mime = PublicInbox::MIME->new(<<'EOF'); + $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; Message-ID: <no-subject-at-all@example.com> From: no subject at all <no-subject-at-all@example.com> To: git@vger.kernel.org EOF -$im->add($mime); + $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; +Message-ID: <ampersand@example.com> +From: <e@example.com> +To: git@vger.kernel.org +Subject: git & ampersand -$im->done; -PublicInbox::SearchIdx->new($ibx, 1)->index_sync; +hi +++ b/foo +x=y +s'more -my $cfgpfx = "publicinbox.test"; -my $config = PublicInbox::Config->new(\<<EOF); -$cfgpfx.address=git\@vger.kernel.org -$cfgpfx.inboxdir=$tmpdir EOF -my $www = PublicInbox::WWW->new($config); +}; + +my $cfg = cfg_new $tmpdir, <<EOF; +[publicinbox "test"] + address = git\@vger.kernel.org + inboxdir = $ibx->{inboxdir} +EOF +my $www = PublicInbox::WWW->new($cfg); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; - my $res; - $res = $cb->(GET('/test/?q=%C3%86var')); - my $html = $res->content; - like($html, qr/<title>Ævar - /, 'HTML escaped in title'); - my @res = ($html =~ m/\?q=(.+var)\b/g); - ok(scalar(@res), 'saw query strings'); - my %uniq = map { $_ => 1 } @res; - is(1, scalar keys %uniq, 'all query values identical in HTML'); - is('%C3%86var', (keys %uniq)[0], 'matches original query'); - ok(index($html, 'by Ævar Arnfjörð Bjarmason') >= 0, - "displayed Ævar's name properly in HTML"); + my ($html, $res); + my $approxidate = 'now'; + for my $req ('/test/?q=%C3%86var', '/test/?q=%25C3%2586var') { + $res = $cb->(GET($req."+d:..$approxidate")); + $html = $res->content; + like($html, qr/<title>Ævar d:\.\.\Q$approxidate\E/, + 'HTML escaped in title, "d:..$APPROXIDATE" preserved'); + my @res = ($html =~ m/\?q=(.+var)\+d:\.\.\Q$approxidate\E/g); + ok(scalar(@res), 'saw query strings'); + my %uniq = map { $_ => 1 } @res; + is(1, scalar keys %uniq, 'all query values identical in HTML'); + is('%C3%86var', (keys %uniq)[0], 'matches original query'); + ok(index($html, 'by Ævar Arnfjörð Bjarmason') + >= 0, "displayed Ævar's name properly in HTML"); + like($html, qr/download mbox\.gz: .*?"full threads"/s, + '"full threads" download option shown'); + } + like($html, qr/Initial query\b.*?returned no.results, used:.*instead/s, + 'noted retry on double-escaped query {-uxs_retried}'); my $warn = []; local $SIG{__WARN__} = sub { push @$warn, @_ }; @@ -82,8 +97,13 @@ test_psgi(sub { $www->call(@_) }, sub { is($res->code, 200, 'successful search result'); is_deeply([], $warn, 'no warnings from non-numeric comparison'); + $res = $cb->(GET('/test/?&q=s:test')); + is($res->code, 200, 'successful search result'); + is_deeply([], $warn, 'no warnings from black parameter'); + $res = $cb->(POST('/test/?q=s:bogus&x=m')); is($res->code, 404, 'failed search result gives 404'); + like($res->content, qr/No results found/, "`No results' shown"); is_deeply([], $warn, 'no warnings'); my $mid_re = qr/\Q$mid\E/o; @@ -94,6 +114,11 @@ test_psgi(sub { $www->call(@_) }, sub { like($res->content, $mid_re, 'found mid in response'); chop($digits); } + $res = $cb->(GET("/test/$mid/")); + $html = $res->content; + like($html, qr/\bFrom: Ævar /, + "displayed Ævar's name properly in permalink From:"); + unlike($html, qr/Ã/, 'no raw octets in permalink HTML'); $res = $cb->(GET('/test/')); $html = $res->content; @@ -101,6 +126,8 @@ test_psgi(sub { $www->call(@_) }, sub { 'subject-less message linked from "/$INBOX/"'); like($html, qr/\bhref="blank-subject[^>]+>\(no subject\)</, 'blank subject message linked from "/$INBOX/"'); + like($html, qr/test Ævar/, + "displayed Ævar's name properly in topic view"); $res = $cb->(GET('/test/?q=tc:git')); like($html, qr/\bhref="no-subject-at-all[^>]+>\(no subject\)</, @@ -113,8 +140,46 @@ test_psgi(sub { $www->call(@_) }, sub { $res = $cb->(GET('/test/no-subject-at-all@example.com/t.mbox.gz')); like($res->header('Content-Disposition'), qr/filename=no-subject\.mbox\.gz/); + + # "full threads" mbox.gz download + $res = $cb->(POST("/test/?q=s:test+d:..$approxidate&x=m&t")); + is($res->code, 200, 'successful mbox download with threads'); + gunzip(\($res->content) => \(my $before)); + is_deeply([ "Message-ID: <$mid>\n", "Message-ID: <reply\@asdf>\n" ], + [ grep(/^Message-ID:/m, split(/^/m, $before)) ], + 'got full thread'); + + # clobber has_threadid to emulate old versions: + { + my $sidx = PublicInbox::SearchIdx->new($ibx, 0); + my $xdb = $sidx->idx_acquire; + $xdb->set_metadata('has_threadid', '0'); + $sidx->idx_release; + } + $cfg->each_inbox(sub { delete $_[0]->{search} }); + $res = $cb->(GET('/test/?q=s:test')); + is($res->code, 200, 'successful search w/o has_threadid'); + unlike($html, qr/download mbox\.gz: .*?"full threads"/s, + '"full threads" download option not shown w/o has_threadid'); + + # in case somebody uses curl to bypass <form> + $res = $cb->(POST("/test/?q=s:test+d:..$approxidate&x=m&t")); + is($res->code, 200, 'successful mbox download w/ threads'); + gunzip(\($res->content) => \(my $after)); + isnt($before, $after); + + $res = $cb->(GET('/test/?q=git+%26+ampersand&x=A')); + is $res->code, 200, 'Atom hit with ampersand'; + unlike $res->content, qr/git\+&\+ampersand/, '& is HTML-escaped'; + + $res = $cb->(GET('/test/?q=%22hi+%2b%2b%2b+b/foo%22&x=A')); + is $res->code, 200, 'slashes and plusses search hit'; + like $res->content, qr!q=%22hi\+(?:%2[bB]){3}\+b/foo%22!, + '+ and " escaped, but slash not escaped in query'; + + $res = $cb->(GET(q{/test/?q=%22s'more%22&x=A})); + is $res->code, 200, 'single quote inside phrase'; + # TODO: more tests and odd cases }); done_testing(); - -1; |