about summary refs log tree commit homepage
path: root/t/psgi_search.t
diff options
context:
space:
mode:
Diffstat (limited to 't/psgi_search.t')
-rw-r--r--t/psgi_search.t155
1 files changed, 110 insertions, 45 deletions
diff --git a/t/psgi_search.t b/t/psgi_search.t
index 4fe315a1..8c981c6c 100644
--- a/t/psgi_search.t
+++ b/t/psgi_search.t
@@ -1,80 +1,95 @@
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use warnings;
-use Test::More;
-use Email::MIME;
-use PublicInbox::Config;
-use PublicInbox::Inbox;
-use PublicInbox::InboxWritable;
-use bytes (); # only for bytes::length
+use v5.12;
 use PublicInbox::TestCommon;
-my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
+use IO::Uncompress::Gunzip qw(gunzip);
+use PublicInbox::Eml;
+use PublicInbox::Inbox;
+my @mods = qw(DBD::SQLite Xapian HTTP::Request::Common Plack::Test
                 URI::Escape Plack::Builder);
 require_mods(@mods);
 use_ok($_) for (qw(HTTP::Request::Common Plack::Test));
 use_ok 'PublicInbox::WWW';
+use_ok 'PublicInbox::SearchIdx';
 my ($tmpdir, $for_destroy) = tmpdir();
+local $ENV{TZ} = 'UTC';
 
-my $ibx = PublicInbox::Inbox->new({
-        inboxdir => $tmpdir,
-        address => 'git@vger.kernel.org',
-        name => 'test',
-});
-$ibx = PublicInbox::InboxWritable->new($ibx);
-$ibx->init_inbox(1);
-my $im = $ibx->importer(0);
 my $digits = '10010260936330';
 my $ua = 'Pine.LNX.4.10';
 my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com";
-my $mime = PublicInbox::MIME->new(<<EOF);
-Subject: test
+my $ibx = create_inbox '26-git', indexlevel => 'full', tmpdir => "$tmpdir/1",
+sub {
+        my ($im) = @_;
+        # n.b. these headers are not properly RFC2047-encoded
+        $im->add(PublicInbox::Eml->new(<<EOF)) or BAIL_OUT;
+Subject: test Ævar
 Message-ID: <$mid>
 From: Ævar Arnfjörð Bjarmason <avarab\@example>
 To: git\@vger.kernel.org
 
 EOF
-$im->add($mime);
 
-$mime = PublicInbox::MIME->new(<<'EOF');
+        $im->add(PublicInbox::Eml->new(<<"")) or BAIL_OUT;
+Message-ID: <reply\@asdf>
+From: replier <r\@example.com>
+In-Reply-To: <$mid>
+Subject: mismatch
+
+        $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT;
 Subject:
 Message-ID: <blank-subject@example.com>
 From: blank subject <blank-subject@example.com>
 To: git@vger.kernel.org
 
 EOF
-$im->add($mime);
 
-$mime = PublicInbox::MIME->new(<<'EOF');
+        $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT;
 Message-ID: <no-subject-at-all@example.com>
 From: no subject at all <no-subject-at-all@example.com>
 To: git@vger.kernel.org
 
 EOF
-$im->add($mime);
+        $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT;
+Message-ID: <ampersand@example.com>
+From: <e@example.com>
+To: git@vger.kernel.org
+Subject: git & ampersand
 
-$im->done;
-PublicInbox::SearchIdx->new($ibx, 1)->index_sync;
+hi +++ b/foo
+x=y
+s'more
 
-my $cfgpfx = "publicinbox.test";
-my $config = PublicInbox::Config->new(\<<EOF);
-$cfgpfx.address=git\@vger.kernel.org
-$cfgpfx.inboxdir=$tmpdir
 EOF
-my $www = PublicInbox::WWW->new($config);
+};
+
+my $cfg = cfg_new $tmpdir, <<EOF;
+[publicinbox "test"]
+        address = git\@vger.kernel.org
+        inboxdir = $ibx->{inboxdir}
+EOF
+my $www = PublicInbox::WWW->new($cfg);
 test_psgi(sub { $www->call(@_) }, sub {
         my ($cb) = @_;
-        my $res;
-        $res = $cb->(GET('/test/?q=%C3%86var'));
-        my $html = $res->content;
-        like($html, qr/<title>&#198;var - /, 'HTML escaped in title');
-        my @res = ($html =~ m/\?q=(.+var)\b/g);
-        ok(scalar(@res), 'saw query strings');
-        my %uniq = map { $_ => 1 } @res;
-        is(1, scalar keys %uniq, 'all query values identical in HTML');
-        is('%C3%86var', (keys %uniq)[0], 'matches original query');
-        ok(index($html, 'by &#198;var Arnfj&#246;r&#240; Bjarmason') >= 0,
-                "displayed Ævar's name properly in HTML");
+        my ($html, $res);
+        my $approxidate = 'now';
+        for my $req ('/test/?q=%C3%86var', '/test/?q=%25C3%2586var') {
+                $res = $cb->(GET($req."+d:..$approxidate"));
+                $html = $res->content;
+                like($html, qr/<title>&#198;var d:\.\.\Q$approxidate\E/,
+                        'HTML escaped in title, "d:..$APPROXIDATE" preserved');
+                my @res = ($html =~ m/\?q=(.+var)\+d:\.\.\Q$approxidate\E/g);
+                ok(scalar(@res), 'saw query strings');
+                my %uniq = map { $_ => 1 } @res;
+                is(1, scalar keys %uniq, 'all query values identical in HTML');
+                is('%C3%86var', (keys %uniq)[0], 'matches original query');
+                ok(index($html, 'by &#198;var Arnfj&#246;r&#240; Bjarmason')
+                        >= 0, "displayed Ævar's name properly in HTML");
+                like($html, qr/download mbox\.gz: .*?"full threads"/s,
+                        '"full threads" download option shown');
+        }
+        like($html, qr/Initial query\b.*?returned no.results, used:.*instead/s,
+                'noted retry on double-escaped query {-uxs_retried}');
 
         my $warn = [];
         local $SIG{__WARN__} = sub { push @$warn, @_ };
@@ -82,8 +97,13 @@ test_psgi(sub { $www->call(@_) }, sub {
         is($res->code, 200, 'successful search result');
         is_deeply([], $warn, 'no warnings from non-numeric comparison');
 
+        $res = $cb->(GET('/test/?&q=s:test'));
+        is($res->code, 200, 'successful search result');
+        is_deeply([], $warn, 'no warnings from black parameter');
+
         $res = $cb->(POST('/test/?q=s:bogus&x=m'));
         is($res->code, 404, 'failed search result gives 404');
+        like($res->content, qr/No results found/, "`No results' shown");
         is_deeply([], $warn, 'no warnings');
 
         my $mid_re = qr/\Q$mid\E/o;
@@ -94,6 +114,11 @@ test_psgi(sub { $www->call(@_) }, sub {
                 like($res->content, $mid_re, 'found mid in response');
                 chop($digits);
         }
+        $res = $cb->(GET("/test/$mid/"));
+        $html = $res->content;
+        like($html, qr/\bFrom: &#198;var /,
+                "displayed Ævar's name properly in permalink From:");
+        unlike($html, qr/&#195;/, 'no raw octets in permalink HTML');
 
         $res = $cb->(GET('/test/'));
         $html = $res->content;
@@ -101,6 +126,8 @@ test_psgi(sub { $www->call(@_) }, sub {
                 'subject-less message linked from "/$INBOX/"');
         like($html, qr/\bhref="blank-subject[^>]+>\(no subject\)</,
                 'blank subject message linked from "/$INBOX/"');
+        like($html, qr/test &#198;var/,
+                "displayed Ævar's name properly in topic view");
 
         $res = $cb->(GET('/test/?q=tc:git'));
         like($html, qr/\bhref="no-subject-at-all[^>]+>\(no subject\)</,
@@ -113,8 +140,46 @@ test_psgi(sub { $www->call(@_) }, sub {
         $res = $cb->(GET('/test/no-subject-at-all@example.com/t.mbox.gz'));
         like($res->header('Content-Disposition'),
                 qr/filename=no-subject\.mbox\.gz/);
+
+        # "full threads" mbox.gz download
+        $res = $cb->(POST("/test/?q=s:test+d:..$approxidate&x=m&t"));
+        is($res->code, 200, 'successful mbox download with threads');
+        gunzip(\($res->content) => \(my $before));
+        is_deeply([ "Message-ID: <$mid>\n", "Message-ID: <reply\@asdf>\n" ],
+                [ grep(/^Message-ID:/m, split(/^/m, $before)) ],
+                'got full thread');
+
+        # clobber has_threadid to emulate old versions:
+        {
+                my $sidx = PublicInbox::SearchIdx->new($ibx, 0);
+                my $xdb = $sidx->idx_acquire;
+                $xdb->set_metadata('has_threadid', '0');
+                $sidx->idx_release;
+        }
+        $cfg->each_inbox(sub { delete $_[0]->{search} });
+        $res = $cb->(GET('/test/?q=s:test'));
+        is($res->code, 200, 'successful search w/o has_threadid');
+        unlike($html, qr/download mbox\.gz: .*?"full threads"/s,
+                '"full threads" download option not shown w/o has_threadid');
+
+        # in case somebody uses curl to bypass <form>
+        $res = $cb->(POST("/test/?q=s:test+d:..$approxidate&x=m&t"));
+        is($res->code, 200, 'successful mbox download w/ threads');
+        gunzip(\($res->content) => \(my $after));
+        isnt($before, $after);
+
+        $res = $cb->(GET('/test/?q=git+%26+ampersand&x=A'));
+        is $res->code, 200, 'Atom hit with ampersand';
+        unlike $res->content, qr/git\+&\+ampersand/, '& is HTML-escaped';
+
+        $res = $cb->(GET('/test/?q=%22hi+%2b%2b%2b+b/foo%22&x=A'));
+        is $res->code, 200, 'slashes and plusses search hit';
+        like $res->content, qr!q=%22hi\+(?:%2[bB]){3}\+b/foo%22!,
+                '+ and " escaped, but slash not escaped in query';
+
+        $res = $cb->(GET(q{/test/?q=%22s'more%22&x=A}));
+        is $res->code, 200, 'single quote inside phrase';
+        # TODO: more tests and odd cases
 });
 
 done_testing();
-
-1;