about summary refs log tree commit homepage
path: root/t/psgi_search.t
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-08-22 06:06:27 +0000
committerEric Wong <e@yhbt.net>2020-08-23 00:19:53 +0000
commita367ec1b15a2458e532245f5308565dd84f8ca63 (patch)
tree97f5c7fc52301282ff9a5f1c9913f3827edd3b49 /t/psgi_search.t
parentaad1b9e70529b78d3d7d62b0920ad82ca40f9592 (diff)
downloadpublic-inbox-a367ec1b15a2458e532245f5308565dd84f8ca63.tar.gz
mbox: disable "&t" on existing Xapian until full reindex
Expanding threads via over.sqlite3 for mbox.gz downloads without
Xapian effectively collapsing on the THREADID column leads to
repeated messages getting downloaded.

To avoid that situation, use a "has_threadid" Xapian metadata
flag that's only set on --reindex (and brand new Xapian DBs).

This allows admins to upgrade WWW or do --reindex in any order;
without worrying about users eating up bandwidth and CPU cycles.
Diffstat (limited to 't/psgi_search.t')
-rw-r--r--t/psgi_search.t39
1 files changed, 37 insertions, 2 deletions
diff --git a/t/psgi_search.t b/t/psgi_search.t
index 5d537363..c1677eb3 100644
--- a/t/psgi_search.t
+++ b/t/psgi_search.t
@@ -3,6 +3,7 @@
 use strict;
 use warnings;
 use Test::More;
+use IO::Uncompress::Gunzip qw(gunzip);
 use PublicInbox::Eml;
 use PublicInbox::Config;
 use PublicInbox::Inbox;
@@ -39,6 +40,12 @@ To: git\@vger.kernel.org
 EOF
 $im->add($mime);
 
+$im->add(PublicInbox::Eml->new(<<""));
+Message-ID: <reply\@asdf>
+From: replier <r\@example.com>
+In-Reply-To: <$mid>
+Subject: mismatch
+
 $mime = PublicInbox::Eml->new(<<'EOF');
 Subject:
 Message-ID: <blank-subject@example.com>
@@ -79,6 +86,9 @@ test_psgi(sub { $www->call(@_) }, sub {
         ok(index($html, 'by &#198;var Arnfj&#246;r&#240; Bjarmason') >= 0,
                 "displayed Ævar's name properly in HTML");
 
+        like($html, qr/download mbox\.gz: .*?"full threads"/s,
+                '"full threads" download option shown');
+
         my $warn = [];
         local $SIG{__WARN__} = sub { push @$warn, @_ };
         $res = $cb->(GET('/test/?q=s:test&l=5e'));
@@ -118,8 +128,33 @@ test_psgi(sub { $www->call(@_) }, sub {
         $res = $cb->(GET('/test/no-subject-at-all@example.com/t.mbox.gz'));
         like($res->header('Content-Disposition'),
                 qr/filename=no-subject\.mbox\.gz/);
+
+        # "full threads" mbox.gz download
+        $res = $cb->(POST('/test/?q=s:test&x=m&t'));
+        is($res->code, 200, 'successful mbox download with threads');
+        gunzip(\($res->content) => \(my $before));
+        is_deeply([ "Message-ID: <$mid>\n", "Message-ID: <reply\@asdf>\n" ],
+                [ grep(/^Message-ID:/m, split(/^/m, $before)) ],
+                'got full thread');
+
+        # clobber has_threadid to emulate old versions:
+        {
+                my $sidx = PublicInbox::SearchIdx->new($ibx, 0);
+                my $xdb = $sidx->idx_acquire;
+                $xdb->set_metadata('has_threadid', '0');
+                $sidx->idx_release;
+        }
+        $config->each_inbox(sub { delete $_[0]->{search} });
+        $res = $cb->(GET('/test/?q=s:test'));
+        is($res->code, 200, 'successful search w/o has_threadid');
+        unlike($html, qr/download mbox\.gz: .*?"full threads"/s,
+                '"full threads" download option not shown w/o has_threadid');
+
+        # in case somebody uses curl to bypass <form>
+        $res = $cb->(POST('/test/?q=s:test&x=m&t'));
+        is($res->code, 200, 'successful mbox download w/ threads');
+        gunzip(\($res->content) => \(my $after));
+        isnt($before, $after);
 });
 
 done_testing();
-
-1;