about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--lib/PublicInbox/Mbox.pm2
-rw-r--r--lib/PublicInbox/Search.pm10
-rw-r--r--lib/PublicInbox/SearchIdx.pm16
-rw-r--r--lib/PublicInbox/SearchView.pm21
-rw-r--r--lib/PublicInbox/V2Writable.pm12
-rw-r--r--t/init.t1
-rw-r--r--t/psgi_search.t39
7 files changed, 88 insertions, 13 deletions
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index c9b11c21..0223bead 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -262,7 +262,7 @@ sub mbox_all {
         $ctx->{ids} = $srch->mset_to_artnums($mset);
         require PublicInbox::MboxGz;
         my $fn;
-        if ($q->{t}) {
+        if ($q->{t} && $srch->has_threadid) {
                 $fn = 'results-thread-'.$q_string;
                 PublicInbox::MboxGz::mbox_gz($ctx, \&results_thread_cb, $fn);
         } else {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index bc820b64..01bbe73d 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -311,6 +311,12 @@ sub _do_enquire {
         retry_reopen($self, \&_enquire_once, [ $self, $query, $opts ]);
 }
 
+# returns true if all docs have the THREADID value
+sub has_threadid ($) {
+        my ($self) = @_;
+        (xdb($self)->get_metadata('has_threadid') // '') eq '1';
+}
+
 sub _enquire_once { # retry_reopen callback
         my ($self, $query, $opts) = @{$_[0]};
         my $xdb = xdb($self);
@@ -328,7 +334,9 @@ sub _enquire_once { # retry_reopen callback
         }
 
         # `mairix -t / --threads' or JMAP collapseThreads
-        $enquire->set_collapse_key(THREADID) if $opts->{thread};
+        if ($opts->{thread} && has_threadid($self)) {
+                $enquire->set_collapse_key(THREADID);
+        }
 
         my $offset = $opts->{offset} || 0;
         my $limit = $opts->{limit} || 50;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index baa6f41a..ade55756 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -131,6 +131,7 @@ sub idx_acquire {
                                 ($is_shard && need_xapian($self)))) {
                         File::Path::mkpath($dir);
                         nodatacow_dir($dir);
+                        $self->{-set_has_threadid_once} = 1;
                 }
         }
         return unless defined $flag;
@@ -590,9 +591,17 @@ sub v1_checkpoint ($$;$) {
 
         $self->{mm}->{dbh}->commit;
         if ($newest && need_xapian($self)) {
-                my $cur = $self->{xdb}->get_metadata('last_commit');
+                my $xdb = $self->{xdb};
+                my $cur = $xdb->get_metadata('last_commit');
                 if (need_update($self, $cur, $newest)) {
-                        $self->{xdb}->set_metadata('last_commit', $newest);
+                        $xdb->set_metadata('last_commit', $newest);
+                }
+
+                # let SearchView know a full --reindex was done so it can
+                # generate ->has_threadid-dependent links
+                if ($sync->{reindex} && !ref($sync->{reindex})) {
+                        my $n = $xdb->get_metadata('has_threadid');
+                        $xdb->set_metadata('has_threadid', '1') if $n ne '1';
                 }
         }
 
@@ -816,6 +825,9 @@ sub set_metadata_once {
         return if $self->{shard}; # only continue if undef or 0, not >0
         my $xdb = $self->{xdb};
 
+        if (delete($self->{-set_has_threadid_once})) {
+                $xdb->set_metadata('has_threadid', '1');
+        }
         if (delete($self->{-set_indexlevel_once})) {
                 my $level = $xdb->get_metadata('indexlevel');
                 if (!$level || $level ne 'medium') {
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index dd69564a..76428dfb 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -188,13 +188,20 @@ sub search_nav_top {
                 $rv .= qq{<a\nhref="?$s">summary</a>|<b>nested</b>};
         }
         my $A = $q->qs_html(x => 'A', r => undef);
-        $rv .= qq{|<a\nhref="?$A">Atom feed</a>]} .
-                qq{\n\t\t\tdownload mbox.gz: } .
-                # we set name=z w/o using it since it seems required for
-                # lynx (but works fine for w3m).
-                qq{<input\ntype=submit\nname=z\nvalue="results only"/>|} .
-                qq{<input\ntype=submit\nname=x\nvalue="full threads"/>} .
-                qq{</pre></form><pre>};
+        $rv .= qq{|<a\nhref="?$A">Atom feed</a>]};
+        if ($ctx->{-inbox}->search->has_threadid) {
+                $rv .= qq{\n\t\t\tdownload mbox.gz: } .
+                        # we set name=z w/o using it since it seems required for
+                        # lynx (but works fine for w3m).
+                        qq{<input\ntype=submit\nname=z\n} .
+                                q{value="results only"/>} .
+                        qq{|<input\ntype=submit\nname=x\n} .
+                                q{value="full threads"/>};
+        } else { # BOFH needs to --reindex
+                $rv .= qq{\n\t\t\t\t\t\tdownload: } .
+                        qq{<input\ntype=submit\nname=z\nvalue="mbox.gz"/>}
+        }
+        $rv .= qq{</pre></form><pre>};
 }
 
 sub search_nav_bot {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 0a91a132..b0148dba 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -1337,6 +1337,18 @@ sub index_sync {
                 xapian_only($self, $opt, $sync, $art_beg);
         }
 
+        # --reindex on the command-line
+        if ($opt->{reindex} && !ref($opt->{reindex}) && $idxlevel ne 'basic') {
+                $self->lock_acquire;
+                my $s0 = PublicInbox::SearchIdx->new($self->{ibx}, 0, 0);
+                if (my $xdb = $s0->idx_acquire) {
+                        my $n = $xdb->get_metadata('has_threadid');
+                        $xdb->set_metadata('has_threadid', 1) if $n ne '1';
+                }
+                $s0->idx_release;
+                $self->lock_release;
+        }
+
         # reindex does not pick up new changes, so we rerun w/o it:
         if ($opt->{reindex}) {
                 my %again = %$opt;
diff --git a/t/init.t b/t/init.t
index dad09435..a5a9debc 100644
--- a/t/init.t
+++ b/t/init.t
@@ -108,6 +108,7 @@ SKIP: {
                 is(PublicInbox::Admin::detect_indexlevel($ibx), 'full',
                         "detected default indexlevel -V$v");
                 ok($ibx->{-skip_docdata}, "docdata skip set -V$v");
+                ok($ibx->search->has_threadid, 'has_threadid flag set on new inbox');
         }
 
         # loop for idempotency
diff --git a/t/psgi_search.t b/t/psgi_search.t
index 5d537363..c1677eb3 100644
--- a/t/psgi_search.t
+++ b/t/psgi_search.t
@@ -3,6 +3,7 @@
 use strict;
 use warnings;
 use Test::More;
+use IO::Uncompress::Gunzip qw(gunzip);
 use PublicInbox::Eml;
 use PublicInbox::Config;
 use PublicInbox::Inbox;
@@ -39,6 +40,12 @@ To: git\@vger.kernel.org
 EOF
 $im->add($mime);
 
+$im->add(PublicInbox::Eml->new(<<""));
+Message-ID: <reply\@asdf>
+From: replier <r\@example.com>
+In-Reply-To: <$mid>
+Subject: mismatch
+
 $mime = PublicInbox::Eml->new(<<'EOF');
 Subject:
 Message-ID: <blank-subject@example.com>
@@ -79,6 +86,9 @@ test_psgi(sub { $www->call(@_) }, sub {
         ok(index($html, 'by &#198;var Arnfj&#246;r&#240; Bjarmason') >= 0,
                 "displayed Ævar's name properly in HTML");
 
+        like($html, qr/download mbox\.gz: .*?"full threads"/s,
+                '"full threads" download option shown');
+
         my $warn = [];
         local $SIG{__WARN__} = sub { push @$warn, @_ };
         $res = $cb->(GET('/test/?q=s:test&l=5e'));
@@ -118,8 +128,33 @@ test_psgi(sub { $www->call(@_) }, sub {
         $res = $cb->(GET('/test/no-subject-at-all@example.com/t.mbox.gz'));
         like($res->header('Content-Disposition'),
                 qr/filename=no-subject\.mbox\.gz/);
+
+        # "full threads" mbox.gz download
+        $res = $cb->(POST('/test/?q=s:test&x=m&t'));
+        is($res->code, 200, 'successful mbox download with threads');
+        gunzip(\($res->content) => \(my $before));
+        is_deeply([ "Message-ID: <$mid>\n", "Message-ID: <reply\@asdf>\n" ],
+                [ grep(/^Message-ID:/m, split(/^/m, $before)) ],
+                'got full thread');
+
+        # clobber has_threadid to emulate old versions:
+        {
+                my $sidx = PublicInbox::SearchIdx->new($ibx, 0);
+                my $xdb = $sidx->idx_acquire;
+                $xdb->set_metadata('has_threadid', '0');
+                $sidx->idx_release;
+        }
+        $config->each_inbox(sub { delete $_[0]->{search} });
+        $res = $cb->(GET('/test/?q=s:test'));
+        is($res->code, 200, 'successful search w/o has_threadid');
+        unlike($html, qr/download mbox\.gz: .*?"full threads"/s,
+                '"full threads" download option not shown w/o has_threadid');
+
+        # in case somebody uses curl to bypass <form>
+        $res = $cb->(POST('/test/?q=s:test&x=m&t'));
+        is($res->code, 200, 'successful mbox download w/ threads');
+        gunzip(\($res->content) => \(my $after));
+        isnt($before, $after);
 });
 
 done_testing();
-
-1;