about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-08-22 06:06:27 +0000
committerEric Wong <e@yhbt.net>2020-08-23 00:19:53 +0000
commita367ec1b15a2458e532245f5308565dd84f8ca63 (patch)
tree97f5c7fc52301282ff9a5f1c9913f3827edd3b49 /lib
parentaad1b9e70529b78d3d7d62b0920ad82ca40f9592 (diff)
downloadpublic-inbox-a367ec1b15a2458e532245f5308565dd84f8ca63.tar.gz
mbox: disable "&t" on existing Xapian until full reindex
Expanding threads via over.sqlite3 for mbox.gz downloads without
Xapian effectively collapsing on the THREADID column leads to
repeated messages getting downloaded.

To avoid that situation, use a "has_threadid" Xapian metadata
flag that's only set on --reindex (and brand new Xapian DBs).

This allows admins to upgrade WWW or do --reindex in any order;
without worrying about users eating up bandwidth and CPU cycles.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Mbox.pm2
-rw-r--r--lib/PublicInbox/Search.pm10
-rw-r--r--lib/PublicInbox/SearchIdx.pm16
-rw-r--r--lib/PublicInbox/SearchView.pm21
-rw-r--r--lib/PublicInbox/V2Writable.pm12
5 files changed, 50 insertions, 11 deletions
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index c9b11c21..0223bead 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -262,7 +262,7 @@ sub mbox_all {
         $ctx->{ids} = $srch->mset_to_artnums($mset);
         require PublicInbox::MboxGz;
         my $fn;
-        if ($q->{t}) {
+        if ($q->{t} && $srch->has_threadid) {
                 $fn = 'results-thread-'.$q_string;
                 PublicInbox::MboxGz::mbox_gz($ctx, \&results_thread_cb, $fn);
         } else {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index bc820b64..01bbe73d 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -311,6 +311,12 @@ sub _do_enquire {
         retry_reopen($self, \&_enquire_once, [ $self, $query, $opts ]);
 }
 
+# returns true if all docs have the THREADID value
+sub has_threadid ($) {
+        my ($self) = @_;
+        (xdb($self)->get_metadata('has_threadid') // '') eq '1';
+}
+
 sub _enquire_once { # retry_reopen callback
         my ($self, $query, $opts) = @{$_[0]};
         my $xdb = xdb($self);
@@ -328,7 +334,9 @@ sub _enquire_once { # retry_reopen callback
         }
 
         # `mairix -t / --threads' or JMAP collapseThreads
-        $enquire->set_collapse_key(THREADID) if $opts->{thread};
+        if ($opts->{thread} && has_threadid($self)) {
+                $enquire->set_collapse_key(THREADID);
+        }
 
         my $offset = $opts->{offset} || 0;
         my $limit = $opts->{limit} || 50;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index baa6f41a..ade55756 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -131,6 +131,7 @@ sub idx_acquire {
                                 ($is_shard && need_xapian($self)))) {
                         File::Path::mkpath($dir);
                         nodatacow_dir($dir);
+                        $self->{-set_has_threadid_once} = 1;
                 }
         }
         return unless defined $flag;
@@ -590,9 +591,17 @@ sub v1_checkpoint ($$;$) {
 
         $self->{mm}->{dbh}->commit;
         if ($newest && need_xapian($self)) {
-                my $cur = $self->{xdb}->get_metadata('last_commit');
+                my $xdb = $self->{xdb};
+                my $cur = $xdb->get_metadata('last_commit');
                 if (need_update($self, $cur, $newest)) {
-                        $self->{xdb}->set_metadata('last_commit', $newest);
+                        $xdb->set_metadata('last_commit', $newest);
+                }
+
+                # let SearchView know a full --reindex was done so it can
+                # generate ->has_threadid-dependent links
+                if ($sync->{reindex} && !ref($sync->{reindex})) {
+                        my $n = $xdb->get_metadata('has_threadid');
+                        $xdb->set_metadata('has_threadid', '1') if $n ne '1';
                 }
         }
 
@@ -816,6 +825,9 @@ sub set_metadata_once {
         return if $self->{shard}; # only continue if undef or 0, not >0
         my $xdb = $self->{xdb};
 
+        if (delete($self->{-set_has_threadid_once})) {
+                $xdb->set_metadata('has_threadid', '1');
+        }
         if (delete($self->{-set_indexlevel_once})) {
                 my $level = $xdb->get_metadata('indexlevel');
                 if (!$level || $level ne 'medium') {
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index dd69564a..76428dfb 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -188,13 +188,20 @@ sub search_nav_top {
                 $rv .= qq{<a\nhref="?$s">summary</a>|<b>nested</b>};
         }
         my $A = $q->qs_html(x => 'A', r => undef);
-        $rv .= qq{|<a\nhref="?$A">Atom feed</a>]} .
-                qq{\n\t\t\tdownload mbox.gz: } .
-                # we set name=z w/o using it since it seems required for
-                # lynx (but works fine for w3m).
-                qq{<input\ntype=submit\nname=z\nvalue="results only"/>|} .
-                qq{<input\ntype=submit\nname=x\nvalue="full threads"/>} .
-                qq{</pre></form><pre>};
+        $rv .= qq{|<a\nhref="?$A">Atom feed</a>]};
+        if ($ctx->{-inbox}->search->has_threadid) {
+                $rv .= qq{\n\t\t\tdownload mbox.gz: } .
+                        # we set name=z w/o using it since it seems required for
+                        # lynx (but works fine for w3m).
+                        qq{<input\ntype=submit\nname=z\n} .
+                                q{value="results only"/>} .
+                        qq{|<input\ntype=submit\nname=x\n} .
+                                q{value="full threads"/>};
+        } else { # BOFH needs to --reindex
+                $rv .= qq{\n\t\t\t\t\t\tdownload: } .
+                        qq{<input\ntype=submit\nname=z\nvalue="mbox.gz"/>}
+        }
+        $rv .= qq{</pre></form><pre>};
 }
 
 sub search_nav_bot {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 0a91a132..b0148dba 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -1337,6 +1337,18 @@ sub index_sync {
                 xapian_only($self, $opt, $sync, $art_beg);
         }
 
+        # --reindex on the command-line
+        if ($opt->{reindex} && !ref($opt->{reindex}) && $idxlevel ne 'basic') {
+                $self->lock_acquire;
+                my $s0 = PublicInbox::SearchIdx->new($self->{ibx}, 0, 0);
+                if (my $xdb = $s0->idx_acquire) {
+                        my $n = $xdb->get_metadata('has_threadid');
+                        $xdb->set_metadata('has_threadid', 1) if $n ne '1';
+                }
+                $s0->idx_release;
+                $self->lock_release;
+        }
+
         # reindex does not pick up new changes, so we rerun w/o it:
         if ($opt->{reindex}) {
                 my %again = %$opt;