about summary refs log tree commit homepage
path: root/t
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-06-13 00:29:37 +0000
committerEric Wong <e@80x24.org>2019-06-14 01:31:25 +0000
commite9eb3af852778a67533e9579b14695763535d262 (patch)
tree09478c84234a4a7f99def3bbae8f4f7e19c390fe /t
parente665a4fa317bf9ceea812bc9ca3f486ec722dfea (diff)
downloadpublic-inbox-e9eb3af852778a67533e9579b14695763535d262.tar.gz
v2 repos are sometimes created on machines where CPU
parallelization exceeds the capability of the storage devices.

In that case, users may reshard the Xapian DB to any smaller,
positive integer to avoid excessive overhead and contention when
bottlenecked by slow storage.

Resharding can also be used to increase shard count after
hardware upgrades.
Diffstat (limited to 't')
-rw-r--r--t/xcpdb-reshard.t83
1 files changed, 83 insertions, 0 deletions
diff --git a/t/xcpdb-reshard.t b/t/xcpdb-reshard.t
new file mode 100644
index 00000000..ce552f54
--- /dev/null
+++ b/t/xcpdb-reshard.t
@@ -0,0 +1,83 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+my @mods = qw(DBI DBD::SQLite Search::Xapian);
+foreach my $mod (@mods) {
+        eval "require $mod";
+        plan skip_all => "missing $mod for $0" if $@;
+};
+require './t/common.perl';
+require_git('2.6');
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+use PublicInbox::InboxWritable;
+
+my $mime = PublicInbox::MIME->create(
+        header => [
+                From => 'a@example.com',
+                To => 'test@example.com',
+                Subject => 'this is a subject',
+                Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
+        ],
+        body => '',
+);
+
+my ($this) = (split('/', $0))[-1];
+my $tmpdir = tempdir($this.'-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $ibx = PublicInbox::Inbox->new({
+        mainrepo => "$tmpdir/testbox",
+        name => $this,
+        version => 2,
+        -primary_address => 'test@example.com',
+        indexlevel => 'medium',
+});
+my $path = 'blib/script';
+my @xcpdb = ("$path/public-inbox-xcpdb", '-q');
+my $nproc = 8;
+my $ndoc = 13;
+my $im = PublicInbox::InboxWritable->new($ibx, {nproc => $nproc})->importer(1);
+for my $i (1..$ndoc) {
+        $mime->header_set('Message-ID', "<m$i\@example.com>");
+        ok($im->add($mime), "message $i added");
+}
+$im->done;
+my @parts = grep(m!/\d+\z!, glob("$ibx->{mainrepo}/xap*/*"));
+is(scalar(@parts), $nproc, 'got expected parts');
+my $orig = $ibx->over->query_xover(1, $ndoc);
+my %nums = map {; "$_->{num}" => 1 } @$orig;
+
+# ensure we can go up or down in partitions, or stay the same:
+for my $R (qw(2 4 1 3 3)) {
+        delete $ibx->{search}; # release old handles
+        is(system(@xcpdb, "-R$R", $ibx->{mainrepo}), 0, "xcpdb -R$R");
+        my @new_parts = grep(m!/\d+\z!, glob("$ibx->{mainrepo}/xap*/*"));
+        is(scalar(@new_parts), $R, 'repartitioned to two parts');
+        my $msgs = $ibx->search->query('s:this');
+        is(scalar(@$msgs), $ndoc, 'got expected docs after repartitioning');
+        my %by_mid = map {; "$_->{mid}" => $_ } @$msgs;
+        ok($by_mid{"m$_\@example.com"}, "$_ exists") for (1..$ndoc);
+
+        delete $ibx->{search}; # release old handles
+
+        # ensure docids in Xapian match NNTP article numbers
+        my $tot = 0;
+        my %tmp = %nums;
+        foreach my $d (@new_parts) {
+                my $xdb = Search::Xapian::Database->new($d);
+                $tot += $xdb->get_doccount;
+                my $it = $xdb->postlist_begin('');
+                my $end = $xdb->postlist_end('');
+                for (; $it != $end; $it++) {
+                        my $docid = $it->get_docid;
+                        if ($xdb->get_document($docid)) {
+                                ok(delete($tmp{$docid}), "saw #$docid");
+                        }
+                }
+        }
+        is(scalar keys %tmp, 0, 'all docids seen');
+}
+
+done_testing();
+1;