diff options
author | Eric Wong <e@80x24.org> | 2019-06-13 00:29:37 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2019-06-14 01:31:25 +0000 |
commit | e9eb3af852778a67533e9579b14695763535d262 (patch) | |
tree | 09478c84234a4a7f99def3bbae8f4f7e19c390fe /t | |
parent | e665a4fa317bf9ceea812bc9ca3f486ec722dfea (diff) | |
download | public-inbox-e9eb3af852778a67533e9579b14695763535d262.tar.gz |
v2 repos are sometimes created on machines where CPU parallelization exceeds the capability of the storage devices. In that case, users may reshard the Xapian DB to any smaller, positive integer to avoid excessive overhead and contention when bottlenecked by slow storage. Resharding can also be used to increase shard count after hardware upgrades.
Diffstat (limited to 't')
-rw-r--r-- | t/xcpdb-reshard.t | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/t/xcpdb-reshard.t b/t/xcpdb-reshard.t new file mode 100644 index 00000000..ce552f54 --- /dev/null +++ b/t/xcpdb-reshard.t @@ -0,0 +1,83 @@ +# Copyright (C) 2019 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use warnings; +use Test::More; +my @mods = qw(DBI DBD::SQLite Search::Xapian); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "missing $mod for $0" if $@; +}; +require './t/common.perl'; +require_git('2.6'); +use File::Temp qw/tempdir/; +use PublicInbox::MIME; +use PublicInbox::InboxWritable; + +my $mime = PublicInbox::MIME->create( + header => [ + From => 'a@example.com', + To => 'test@example.com', + Subject => 'this is a subject', + Date => 'Fri, 02 Oct 1993 00:00:00 +0000', + ], + body => '', +); + +my ($this) = (split('/', $0))[-1]; +my $tmpdir = tempdir($this.'-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $ibx = PublicInbox::Inbox->new({ + mainrepo => "$tmpdir/testbox", + name => $this, + version => 2, + -primary_address => 'test@example.com', + indexlevel => 'medium', +}); +my $path = 'blib/script'; +my @xcpdb = ("$path/public-inbox-xcpdb", '-q'); +my $nproc = 8; +my $ndoc = 13; +my $im = PublicInbox::InboxWritable->new($ibx, {nproc => $nproc})->importer(1); +for my $i (1..$ndoc) { + $mime->header_set('Message-ID', "<m$i\@example.com>"); + ok($im->add($mime), "message $i added"); +} +$im->done; +my @parts = grep(m!/\d+\z!, glob("$ibx->{mainrepo}/xap*/*")); +is(scalar(@parts), $nproc, 'got expected parts'); +my $orig = $ibx->over->query_xover(1, $ndoc); +my %nums = map {; "$_->{num}" => 1 } @$orig; + +# ensure we can go up or down in partitions, or stay the same: +for my $R (qw(2 4 1 3 3)) { + delete $ibx->{search}; # release old handles + is(system(@xcpdb, "-R$R", $ibx->{mainrepo}), 0, "xcpdb -R$R"); + my @new_parts = grep(m!/\d+\z!, glob("$ibx->{mainrepo}/xap*/*")); + is(scalar(@new_parts), $R, 'repartitioned to two parts'); + my $msgs = $ibx->search->query('s:this'); + is(scalar(@$msgs), $ndoc, 'got expected docs after repartitioning'); + my %by_mid = map {; "$_->{mid}" => $_ } @$msgs; + ok($by_mid{"m$_\@example.com"}, "$_ exists") for (1..$ndoc); + + delete $ibx->{search}; # release old handles + + # ensure docids in Xapian match NNTP article numbers + my $tot = 0; + my %tmp = %nums; + foreach my $d (@new_parts) { + my $xdb = Search::Xapian::Database->new($d); + $tot += $xdb->get_doccount; + my $it = $xdb->postlist_begin(''); + my $end = $xdb->postlist_end(''); + for (; $it != $end; $it++) { + my $docid = $it->get_docid; + if ($xdb->get_document($docid)) { + ok(delete($tmp{$docid}), "saw #$docid"); + } + } + } + is(scalar keys %tmp, 0, 'all docids seen'); +} + +done_testing(); +1; |