diff options
author | Eric Wong <e@80x24.org> | 2020-12-27 20:02:51 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2020-12-31 13:20:54 +0000 |
commit | 08de05443804120a2663aa3611c47c84a18e0c35 (patch) | |
tree | abfd80cfba197d6a38b0cfa52c17ca3adaf22ed9 /t | |
parent | 12583f45f29f3acd6cd704df9a7e5aaff5acc3f7 (diff) | |
download | public-inbox-08de05443804120a2663aa3611c47c84a18e0c35.tar.gz |
While a single extindex combines multiple inboxes into a single search index, extindex still requires up-front indexing on items which can be searched. XSearch has no on-disk footprint itself and uses Xapian DBs of existing publicinbox and extindex ("extinbox") exclusively. XSearch still suffers from the multi-shard Xapian scalability problems which led to the creation of extindex, but I expect the number of shards to remain relatively low. I envision users hosting public-inbox instances on their workstations will only have two extindex combined by this, one read-only extindex for serving public archives, and one read-write extindex managed by LeiStore for private mail.
Diffstat (limited to 't')
-rw-r--r-- | t/lei_xsearch.t | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t new file mode 100644 index 00000000..c41213bd --- /dev/null +++ b/t/lei_xsearch.t @@ -0,0 +1,73 @@ +#!perl -w +# Copyright (C) 2020 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use v5.10.1; +use Test::More; +use List::Util qw(shuffle max); +use PublicInbox::TestCommon; +use PublicInbox::ExtSearchIdx; +use PublicInbox::Eml; +use PublicInbox::InboxWritable; +require_mods(qw(DBD::SQLite Search::Xapian)); +require_git 2.6; +require_ok 'PublicInbox::LeiXSearch'; +my ($home, $for_destroy) = tmpdir(); +my @ibx; +for my $V (1..2) { + for my $i (3..6) { + my $ibx = PublicInbox::InboxWritable->new({ + inboxdir => "$home/v$V-$i", + name => "test-v$V-$i", + version => $V, + indexlevel => 'medium', + -primary_address => "v$V-$i\@example.com", + }, { nproc => int(rand(8)) + 1 }); + push @ibx, $ibx; + my $im = $ibx->importer(0); + for my $j (0..9) { + my $eml = PublicInbox::Eml->new(<<EOF); +From: x\@example.com +To: $ibx->{-primary_address} +Date: Fri, 02 Oct 1993 0$V:0$i:0$j +0000 +Subject: v${V}i${i}j$j +Message-ID: <v${V}i${i}j$j\@example> + +${V}er ${i}on j$j +EOF + $im->add($eml); + } + $im->done; + } +} +my $first = shift @ibx; is($first->{name}, 'test-v1-3', 'first plucked'); +my $last = pop @ibx; is($last->{name}, 'test-v2-6', 'last plucked'); +my $eidx = PublicInbox::ExtSearchIdx->new("$home/eidx"); +$eidx->attach_inbox($first); +$eidx->attach_inbox($last); +$eidx->eidx_sync({fsync => 0}); +my $es = PublicInbox::ExtSearch->new("$home/eidx"); +my $lxs = PublicInbox::LeiXSearch->new; +for my $ibxish (shuffle($es, @ibx)) { + $lxs->attach_extinbox($ibxish); +} +my $nr = $lxs->xdb->get_doccount; +my $mset = $lxs->mset('d:19931002..19931003', { limit => $nr }); +is($mset->size, $nr, 'got all messages'); +my @msgs; +for my $mi ($mset->items) { + if (my $smsg = $lxs->smsg_for($mi)) { + push @msgs, $smsg; + } else { + diag "E: ${\$mi->get_docid} missing"; + } +} +is(scalar(@msgs), $nr, 'smsgs retrieved for all'); + +$mset = $lxs->recent(undef, { limit => 1 }); +is($mset->size, 1, 'one result'); +my $max = max(map { $_->{docid} } @msgs); +is($lxs->smsg_for(($mset->items)[0])->{docid}, $max, + 'got highest docid'); + +done_testing; |