about summary refs log tree commit homepage
path: root/t
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-27 20:02:51 +0000
committerEric Wong <e@80x24.org>2020-12-31 13:20:54 +0000
commit08de05443804120a2663aa3611c47c84a18e0c35 (patch)
treeabfd80cfba197d6a38b0cfa52c17ca3adaf22ed9 /t
parent12583f45f29f3acd6cd704df9a7e5aaff5acc3f7 (diff)
downloadpublic-inbox-08de05443804120a2663aa3611c47c84a18e0c35.tar.gz
While a single extindex combines multiple inboxes into a single
search index, extindex still requires up-front indexing on items
which can be searched.  XSearch has no on-disk footprint itself
and uses Xapian DBs of existing publicinbox and extindex
("extinbox") exclusively.

XSearch still suffers from the multi-shard Xapian scalability
problems which led to the creation of extindex, but I expect the
number of shards to remain relatively low.

I envision users hosting public-inbox instances on their
workstations will only have two extindex combined by this, one
read-only extindex for serving public archives, and one
read-write extindex managed by LeiStore for private mail.
Diffstat (limited to 't')
-rw-r--r--t/lei_xsearch.t73
1 files changed, 73 insertions, 0 deletions
diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t
new file mode 100644
index 00000000..c41213bd
--- /dev/null
+++ b/t/lei_xsearch.t
@@ -0,0 +1,73 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use List::Util qw(shuffle max);
+use PublicInbox::TestCommon;
+use PublicInbox::ExtSearchIdx;
+use PublicInbox::Eml;
+use PublicInbox::InboxWritable;
+require_mods(qw(DBD::SQLite Search::Xapian));
+require_git 2.6;
+require_ok 'PublicInbox::LeiXSearch';
+my ($home, $for_destroy) = tmpdir();
+my @ibx;
+for my $V (1..2) {
+        for my $i (3..6) {
+                my $ibx = PublicInbox::InboxWritable->new({
+                        inboxdir => "$home/v$V-$i",
+                        name => "test-v$V-$i",
+                        version => $V,
+                        indexlevel => 'medium',
+                        -primary_address => "v$V-$i\@example.com",
+                }, { nproc => int(rand(8)) + 1 });
+                push @ibx, $ibx;
+                my $im = $ibx->importer(0);
+                for my $j (0..9) {
+                        my $eml = PublicInbox::Eml->new(<<EOF);
+From: x\@example.com
+To: $ibx->{-primary_address}
+Date: Fri, 02 Oct 1993 0$V:0$i:0$j +0000
+Subject: v${V}i${i}j$j
+Message-ID: <v${V}i${i}j$j\@example>
+
+${V}er ${i}on j$j
+EOF
+                        $im->add($eml);
+                }
+                $im->done;
+        }
+}
+my $first = shift @ibx; is($first->{name}, 'test-v1-3', 'first plucked');
+my $last = pop @ibx; is($last->{name}, 'test-v2-6', 'last plucked');
+my $eidx = PublicInbox::ExtSearchIdx->new("$home/eidx");
+$eidx->attach_inbox($first);
+$eidx->attach_inbox($last);
+$eidx->eidx_sync({fsync => 0});
+my $es = PublicInbox::ExtSearch->new("$home/eidx");
+my $lxs = PublicInbox::LeiXSearch->new;
+for my $ibxish (shuffle($es, @ibx)) {
+        $lxs->attach_extinbox($ibxish);
+}
+my $nr = $lxs->xdb->get_doccount;
+my $mset = $lxs->mset('d:19931002..19931003', { limit => $nr });
+is($mset->size, $nr, 'got all messages');
+my @msgs;
+for my $mi ($mset->items) {
+        if (my $smsg = $lxs->smsg_for($mi)) {
+                push @msgs, $smsg;
+        } else {
+                diag "E: ${\$mi->get_docid} missing";
+        }
+}
+is(scalar(@msgs), $nr, 'smsgs retrieved for all');
+
+$mset = $lxs->recent(undef, { limit => 1 });
+is($mset->size, 1, 'one result');
+my $max = max(map { $_->{docid} } @msgs);
+is($lxs->smsg_for(($mset->items)[0])->{docid}, $max,
+        'got highest docid');
+
+done_testing;