From 58e764d179131b0fba8590915e2528248be26329 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 23 Nov 2020 07:05:51 +0000 Subject: miscsearch: a new Xapian sub-DB for extindex This will be used to index and search Inbox objects and perhaps individual git repositories/epochs for grokmirror manifest.js.gz generation. There is no sharding planned for this at the moment since inbox count should remain low (~100K to 1M) compared to message count. Folding this into the existing sharded DBs could be possible; but would likely increase query and maintenance costs, as well as development complexity. So we'll use a few more inodes and FDs at runtime, instead. --- t/miscsearch.t | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 t/miscsearch.t (limited to 't/miscsearch.t') diff --git a/t/miscsearch.t b/t/miscsearch.t new file mode 100644 index 00000000..45a19da9 --- /dev/null +++ b/t/miscsearch.t @@ -0,0 +1,54 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use Test::More; +use PublicInbox::TestCommon; +use PublicInbox::InboxWritable; +require_mods(qw(Search::Xapian DBD::SQLite)); +use_ok 'PublicInbox::MiscSearch'; +use_ok 'PublicInbox::MiscIdx'; + +my ($tmp, $for_destroy) = tmpdir(); +my $eidx = { xpfx => "$tmp/eidx", -no_fsync => 1 }; # mock ExtSearchIdx +{ + mkdir "$tmp/v1" or BAIL_OUT "mkdir $!"; + open my $fh, '>', "$tmp/v1/description" or BAIL_OUT "open: $!"; + print $fh "Everything sucks this year\n" or BAIL_OUT "print $!"; + close $fh or BAIL_OUT "close $!"; +} +{ + my $v1 = PublicInbox::InboxWritable->new({ + inboxdir => "$tmp/v1", + name => 'hope', + address => [ 'nope@example.com' ], + indexlevel => 'basic', + version => 1, + }); + $v1->init_inbox; + my $mi = PublicInbox::MiscIdx->new($eidx); + $mi->begin_txn; + $mi->index_ibx($v1); + $mi->commit_txn; +} + +my $ms = PublicInbox::MiscSearch->new("$tmp/eidx/misc"); +my $mset = $ms->mset('"everything sucks today"'); +is(scalar($mset->items), 0, 'no match on description phrase'); + +$mset = $ms->mset('"everything sucks this year"'); +is(scalar($mset->items), 1, 'match phrase on description'); + +$mset = $ms->mset('everything sucks'); +is(scalar($mset->items), 1, 'match words in description'); + +$mset = $ms->mset('nope@example.com'); +is(scalar($mset->items), 1, 'match full address'); + +$mset = $ms->mset('nope'); +is(scalar($mset->items), 1, 'match partial address'); + +$mset = $ms->mset('hope'); +is(scalar($mset->items), 1, 'match name'); + +done_testing; -- cgit v1.2.3-24-ge0c7