authorEric Wong <e@80x24.org>2020-11-23 07:05:51 +0000
committerEric Wong <e@80x24.org>2020-11-24 05:03:55 +0000
commit58e764d179131b0fba8590915e2528248be26329 (patch)
tree18a24739cb8bcc1ec4704772475e8e2dcb225453 /lib/PublicInbox/SearchIdx.pm
parentd792a6e8029e4fe56977c5c5d76bae8fe8836cc4 (diff)
This will be used to index and search Inbox objects and perhaps
individual git repositories/epochs for grokmirror manifest.js.gz
generation.  There is no sharding planned for this at the moment
since inbox count should remain low (~100K to 1M) compared to
message count.

Folding this into the existing sharded DBs could be possible;
but would likely increase query and maintenance costs, as well
as development complexity.  So we'll use a few more inodes and
FDs at runtime, instead.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
1 files changed, 4 insertions, 3 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 6ff2cf94..18390602 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -22,9 +22,10 @@ use PublicInbox::OverIdx;
 use PublicInbox::Spawn qw(spawn nodatacow_dir);
 use PublicInbox::Git qw(git_unquote);
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
-our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size prepare_stack);
+our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size prepare_stack
+        index_text term_generator add_val);
 my $X = \%PublicInbox::Search::X;
 our $DB_NO_SYNC = 0;
 our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff : 1_000_000;
 use constant DEBUG => !!$ENV{DEBUG};
@@ -154,7 +155,7 @@ sub term_generator ($) { # write-only
         $self->{term_generator} //= do {
                 my $tg = $X->{TermGenerator}->new;
-                $tg->set_stemmer($self->stemmer);
+                $tg->set_stemmer(PublicInbox::Search::stemmer($self));