diff options
author | Eric Wong <e@80x24.org> | 2020-11-23 07:05:51 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2020-11-24 05:03:55 +0000 |
commit | 58e764d179131b0fba8590915e2528248be26329 (patch) | |
tree | 18a24739cb8bcc1ec4704772475e8e2dcb225453 /lib/PublicInbox/MiscSearch.pm | |
parent | d792a6e8029e4fe56977c5c5d76bae8fe8836cc4 (diff) | |
download | public-inbox-58e764d179131b0fba8590915e2528248be26329.tar.gz |
This will be used to index and search Inbox objects and perhaps individual git repositories/epochs for grokmirror manifest.js.gz generation. There is no sharding planned for this at the moment since inbox count should remain low (~100K to 1M) compared to message count. Folding this into the existing sharded DBs could be possible; but would likely increase query and maintenance costs, as well as development complexity. So we'll use a few more inodes and FDs at runtime, instead.
Diffstat (limited to 'lib/PublicInbox/MiscSearch.pm')
-rw-r--r-- | lib/PublicInbox/MiscSearch.pm | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm new file mode 100644 index 00000000..8beb8349 --- /dev/null +++ b/lib/PublicInbox/MiscSearch.pm @@ -0,0 +1,79 @@ +# Copyright (C) 2020 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# read-only counterpart to MiscIdx +package PublicInbox::MiscSearch; +use strict; +use v5.10.1; +use PublicInbox::Search qw(retry_reopen); + +# Xapian value columns: +our $MODIFIED = 0; + +# avoid conflicting with message Search::prob_prefix for UI/UX reasons +my %PROB_PREFIX = ( + description => 'S', # $INBOX_DIR/description + address => 'A', + listid => 'XLISTID', + url => 'XURL', + infourl => 'XINFOURL', + name => 'XNAME', + '' => 'S A XLISTID XNAME XURL XINFOURL' +); + +sub new { + my ($class, $dir) = @_; + bless { + xdb => $PublicInbox::Search::X{Database}->new($dir) + }, $class; +} + +# read-only +sub mi_qp_new ($) { + my ($self) = @_; + my $xdb = $self->{xdb}; + my $qp = $PublicInbox::Search::X{QueryParser}->new; + $qp->set_default_op(PublicInbox::Search::OP_AND()); + $qp->set_database($xdb); + $qp->set_stemmer(PublicInbox::Search::stemmer($self)); + $qp->set_stemming_strategy(PublicInbox::Search::STEM_SOME()); + my $cb = $qp->can('set_max_wildcard_expansion') // + $qp->can('set_max_expansion'); # Xapian 1.5.0+ + $cb->($qp, 100); + $cb = $qp->can('add_valuerangeprocessor') // + $qp->can('add_rangeprocessor'); # Xapian 1.5.0+ + while (my ($name, $prefix) = each %PROB_PREFIX) { + $qp->add_prefix($name, $_) for split(/ /, $prefix); + } + $qp->add_boolean_prefix('type', 'T'); + $qp; +} + +sub misc_enquire_once { # retry_reopen callback + my ($self, $qr, $opt) = @{$_[0]}; + my $eq = $PublicInbox::Search::X{Enquire}->new($self->{xdb}); + $eq->set_query($qr); + my $desc = !$opt->{asc}; + my $rel = $opt->{relevance} // 0; + if ($rel == -1) { # ORDER BY docid/UID + $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING); + $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new); + } elsif ($rel) { + $eq->set_sort_by_relevance_then_value($MODIFIED, $desc); + } else { + $eq->set_sort_by_value_then_relevance($MODIFIED, $desc); + } + $eq->get_mset($opt->{offset} || 0, $opt->{limit} || 200); +} + +sub mset { + my ($self, $qs, $opt) = @_; + $opt ||= {}; + my $qp = $self->{qp} //= mi_qp_new($self); + $qs = 'type:inbox' if $qs eq ''; + my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS); + $opt->{relevance} = 1 unless exists $opt->{relevance}; + retry_reopen($self, \&misc_enquire_once, [ $self, $qr, $opt ]); +} + +1; |