From 58e764d179131b0fba8590915e2528248be26329 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 23 Nov 2020 07:05:51 +0000 Subject: miscsearch: a new Xapian sub-DB for extindex This will be used to index and search Inbox objects and perhaps individual git repositories/epochs for grokmirror manifest.js.gz generation. There is no sharding planned for this at the moment since inbox count should remain low (~100K to 1M) compared to message count. Folding this into the existing sharded DBs could be possible; but would likely increase query and maintenance costs, as well as development complexity. So we'll use a few more inodes and FDs at runtime, instead. --- lib/PublicInbox/MiscSearch.pm | 79 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 lib/PublicInbox/MiscSearch.pm (limited to 'lib/PublicInbox/MiscSearch.pm') diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm new file mode 100644 index 00000000..8beb8349 --- /dev/null +++ b/lib/PublicInbox/MiscSearch.pm @@ -0,0 +1,79 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# read-only counterpart to MiscIdx +package PublicInbox::MiscSearch; +use strict; +use v5.10.1; +use PublicInbox::Search qw(retry_reopen); + +# Xapian value columns: +our $MODIFIED = 0; + +# avoid conflicting with message Search::prob_prefix for UI/UX reasons +my %PROB_PREFIX = ( + description => 'S', # $INBOX_DIR/description + address => 'A', + listid => 'XLISTID', + url => 'XURL', + infourl => 'XINFOURL', + name => 'XNAME', + '' => 'S A XLISTID XNAME XURL XINFOURL' +); + +sub new { + my ($class, $dir) = @_; + bless { + xdb => $PublicInbox::Search::X{Database}->new($dir) + }, $class; +} + +# read-only +sub mi_qp_new ($) { + my ($self) = @_; + my $xdb = $self->{xdb}; + my $qp = $PublicInbox::Search::X{QueryParser}->new; + $qp->set_default_op(PublicInbox::Search::OP_AND()); + $qp->set_database($xdb); + $qp->set_stemmer(PublicInbox::Search::stemmer($self)); + $qp->set_stemming_strategy(PublicInbox::Search::STEM_SOME()); + my $cb = $qp->can('set_max_wildcard_expansion') // + $qp->can('set_max_expansion'); # Xapian 1.5.0+ + $cb->($qp, 100); + $cb = $qp->can('add_valuerangeprocessor') // + $qp->can('add_rangeprocessor'); # Xapian 1.5.0+ + while (my ($name, $prefix) = each %PROB_PREFIX) { + $qp->add_prefix($name, $_) for split(/ /, $prefix); + } + $qp->add_boolean_prefix('type', 'T'); + $qp; +} + +sub misc_enquire_once { # retry_reopen callback + my ($self, $qr, $opt) = @{$_[0]}; + my $eq = $PublicInbox::Search::X{Enquire}->new($self->{xdb}); + $eq->set_query($qr); + my $desc = !$opt->{asc}; + my $rel = $opt->{relevance} // 0; + if ($rel == -1) { # ORDER BY docid/UID + $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING); + $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new); + } elsif ($rel) { + $eq->set_sort_by_relevance_then_value($MODIFIED, $desc); + } else { + $eq->set_sort_by_value_then_relevance($MODIFIED, $desc); + } + $eq->get_mset($opt->{offset} || 0, $opt->{limit} || 200); +} + +sub mset { + my ($self, $qs, $opt) = @_; + $opt ||= {}; + my $qp = $self->{qp} //= mi_qp_new($self); + $qs = 'type:inbox' if $qs eq ''; + my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS); + $opt->{relevance} = 1 unless exists $opt->{relevance}; + retry_reopen($self, \&misc_enquire_once, [ $self, $qr, $opt ]); +} + +1; -- cgit v1.2.3-24-ge0c7