From 721f7add0cf5ac6e6247483628e985742c09e45f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 27 Nov 2020 09:52:48 +0000 Subject: miscsearch: implement ->newsgroup_matches This may be used to speed up newsgroup searches down-the-line, but the grep perlop isn't too shabby, at the moment. --- lib/PublicInbox/MiscSearch.pm | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'lib') diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm index 48ef6914..f2e31443 100644 --- a/lib/PublicInbox/MiscSearch.pm +++ b/lib/PublicInbox/MiscSearch.pm @@ -76,6 +76,46 @@ sub mset { retry_reopen($self, \&misc_enquire_once, $qr, $opt); } +sub ibx_matches_once { # retry_reopen callback + my ($self, $qr, $by_newsgroup) = @_; + # double in case no newsgroups are configured: + my $limit = scalar(keys %$by_newsgroup) * 2; + my $opt = { limit => $limit, offset => 0, relevance => -1 }; + my $ret = {}; # newsgroup => $ibx of matches + while (1) { + my $mset = misc_enquire_once($self, $qr, $opt); + for my $mi ($mset->items) { + my $doc = $mi->get_document; + my $end = $doc->termlist_end; + my $cur = $doc->termlist_begin; + $cur->skip_to('Q'); + if ($cur != $end) { + my $ng = $cur->get_termname; # eidx_key + $ng =~ s/\AQ// or warn "BUG: no `Q': $ng"; + if (my $ibx = $by_newsgroup->{$ng}) { + $ret->{$ng} = $ibx; + } + } else { + warn <get_docid} has no `Q' (eidx_key) term +EOF + } + } + my $nr = $mset->size; + return $ret if $nr < $limit; + $opt->{offset} += $nr; + } +} + +# returns a newsgroup => PublicInbox::Inbox mapping +sub newsgroup_matches { + my ($self, $qs, $pi_cfg) = @_; + my $qp = $self->{qp} //= mi_qp_new($self); + $qs .= ' type:inbox'; + my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS); + retry_reopen($self, \&ibx_matches_once, $qr, $pi_cfg->{-by_newsgroup}); +} + sub ibx_data_once { my ($self, $ibx) = @_; my $xdb = $self->{xdb}; -- cgit v1.2.3-24-ge0c7