From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/3] search: make xap_terms easier-to-use and use it more
Date: Wed, 23 Jun 2021 07:14:20 -0400 [thread overview]
Message-ID: <20210623111422.30182-2-e@80x24.org> (raw)
In-Reply-To: <20210623111422.30182-1-e@80x24.org>
This allows us to simplify callers throughout, and exceptions are
can no longer be silently hidden. MiscSearch now uses xap_terms
for looking up eidx_key terms for a code reduction.
We also simplify LeiStore->_msg_kw for runtime use by moving the
MsetIterator handling into t/lei_store.t test case.
---
lib/PublicInbox/LeiSearch.pm | 16 +++++++---------
lib/PublicInbox/LeiXSearch.pm | 4 ++--
lib/PublicInbox/MiscSearch.pm | 23 +++++++----------------
lib/PublicInbox/Search.pm | 22 +++++++++-------------
lib/PublicInbox/SearchIdx.pm | 5 +++--
t/lei_store.t | 3 ++-
6 files changed, 30 insertions(+), 43 deletions(-)
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index d0963e92..06ea6299 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -19,16 +19,13 @@ sub num2docid ($$) {
}
sub _msg_kw { # retry_reopen callback
- my ($self, $num) = @_; # num_or_mitem
- my $xdb = $self->xdb; # set {nshard};
- my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
- my $kw = xap_terms('K', $xdb, $docid);
- warn "E: #$docid ($num): $@\n" if $@;
- wantarray ? sort(keys(%$kw)) : $kw;
+ my ($self, $num) = @_;
+ my $xdb = $self->xdb; # set {nshard} for num2docid;
+ xap_terms('K', $xdb, num2docid($self, $num));
}
-sub msg_keywords {
- my ($self, $num) = @_; # num_or_mitem
+sub msg_keywords { # array or hashref
+ my ($self, $num) = @_;
$self->retry_reopen(\&_msg_kw, $num);
}
@@ -138,7 +135,8 @@ sub kw_changed {
$docids //= [];
@$docids = sort { $a <=> $b } values %$xoids;
}
- my $cur_kw = msg_keywords($self, $docids->[0]);
+ my $cur_kw = eval { msg_keywords($self, $docids->[0]) };
+ die "E: #$docids->[0] keyword lookup failure: $@\n" if $@;
# RFC 5550 sec 5.9 on the $Forwarded keyword states:
# "Once set, the flag SHOULD NOT be cleared"
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index beb955bb..cac7fb7d 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -71,11 +71,11 @@ sub _mitem_kw { # retry_reopen callback
my $doc = $mitem->get_document;
my $kw = xap_terms('K', $doc);
$kw->{flagged} = 1 if $flagged;
- my $L = xap_terms('L', $doc);
+ my @L = xap_terms('L', $doc);
# we keep the empty {kw} array here to prevent expensive work in
# ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty
$smsg->{kw} = [ sort keys %$kw ];
- $smsg->{L} = [ sort keys %$L ] if scalar(keys %$L);
+ $smsg->{L} = \@L if scalar(@L);
}
sub mitem_kw ($$$;$) {
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index ead9a278..4e010453 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -5,7 +5,7 @@
package PublicInbox::MiscSearch;
use strict;
use v5.10.1;
-use PublicInbox::Search qw(retry_reopen int_val);
+use PublicInbox::Search qw(retry_reopen int_val xap_terms);
my $json;
# Xapian value columns:
@@ -90,15 +90,10 @@ sub ibx_matches_once { # retry_reopen callback
while (1) {
my $mset = misc_enquire_once($self, $qr, $opt);
for my $mi ($mset->items) {
- my $doc = $mi->get_document;
- my $end = $doc->termlist_end;
- my $cur = $doc->termlist_begin;
- $cur->skip_to('Q');
- if ($cur != $end) {
- my $ng = $cur->get_termname; # eidx_key
- $ng =~ s/\AQ// or warn "BUG: no `Q': $ng";
- if (my $ibx = $by_newsgroup->{$ng}) {
- $ret->{$ng} = $ibx;
+ my ($eidx_key) = xap_terms('Q', $mi->get_document);
+ if (defined($eidx_key)) {
+ if (my $ibx = $by_newsgroup->{$eidx_key}) {
+ $ret->{$eidx_key} = $ibx;
}
} else {
warn <<EOF;
@@ -144,12 +139,8 @@ sub inbox_data {
sub ibx_cache_load {
my ($doc, $cache) = @_;
- my $end = $doc->termlist_end;
- my $cur = $doc->termlist_begin;
- $cur->skip_to('Q');
- return if $cur == $end;
- my $eidx_key = $cur->get_termname;
- $eidx_key =~ s/\AQ// or return; # expired
+ my ($eidx_key) = xap_terms('Q', $doc);
+ return unless defined($eidx_key); # expired
my $ce = $cache->{$eidx_key} = {};
$ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
$ce->{-modified} = int_val($doc, $MODIFIED);
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 59a5a3b0..7e19e616 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -557,19 +557,15 @@ sub get_pct ($) { # mset item
sub xap_terms ($$;@) {
my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty ()
my %ret;
- eval {
- my $end = $xdb_or_doc->termlist_end(@docid);
- my $cur = $xdb_or_doc->termlist_begin(@docid);
- for (; $cur != $end; $cur++) {
- $cur->skip_to($pfx);
- last if $cur == $end;
- my $tn = $cur->get_termname;
- if (index($tn, $pfx) == 0) {
- $ret{substr($tn, length($pfx))} = undef;
- }
- }
- };
- \%ret;
+ my $end = $xdb_or_doc->termlist_end(@docid);
+ my $cur = $xdb_or_doc->termlist_begin(@docid);
+ for (; $cur != $end; $cur++) {
+ $cur->skip_to($pfx);
+ last if $cur == $end;
+ my $tn = $cur->get_termname;
+ $ret{substr($tn, length($pfx))} = undef if !index($tn, $pfx);
+ }
+ wantarray ? sort(keys(%ret)) : \%ret;
}
1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index f553eda6..65764cc8 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -435,8 +435,9 @@ sub add_xapian ($$$$) {
if (my $old = $merge_vmd ? _get_doc($self, $smsg->{num}) : undef) {
my @x = @VMD_MAP;
while (my ($field, $pfx) = splice(@x, 0, 2)) {
- my $vals = xap_terms($pfx, $old);
- $doc->add_boolean_term($pfx.$_) for keys %$vals;
+ for my $term (xap_terms($pfx, $old)) {
+ $doc->add_boolean_term($pfx.$term);
+ }
}
}
$self->{xdb}->replace_document($smsg->{num}, $doc);
diff --git a/t/lei_store.t b/t/lei_store.t
index db94f6da..73b5c74d 100644
--- a/t/lei_store.t
+++ b/t/lei_store.t
@@ -31,7 +31,8 @@ $sto->done;
is($mset->size, 1, 'search works');
is_deeply($es->mset_to_artnums($mset), [ $msgs->[0]->{num} ],
'mset_to_artnums');
- my @kw = $es->msg_keywords(($mset->items)[0]);
+ my $mi = ($mset->items)[0];
+ my @kw = PublicInbox::Search::xap_terms('K', $mi->get_document);
is_deeply(\@kw, [], 'no flags');
}
next prev parent reply other threads:[~2021-06-23 11:14 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-23 11:14 [PATCH 0/3] some WWW search things Eric Wong
2021-06-23 11:14 ` Eric Wong [this message]
2021-06-23 11:14 ` [PATCH 2/3] www_listing: start updating for pagination + search Eric Wong
2021-06-23 11:14 ` [PATCH 3/3] www: do not warn on blank query parameters Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210623111422.30182-2-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).