user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/3] search: make xap_terms easier-to-use and use it more
Date: Wed, 23 Jun 2021 07:14:20 -0400	[thread overview]
Message-ID: <20210623111422.30182-2-e@80x24.org> (raw)
In-Reply-To: <20210623111422.30182-1-e@80x24.org>

This allows us to simplify callers throughout, and exceptions are
can no longer be silently hidden.  MiscSearch now uses xap_terms
for looking up eidx_key terms for a code reduction.

We also simplify LeiStore->_msg_kw for runtime use by moving the
MsetIterator handling into t/lei_store.t test case.
---
 lib/PublicInbox/LeiSearch.pm  | 16 +++++++---------
 lib/PublicInbox/LeiXSearch.pm |  4 ++--
 lib/PublicInbox/MiscSearch.pm | 23 +++++++----------------
 lib/PublicInbox/Search.pm     | 22 +++++++++-------------
 lib/PublicInbox/SearchIdx.pm  |  5 +++--
 t/lei_store.t                 |  3 ++-
 6 files changed, 30 insertions(+), 43 deletions(-)

diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index d0963e92..06ea6299 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -19,16 +19,13 @@ sub num2docid ($$) {
 }
 
 sub _msg_kw { # retry_reopen callback
-	my ($self, $num) = @_; # num_or_mitem
-	my $xdb = $self->xdb; # set {nshard};
-	my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
-	my $kw = xap_terms('K', $xdb, $docid);
-	warn "E: #$docid ($num): $@\n" if $@;
-	wantarray ? sort(keys(%$kw)) : $kw;
+	my ($self, $num) = @_;
+	my $xdb = $self->xdb; # set {nshard} for num2docid;
+	xap_terms('K', $xdb, num2docid($self, $num));
 }
 
-sub msg_keywords {
-	my ($self, $num) = @_; # num_or_mitem
+sub msg_keywords { # array or hashref
+	my ($self, $num) = @_;
 	$self->retry_reopen(\&_msg_kw, $num);
 }
 
@@ -138,7 +135,8 @@ sub kw_changed {
 		$docids //= [];
 		@$docids = sort { $a <=> $b } values %$xoids;
 	}
-	my $cur_kw = msg_keywords($self, $docids->[0]);
+	my $cur_kw = eval { msg_keywords($self, $docids->[0]) };
+	die "E: #$docids->[0] keyword lookup failure: $@\n" if $@;
 
 	# RFC 5550 sec 5.9 on the $Forwarded keyword states:
 	# "Once set, the flag SHOULD NOT be cleared"
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index beb955bb..cac7fb7d 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -71,11 +71,11 @@ sub _mitem_kw { # retry_reopen callback
 	my $doc = $mitem->get_document;
 	my $kw = xap_terms('K', $doc);
 	$kw->{flagged} = 1 if $flagged;
-	my $L = xap_terms('L', $doc);
+	my @L = xap_terms('L', $doc);
 	# we keep the empty {kw} array here to prevent expensive work in
 	# ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty
 	$smsg->{kw} = [ sort keys %$kw ];
-	$smsg->{L} = [ sort keys %$L ] if scalar(keys %$L);
+	$smsg->{L} = \@L if scalar(@L);
 }
 
 sub mitem_kw ($$$;$) {
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index ead9a278..4e010453 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -5,7 +5,7 @@
 package PublicInbox::MiscSearch;
 use strict;
 use v5.10.1;
-use PublicInbox::Search qw(retry_reopen int_val);
+use PublicInbox::Search qw(retry_reopen int_val xap_terms);
 my $json;
 
 # Xapian value columns:
@@ -90,15 +90,10 @@ sub ibx_matches_once { # retry_reopen callback
 	while (1) {
 		my $mset = misc_enquire_once($self, $qr, $opt);
 		for my $mi ($mset->items) {
-			my $doc = $mi->get_document;
-			my $end = $doc->termlist_end;
-			my $cur = $doc->termlist_begin;
-			$cur->skip_to('Q');
-			if ($cur != $end) {
-				my $ng = $cur->get_termname; # eidx_key
-				$ng =~ s/\AQ// or warn "BUG: no `Q': $ng";
-				if (my $ibx = $by_newsgroup->{$ng}) {
-					$ret->{$ng} = $ibx;
+			my ($eidx_key) = xap_terms('Q', $mi->get_document);
+			if (defined($eidx_key)) {
+				if (my $ibx = $by_newsgroup->{$eidx_key}) {
+					$ret->{$eidx_key} = $ibx;
 				}
 			} else {
 				warn <<EOF;
@@ -144,12 +139,8 @@ sub inbox_data {
 
 sub ibx_cache_load {
 	my ($doc, $cache) = @_;
-	my $end = $doc->termlist_end;
-	my $cur = $doc->termlist_begin;
-	$cur->skip_to('Q');
-	return if $cur == $end;
-	my $eidx_key = $cur->get_termname;
-	$eidx_key =~ s/\AQ// or return; # expired
+	my ($eidx_key) = xap_terms('Q', $doc);
+	return unless defined($eidx_key); # expired
 	my $ce = $cache->{$eidx_key} = {};
 	$ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
 	$ce->{-modified} = int_val($doc, $MODIFIED);
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 59a5a3b0..7e19e616 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -557,19 +557,15 @@ sub get_pct ($) { # mset item
 sub xap_terms ($$;@) {
 	my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty ()
 	my %ret;
-	eval {
-		my $end = $xdb_or_doc->termlist_end(@docid);
-		my $cur = $xdb_or_doc->termlist_begin(@docid);
-		for (; $cur != $end; $cur++) {
-			$cur->skip_to($pfx);
-			last if $cur == $end;
-			my $tn = $cur->get_termname;
-			if (index($tn, $pfx) == 0) {
-				$ret{substr($tn, length($pfx))} = undef;
-			}
-		}
-	};
-	\%ret;
+	my $end = $xdb_or_doc->termlist_end(@docid);
+	my $cur = $xdb_or_doc->termlist_begin(@docid);
+	for (; $cur != $end; $cur++) {
+		$cur->skip_to($pfx);
+		last if $cur == $end;
+		my $tn = $cur->get_termname;
+		$ret{substr($tn, length($pfx))} = undef if !index($tn, $pfx);
+	}
+	wantarray ? sort(keys(%ret)) : \%ret;
 }
 
 1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index f553eda6..65764cc8 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -435,8 +435,9 @@ sub add_xapian ($$$$) {
 	if (my $old = $merge_vmd ? _get_doc($self, $smsg->{num}) : undef) {
 		my @x = @VMD_MAP;
 		while (my ($field, $pfx) = splice(@x, 0, 2)) {
-			my $vals = xap_terms($pfx, $old);
-			$doc->add_boolean_term($pfx.$_) for keys %$vals;
+			for my $term (xap_terms($pfx, $old)) {
+				$doc->add_boolean_term($pfx.$term);
+			}
 		}
 	}
 	$self->{xdb}->replace_document($smsg->{num}, $doc);
diff --git a/t/lei_store.t b/t/lei_store.t
index db94f6da..73b5c74d 100644
--- a/t/lei_store.t
+++ b/t/lei_store.t
@@ -31,7 +31,8 @@ $sto->done;
 	is($mset->size, 1, 'search works');
 	is_deeply($es->mset_to_artnums($mset), [ $msgs->[0]->{num} ],
 		'mset_to_artnums');
-	my @kw = $es->msg_keywords(($mset->items)[0]);
+	my $mi = ($mset->items)[0];
+	my @kw = PublicInbox::Search::xap_terms('K', $mi->get_document);
 	is_deeply(\@kw, [], 'no flags');
 }
 

  reply	other threads:[~2021-06-23 11:14 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-23 11:14 [PATCH 0/3] some WWW search things Eric Wong
2021-06-23 11:14 ` Eric Wong [this message]
2021-06-23 11:14 ` [PATCH 2/3] www_listing: start updating for pagination + search Eric Wong
2021-06-23 11:14 ` [PATCH 3/3] www: do not warn on blank query parameters Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210623111422.30182-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).