user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH 0/2] search-related cleanups
@ 2025-03-05 23:26 Eric Wong
  2025-03-05 23:26 ` [PATCH 1/2] search: use common do_enquire for MiscSearch Eric Wong
  2025-03-05 23:26 ` [PATCH 2/2] search: unoverload {relevance} in options Eric Wong
  0 siblings, 2 replies; 3+ messages in thread
From: Eric Wong @ 2025-03-05 23:26 UTC (permalink / raw)
  To: meta

2/2 is a long overdue cleanup to the insanity of overloading the
{relevance} field.

Eric Wong (2):
  search: use common do_enquire for MiscSearch
  search: unoverload {relevance} in options

 lib/PublicInbox/CodeSearch.pm |  3 ++-
 lib/PublicInbox/ExtMsg.pm     |  2 +-
 lib/PublicInbox/IMAP.pm       |  3 ++-
 lib/PublicInbox/Isearch.pm    |  2 +-
 lib/PublicInbox/LeiQuery.pm   |  4 ++--
 lib/PublicInbox/LeiUp.pm      |  2 +-
 lib/PublicInbox/Mbox.pm       |  2 +-
 lib/PublicInbox/MiscSearch.pm | 23 ++++----------------
 lib/PublicInbox/Search.pm     | 41 ++++++++++++-----------------------
 lib/PublicInbox/SearchView.pm |  2 +-
 lib/PublicInbox/WwwListing.pm |  2 +-
 lib/PublicInbox/XapHelper.pm  | 17 ++++++++++++---
 t/lei_xsearch.t               |  2 +-
 13 files changed, 45 insertions(+), 60 deletions(-)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] search: use common do_enquire for MiscSearch
  2025-03-05 23:26 [PATCH 0/2] search-related cleanups Eric Wong
@ 2025-03-05 23:26 ` Eric Wong
  2025-03-05 23:26 ` [PATCH 2/2] search: unoverload {relevance} in options Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2025-03-05 23:26 UTC (permalink / raw)
  To: meta

We can consistently use a {sort_col} search option as in
xap_helper to allow do_enquire to be more generic across
searches.  This lets us avoid the need for a specialized
implementation in MiscSearch.
---
 lib/PublicInbox/CodeSearch.pm |  3 ++-
 lib/PublicInbox/MiscSearch.pm | 21 +++------------------
 lib/PublicInbox/Search.pm     |  8 +++++---
 3 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm
index 2200262d..a80fd4b7 100644
--- a/lib/PublicInbox/CodeSearch.pm
+++ b/lib/PublicInbox/CodeSearch.pm
@@ -176,7 +176,8 @@ sub mset {
 	$qry = $PublicInbox::Search::X{Query}->new(
 				PublicInbox::Search::OP_FILTER(),
 				$qry, 'T'.'c');
-	$self->do_enquire($qry, $opt, CT);
+	$opt->{sort_col} = CT;
+	$self->do_enquire($qry, $opt);
 }
 
 sub roots2paths { # for diagnostics
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index ad7ebeb4..f244e664 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -55,23 +55,6 @@ sub mi_qp_new ($) {
 	$qp;
 }
 
-sub misc_enquire_once { # retry_reopen callback
-	my ($self, $qr, $opt) = @_;
-	my $eq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
-	$eq->set_query($qr);
-        my $desc = !$opt->{asc};
-	my $rel = $opt->{relevance} // 0;
-	if ($rel == -1) { # ORDER BY docid
-		$eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
-		$eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
-	} elsif ($rel) {
-		$eq->set_sort_by_relevance_then_value($MODIFIED, $desc);
-	} else {
-		$eq->set_sort_by_value_then_relevance($MODIFIED, $desc);
-	}
-	$eq->get_mset($opt->{offset} || 0, $opt->{limit} || 200);
-}
-
 sub mset {
 	my ($self, $qs, $opt) = @_;
 	$opt ||= {};
@@ -80,7 +63,9 @@ sub mset {
 	$qs = 'type:inbox' if $qs eq '';
 	my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
 	$opt->{relevance} = 1 unless exists $opt->{relevance};
-	retry_reopen($self, \&misc_enquire_once, $qr, $opt);
+	$opt->{sort_col} = $MODIFIED;
+	$opt->{limit} ||= 200;
+	PublicInbox::Search::do_enquire($self, $qr, $opt);
 }
 
 sub ibx_data_once {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index afcaabc7..9b35c8d4 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -454,7 +454,8 @@ sub mset {
 		$qry = $X{Query}->new(OP_FILTER(), $qry,
 			$X{Query}->new(OP_VALUE_RANGE(), THREADID, $tid, $tid));
 	}
-	do_enquire($self, $qry, $opt, TS);
+	$opt->{sort_col} //= TS;
+	do_enquire($self, $qry, $opt);
 }
 
 my %QPMETHOD_2_SYM = (add_prefix => ':', add_boolean_prefix => '=');
@@ -508,10 +509,11 @@ sub async_mset {
 	}
 }
 
-sub do_enquire { # shared with CodeSearch
-	my ($self, $qry, $opt, $col) = @_;
+sub do_enquire { # shared with CodeSearch and MiscSearch
+	my ($self, $qry, $opt) = @_;
 	my $enq = $X{Enquire}->new(xdb($self));
 	$enq->set_query($qry);
+	my $col = $opt->{sort_col} // TS;
 	my $rel = $opt->{relevance} // 0;
 	if ($rel == -2) { # ORDER BY docid/UID (highest first)
 		$enq->set_weighting_scheme($X{BoolWeight}->new);

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] search: unoverload {relevance} in options
  2025-03-05 23:26 [PATCH 0/2] search-related cleanups Eric Wong
  2025-03-05 23:26 ` [PATCH 1/2] search: use common do_enquire for MiscSearch Eric Wong
@ 2025-03-05 23:26 ` Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2025-03-05 23:26 UTC (permalink / raw)
  To: meta

Take the lead from xap_helper and split out handling of {asc}
and {sort_col} fields while keeping the {relevance} field as a
boolean.  As with xap_helper, {sort_col} < 0 means
Xapian::BoolWeight will be used for docid ordering.
---
 lib/PublicInbox/ExtMsg.pm     |  2 +-
 lib/PublicInbox/IMAP.pm       |  3 ++-
 lib/PublicInbox/Isearch.pm    |  2 +-
 lib/PublicInbox/LeiQuery.pm   |  4 ++--
 lib/PublicInbox/LeiUp.pm      |  2 +-
 lib/PublicInbox/Mbox.pm       |  2 +-
 lib/PublicInbox/MiscSearch.pm |  2 +-
 lib/PublicInbox/Search.pm     | 33 +++++++++------------------------
 lib/PublicInbox/SearchView.pm |  2 +-
 lib/PublicInbox/WwwListing.pm |  2 +-
 lib/PublicInbox/XapHelper.pm  | 17 ++++++++++++++---
 t/lei_xsearch.t               |  2 +-
 12 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index d994b783..dd7240fb 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -33,7 +33,7 @@ sub search_partial ($$) {
 	my ($ibx, $mid) = @_;
 	return if length($mid) < $MIN_PARTIAL_LEN;
 	my $srch = $ibx->isrch or return;
-	my $opt = { limit => PARTIAL_MAX, relevance => -1 };
+	my $opt = { limit => PARTIAL_MAX, sort_col => -1, asc => 1 };
 	my @try = ("m:$mid*");
 	my $chop = $mid;
 	if ($chop =~ s/(\W+)(\w*)\z//) {
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index e72c646c..c3b0b41e 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -1117,7 +1117,8 @@ sub search_common {
 		my $srch = $self->{ibx}->isrch or
 			return "$tag BAD search not available for mailbox\r\n";
 		my $opt = {
-			relevance => -1,
+			sort_col => -1,
+			asc => 1,
 			limit => UID_SLICE,
 			uid_range => $range_info
 		};
diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm
index 5f22c2f2..8fe3c54a 100644
--- a/lib/PublicInbox/Isearch.pm
+++ b/lib/PublicInbox/Isearch.pm
@@ -83,7 +83,7 @@ sub mset_to_artnums {
 	my $docids = PublicInbox::Search::mset_to_artnums($self->{es}, $mset);
 	my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
 	my $qmarks = join(',', map { '?' } @$docids);
-	if ($opt && ($opt->{relevance} // 0) == -1) { # -1 => ENQ_ASCENDING
+	if ($opt->{asc} && ($opt->{sort_col} // 0) < 0) { # docid ascending
 		my $range = '';
 		my @r;
 		if (my $r = $opt->{uid_range}) {
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index eadf811f..787cd3a5 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -45,7 +45,7 @@ sub _start_query { # used by "lei q" and "lei up"
 	};
 
 	# descending docid order is cheapest, MUA controls sorting order
-	$self->{mset_opt}->{relevance} //= -2 if $l2m || $opt->{threads};
+	$self->{mset_opt}->{sort_col} = -1 if $l2m || $opt->{threads};
 
 	my $tot = $self->{mset_opt}->{total} //= $self->{opt}->{limit} // 10000;
 	$self->{mset_opt}->{limit} = $tot > 10000 ? 10000 : $tot;
@@ -138,7 +138,7 @@ sub lei_q {
 		if ($sort eq 'relevance') {
 			$mset_opt{relevance} = 1;
 		} elsif ($sort eq 'docid') {
-			$mset_opt{relevance} = $mset_opt{asc} ? -1 : -2;
+			$mset_opt{sort_col} = -1;
 		} elsif ($sort =~ /\Areceived(?:-?[aA]t)?\z/) {
 			# the default
 		} else {
diff --git a/lib/PublicInbox/LeiUp.pm b/lib/PublicInbox/LeiUp.pm
index 9931f017..3fe7841c 100644
--- a/lib/PublicInbox/LeiUp.pm
+++ b/lib/PublicInbox/LeiUp.pm
@@ -25,7 +25,7 @@ sub up1 ($$) {
 	my $cli_exclude = delete $lei->{opt}->{exclude};
 	my $lss = PublicInbox::LeiSavedSearch->up($lei, $out) or return;
 	my $f = $lss->{'-f'};
-	my $mset_opt = $lei->{mset_opt} = { relevance => -2 };
+	my $mset_opt = $lei->{mset_opt} = { sort_col => -1 };
 	my $q = $lss->{-cfg}->get_all('lei.q') //
 				die("lei.q unset in $f (out=$out)\n");
 	my $lse = $lei->{lse} // die 'BUG: {lse} missing';
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index a5974a39..084eeb61 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -249,7 +249,7 @@ sub mbox_all {
 	return mbox_all_ids($ctx) if $qstr !~ /\S/;
 	my $srch = $ctx->{srch} = $ctx->{ibx}->isrch or
 		return PublicInbox::WWW::need($ctx, 'Search');
-	my $opt = $ctx->{qopts} = { relevance => -2 }; # ORDER BY docid DESC
+	my $opt = $ctx->{qopts} = { sort_col => -1 }; # ORDER BY docid DESC
 
 	# {threadid} limits results to a given thread
 	# {threads} collapses results from messages in the same thread,
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index f244e664..663c5352 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -116,7 +116,7 @@ sub ibx_cache_load {
 
 sub _nntpd_cache_load { # retry_reopen callback
 	my ($self) = @_;
-	my $opt = { limit => $self->{xdb}->get_doccount * 10, relevance => -1 };
+	my $opt = { limit => $self->{xdb}->get_doccount * 10, sort_col => -1 };
 	my $mset = mset($self, 'type:newsgroup type:inbox', $opt);
 	my $cache = {};
 	for my $it ($mset->items) {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 9b35c8d4..4fa02885 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -466,20 +466,9 @@ sub xh_opt ($$) {
 	my @ret;
 	push @ret, '-o', $opt->{offset} if $opt->{offset};
 	push @ret, '-m', $lim;
-	my $rel = $opt->{relevance} // 0;
-	if ($rel == -2) { # ORDER BY docid/UID (highest first)
-		push @ret, '-k', '-1';
-	} elsif ($rel == -1) { # ORDER BY docid/UID (lowest first)
-		push @ret, '-k', '-1';
-		push @ret, '-a';
-	} elsif ($rel == 0) {
-		push @ret, '-k', $opt->{sort_col} // TS;
-		push @ret, '-a' if $opt->{asc};
-	} else { # rel > 0
-		push @ret, '-r';
-		push @ret, '-k', $opt->{sort_col} // TS;
-		push @ret, '-a' if $opt->{asc};
-	}
+	push @ret, '-r' if $opt->{relevance};
+	push @ret, '-k', $opt->{sort_col} // TS;
+	push @ret, '-a' if $opt->{asc};
 	push @ret, '-t' if $opt->{threads};
 	push @ret, '-T', $opt->{threadid} if defined $opt->{threadid};
 	push @ret, '-O', $opt->{eidx_key} if defined $opt->{eidx_key};
@@ -514,19 +503,15 @@ sub do_enquire { # shared with CodeSearch and MiscSearch
 	my $enq = $X{Enquire}->new(xdb($self));
 	$enq->set_query($qry);
 	my $col = $opt->{sort_col} // TS;
-	my $rel = $opt->{relevance} // 0;
-	if ($rel == -2) { # ORDER BY docid/UID (highest first)
-		$enq->set_weighting_scheme($X{BoolWeight}->new);
-		$enq->set_docid_order($ENQ_DESCENDING);
-	} elsif ($rel == -1) { # ORDER BY docid/UID (lowest first)
+	if ($col < 0) {
 		$enq->set_weighting_scheme($X{BoolWeight}->new);
-		$enq->set_docid_order($ENQ_ASCENDING);
-	} elsif ($rel == 0) {
-		$enq->set_sort_by_value_then_relevance($col, !$opt->{asc});
-	} else { # rel > 0
+		$enq->set_docid_order(
+			$opt->{asc} ? $ENQ_ASCENDING : $ENQ_DESCENDING);
+	} elsif ($opt->{relevance}) {
 		$enq->set_sort_by_relevance_then_value($col, !$opt->{asc});
+	} else {
+		$enq->set_sort_by_value_then_relevance($col, !$opt->{asc});
 	}
-
 	# `lei q -t / --threads' or JMAP collapseThreads; but don't collapse
 	# on `-tt' ({threads} > 1) which sets the Flagged|Important keyword
 	(($opt->{threads} // 0) == 1 && has_threadid($self)) and
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 8464ae1b..5746e434 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -50,7 +50,7 @@ sub sres_top_html {
 	my $opt = {
 		limit => $q->{l},
 		offset => $o,
-		relevance => $q->{r},
+		relevance => !!$q->{r},
 		threads => $q->{t},
 		asc => $asc,
 	};
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index 5681377f..101da9b7 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -93,7 +93,7 @@ sub add_misc_ibx { # MiscSearch->retry_reopen callback
 		$asc = 1;
 		$o = -($o + 1); # so [-1] is the last element, like Perl lists
 	}
-	my $r = $q->{r};
+	my $r = !!$q->{r};
 	my $opt = {
 		offset => $o,
 		asc => $asc,
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index d3c98f03..e98553c8 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -80,7 +80,12 @@ sub cmd_dump_ibx {
 	$req->{A} or die 'dump_ibx requires -A PREFIX';
 	term_length_extract $req;
 	my $max = $req->{'m'} // $req->{srch}->{xdb}->get_doccount;
-	my $opt = { relevance => -1, limit => $max, offset => $req->{o} // 0 };
+	my $opt = {
+		sort_col => -1,
+		asc => 1,
+		limit => $max,
+		offset => $req->{o} // 0
+	};
 	$opt->{eidx_key} = $req->{O} if defined $req->{O};
 	my $mset = $req->{srch}->mset($qry_str, $opt);
 	$req->{0}->autoflush(1);
@@ -131,8 +136,12 @@ sub cmd_dump_roots {
 	while (defined(my $oidhex = shift @x)) {
 		$root2off->{$oidhex} = shift @x;
 	}
-	my $opt = { relevance => -1, limit => $req->{'m'},
-			offset => $req->{o} // 0 };
+	my $opt = {
+		sort_col => -1,
+		asc => 1,
+		limit => $req->{'m'},
+		offset => $req->{o} // 0
+	};
 	my $mset = $req->{srch}->mset($qry_str, $opt);
 	$req->{0}->autoflush(1);
 	$req->{wbuf} = '';
@@ -159,8 +168,10 @@ sub cmd_mset { # to be used by WWW + IMAP
 	$qry_str // die 'usage: mset [OPTIONS] QRY_STR';
 	my $opt = { limit => $req->{'m'}, offset => $req->{o} // 0 };
 	$opt->{relevance} = 1 if $req->{r};
+	$opt->{asc} = 1 if $req->{a};
 	$opt->{threads} = 1 if defined $req->{t};
 	$opt->{git_dir} = $req->{g} if defined $req->{g};
+	$opt->{sort_col} = $req->{k} if defined $req->{k};
 	$opt->{eidx_key} = $req->{O} if defined $req->{O};
 	my @uid_range = @$req{qw(u U)};
 	$opt->{uid_range} = \@uid_range if grep(defined, @uid_range) == 2;
diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t
index 977fb1e9..879256dc 100644
--- a/t/lei_xsearch.t
+++ b/t/lei_xsearch.t
@@ -61,7 +61,7 @@ for my $mi ($mset->items) {
 }
 is(scalar(@msgs), $nr, 'smsgs retrieved for all');
 
-$mset = $lxs->mset('z:1..', { relevance => -2, limit => 1 });
+$mset = $lxs->mset('z:1..', { sort_col => -1, limit => 1 });
 is($mset->size, 1, 'one result');
 
 my @ibxish = $lxs->locals;

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-03-05 23:26 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-05 23:26 [PATCH 0/2] search-related cleanups Eric Wong
2025-03-05 23:26 ` [PATCH 1/2] search: use common do_enquire for MiscSearch Eric Wong
2025-03-05 23:26 ` [PATCH 2/2] search: unoverload {relevance} in options Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).