about summary refs log tree commit homepage
path: root/lib/PublicInbox/Search.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-12-08 03:54:33 +0000
committerEric Wong <e@80x24.org>2023-12-09 02:03:47 +0000
commit305e237b9e22ffed1b009f8f2b12bf99e8856748 (patch)
treeed5f2b4e3beb4d0d048088dc99483ac332f3841a /lib/PublicInbox/Search.pm
parentc5515401eb4f51998c8444afee9e8f6db1e8d514 (diff)
downloadpublic-inbox-305e237b9e22ffed1b009f8f2b12bf99e8856748.tar.gz
Xapian has always sorted termlist iterators, so we now:

1) break out of the iterator loop early on non-matches
2) avoid doing sorting ourselves

As a result, we'll also favor the wantarray forms of xap_terms
and all_terms to preserve sort order in most cases.

Confirmed by the Xapian maintainer: <20231201184844.GO4059@survex.com>

Link: https://lists.xapian.org/pipermail/xapian-discuss/2023-December/010013.html
Diffstat (limited to 'lib/PublicInbox/Search.pm')
-rw-r--r--lib/PublicInbox/Search.pm19
1 files changed, 10 insertions, 9 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 8ef17d58..678c8c5d 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -614,16 +614,16 @@ sub get_pct ($) { # mset item
 
 sub xap_terms ($$;@) {
         my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty ()
-        my %ret;
         my $end = $xdb_or_doc->termlist_end(@docid);
         my $cur = $xdb_or_doc->termlist_begin(@docid);
+        $cur->skip_to($pfx);
+        my (@ret, $tn);
+        my $pfxlen = length($pfx);
         for (; $cur != $end; $cur++) {
-                $cur->skip_to($pfx);
-                last if $cur == $end;
-                my $tn = $cur->get_termname;
-                $ret{substr($tn, length($pfx))} = undef if !index($tn, $pfx);
+                $tn = $cur->get_termname;
+                index($tn, $pfx) ? last : push(@ret, substr($tn, $pfxlen));
         }
-        wantarray ? sort(keys(%ret)) : \%ret;
+        wantarray ? @ret : +{ map { $_ => undef } @ret };
 }
 
 # get combined docid from over.num:
@@ -638,11 +638,12 @@ sub all_terms {
         my ($self, $pfx) = @_;
         my $cur = xdb($self)->allterms_begin($pfx);
         my $end = $self->{xdb}->allterms_end($pfx);
-        my %ret;
+        my $pfxlen = length($pfx);
+        my @ret;
         for (; $cur != $end; $cur++) {
-                $ret{substr($cur->get_termname, length($pfx))} = undef;
+                push @ret, substr($cur->get_termname, $pfxlen);
         }
-        wantarray ? (sort keys %ret) : \%ret;
+        wantarray ? @ret : +{ map { $_ => undef } @ret };
 }
 
 sub xh_args { # prep getopt args to feed to xap_helper.h socket