about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--lib/PublicInbox/LeiInspect.pm1
-rw-r--r--lib/PublicInbox/Search.pm19
-rw-r--r--lib/PublicInbox/SearchIdx.pm13
-rw-r--r--lib/PublicInbox/xh_cidx.h15
-rw-r--r--lib/PublicInbox/xh_mset.h2
5 files changed, 22 insertions, 28 deletions
diff --git a/lib/PublicInbox/LeiInspect.pm b/lib/PublicInbox/LeiInspect.pm
index d4ad03eb..88d7949c 100644
--- a/lib/PublicInbox/LeiInspect.pm
+++ b/lib/PublicInbox/LeiInspect.pm
@@ -97,7 +97,6 @@ sub _inspect_doc ($$) {
                 my $term = ($1 // '');
                 push @{$ent->{terms}->{$term}}, $tn;
         }
-        @$_ = sort(@$_) for values %{$ent->{terms} // {}};
         $cur = $doc->values_begin;
         $end = $doc->values_end;
         for (; $cur != $end; $cur++) {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 8ef17d58..678c8c5d 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -614,16 +614,16 @@ sub get_pct ($) { # mset item
 
 sub xap_terms ($$;@) {
         my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty ()
-        my %ret;
         my $end = $xdb_or_doc->termlist_end(@docid);
         my $cur = $xdb_or_doc->termlist_begin(@docid);
+        $cur->skip_to($pfx);
+        my (@ret, $tn);
+        my $pfxlen = length($pfx);
         for (; $cur != $end; $cur++) {
-                $cur->skip_to($pfx);
-                last if $cur == $end;
-                my $tn = $cur->get_termname;
-                $ret{substr($tn, length($pfx))} = undef if !index($tn, $pfx);
+                $tn = $cur->get_termname;
+                index($tn, $pfx) ? last : push(@ret, substr($tn, $pfxlen));
         }
-        wantarray ? sort(keys(%ret)) : \%ret;
+        wantarray ? @ret : +{ map { $_ => undef } @ret };
 }
 
 # get combined docid from over.num:
@@ -638,11 +638,12 @@ sub all_terms {
         my ($self, $pfx) = @_;
         my $cur = xdb($self)->allterms_begin($pfx);
         my $end = $self->{xdb}->allterms_end($pfx);
-        my %ret;
+        my $pfxlen = length($pfx);
+        my @ret;
         for (; $cur != $end; $cur++) {
-                $ret{substr($cur->get_termname, length($pfx))} = undef;
+                push @ret, substr($cur->get_termname, $pfxlen);
         }
-        wantarray ? (sort keys %ret) : \%ret;
+        wantarray ? @ret : +{ map { $_ => undef } @ret };
 }
 
 sub xh_args { # prep getopt args to feed to xap_helper.h socket
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 1bf471fc..1ac8e33e 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -42,7 +42,7 @@ my $BASE85 = qr/[a-zA-Z0-9\!\#\$\%\&\(\)\*\+\-;<=>\?\@\^_`\{\|\}\~]+/;
 my $xapianlevels = qr/\A(?:full|medium)\z/;
 my $hex = '[a-f0-9]';
 my $OID = $hex .'{40,}';
-my @VMD_MAP = (kw => 'K', L => 'L');
+my @VMD_MAP = (kw => 'K', L => 'L'); # value order matters
 our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/;
 
 sub new {
@@ -608,17 +608,16 @@ sub set_vmd {
         my ($self, $docid, $vmd) = @_;
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
-        my ($end, @rm, @add);
+        my ($v, @rm, @add);
         my @x = @VMD_MAP;
+        my ($cur, $end) = ($doc->termlist_begin, $doc->termlist_end);
         while (my ($field, $pfx) = splice(@x, 0, 2)) {
                 my $set = $vmd->{$field} // next;
                 my %keep = map { $_ => 1 } @$set;
                 my %add = %keep;
-                $end //= $doc->termlist_end;
-                for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
-                        $cur->skip_to($pfx);
-                        last if $cur == $end;
-                        my $v = $cur->get_termname;
+                $cur->skip_to($pfx); # works due to @VMD_MAP order
+                for (; $cur != $end; $cur++) {
+                        $v = $cur->get_termname;
                         $v =~ s/\A$pfx//s or next;
                         $keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v);
                 }
diff --git a/lib/PublicInbox/xh_cidx.h b/lib/PublicInbox/xh_cidx.h
index 1980f9f6..2803b3a4 100644
--- a/lib/PublicInbox/xh_cidx.h
+++ b/lib/PublicInbox/xh_cidx.h
@@ -12,12 +12,9 @@ static void dump_ibx_term(struct req *req, const char *pfx,
 
         for (cur.skip_to(pfx); cur != end; cur++) {
                 std::string tn = *cur;
-
-                if (starts_with(&tn, pfx, pfx_len)) {
-                        fprintf(req->fp[0], "%s %s\n",
-                                tn.c_str() + pfx_len, ibx_id);
-                        ++req->nr_out;
-                }
+                if (!starts_with(&tn, pfx, pfx_len)) break;
+                fprintf(req->fp[0], "%s %s\n", tn.c_str() + pfx_len, ibx_id);
+                ++req->nr_out;
         }
 }
 
@@ -95,8 +92,7 @@ static bool root2offs_str(struct fbuf *root_offs, Xapian::Document *doc)
         fbuf_init(root_offs);
         for (cur.skip_to("G"); cur != end; cur++) {
                 std::string tn = *cur;
-                if (!starts_with(&tn, "G", 1))
-                        continue;
+                if (!starts_with(&tn, "G", 1)) break;
                 union { const char *in; char *out; } u;
                 u.in = tn.c_str() + 1;
                 e.key = u.out;
@@ -125,8 +121,7 @@ static void dump_roots_term(struct req *req, const char *pfx,
 
         for (cur.skip_to(pfx); cur != end; cur++) {
                 std::string tn = *cur;
-                if (!starts_with(&tn, pfx, pfx_len))
-                        continue;
+                if (!starts_with(&tn, pfx, pfx_len)) break;
                 fputs(tn.c_str() + pfx_len, drt->wbuf.fp);
                 fwrite(root_offs->ptr, root_offs->len, 1, drt->wbuf.fp);
                 ++req->nr_out;
diff --git a/lib/PublicInbox/xh_mset.h b/lib/PublicInbox/xh_mset.h
index 056fe22b..4e97a284 100644
--- a/lib/PublicInbox/xh_mset.h
+++ b/lib/PublicInbox/xh_mset.h
@@ -11,7 +11,7 @@ static void emit_doc_term(FILE *fp, const char *pfx, Xapian::Document *doc)
 
         for (cur.skip_to(pfx); cur != end; cur++) {
                 std::string tn = *cur;
-                if (!starts_with(&tn, pfx, pfx_len)) continue;
+                if (!starts_with(&tn, pfx, pfx_len)) break;
                 fputc(0, fp);
                 fwrite(tn.data(), tn.size(), 1, fp);
         }