about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2024-04-24 06:44:47 +0000
committerEric Wong <e@80x24.org>2024-04-24 21:34:47 +0000
commit9987cc1cc687f3b2d03a78938773c4f0c5b64d98 (patch)
tree89a65b32d436ae0b729ed583328aec7836d69b16
parent1682c7a2264b1083a9cd37151134667edbc31059 (diff)
downloadpublic-inbox-9987cc1cc687f3b2d03a78938773c4f0c5b64d98.tar.gz
The C++ version of xap_helper will allow more complex and
expensive queries.  Both the Perl and C++-only version will
allow offloading search into a separate process which can be
killed via ITIMER_REAL or RLIMIT_CPU in the face of overload.

The xap_helper `mset' command wrapper is simplified to
unconditionally return rank, percentage, and estimated matches
information.  This may slightly penalize mbox retrievals and
lei users, but perhaps that can be a different command entirely.
-rw-r--r--MANIFEST2
-rw-r--r--lib/PublicInbox/Isearch.pm50
-rw-r--r--lib/PublicInbox/Mbox.pm115
-rw-r--r--lib/PublicInbox/MboxGz.pm4
-rw-r--r--lib/PublicInbox/Search.pm52
-rw-r--r--lib/PublicInbox/SearchView.pm56
-rw-r--r--lib/PublicInbox/XapClient.pm1
-rw-r--r--lib/PublicInbox/XapHelper.pm11
-rw-r--r--lib/PublicInbox/XhcMset.pm51
-rw-r--r--lib/PublicInbox/XhcMsetIterator.pm20
-rw-r--r--lib/PublicInbox/xap_helper.h9
-rw-r--r--lib/PublicInbox/xh_mset.h33
-rw-r--r--t/cindex.t8
-rw-r--r--t/xap_helper.t14
14 files changed, 287 insertions, 139 deletions
diff --git a/MANIFEST b/MANIFEST
index 4c974338..fb175e5f 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -382,6 +382,8 @@ lib/PublicInbox/XapClient.pm
 lib/PublicInbox/XapHelper.pm
 lib/PublicInbox/XapHelperCxx.pm
 lib/PublicInbox/Xapcmd.pm
+lib/PublicInbox/XhcMset.pm
+lib/PublicInbox/XhcMsetIterator.pm
 lib/PublicInbox/gcf2_libgit2.h
 lib/PublicInbox/xap_helper.h
 lib/PublicInbox/xh_cidx.h
diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm
index 62112171..20808d6d 100644
--- a/lib/PublicInbox/Isearch.pm
+++ b/lib/PublicInbox/Isearch.pm
@@ -26,34 +26,44 @@ SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
 
 sub query_approxidate { $_[0]->{es}->query_approxidate($_[1], $_[2]) }
 
-sub mset {
-        my ($self, $str, $opt) = @_;
+sub eidx_mset_prep ($$) {
+        my ($self, $opt) = @_;
         my %opt = $opt ? %$opt : ();
         $opt{eidx_key} = $self->{eidx_key};
-        if (my $uid_range = $opt{uid_range}) {
-                my ($beg, $end) = @$uid_range;
-                my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
-                my $dbh = $self->{es}->over->dbh;
-                my $sth = $dbh->prepare_cached(<<'', undef, 1);
+        my $uid_range = $opt{uid_range} or return \%opt;
+        my ($beg, $end) = @$uid_range;
+        my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
+        my $dbh = $self->{es}->over->dbh;
+        my $sth = $dbh->prepare_cached(<<'', undef, 1);
 SELECT MIN(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
 
-                $sth->execute($ibx_id, $beg, $end);
-                my @r = ($sth->fetchrow_array);
+        $sth->execute($ibx_id, $beg, $end);
+        my @r = ($sth->fetchrow_array);
 
-                $sth = $dbh->prepare_cached(<<'', undef, 1);
+        $sth = $dbh->prepare_cached(<<'', undef, 1);
 SELECT MAX(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
 
-                $sth->execute($ibx_id, $beg, $end);
-                $r[1] = $sth->fetchrow_array;
-                if (defined($r[1]) && defined($r[0])) {
-                        $opt{limit} = $r[1] - $r[0] + 1;
-                } else {
-                        $r[1] //= $self->{es}->xdb->get_lastdocid;
-                        $r[0] //= 0;
-                }
-                $opt{uid_range} = \@r; # these are fed to Xapian and SQLite
+        $sth->execute($ibx_id, $beg, $end);
+        $r[1] = $sth->fetchrow_array;
+        if (defined($r[1]) && defined($r[0])) {
+                $opt{limit} = $r[1] - $r[0] + 1;
+        } else {
+                $r[1] //= $self->{es}->xdb->get_lastdocid;
+                $r[0] //= 0;
         }
-        $self->{es}->mset($str, \%opt);
+        $opt{uid_range} = \@r; # these are fed to Xapian and SQLite
+        \%opt;
+}
+
+sub mset {
+        my ($self, $str, $opt) = @_;
+        $self->{es}->mset($str, eidx_mset_prep $self, $opt);
+}
+
+sub async_mset {
+        my ($self, $str, $opt, $cb, @args) = @_;
+        $opt = eidx_mset_prep $self, $opt;
+        $self->{es}->async_mset($str, $opt, $cb, @args);
 }
 
 sub mset_to_artnums {
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index ac565df9..82fba5c6 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -31,8 +31,8 @@ sub async_next {
         my ($http) = @_; # PublicInbox::HTTP
         my $ctx = $http->{forward} or return; # client aborted
         eval {
-                my $smsg = $ctx->{smsg} or return $ctx->close;
-                $ctx->smsg_blob($smsg);
+                my $smsg = $ctx->{smsg} // return $ctx->close;
+                $ctx->smsg_blob($smsg) if $smsg;
         };
         warn "E: $@" if $@;
 }
@@ -159,6 +159,7 @@ sub all_ids_cb {
                 }
                 $ctx->{ids} = $ids = $over->ids_after(\($ctx->{prev}));
         } while (@$ids);
+        undef;
 }
 
 sub mbox_all_ids {
@@ -175,52 +176,79 @@ sub mbox_all_ids {
         PublicInbox::MboxGz::mbox_gz($ctx, \&all_ids_cb, 'all');
 }
 
-sub refill_result_ids ($) {
-        my ($ctx) = @_;
+my $refill_ids_cb = sub { # async_mset cb
+        my ($ctx, $http, $mset, $err) = @_;
+        $http = undef unless $ctx->{-really_async};
+        if ($err) {
+                warn "E: $err";
+                $ctx->close if $http; # our async httpd
+                return;
+        }
         # refill result set, deprioritize since there's many results
-        my $srch = $ctx->{ibx}->isrch or return $ctx->gone('search');
-        my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
-        my $size = $mset->size or return;
+        my $size = $mset->size or do {
+                $ctx->close if $http;
+                $ctx->{-mbox_done} = 1;
+                return;
+        };
         $ctx->{qopts}->{offset} += $size;
-        $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts});
+        $ctx->{ids} = $ctx->{srch}->mset_to_artnums($mset, $ctx->{qopts});
         $ctx->{-low_prio} = 1; # true
-}
+        return if !$http;
+        eval {
+                my $smsg = results_cb($ctx) // return $ctx->close;
+                return if !$smsg; # '' wait for async_mset
+                $ctx->smsg_blob($ctx->{smsg} = $smsg);
+        };
+        warn "E: $@" if $@;
+};
 
-sub results_cb {
-        my ($ctx) = @_;
+sub results_cb { # async_next or MboxGz->getline cb
+        my ($ctx, $http) = @_;
         my $over = $ctx->{ibx}->over or return $ctx->gone('over');
         while (1) {
-                while (defined(my $num = shift(@{$ctx->{ids}}))) {
+                my $ids = $ctx->{xids} // $ctx->{ids};
+                while (defined(my $num = shift(@$ids))) {
                         my $smsg = $over->get_art($num) or next;
                         return $smsg;
                 }
-                refill_result_ids($ctx) or return; # refill ctx->{ids}
+                next if $ctx->{xids} && $over->expand_thread($ctx);
+                return '' if $ctx->{srch}->async_mset(@$ctx{qw(query qopts)},
+                                                $refill_ids_cb, $ctx, $http);
+                return if $ctx->{-mbox_done};
         }
 }
 
-sub results_thread_cb {
-        my ($ctx) = @_;
-
-        my $over = $ctx->{ibx}->over or return $ctx->gone('over');
-        while (1) {
-                while (defined(my $num = shift(@{$ctx->{xids}}))) {
-                        my $smsg = $over->get_art($num) or next;
-                        return $smsg;
-                }
-                next if $over->expand_thread($ctx); # refills ctx->{xids}
-
-                refill_result_ids($ctx) or return; # refill ctx->{ids}
+sub mbox_qry_cb { # async_mset cb
+        my ($ctx, $q, $mset, $err) = @_;
+        my $wcb = delete $ctx->{wcb};
+        if ($err) {
+                warn "E: $err";
+                return $wcb->([500, [qw(Content-Type text/plain)],
+                                [ "Internal server error\n" ]])
         }
+        $ctx->{qopts}->{offset} = $mset->size or
+                        return $wcb->([404, [qw(Content-Type text/plain)],
+                                        ["No results found\n"]]);
+        $ctx->{ids} = $ctx->{srch}->mset_to_artnums($mset, $ctx->{qopts});
+        my $fn;
+        if ($q->{t} && $ctx->{srch}->has_threadid) {
+                $ctx->{xids} = []; # triggers over->expand_thread
+                $fn = "results-thread-$ctx->{query}";
+        } else {
+                $fn = "results-$ctx->{query}";
+        }
+        require PublicInbox::MboxGz;
+        my $res = PublicInbox::MboxGz::mbox_gz($ctx, \&results_cb, $fn);
+        ref($res) eq 'CODE' ? $res->($wcb) : $wcb->($res);
 }
 
 sub mbox_all {
         my ($ctx, $q) = @_;
-        my $q_string = $q->{'q'};
-        return mbox_all_ids($ctx) if $q_string !~ /\S/;
-        my $srch = $ctx->{ibx}->isrch or
+        my $qstr = $q->{'q'};
+        return mbox_all_ids($ctx) if $qstr !~ /\S/;
+        my $srch = $ctx->{srch} = $ctx->{ibx}->isrch or
                 return PublicInbox::WWW::need($ctx, 'Search');
-
-        my $qopts = $ctx->{qopts} = { relevance => -2 }; # ORDER BY docid DESC
+        my $opt = $ctx->{qopts} = { relevance => -2 }; # ORDER BY docid DESC
 
         # {threadid} limits results to a given thread
         # {threads} collapses results from messages in the same thread,
@@ -230,25 +258,16 @@ sub mbox_all {
                                 $ctx->{ibx}->{isrch}->{es}->over :
                                 $ctx->{ibx}->over) or
                         return PublicInbox::WWW::need($ctx, 'Overview');
-                $qopts->{threadid} = $over->mid2tid($ctx->{mid});
-        }
-        $qopts->{threads} = 1 if $q->{t};
-        $srch->query_approxidate($ctx->{ibx}->git, $q_string);
-        my $mset = $srch->mset($q_string, $qopts);
-        $qopts->{offset} = $mset->size or
-                        return [404, [qw(Content-Type text/plain)],
-                                ["No results found\n"]];
-        $ctx->{query} = $q_string;
-        $ctx->{ids} = $srch->mset_to_artnums($mset, $qopts);
-        require PublicInbox::MboxGz;
-        my $fn;
-        if ($q->{t} && $srch->has_threadid) {
-                $fn = 'results-thread-'.$q_string;
-                PublicInbox::MboxGz::mbox_gz($ctx, \&results_thread_cb, $fn);
-        } else {
-                $fn = 'results-'.$q_string;
-                PublicInbox::MboxGz::mbox_gz($ctx, \&results_cb, $fn);
+                $opt->{threadid} = $over->mid2tid($ctx->{mid});
         }
+        $opt->{threads} = 1 if $q->{t};
+        $srch->query_approxidate($ctx->{ibx}->git, $qstr);
+        $ctx->{query} = $qstr;
+        sub { # called by PSGI server
+                $ctx->{wcb} = $_[0]; # PSGI server supplied write cb
+                $srch->async_mset($qstr, $opt, \&mbox_qry_cb, $ctx, $q) and
+                        $ctx->{-really_async} = 1;
+        };
 }
 
 1;
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index 533d2ff1..864d701e 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -13,8 +13,8 @@ sub async_next ($) {
         my ($http) = @_; # PublicInbox::HTTP
         my $ctx = $http->{forward} or return;
         eval {
-                $ctx->{smsg} = $ctx->{cb}->($ctx) or return $ctx->close;
-                $ctx->smsg_blob($ctx->{smsg});
+                my $smsg = $ctx->{cb}->($ctx, $http) // return $ctx->close;
+                $smsg and $ctx->smsg_blob($ctx->{smsg} = $smsg);
         };
         warn "E: $@" if $@;
 }
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 0196dd45..60d12dbf 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -11,6 +11,7 @@ our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
 use List::Util qw(max);
 use POSIX qw(strftime);
 use Carp ();
+our $XHC;
 
 # values for searching, changing the numeric value breaks
 # compatibility with old indices (so don't change them it)
@@ -85,7 +86,6 @@ our @XH_SPEC = (
         'k=i', # sort column (like sort(1))
         'm=i', # maximum number of results
         'o=i', # offset
-        'p', # show percent
         'r', # 1=relevance then column
         't', # collapse threads
         'A=s@', # prefixes
@@ -428,6 +428,56 @@ sub mset {
         do_enquire($self, $qry, $opt, TS);
 }
 
+sub xhc_start_maybe () {
+        require PublicInbox::XapClient;
+        my $xhc = PublicInbox::XapClient::start_helper();
+        require PublicInbox::XhcMset if $xhc;
+        $xhc;
+}
+
+sub xh_opt ($) {
+        my ($opt) = @_;
+        my $lim = $opt->{limit} || 50;
+        my @ret;
+        push @ret, '-o', $opt->{offset} if $opt->{offset};
+        push @ret, '-m', $lim;
+        my $rel = $opt->{relevance} // 0;
+        if ($rel == -2) { # ORDER BY docid/UID (highest first)
+                push @ret, '-k', '-1';
+        } elsif ($rel == -1) { # ORDER BY docid/UID (lowest first)
+                push @ret, '-k', '-1';
+                push @ret, '-a';
+        } elsif ($rel == 0) {
+                push @ret, '-k', $opt->{sort_col} // TS;
+                push @ret, '-a' if $opt->{asc};
+        } else { # rel > 0
+                push @ret, '-r';
+                push @ret, '-k', $opt->{sort_col} // TS;
+                push @ret, '-a' if $opt->{asc};
+        }
+        push @ret, '-t' if $opt->{threads};
+        push @ret, '-T', $opt->{threadid} if defined $opt->{threadid};
+        push @ret, '-O', $opt->{eidx_key} if defined $opt->{eidx_key};
+        @ret;
+}
+
+# returns a true value if actually handled asynchronously,
+# and a falsy value if handled synchronously
+sub async_mset {
+        my ($self, $qry_str, $opt, $cb, @args) = @_;
+        $XHC //= xhc_start_maybe;
+        if ($XHC) { # unconditionally retrieving pct + rank for now
+                xdb($self); # populate {nshards}
+                my @margs = ($self->xh_args, xh_opt($opt));
+                my $rd = $XHC->mkreq(undef, 'mset', @margs, $qry_str);
+                PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args);
+        } else { # synchronous
+                my $mset = $self->mset($qry_str, $opt);
+                $cb->(@args, $mset);
+                undef;
+        }
+}
+
 sub do_enquire { # shared with CodeSearch
         my ($self, $qry, $opt, $col) = @_;
         my $enq = $X{Enquire}->new(xdb($self));
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 4016ddeb..9919e25c 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -30,10 +30,9 @@ sub mbox_results {
 
 sub sres_top_html {
         my ($ctx) = @_;
-        my $srch = $ctx->{ibx}->isrch or
+        my $srch = $ctx->{srch} = $ctx->{ibx}->isrch or
                 return PublicInbox::WWW::need($ctx, 'Search');
         my $q = PublicInbox::SearchQuery->new($ctx->{qp});
-        my $x = $q->{x};
         my $o = $q->{o} // 0;
         my $asc;
         if ($o < 0) {
@@ -41,48 +40,57 @@ sub sres_top_html {
                 $o = -($o + 1); # so [-1] is the last element, like Perl lists
         }
 
-        my $code = 200;
         # double the limit for expanded views:
-        my $opts = {
+        my $opt = {
                 limit => $q->{l},
                 offset => $o,
                 relevance => $q->{r},
                 threads => $q->{t},
                 asc => $asc,
         };
-        my ($mset, $total, $err, $html);
-retry:
-        eval {
-                my $query = $q->{'q'};
-                $srch->query_approxidate($ctx->{ibx}->git, $query);
-                $mset = $srch->mset($query, $opts);
-                $total = $mset->get_matches_estimated;
-        };
-        $err = $@;
+        my $qs = $q->{'q'};
+        $srch->query_approxidate($ctx->{ibx}->git, $qs);
+        sub {
+                $ctx->{wcb} = $_[0]; # PSGI server supplied write cb
+                $srch->async_mset($qs, $opt, \&sres_html_cb, $ctx, $opt, $q);
+        }
+}
+
+sub sres_html_cb { # async_mset cb
+        my ($ctx, $opt, $q, $mset, $err) = @_;
+        my $code = 200;
+        my $total = $mset ? $mset->get_matches_estimated : undef;
         ctx_prepare($q, $ctx);
+        my ($res, $html);
         if ($err) {
                 $code = 400;
                 $html = '<pre>'.err_txt($ctx, $err).'</pre><hr>';
         } elsif ($total == 0) {
-                if (defined($ctx->{-uxs_retried})) {
-                        # undo retry damage:
+                if (defined($ctx->{-uxs_retried})) { # undo retry damage:
                         $q->{'q'} = $ctx->{-uxs_retried};
-                } elsif (index($q->{'q'}, '%') >= 0) {
+                } elsif (index($q->{'q'}, '%') >= 0) { # retry unescaped
                         $ctx->{-uxs_retried} = $q->{'q'};
-                        $q->{'q'} = uri_unescape($q->{'q'});
-                        goto retry;
+                        my $qs = $q->{'q'} = uri_unescape($q->{'q'});
+                        $ctx->{srch}->query_approxidate($ctx->{ibx}->git, $qs);
+                        return $ctx->{srch}->async_mset($qs, $opt,
+                                                \&sres_html_cb, $ctx, $opt, $q);
                 }
                 $code = 404;
                 $html = "<pre>\n[No results found]</pre><hr>";
+        } elsif ($q->{x} eq 'A') {
+                $res = adump($mset, $q, $ctx);
         } else {
-                return adump($mset, $q, $ctx) if $x eq 'A';
-
                 $ctx->{-html_tip} = search_nav_top($mset, $q, $ctx);
-                return mset_thread($ctx, $mset, $q) if $x eq 't';
-                mset_summary($ctx, $mset, $q); # appends to {-html_tip}
-                $html = '';
+                if ($q->{x} eq 't') {
+                        $res = mset_thread($ctx, $mset, $q);
+                } else {
+                        mset_summary($ctx, $mset, $q); # appends to {-html_tip}
+                        $html = '';
+                }
         }
-        html_oneshot($ctx, $code, $html);
+        $res //= html_oneshot($ctx, $code, $html);
+        my $wcb = delete $ctx->{wcb};
+        ref($res) eq 'CODE' ? $res->($wcb) : $wcb->($res);
 }
 
 # display non-nested search results similar to what users expect from
diff --git a/lib/PublicInbox/XapClient.pm b/lib/PublicInbox/XapClient.pm
index 98034130..f0270091 100644
--- a/lib/PublicInbox/XapClient.pm
+++ b/lib/PublicInbox/XapClient.pm
@@ -26,6 +26,7 @@ sub mkreq {
 }
 
 sub start_helper {
+        $PublicInbox::IPC::send_cmd or return; # can't work w/o SCM_RIGHTS
         my @argv = @_;
         socketpair(my $sock, my $in, AF_UNIX, SOCK_SEQPACKET, 0);
         my $cls = 'PublicInbox::XapHelperCxx';
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index 27d98ba1..c55a72ce 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -147,12 +147,8 @@ sub cmd_dump_roots {
 
 sub mset_iter ($$) {
         my ($req, $it) = @_;
-        eval {
-                my $buf = $it->get_docid;
-                $buf .= "\0".$it->get_percent if $req->{p};
-                say { $req->{0} } $buf;
-        };
-        $@ ? iter_retry_check($req) : 0;
+        say { $req->{0} } $it->get_docid, "\0",
+                        $it->get_percent, "\0", $it->get_rank;
 }
 
 sub cmd_mset { # to be used by WWW + IMAP
@@ -165,7 +161,8 @@ sub cmd_mset { # to be used by WWW + IMAP
         $opt->{eidx_key} = $req->{O} if defined $req->{O};
         $opt->{threadid} = $req->{T} if defined $req->{T};
         my $mset = $req->{srch}->mset($qry_str, $opt);
-        say { $req->{0} } 'mset.size=', $mset->size;
+        say { $req->{0} } 'mset.size=', $mset->size,
+                ' .get_matches_estimated=', $mset->get_matches_estimated;
         for my $it ($mset->items) {
                 for (my $t = 10; $t > 0; --$t) {
                         $t = mset_iter($req, $it) // $t;
diff --git a/lib/PublicInbox/XhcMset.pm b/lib/PublicInbox/XhcMset.pm
new file mode 100644
index 00000000..ac25eece
--- /dev/null
+++ b/lib/PublicInbox/XhcMset.pm
@@ -0,0 +1,51 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# mocks Xapian::Mset and allows slow queries from blocking the event loop
+package PublicInbox::XhcMset;
+use v5.12;
+use parent qw(PublicInbox::DS);
+use PublicInbox::XhcMsetIterator;
+use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
+
+sub event_step {
+        my ($self) = @_;
+        my ($cb, @args) = @{delete $self->{cb_args} // return};
+        my $rd = $self->{sock};
+        eval {
+                my $hdr = <$rd> // die "E: reading mset header: $!";
+                for (split /\s+/, $hdr) { # read mset.size + estimated_matches
+                        my ($k, $v) = split /=/, $_, 2;
+                        $k =~ s/\A[^\.]*\.//; # s/(mset)?\./
+                        $self->{$k} = $v;
+                }
+                my $size = $self->{size} // die "E: bad xhc header: `$hdr'";
+                my @it = map { PublicInbox::XhcMsetIterator::make($_) } <$rd>;
+                $self->{items} = \@it;
+                scalar(@it) == $size or die
+                        'E: got ',scalar(@it),", expected mset.size=$size";
+        };
+        my $err = $@;
+        $self->close;
+        eval { $cb->(@args, $self, $err) };
+        warn "E: $@\n" if $@;
+}
+
+sub maybe_new {
+        my (undef, $rd, $srch, @cb_args) = @_;
+        my $self = bless { cb_args => \@cb_args, srch => $srch }, __PACKAGE__;
+        if ($PublicInbox::DS::in_loop) { # async
+                $self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT);
+        } else { # synchronous
+                $self->{sock} = $rd;
+                event_step($self);
+                undef;
+        }
+}
+
+eval(join('', map { "sub $_ { \$_[0]->{$_} }\n" } qw(size
+        get_matches_estimated)));
+
+sub items { @{$_[0]->{items}} }
+
+1;
diff --git a/lib/PublicInbox/XhcMsetIterator.pm b/lib/PublicInbox/XhcMsetIterator.pm
new file mode 100644
index 00000000..dcfc61e4
--- /dev/null
+++ b/lib/PublicInbox/XhcMsetIterator.pm
@@ -0,0 +1,20 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# mocks Xapian::MsetIterator, there's many of these allocated at once
+package PublicInbox::XhcMsetIterator;
+use v5.12;
+
+sub make ($) {
+        chomp($_[0]);
+        my @self = map { $_ + 0 } split /\0/, $_[0]; # docid, pct, rank
+        # we don't store $xdb in self[4] since we avoid $it->get_document
+        # in favor of $xdb->get_document($it->get_docid)
+        bless \@self, __PACKAGE__;
+}
+
+sub get_docid { $_[0]->[0] }
+sub get_percent { $_[0]->[1] }
+sub get_rank { $_[0]->[2] }
+
+1;
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 872f063d..5a89544a 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -141,7 +141,6 @@ struct req { // argv and pfxv point into global rbuf
         bool collapse_threads;
         bool code_search;
         bool relevance; // sort by relevance before column
-        bool emit_percent;
         bool asc; // ascending sort
 };
 
@@ -225,6 +224,13 @@ static Xapian::MSet mail_mset(struct req *req, const char *qry_str)
                 qry = Xapian::Query(Xapian::Query::OP_FILTER, qry,
                                         Xapian::Query(req->Oeidx_key));
         }
+        // TODO: uid_range
+        if (req->threadid != ULLONG_MAX) {
+                std::string tid = Xapian::sortable_serialise(req->threadid);
+                qry = Xapian::Query(Xapian::Query::OP_FILTER, qry,
+                        Xapian::Query(Xapian::Query::OP_VALUE_RANGE, THREADID,
+                                        tid, tid));
+        }
         Xapian::Enquire enq = prep_enquire(req);
         enq.set_query(qry);
         // THREADID is a CPP macro defined on CLI (see) XapHelperCxx.pm
@@ -632,7 +638,6 @@ static void dispatch(struct req *req)
                         if (*end || req->off == ULLONG_MAX)
                                 ABORT("-o %s", optarg);
                         break;
-                case 'p': req->emit_percent = true; break;
                 case 'r': req->relevance = true; break;
                 case 't': req->collapse_threads = true; break;
                 case 'A':
diff --git a/lib/PublicInbox/xh_mset.h b/lib/PublicInbox/xh_mset.h
index 3727a932..db2692c9 100644
--- a/lib/PublicInbox/xh_mset.h
+++ b/lib/PublicInbox/xh_mset.h
@@ -3,25 +3,6 @@
 // This file is only intended to be included by xap_helper.h
 // it implements pieces used by WWW, IMAP and lei
 
-static enum exc_iter mset_iter(const struct req *req, FILE *fp, off_t off,
-                                Xapian::MSetIterator *i)
-{
-        try {
-                fprintf(fp, "%llu", (unsigned long long)(*(*i))); // get_docid
-                if (req->emit_percent)
-                        fprintf(fp, "%c%d", 0, i->get_percent());
-                fputc('\n', fp);
-        } catch (const Xapian::DatabaseModifiedError & e) {
-                req->srch->db->reopen();
-                if (fseeko(fp, off, SEEK_SET) < 0) EABORT("fseeko");
-                return ITER_RETRY;
-        } catch (const Xapian::DocNotFoundError & e) { // oh well...
-                warnx("doc not found: %s", e.get_description().c_str());
-                if (fseeko(fp, off, SEEK_SET) < 0) EABORT("fseeko");
-        }
-        return ITER_OK;
-}
-
 #ifndef WBUF_FLUSH_THRESHOLD
 #        define WBUF_FLUSH_THRESHOLD (BUFSIZ - 1000)
 #endif
@@ -39,7 +20,9 @@ static bool cmd_mset(struct req *req)
         Xapian::MSet mset = req->code_search ? commit_mset(req, qry_str) :
                                                 mail_mset(req, qry_str);
         fbuf_init(&wbuf);
-        fprintf(wbuf.fp, "mset.size=%llu\n", (unsigned long long)mset.size());
+        fprintf(wbuf.fp, "mset.size=%llu .get_matches_estimated=%llu\n",
+                (unsigned long long)mset.size(),
+                (unsigned long long)mset.get_matches_estimated());
         int fd = fileno(req->fp[0]);
         for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); i++) {
                 off_t off = ftello(wbuf.fp);
@@ -58,12 +41,10 @@ static bool cmd_mset(struct req *req)
                         if (fseeko(wbuf.fp, 0, SEEK_SET)) EABORT("fseeko");
                         off = 0;
                 }
-                for (int t = 10; t > 0; --t)
-                        switch (mset_iter(req, wbuf.fp, off, &i)) {
-                        case ITER_OK: t = 0; break; // leave inner loop
-                        case ITER_RETRY: break; // continue for-loop
-                        case ITER_ABORT: return false; // error
-                        }
+                fprintf(wbuf.fp, "%llu" "%c" "%d" "%c" "%llu\n",
+                        (unsigned long long)(*i), // get_docid
+                        0, i.get_percent(),
+                        0, (unsigned long long)i.get_rank());
         }
         off_t off = ftello(wbuf.fp);
         if (off < 0) EABORT("ftello");
diff --git a/t/cindex.t b/t/cindex.t
index acd74a5d..90236287 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -152,10 +152,12 @@ my $test_xhc = sub {
         my ($r, @l);
         $r = $xhc->mkreq([], qw(mset -c -g), $zp_git, @xh_args, 'NUL');
         chomp(@l = <$r>);
-        is(shift(@l), 'mset.size=2', "got expected header $impl");
+        like shift(@l), qr/\bmset\.size=2\b/, "got expected header $impl";
         my %docid2data;
         my @got = sort map {
-                my ($docid, @extra) = split /\0/;
+                my ($docid, $pct, $rank, @extra) = split /\0/;
+                ok $pct >= 0 && $pct <= 100, 'pct in range';
+                ok $rank >= 0 && $rank <= 100000, 'rank ok';
                 is scalar(@extra), 0, 'no extra fields';
                 $docid2data{$docid} =
                         $csrch->xdb->get_document($docid)->get_data;
@@ -164,7 +166,7 @@ my $test_xhc = sub {
 
         $r = $xhc->mkreq([], qw(mset -c -g), "$tmp/wt0/.git", @xh_args, 'NUL');
         chomp(@l = <$r>);
-        is(shift(@l), 'mset.size=0', "got miss in wrong dir $impl");
+        like shift(@l), qr/\bmset.size=0\b/, "got miss in wrong dir $impl";
         is_deeply(\@l, [], "no extra lines $impl");
 
         while (my ($did, $expect) = each %docid2data) {
diff --git a/t/xap_helper.t b/t/xap_helper.t
index 70c634ac..effe8bc5 100644
--- a/t/xap_helper.t
+++ b/t/xap_helper.t
@@ -204,10 +204,11 @@ for my $n (@NO_CXX) {
         $err = do { local $/; <$err_r> };
         is $err, "mset.size=6 nr_out=5\n", "got expected status ($xhc->{impl})";
 
-        $r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
+        $r = $xhc->mkreq([], qw(mset), @ibx_shard_args,
                                 'dfn:lib/PublicInbox/Search.pm');
         chomp((my $hdr, @res) = readline($r));
-        is $hdr, 'mset.size=1', "got expected header via mset ($xhc->{impl}";
+        like $hdr, qr/\bmset\.size=1\b/,
+                "got expected header via mset ($xhc->{impl}";
         is scalar(@res), 1, 'got one result';
         @res = split /\0/, $res[0];
         {
@@ -217,19 +218,20 @@ for my $n (@NO_CXX) {
                 is_deeply \@q, [ $mid ], 'docid usable';
         }
         ok $res[1] > 0 && $res[1] <= 100, 'pct > 0 && <= 100';
-        is scalar(@res), 2, 'only 2 columns in result';
+        is scalar(@res), 3, 'only 3 columns in result';
 
-        $r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
+        $r = $xhc->mkreq([], qw(mset), @ibx_shard_args,
                                 'dt:19700101'.'000000..');
         chomp(($hdr, @res) = readline($r));
-        is $hdr, 'mset.size=6',
+        like $hdr, qr/\bmset\.size=6\b/,
                 "got expected header via multi-result mset ($xhc->{impl}";
         is(scalar(@res), 6, 'got 6 rows');
         for my $r (@res) {
-                my ($docid, $pct, @rest) = split /\0/, $r;
+                my ($docid, $pct, $rank, @rest) = split /\0/, $r;
                 my $doc = $v2->search->xdb->get_document($docid);
                 ok $pct > 0 && $pct <= 100,
                         "pct > 0 && <= 100 #$docid ($xhc->{impl})";
+                like $rank, qr/\A\d+\z/, 'rank is a digit';
                 is scalar(@rest), 0, 'no extra rows returned';
         }
         my $nr;