From 4b551c884a648b45ec6b5465efd9fb67f85f0055 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 5 Dec 2020 11:10:45 +0000 Subject: imap: support isearch and reduce Xapian queries Since IMAP search (either with Isearch or traditional per-Inbox search) only returns UIDs, we can safely set the limit to the UID slice size(*). With isearch, we can also trust the Xapian result to fit any docid range we specify. Limiting Xapian results to 1000 was making ->ALL docid <=> per-Inbox UID impossible since results could overlap between ranges unpredictably. Finally, we can map the ->ALL docids into per-Inbox UIDs and show them to the client in the UID order of the Inbox, not the docid order of the ->ALL extindex. This also lets us get rid of the "uid:" query parser prefix and use the Xapian::Query API directly to reduce our search prefix footprint. For mbox.gz downloads in WWW, we'll also make a best effort to preserve the order from the Inbox, not the order of extindex; though it's possible large result sets can have non-overlapping windows. (*) by definition, UID slice size is a "safe" value which shouldn't OOM either the server or clients. --- lib/PublicInbox/Search.pm | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/Search.pm') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index ba239255..7785d483 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -90,6 +90,7 @@ sub load_xapian () { $ENQ_ASCENDING = $x eq 'Xapian' ? 1 : Search::Xapian::ENQ_ASCENDING(); + *sortable_serialise = $x.'::sortable_serialise'; # n.b. FLAG_PURE_NOT is expensive not suitable for a public # website as it could become a denial-of-service vector # FLAG_PHRASE also seems to cause performance problems chert @@ -334,6 +335,12 @@ sub _enquire_once { # retry_reopen callback if (defined(my $eidx_key = $opts->{eidx_key})) { $query = $X{Query}->new(OP_FILTER(), $query, 'O'.$eidx_key); } + if (defined(my $uid_range = $opts->{uid_range})) { + my $range = $X{Query}->new(OP_VALUE_RANGE(), UID, + sortable_serialise($uid_range->[0]), + sortable_serialise($uid_range->[1])); + $query = $X{Query}->new(OP_FILTER(), $query, $range); + } my $enquire = $X{Enquire}->new($xdb); $enquire->set_query($query); $opts ||= {}; @@ -389,7 +396,6 @@ sub qparse_new ($) { # for IMAP, undocumented for WWW and may be split off go away $cb->($qp, $NVRP->new(BYTES, 'bytes:')); $cb->($qp, $NVRP->new(TS, 'ts:')); - $cb->($qp, $NVRP->new(UID, 'uid:')); while (my ($name, $prefix) = each %bool_pfx_external) { $qp->add_boolean_prefix($name, $_) foreach split(/ /, $prefix); -- cgit v1.2.3-24-ge0c7