From 4a2e89007cb7b62151cb1869e49b27ebacfc27eb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 23 Dec 2020 08:38:48 +0000 Subject: miscsearch: index UIDVALIDITY, use as startup cache This brings -nntpd startup time down from ~35s to ~5s with 50K inboxes. Further improvements ought to be possible with deeper changes to MiscIdx, since -mda having to load every inbox seems unreasonable; but this general change is fairly unintrusive. --- lib/PublicInbox/Search.pm | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/Search.pm') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index b1d38fb9..05c679c9 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -6,7 +6,7 @@ package PublicInbox::Search; use strict; use parent qw(Exporter); -our @EXPORT_OK = qw(retry_reopen); +our @EXPORT_OK = qw(retry_reopen int_val); use List::Util qw(max); # values for searching, changing the numeric value breaks @@ -91,6 +91,7 @@ sub load_xapian () { 1 : Search::Xapian::ENQ_ASCENDING(); *sortable_serialise = $x.'::sortable_serialise'; + *sortable_unserialise = $x.'::sortable_unserialise'; # n.b. FLAG_PURE_NOT is expensive not suitable for a public # website as it could become a denial-of-service vector # FLAG_PHRASE also seems to cause performance problems chert @@ -436,4 +437,10 @@ sub help { \@ret; } +sub int_val ($$) { + my ($doc, $col) = @_; + my $val = $doc->get_value($col) or return; # undefined is '' in Xapian + sortable_unserialise($val) + 0; # PV => IV conversion +} + 1; -- cgit v1.2.3-24-ge0c7 From d0e74a3591d9e701af6fea30baacf2ddb51475d5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Dec 2020 19:38:28 +0000 Subject: search: remove pointless {relevance} setting SearchView will set it to `undef', others will set the 'mset' option (for the ->mset method :P) to 2 which causes {relevance} to be ignored. And the 'mset' option is poorly named now that the message is named ->mset... --- lib/PublicInbox/Search.pm | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/PublicInbox/Search.pm') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 05c679c9..ffd19a1f 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -287,7 +287,6 @@ sub mset { $opts ||= {}; my $qp = $self->{qp} //= qparse_new($self); my $query = $qp->parse_query($query_string, $self->{qp_flags}); - $opts->{relevance} = 1 unless exists $opts->{relevance}; _do_enquire($self, $query, $opts); } -- cgit v1.2.3-24-ge0c7 From 5f875446975b1473c1ffd7196e572e13d58ba56f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Dec 2020 19:38:29 +0000 Subject: search: remove {mset} option for ->mset method The ->mset method always returns a Xapian mset nowadays, so naming a parameter {mset} is too confusing. As it does with MiscSearch, setting the {relevance} parameter to -1 now sorts by ascending docid order. -2 is now supported for descending docid order, too, since it may be useful for lei users. --- lib/PublicInbox/Search.pm | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'lib/PublicInbox/Search.pm') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index ffd19a1f..fb3e9975 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -58,7 +58,11 @@ our $QP_FLAGS; our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query); our $Xap; # 'Search::Xapian' or 'Xapian' our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor') -our $ENQ_ASCENDING; + +# ENQ_DESCENDING and ENQ_ASCENDING weren't in SWIG Xapian.pm prior to 1.4.16, +# let's hope the ABI is stable +our $ENQ_DESCENDING = 0; +our $ENQ_ASCENDING = 1; sub load_xapian () { return 1 if defined $Xap; @@ -84,12 +88,6 @@ sub load_xapian () { 'NumberRangeProcessor' : 'NumberValueRangeProcessor'); $X{$_} = $Xap.'::'.$_ for (keys %X); - # ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm, - # so lets hope this part of the ABI is stable because it's - # just an integer: - $ENQ_ASCENDING = $x eq 'Xapian' ? - 1 : Search::Xapian::ENQ_ASCENDING(); - *sortable_serialise = $x.'::sortable_serialise'; *sortable_unserialise = $x.'::sortable_unserialise'; # n.b. FLAG_PURE_NOT is expensive not suitable for a public @@ -344,13 +342,17 @@ sub _enquire_once { # retry_reopen callback $enquire->set_query($query); $opts ||= {}; my $desc = !$opts->{asc}; - if (($opts->{mset} || 0) == 2) { # mset == 2: ORDER BY docid/UID + my $rel = $opts->{relevance} // 0; + if ($rel == -1) { # ORDER BY docid/UID + $enquire->set_weighting_scheme($X{BoolWeight}->new); $enquire->set_docid_order($ENQ_ASCENDING); + } elsif ($rel == 0) { + $enquire->set_sort_by_value_then_relevance(TS, $desc); + } elsif ($rel == -2) { $enquire->set_weighting_scheme($X{BoolWeight}->new); - } elsif ($opts->{relevance}) { + $enquire->set_docid_order($ENQ_DESCENDING); + } else { # rel > 0 $enquire->set_sort_by_relevance_then_value(TS, $desc); - } else { - $enquire->set_sort_by_value_then_relevance(TS, $desc); } # `mairix -t / --threads' or JMAP collapseThreads -- cgit v1.2.3-24-ge0c7