user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 0/4] use ALL to speedup -nntpd and -imapd
@ 2022-08-03 20:03  7% Eric Wong
  2022-08-03 20:03  6% ` [PATCH 2/4] miscidx: index inbox min/max article numbers Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2022-08-03 20:03 UTC (permalink / raw)
  To: meta

Just a normal "public-inbox-extindex --all" invocation should be
enough to trigger these optimizations, no --reindex necessary.

-imapd startup is around ~3x faster, NNTP LIST is ~40x faster.

Eric Wong (4):
  nntpd: do not delete newsgroup name from inbox object
  miscidx: index inbox min/max article numbers
  nntp: speed up group listings via ->ALL->misc
  imapd: use nntpd_cache to speed up startup/reload time

 lib/PublicInbox/IMAP.pm       |  17 +++---
 lib/PublicInbox/IMAPD.pm      | 100 +++++++++++-----------------------
 lib/PublicInbox/Inbox.pm      |  10 ++++
 lib/PublicInbox/MiscIdx.pm    |   8 ++-
 lib/PublicInbox/MiscSearch.pm |  21 ++++---
 lib/PublicInbox/Msgmap.pm     |  12 ++--
 lib/PublicInbox/NNTP.pm       |  26 ++++++---
 lib/PublicInbox/NNTPD.pm      |   1 -
 lib/PublicInbox/Search.pm     |   3 +-
 9 files changed, 96 insertions(+), 102 deletions(-)

^ permalink raw reply	[relevance 7%]

* [PATCH 2/4] miscidx: index inbox min/max article numbers
  2022-08-03 20:03  7% [PATCH 0/4] use ALL to speedup -nntpd and -imapd Eric Wong
@ 2022-08-03 20:03  6% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2022-08-03 20:03 UTC (permalink / raw)
  To: meta

This will be used to speed up NNTP group listings and IMAP startup
with thousands of inboxes.
---
 lib/PublicInbox/Inbox.pm      | 10 ++++++++++
 lib/PublicInbox/MiscIdx.pm    |  8 ++++++--
 lib/PublicInbox/MiscSearch.pm | 21 ++++++++++++---------
 lib/PublicInbox/Msgmap.pm     | 12 ++++++++----
 lib/PublicInbox/Search.pm     |  3 ++-
 5 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 0ad68810..3f70e69d 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -409,6 +409,16 @@ sub uidvalidity { $_[0]->{uidvalidity} //= eval { $_[0]->mm->created_at } }
 
 sub eidx_key { $_[0]->{newsgroup} // $_[0]->{inboxdir} }
 
+# only used by NNTP, so we need ->mm anyways
+sub art_min { $_[0]->{-art_min} //= eval { $_[0]->mm(1)->min } }
+
+# used by IMAP, too, which tries to avoid ->mm (but ->{mm} is likely
+# faster since it's smaller iff available)
+sub art_max {
+	$_[0]->{-art_max} //= eval { $_[0]->{mm}->max } //
+				eval { $_[0]->over(1)->max };
+}
+
 sub mailboxid { # rfc 8474, 8620, 8621
 	my ($self, $imap_slice) = @_;
 	my $pfx = defined($imap_slice) ? $self->{newsgroup} : $self->{name};
diff --git a/lib/PublicInbox/MiscIdx.pm b/lib/PublicInbox/MiscIdx.pm
index 5faf5c66..76b33b16 100644
--- a/lib/PublicInbox/MiscIdx.pm
+++ b/lib/PublicInbox/MiscIdx.pm
@@ -108,12 +108,16 @@ EOF
 	$doc->add_boolean_term('Q'.$eidx_key); # uniQue id
 	$doc->add_boolean_term('T'.'inbox'); # Type
 
+	# force reread from disk, {description} could be loaded from {misc}
+	delete @$ibx{qw(-art_min -art_max description)};
 	if (defined($ibx->{newsgroup}) && $ibx->nntp_usable) {
 		$doc->add_boolean_term('T'.'newsgroup'); # additional Type
+		my $n = $ibx->art_min;
+		add_val($doc, $PublicInbox::MiscSearch::ART_MIN, $n) if $n;
+		$n = $ibx->art_max;
+		add_val($doc, $PublicInbox::MiscSearch::ART_MAX, $n) if $n;
 	}
 
-	# force reread from disk, {description} could be loaded from {misc}
-	delete $ibx->{description};
 	my $desc = $ibx->description;
 
 	# description = S/Subject (or title)
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index c6d2a062..5fb47d03 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # read-only counterpart to MiscIdx
@@ -11,6 +11,8 @@ my $json;
 # Xapian value columns:
 our $MODIFIED = 0;
 our $UIDVALIDITY = 1; # (created time)
+our $ART_MIN = 2; # NNTP article number
+our $ART_MAX = 3; # NNTP article number
 
 # avoid conflicting with message Search::prob_prefix for UI/UX reasons
 my %PROB_PREFIX = (
@@ -87,14 +89,13 @@ sub ibx_data_once {
 	my $term = 'Q'.$ibx->eidx_key; # may be {inboxdir}, so private
 	my $head = $xdb->postlist_begin($term);
 	my $tail = $xdb->postlist_end($term);
-	if ($head != $tail) {
-		my $doc = $xdb->get_document($head->get_docid);
-		$ibx->{uidvalidity} //= int_val($doc, $UIDVALIDITY);
-		$ibx->{-modified} = int_val($doc, $MODIFIED);
-		$doc->get_data;
-	} else {
-		undef;
-	}
+	return if $head == $tail;
+	my $doc = $xdb->get_document($head->get_docid);
+	$ibx->{uidvalidity} //= int_val($doc, $UIDVALIDITY);
+	$ibx->{-modified} = int_val($doc, $MODIFIED);
+	$ibx->{-art_min} = int_val($doc, $ART_MIN);
+	$ibx->{-art_max} = int_val($doc, $ART_MAX);
+	$doc->get_data;
 }
 
 sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc)
@@ -109,6 +110,8 @@ sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc)
 	{
 		uidvalidity => int_val($doc, $UIDVALIDITY),
 		-modified => int_val($doc, $MODIFIED),
+		-art_min => int_val($doc, $ART_MIN), # may be undef
+		-art_max => int_val($doc, $ART_MAX), # may be undef
 		# extract description from manifest.js.gz epoch description
 		description => $d
 	};
diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index 1041cd17..cb4bb295 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -144,13 +144,17 @@ sub max {
 	$sth->fetchrow_array // 0;
 }
 
-sub minmax {
-	# breaking MIN and MAX into separate queries speeds up from 250ms
-	# to around 700us with 2.7million messages.
+sub min {
 	my $sth = $_[0]->{dbh}->prepare_cached('SELECT MIN(num) FROM msgmap',
 						undef, 1);
 	$sth->execute;
-	($sth->fetchrow_array // 0, max($_[0]));
+	$sth->fetchrow_array // 0;
+}
+
+sub minmax {
+	# breaking MIN and MAX into separate queries speeds up from 250ms
+	# to around 700us with 2.7million messages.
+	(min($_[0]), max($_[0]));
 }
 
 sub mid_delete {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index b6141f68..2feb3e13 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -543,9 +543,10 @@ sub help {
 	\@ret;
 }
 
+# always returns a scalar value
 sub int_val ($$) {
 	my ($doc, $col) = @_;
-	my $val = $doc->get_value($col) or return; # undefined is '' in Xapian
+	my $val = $doc->get_value($col) or return undef; # undef is '' in Xapian
 	sortable_unserialise($val) + 0; # PV => IV conversion
 }
 

^ permalink raw reply related	[relevance 6%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2022-08-03 20:03  7% [PATCH 0/4] use ALL to speedup -nntpd and -imapd Eric Wong
2022-08-03 20:03  6% ` [PATCH 2/4] miscidx: index inbox min/max article numbers Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).