user/dev discussion of public-inbox itself
 help / color / mirror / Atom feed
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 3/5] over: recent: remove expensive COUNT query
Date: Wed, 26 Aug 2020 08:17:40 +0000
Message-ID: <20200826081742.16642-4-e@yhbt.net> (raw)
In-Reply-To: <20200826081742.16642-1-e@yhbt.net>

As noted in commit 87dca6d8d5988c5eb54019cca342450b0b7dd6b7
("www: rework query responses to avoid COUNT in SQLite"),
COUNT on many rows is expensive on big SQLite DBs.

We've already stopped using that code path long ago in WWW
while -imapd and -nntpd never used it.  So we'll adjust our
remaining test cases to not need it, either.
---
 lib/PublicInbox/Over.pm |  8 +-------
 t/indexlevels-mirror.t  | 29 +++++++++++++++--------------
 t/v2writable.t          |  4 +++-
 t/watch_maildir_v2.t    | 30 +++++++++++++++---------------
 4 files changed, 34 insertions(+), 37 deletions(-)

diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index a2cf9f21..6b7d5216 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -244,15 +244,9 @@ sub recent {
 			$s = '+num > 0 ORDER BY ts DESC';
 		}
 	}
-	my $msgs = do_get($self, <<"", $opts, @v);
+	do_get($self, <<"", $opts, @v);
 SELECT ts,ds,ddd FROM over WHERE $s
 
-	return $msgs unless wantarray;
-
-	my $nr = $self->{dbh}->selectrow_array(<<'');
-SELECT COUNT(num) FROM over WHERE num > 0
-
-	($nr, $msgs);
 }
 
 sub get_art {
diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t
index 859c2c17..27533546 100644
--- a/t/indexlevels-mirror.t
+++ b/t/indexlevels-mirror.t
@@ -49,8 +49,8 @@ my $import_index_incremental = sub {
 		inboxdir => $ibx->{inboxdir},
 		indexlevel => $level
 	});
-	my ($nr, $msgs) = $ro_master->recent;
-	is($nr, 1, 'only one message in master, so far');
+	my $msgs = $ro_master->recent;
+	is(scalar(@$msgs), 1, 'only one message in master, so far');
 	is($msgs->[0]->{mid}, 'm@1', 'first message in master indexed');
 
 	# clone
@@ -79,8 +79,8 @@ my $import_index_incremental = sub {
 		inboxdir => $mirror,
 		indexlevel => $level,
 	});
-	($nr, $msgs) = $ro_mirror->recent;
-	is($nr, 1, 'only one message, so far');
+	$msgs = $ro_mirror->recent;
+	is(scalar(@$msgs), 1, 'only one message, so far');
 	is($msgs->[0]->{mid}, 'm@1', 'read first message');
 
 	# update master
@@ -91,16 +91,16 @@ my $import_index_incremental = sub {
 	# mirror updates
 	is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
 	ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK");
-	($nr, $msgs) = $ro_mirror->recent;
-	is($nr, 2, '2nd message seen in mirror');
+	$msgs = $ro_mirror->recent;
+	is(scalar(@$msgs), 2, '2nd message seen in mirror');
 	is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs],
 		['m@1','m@2'], 'got both messages in mirror');
 
 	# incremental index master (required for v1)
 	ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]),
 		'index master OK');
-	($nr, $msgs) = $ro_master->recent;
-	is($nr, 2, '2nd message seen in master');
+	$msgs = $ro_master->recent;
+	is(scalar(@$msgs), 2, '2nd message seen in master');
 	is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs],
 		['m@1','m@2'], 'got both messages in master');
 
@@ -121,15 +121,15 @@ my $import_index_incremental = sub {
 		is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level,
 		   'indexlevel detectable by Admin after xcpdb v' .$v.$level);
 		delete $ro_mirror->{$_} for (qw(over search));
-		($nr, $msgs) = $ro_mirror->search->query('m:m@2');
-		is($nr, 1, "v$v found m\@2 via Xapian on $level");
+		$msgs = $ro_mirror->search->query('m:m@2');
+		is(scalar(@$msgs), 1, "v$v found m\@2 via Xapian on $level");
 	}
 
 	# sync the mirror
 	is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
 	ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK");
-	($nr, $msgs) = $ro_mirror->recent;
-	is($nr, 1, '2nd message gone from mirror');
+	$msgs = $ro_mirror->recent;
+	is(scalar(@$msgs), 1, '2nd message gone from mirror');
 	is_deeply([map { $_->{mid} } @$msgs], ['m@1'],
 		'message unavailable in mirror');
 
@@ -138,8 +138,9 @@ my $import_index_incremental = sub {
 			 'no Xapian shard directories for v2 basic');
 	}
 	if ($level ne 'basic') {
-		($nr, $msgs) = $ro_mirror->search->reopen->query('m:m@2');
-		is($nr, 0, "v$v m\@2 gone from Xapian in mirror on $level");
+		$msgs = $ro_mirror->search->reopen->query('m:m@2');
+		is(scalar(@$msgs), 0,
+			"v$v m\@2 gone from Xapian in mirror on $level");
 	}
 
 	# add another message to master and have the mirror
diff --git a/t/v2writable.t b/t/v2writable.t
index 2bd7a400..9e4547ba 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -120,7 +120,9 @@ if ('ensure git configs are correct') {
 	$mime->header_set('References', '<zz-mid@b>');
 	ok($im->add($mime), 'message with multiple Message-ID');
 	$im->done;
-	my ($total, undef) = $ibx->over->recent;
+	my $total = $ibx->over->dbh->selectrow_array(<<'');
+SELECT COUNT(*) FROM over WHERE num > 0
+
 	is($ibx->mm->num_highwater, $total, 'got expected highwater value');
 	my $srch = $ibx->search;
 	my $mset1 = $srch->reopen->query('m:abcde@1', { mset => 1 });
diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t
index f5b8e932..59ec247e 100644
--- a/t/watch_maildir_v2.t
+++ b/t/watch_maildir_v2.t
@@ -50,7 +50,7 @@ ok($ibx, 'found inbox by name');
 my $srch = $ibx->search;
 
 PublicInbox::WatchMaildir->new($config)->scan('full');
-my ($total, undef) = $srch->reopen->query('');
+my $total = scalar @{$srch->reopen->query('')};
 is($total, 1, 'got one revision');
 
 # my $git = PublicInbox::Git->new("$inboxdir/git/0.git");
@@ -70,7 +70,7 @@ my $write_spam = sub {
 $write_spam->();
 is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam');
 PublicInbox::WatchMaildir->new($config)->scan('full');
-is(($srch->reopen->query(''))[0], 0, 'deleted file');
+is_deeply($srch->reopen->query(''), [], 'deleted file');
 is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
 
 # check with scrubbing
@@ -81,16 +81,16 @@ the body of a message to majordomo\@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html\n);
 	PublicInbox::Emergency->new($maildir)->prepare(\$msg);
 	PublicInbox::WatchMaildir->new($config)->scan('full');
-	my ($nr, $msgs) = $srch->reopen->query('');
-	is($nr, 1, 'got one file back');
+	my $msgs = $srch->reopen->query('');
+	is(scalar(@$msgs), 1, 'got one file back');
 	my $mref = $ibx->msg_by_smsg($msgs->[0]);
 	like($$mref, qr/something\n\z/s, 'message scrubbed on import');
 
 	is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam');
 	$write_spam->();
 	PublicInbox::WatchMaildir->new($config)->scan('full');
-	($nr, $msgs) = $srch->reopen->query('');
-	is($nr, 0, 'inbox is empty again');
+	$msgs = $srch->reopen->query('');
+	is(scalar(@$msgs), 0, 'inbox is empty again');
 	is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
 }
 
@@ -105,8 +105,8 @@ More majordomo info at  http://vger.kernel.org/majordomo-info.html\n);
 		local $SIG{__WARN__} = sub {}; # quiet spam check warning
 		PublicInbox::WatchMaildir->new($config)->scan('full');
 	}
-	my ($nr, $msgs) = $srch->reopen->query('');
-	is($nr, 0, 'inbox is still empty');
+	my $msgs = $srch->reopen->query('');
+	is(scalar(@$msgs), 0, 'inbox is still empty');
 	is(unlink(glob("$maildir/new/*")), 1);
 }
 
@@ -118,8 +118,8 @@ More majordomo info at  http://vger.kernel.org/majordomo-info.html\n);
 	PublicInbox::Emergency->new($maildir)->prepare(\$msg);
 	$config->{'publicinboxwatch.spamcheck'} = 'spamc';
 	PublicInbox::WatchMaildir->new($config)->scan('full');
-	my ($nr, $msgs) = $srch->reopen->query('');
-	is($nr, 1, 'inbox has one mail after spamc OK-ed a message');
+	my $msgs = $srch->reopen->query('');
+	is(scalar(@$msgs), 1, 'inbox has one mail after spamc OK-ed a message');
 	my $mref = $ibx->msg_by_smsg($msgs->[0]);
 	like($$mref, qr/something\n\z/s, 'message scrubbed on import');
 	delete $config->{'publicinboxwatch.spamcheck'};
@@ -131,11 +131,11 @@ More majordomo info at  http://vger.kernel.org/majordomo-info.html\n);
 	$msg = do { local $/; <$fh> };
 	PublicInbox::Emergency->new($maildir)->prepare(\$msg);
 	PublicInbox::WatchMaildir->new($config)->scan('full');
-	my ($nr, $msgs) = $srch->reopen->query('dfpost:6e006fd7');
-	is($nr, 1, 'diff postimage found');
+	my $msgs = $srch->reopen->query('dfpost:6e006fd7');
+	is(scalar(@$msgs), 1, 'diff postimage found');
 	my $post = $msgs->[0];
-	($nr, $msgs) = $srch->query('dfpre:090d998b6c2c');
-	is($nr, 1, 'diff preimage found');
+	$msgs = $srch->query('dfpre:090d998b6c2c');
+	is(scalar(@$msgs), 1, 'diff preimage found');
 	is($post->{blob}, $msgs->[0]->{blob}, 'same message');
 }
 
@@ -162,7 +162,7 @@ both
 EOF
 	PublicInbox::Emergency->new($maildir)->prepare(\$both);
 	PublicInbox::WatchMaildir->new($config)->scan('full');
-	my ($total, $msgs) = $srch->reopen->query('m:both@b.com');
+	my $msgs = $srch->reopen->query('m:both@b.com');
 	my $v1 = $config->lookup_name('v1');
 	my $msg = $v1->git->cat_file($msgs->[0]->{blob});
 	is($both, $$msg, 'got original message back from v1');

  parent reply	other threads:[~2020-08-26  8:17 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-26  8:17 [PATCH 0/5] some minor SQLite-related cleanups Eric Wong
2020-08-26  8:17 ` [PATCH 1/5] over: rename ->connect method to ->dbh Eric Wong
2020-08-26  8:17 ` [PATCH 2/5] over: rename ->disconnect to ->dbh_close Eric Wong
2020-08-26  8:17 ` Eric Wong [this message]
2020-08-26  8:17 ` [PATCH 4/5] over*: use v5.10.1, drop warnings Eric Wong
2020-08-26  8:17 ` [PATCH 5/5] msgmap: use v5.10.1 Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200826081742.16642-4-e@yhbt.net \
    --to=e@yhbt.net \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ http://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for the project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git