* [PATCH 3/5] over: recent: remove expensive COUNT query
2020-08-26 8:17 7% [PATCH 0/5] some minor SQLite-related cleanups Eric Wong
@ 2020-08-26 8:17 5% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-08-26 8:17 UTC (permalink / raw)
To: meta
As noted in commit 87dca6d8d5988c5eb54019cca342450b0b7dd6b7
("www: rework query responses to avoid COUNT in SQLite"),
COUNT on many rows is expensive on big SQLite DBs.
We've already stopped using that code path long ago in WWW
while -imapd and -nntpd never used it. So we'll adjust our
remaining test cases to not need it, either.
---
lib/PublicInbox/Over.pm | 8 +-------
t/indexlevels-mirror.t | 29 +++++++++++++++--------------
t/v2writable.t | 4 +++-
t/watch_maildir_v2.t | 30 +++++++++++++++---------------
4 files changed, 34 insertions(+), 37 deletions(-)
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index a2cf9f21..6b7d5216 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -244,15 +244,9 @@ sub recent {
$s = '+num > 0 ORDER BY ts DESC';
}
}
- my $msgs = do_get($self, <<"", $opts, @v);
+ do_get($self, <<"", $opts, @v);
SELECT ts,ds,ddd FROM over WHERE $s
- return $msgs unless wantarray;
-
- my $nr = $self->{dbh}->selectrow_array(<<'');
-SELECT COUNT(num) FROM over WHERE num > 0
-
- ($nr, $msgs);
}
sub get_art {
diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t
index 859c2c17..27533546 100644
--- a/t/indexlevels-mirror.t
+++ b/t/indexlevels-mirror.t
@@ -49,8 +49,8 @@ my $import_index_incremental = sub {
inboxdir => $ibx->{inboxdir},
indexlevel => $level
});
- my ($nr, $msgs) = $ro_master->recent;
- is($nr, 1, 'only one message in master, so far');
+ my $msgs = $ro_master->recent;
+ is(scalar(@$msgs), 1, 'only one message in master, so far');
is($msgs->[0]->{mid}, 'm@1', 'first message in master indexed');
# clone
@@ -79,8 +79,8 @@ my $import_index_incremental = sub {
inboxdir => $mirror,
indexlevel => $level,
});
- ($nr, $msgs) = $ro_mirror->recent;
- is($nr, 1, 'only one message, so far');
+ $msgs = $ro_mirror->recent;
+ is(scalar(@$msgs), 1, 'only one message, so far');
is($msgs->[0]->{mid}, 'm@1', 'read first message');
# update master
@@ -91,16 +91,16 @@ my $import_index_incremental = sub {
# mirror updates
is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK");
- ($nr, $msgs) = $ro_mirror->recent;
- is($nr, 2, '2nd message seen in mirror');
+ $msgs = $ro_mirror->recent;
+ is(scalar(@$msgs), 2, '2nd message seen in mirror');
is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs],
['m@1','m@2'], 'got both messages in mirror');
# incremental index master (required for v1)
ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]),
'index master OK');
- ($nr, $msgs) = $ro_master->recent;
- is($nr, 2, '2nd message seen in master');
+ $msgs = $ro_master->recent;
+ is(scalar(@$msgs), 2, '2nd message seen in master');
is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs],
['m@1','m@2'], 'got both messages in master');
@@ -121,15 +121,15 @@ my $import_index_incremental = sub {
is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level,
'indexlevel detectable by Admin after xcpdb v' .$v.$level);
delete $ro_mirror->{$_} for (qw(over search));
- ($nr, $msgs) = $ro_mirror->search->query('m:m@2');
- is($nr, 1, "v$v found m\@2 via Xapian on $level");
+ $msgs = $ro_mirror->search->query('m:m@2');
+ is(scalar(@$msgs), 1, "v$v found m\@2 via Xapian on $level");
}
# sync the mirror
is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK");
- ($nr, $msgs) = $ro_mirror->recent;
- is($nr, 1, '2nd message gone from mirror');
+ $msgs = $ro_mirror->recent;
+ is(scalar(@$msgs), 1, '2nd message gone from mirror');
is_deeply([map { $_->{mid} } @$msgs], ['m@1'],
'message unavailable in mirror');
@@ -138,8 +138,9 @@ my $import_index_incremental = sub {
'no Xapian shard directories for v2 basic');
}
if ($level ne 'basic') {
- ($nr, $msgs) = $ro_mirror->search->reopen->query('m:m@2');
- is($nr, 0, "v$v m\@2 gone from Xapian in mirror on $level");
+ $msgs = $ro_mirror->search->reopen->query('m:m@2');
+ is(scalar(@$msgs), 0,
+ "v$v m\@2 gone from Xapian in mirror on $level");
}
# add another message to master and have the mirror
diff --git a/t/v2writable.t b/t/v2writable.t
index 2bd7a400..9e4547ba 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -120,7 +120,9 @@ if ('ensure git configs are correct') {
$mime->header_set('References', '<zz-mid@b>');
ok($im->add($mime), 'message with multiple Message-ID');
$im->done;
- my ($total, undef) = $ibx->over->recent;
+ my $total = $ibx->over->dbh->selectrow_array(<<'');
+SELECT COUNT(*) FROM over WHERE num > 0
+
is($ibx->mm->num_highwater, $total, 'got expected highwater value');
my $srch = $ibx->search;
my $mset1 = $srch->reopen->query('m:abcde@1', { mset => 1 });
diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t
index f5b8e932..59ec247e 100644
--- a/t/watch_maildir_v2.t
+++ b/t/watch_maildir_v2.t
@@ -50,7 +50,7 @@ ok($ibx, 'found inbox by name');
my $srch = $ibx->search;
PublicInbox::WatchMaildir->new($config)->scan('full');
-my ($total, undef) = $srch->reopen->query('');
+my $total = scalar @{$srch->reopen->query('')};
is($total, 1, 'got one revision');
# my $git = PublicInbox::Git->new("$inboxdir/git/0.git");
@@ -70,7 +70,7 @@ my $write_spam = sub {
$write_spam->();
is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam');
PublicInbox::WatchMaildir->new($config)->scan('full');
-is(($srch->reopen->query(''))[0], 0, 'deleted file');
+is_deeply($srch->reopen->query(''), [], 'deleted file');
is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
# check with scrubbing
@@ -81,16 +81,16 @@ the body of a message to majordomo\@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
PublicInbox::WatchMaildir->new($config)->scan('full');
- my ($nr, $msgs) = $srch->reopen->query('');
- is($nr, 1, 'got one file back');
+ my $msgs = $srch->reopen->query('');
+ is(scalar(@$msgs), 1, 'got one file back');
my $mref = $ibx->msg_by_smsg($msgs->[0]);
like($$mref, qr/something\n\z/s, 'message scrubbed on import');
is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam');
$write_spam->();
PublicInbox::WatchMaildir->new($config)->scan('full');
- ($nr, $msgs) = $srch->reopen->query('');
- is($nr, 0, 'inbox is empty again');
+ $msgs = $srch->reopen->query('');
+ is(scalar(@$msgs), 0, 'inbox is empty again');
is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
}
@@ -105,8 +105,8 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
local $SIG{__WARN__} = sub {}; # quiet spam check warning
PublicInbox::WatchMaildir->new($config)->scan('full');
}
- my ($nr, $msgs) = $srch->reopen->query('');
- is($nr, 0, 'inbox is still empty');
+ my $msgs = $srch->reopen->query('');
+ is(scalar(@$msgs), 0, 'inbox is still empty');
is(unlink(glob("$maildir/new/*")), 1);
}
@@ -118,8 +118,8 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
$config->{'publicinboxwatch.spamcheck'} = 'spamc';
PublicInbox::WatchMaildir->new($config)->scan('full');
- my ($nr, $msgs) = $srch->reopen->query('');
- is($nr, 1, 'inbox has one mail after spamc OK-ed a message');
+ my $msgs = $srch->reopen->query('');
+ is(scalar(@$msgs), 1, 'inbox has one mail after spamc OK-ed a message');
my $mref = $ibx->msg_by_smsg($msgs->[0]);
like($$mref, qr/something\n\z/s, 'message scrubbed on import');
delete $config->{'publicinboxwatch.spamcheck'};
@@ -131,11 +131,11 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
$msg = do { local $/; <$fh> };
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
PublicInbox::WatchMaildir->new($config)->scan('full');
- my ($nr, $msgs) = $srch->reopen->query('dfpost:6e006fd7');
- is($nr, 1, 'diff postimage found');
+ my $msgs = $srch->reopen->query('dfpost:6e006fd7');
+ is(scalar(@$msgs), 1, 'diff postimage found');
my $post = $msgs->[0];
- ($nr, $msgs) = $srch->query('dfpre:090d998b6c2c');
- is($nr, 1, 'diff preimage found');
+ $msgs = $srch->query('dfpre:090d998b6c2c');
+ is(scalar(@$msgs), 1, 'diff preimage found');
is($post->{blob}, $msgs->[0]->{blob}, 'same message');
}
@@ -162,7 +162,7 @@ both
EOF
PublicInbox::Emergency->new($maildir)->prepare(\$both);
PublicInbox::WatchMaildir->new($config)->scan('full');
- my ($total, $msgs) = $srch->reopen->query('m:both@b.com');
+ my $msgs = $srch->reopen->query('m:both@b.com');
my $v1 = $config->lookup_name('v1');
my $msg = $v1->git->cat_file($msgs->[0]->{blob});
is($both, $$msg, 'got original message back from v1');
^ permalink raw reply related [relevance 5%]
* [PATCH 0/5] some minor SQLite-related cleanups
@ 2020-08-26 8:17 7% Eric Wong
2020-08-26 8:17 5% ` [PATCH 3/5] over: recent: remove expensive COUNT query Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-08-26 8:17 UTC (permalink / raw)
To: meta
Dropping some expensive dead code and avoiding some overloaded
method names to reduce confusion.
Eric Wong (5):
over: rename ->connect method to ->dbh
over: rename ->disconnect to ->dbh_close
over: recent: remove expensive COUNT query
over*: use v5.10.1, drop warnings
msgmap: use v5.10.1
lib/PublicInbox/Inbox.pm | 2 +-
lib/PublicInbox/Msgmap.pm | 3 ++-
lib/PublicInbox/Over.pm | 33 +++++++++++++--------------------
lib/PublicInbox/OverIdx.pm | 15 +++++++--------
lib/PublicInbox/V2Writable.pm | 4 ++--
lib/PublicInbox/Xapcmd.pm | 2 +-
scripts/dupe-finder | 2 +-
t/indexlevels-mirror.t | 29 +++++++++++++++--------------
t/over.t | 11 +++++------
t/search-thr-index.t | 4 ++--
t/v1reindex.t | 2 +-
t/v2reindex.t | 2 +-
t/v2writable.t | 4 +++-
t/watch_maildir_v2.t | 30 +++++++++++++++---------------
14 files changed, 69 insertions(+), 74 deletions(-)
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-08-26 8:17 7% [PATCH 0/5] some minor SQLite-related cleanups Eric Wong
2020-08-26 8:17 5% ` [PATCH 3/5] over: recent: remove expensive COUNT query Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).