From 87dca6d8d5988c5eb54019cca342450b0b7dd6b7 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Mon, 2 Apr 2018 00:04:55 +0000 Subject: www: rework query responses to avoid COUNT in SQLite In many cases, we do not care about the total number of messages. It's a rather expensive operation in SQLite (Xapian only provides an estimate). For LKML, this brings top-level /$INBOX/ loading time from ~375ms to around 60ms on my system. Days ago, this operation was taking 800-900ms(!) for me before introducing the SQLite overview DB. --- t/altid.t | 9 +++--- t/convert-compact.t | 6 ++-- t/over.t | 6 ++-- t/search-thr-index.t | 4 +-- t/search.t | 80 +++++++++++++++++++++++++-------------------------- t/v2-add-remove-add.t | 6 ++-- t/v2mda.t | 4 +-- t/v2writable.t | 2 +- t/watch_maildir_v2.t | 26 ++++++++--------- 9 files changed, 71 insertions(+), 72 deletions(-) (limited to 't') diff --git a/t/altid.t b/t/altid.t index 0f3b86c1..d4f6152e 100644 --- a/t/altid.t +++ b/t/altid.t @@ -50,12 +50,11 @@ my $altid = [ "serial:gmane:file=$alt_file" ]; { my $ro = PublicInbox::Search->new($git_dir, $altid); - my $res = $ro->query("gmane:1234"); - is($res->{total}, 1, 'got one match'); - is($res->{msgs}->[0]->mid, 'a@example.com'); + my $msgs = $ro->query("gmane:1234"); + is_deeply([map { $_->mid } @$msgs], ['a@example.com'], 'got one match'); - $res = $ro->query("gmane:666"); - is($res->{total}, 0, 'body did NOT match'); + $msgs = $ro->query("gmane:666"); + is_deeply([], $msgs, 'body did NOT match'); }; { diff --git a/t/convert-compact.t b/t/convert-compact.t index 92a6a9c5..e2ba40a5 100644 --- a/t/convert-compact.t +++ b/t/convert-compact.t @@ -99,8 +99,8 @@ foreach (@xdir) { is($st[2] & 07777, -f _ ? 0444 : 0755, 'sharedRepository respected after v2 compact'); } -my $res = $ibx->recent({limit => 1000}); -is($res->{msgs}->[0]->{mid}, 'a-mid@b', 'message exists in history'); -is(scalar @{$res->{msgs}}, 1, 'only one message in history'); +my $msgs = $ibx->recent({limit => 1000}); +is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); +is(scalar @$msgs, 1, 'only one message in history'); done_testing(); diff --git a/t/over.t b/t/over.t index bfe331ed..2a7e8d1d 100644 --- a/t/over.t +++ b/t/over.t @@ -40,7 +40,7 @@ my $ddd = compress(''); foreach my $s ('', undef) { $over->add_over([0, 98, [ 'a' ], [], $s, $ddd]); $over->add_over([0, 99, [ 'b' ], [], $s, $ddd]); - my $msgs = [ map { $_->{num} } @{$over->get_thread('a')->{msgs}} ]; + my $msgs = [ map { $_->{num} } @{$over->get_thread('a')} ]; is_deeply([98], $msgs, 'messages not linked by empty subject'); } @@ -48,13 +48,13 @@ foreach my $s ('', undef) { $over->add_over([0, 98, [ 'a' ], [], 's', $ddd]); $over->add_over([0, 99, [ 'b' ], [], 's', $ddd]); foreach my $mid (qw(a b)) { - my $msgs = [ map { $_->{num} } @{$over->get_thread('a')->{msgs}} ]; + my $msgs = [ map { $_->{num} } @{$over->get_thread('a')} ]; is_deeply([98, 99], $msgs, 'linked messages by subject'); } $over->add_over([0, 98, [ 'a' ], [], 's', $ddd]); $over->add_over([0, 99, [ 'b' ], ['a'], 'diff', $ddd]); foreach my $mid (qw(a b)) { - my $msgs = [ map { $_->{num} } @{$over->get_thread($mid)->{msgs}} ]; + my $msgs = [ map { $_->{num} } @{$over->get_thread($mid)} ]; is_deeply([98, 99], $msgs, "linked messages by Message-ID: <$mid>"); } diff --git a/t/search-thr-index.t b/t/search-thr-index.t index 3ddef809..2aa97bff 100644 --- a/t/search-thr-index.t +++ b/t/search-thr-index.t @@ -49,8 +49,8 @@ foreach (reverse split(/\n\n/, $data)) { my $prev; foreach my $mid (@mids) { - my $res = $rw->{over}->get_thread($mid); - is(3, $res->{total}, "got all messages from $mid"); + my $msgs = $rw->{over}->get_thread($mid); + is(3, scalar(@$msgs), "got all messages from $mid"); } $rw->commit_txn_lazy; diff --git a/t/search.t b/t/search.t index 51adb9fb..c9bef718 100644 --- a/t/search.t +++ b/t/search.t @@ -82,8 +82,8 @@ my $rw_commit = sub { } sub filter_mids { - my ($res) = @_; - sort(map { $_->mid } @{$res->{msgs}}); + my ($msgs) = @_; + sort(map { $_->mid } @$msgs); } { @@ -106,12 +106,12 @@ sub filter_mids { is_deeply(\@res, \@exp, 'got expected results for s:"" match'); $res = $ro->query('s:"Hello world"', {limit => 1}); - is(scalar @{$res->{msgs}}, 1, "limit works"); - my $first = $res->{msgs}->[0]; + is(scalar @$res, 1, "limit works"); + my $first = $res->[0]; $res = $ro->query('s:"Hello world"', {offset => 1}); - is(scalar @{$res->{msgs}}, 1, "offset works"); - my $second = $res->{msgs}->[0]; + is(scalar @$res, 1, "offset works"); + my $second = $res->[0]; isnt($first, $second, "offset returned different result from limit"); } @@ -147,7 +147,7 @@ sub filter_mids { my $ghost_id = $rw->add_message($was_ghost); is($ghost_id, int($ghost_id), "ghost_id is an integer: $ghost_id"); - my $msgs = $rw->{over}->get_thread('ghost-message@s')->{msgs}; + my $msgs = $rw->{over}->get_thread('ghost-message@s'); is(scalar(@$msgs), 2, 'got both messages in ghost thread'); foreach (qw(sid tid)) { is($msgs->[0]->{$_}, $msgs->[1]->{$_}, "{$_} match"); @@ -169,7 +169,7 @@ sub filter_mids { # body $res = $ro->query('goodbye'); - is($res->{msgs}->[0]->mid, 'last@s', 'got goodbye message body'); + is($res->[0]->mid, 'last@s', 'got goodbye message body'); } # long message-id @@ -215,7 +215,7 @@ sub filter_mids { $rw_commit->(); $ro->reopen; my $t = $ro->get_thread('root@s'); - is($t->{total}, 4, "got all 4 mesages in thread"); + is(scalar(@$t), 4, "got all 4 mesages in thread"); my @exp = sort($long_reply_mid, 'root@s', 'last@s', $long_mid); @res = filter_mids($t); is_deeply(\@res, \@exp, "get_thread works"); @@ -244,13 +244,13 @@ sub filter_mids { ], body => "theatre\nfade\n")); my $res = $rw->query("theatre"); - is($res->{total}, 2, "got both matches"); - is($res->{msgs}->[0]->mid, 'nquote@a', "non-quoted scores higher"); - is($res->{msgs}->[1]->mid, 'quote@a', "quoted result still returned"); + is(scalar(@$res), 2, "got both matches"); + is($res->[0]->mid, 'nquote@a', "non-quoted scores higher"); + is($res->[1]->mid, 'quote@a', "quoted result still returned"); $res = $rw->query("illusions"); - is($res->{total}, 1, "got a match for quoted text"); - is($res->{msgs}->[0]->mid, 'quote@a', + is(scalar(@$res), 1, "got a match for quoted text"); + is($res->[0]->mid, 'quote@a', "quoted result returned if nothing else"); } @@ -293,34 +293,34 @@ sub filter_mids { } { - my $res = $ro->query('d:19931002..20101002'); - ok(scalar @{$res->{msgs}} > 0, 'got results within range'); - $res = $ro->query('d:20101003..'); - is(scalar @{$res->{msgs}}, 0, 'nothing after 20101003'); - $res = $ro->query('d:..19931001'); - is(scalar @{$res->{msgs}}, 0, 'nothing before 19931001'); + my $msgs = $ro->query('d:19931002..20101002'); + ok(scalar(@$msgs) > 0, 'got results within range'); + $msgs = $ro->query('d:20101003..'); + is(scalar(@$msgs), 0, 'nothing after 20101003'); + $msgs = $ro->query('d:..19931001'); + is(scalar(@$msgs), 0, 'nothing before 19931001'); } # names and addresses { my $res = $ro->query('t:list@example.com'); - is(scalar @{$res->{msgs}}, 6, 'searched To: successfully'); - foreach my $smsg (@{$res->{msgs}}) { + is(scalar @$res, 6, 'searched To: successfully'); + foreach my $smsg (@$res) { like($smsg->to, qr/\blist\@example\.com\b/, 'to appears'); } $res = $ro->query('tc:list@example.com'); - is(scalar @{$res->{msgs}}, 6, 'searched To+Cc: successfully'); - foreach my $smsg (@{$res->{msgs}}) { + is(scalar @$res, 6, 'searched To+Cc: successfully'); + foreach my $smsg (@$res) { my $tocc = join("\n", $smsg->to, $smsg->cc); like($tocc, qr/\blist\@example\.com\b/, 'tocc appears'); } foreach my $pfx ('tcf:', 'c:') { $res = $ro->query($pfx . 'foo@example.com'); - is(scalar @{$res->{msgs}}, 1, + is(scalar @$res, 1, "searched $pfx successfully for Cc:"); - foreach my $smsg (@{$res->{msgs}}) { + foreach my $smsg (@$res) { like($smsg->cc, qr/\bfoo\@example\.com\b/, 'cc appears'); } @@ -328,9 +328,9 @@ sub filter_mids { foreach my $pfx ('', 'tcf:', 'f:') { $res = $ro->query($pfx . 'Laggy'); - is(scalar @{$res->{msgs}}, 1, + is(scalar(@$res), 1, "searched $pfx successfully for From:"); - foreach my $smsg (@{$res->{msgs}}) { + foreach my $smsg (@$res) { like($smsg->from, qr/Laggy Sender/, "From appears with $pfx"); } @@ -341,23 +341,23 @@ sub filter_mids { $rw_commit->(); $ro->reopen; my $res = $ro->query('b:hello'); - is(scalar @{$res->{msgs}}, 0, 'no match on body search only'); + is(scalar(@$res), 0, 'no match on body search only'); $res = $ro->query('bs:smith'); - is(scalar @{$res->{msgs}}, 0, + is(scalar(@$res), 0, 'no match on body+subject search for From'); $res = $ro->query('q:theatre'); - is(scalar @{$res->{msgs}}, 1, 'only one quoted body'); - like($res->{msgs}->[0]->from, qr/\AQuoter/, 'got quoted body'); + is(scalar(@$res), 1, 'only one quoted body'); + like($res->[0]->from, qr/\AQuoter/, 'got quoted body'); $res = $ro->query('nq:theatre'); - is(scalar @{$res->{msgs}}, 1, 'only one non-quoted body'); - like($res->{msgs}->[0]->from, qr/\ANon-Quoter/, 'got non-quoted body'); + is(scalar @$res, 1, 'only one non-quoted body'); + like($res->[0]->from, qr/\ANon-Quoter/, 'got non-quoted body'); foreach my $pfx (qw(b: bs:)) { $res = $ro->query($pfx . 'theatre'); - is(scalar @{$res->{msgs}}, 2, "searched both bodies for $pfx"); - like($res->{msgs}->[0]->from, qr/\ANon-Quoter/, + is(scalar @$res, 2, "searched both bodies for $pfx"); + like($res->[0]->from, qr/\ANon-Quoter/, "non-quoter first for $pfx"); } } @@ -396,13 +396,13 @@ sub filter_mids { $rw_commit->(); $ro->reopen; my $n = $ro->query('n:attached_fart.txt'); - is(scalar @{$n->{msgs}}, 1, 'got result for n:'); + is(scalar @$n, 1, 'got result for n:'); my $res = $ro->query('part_deux.txt'); - is(scalar @{$res->{msgs}}, 1, 'got result without n:'); - is($n->{msgs}->[0]->mid, $res->{msgs}->[0]->mid, + is(scalar @$res, 1, 'got result without n:'); + is($n->[0]->mid, $res->[0]->mid, 'same result with and without'); my $txt = $ro->query('"inside another"'); - is($txt->{msgs}->[0]->mid, $res->{msgs}->[0]->mid, + is($txt->[0]->mid, $res->[0]->mid, 'search inside text attachments works'); } $rw->commit_txn_lazy; diff --git a/t/v2-add-remove-add.t b/t/v2-add-remove-add.t index b6c58872..c8d12d34 100644 --- a/t/v2-add-remove-add.t +++ b/t/v2-add-remove-add.t @@ -35,8 +35,8 @@ ok($im->add($mime), 'message added'); ok($im->remove($mime), 'message added'); ok($im->add($mime), 'message added again'); $im->done; -my $res = $ibx->recent({limit => 1000}); -is($res->{msgs}->[0]->{mid}, 'a-mid@b', 'message exists in history'); -is(scalar @{$res->{msgs}}, 1, 'only one message in history'); +my $msgs = $ibx->recent({limit => 1000}); +is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); +is(scalar @$msgs, 1, 'only one message in history'); done_testing(); diff --git a/t/v2mda.t b/t/v2mda.t index be27ca07..ca1bb09c 100644 --- a/t/v2mda.t +++ b/t/v2mda.t @@ -52,8 +52,8 @@ ok(PublicInbox::Import::run_die(['public-inbox-mda'], undef, $rdr), 'mda delivered a message'); $ibx = PublicInbox::Inbox->new($ibx); -my $res = $ibx->search->query(''); -my $saved = $ibx->smsg_mime($res->{msgs}->[0]); +my $msgs = $ibx->search->query(''); +my $saved = $ibx->smsg_mime($msgs->[0]); is($saved->{mime}->as_string, $mime->as_string, 'injected message'); done_testing(); diff --git a/t/v2writable.t b/t/v2writable.t index 7e29ef76..1e8e4042 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -249,7 +249,7 @@ EOF ok($im->add($mime), 'add excessively long References'); $im->barrier; - my $msgs = $ibx->search->reopen->get_thread('x'x244)->{msgs}; + my $msgs = $ibx->search->reopen->get_thread('x'x244); is(2, scalar(@$msgs), 'got both messages'); is($msgs->[0]->{mid}, 'x'x244, 'stored truncated mid'); is($msgs->[1]->{references}, '<'.('x'x244).'>', 'stored truncated ref'); diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t index 85130e3c..a76e413f 100644 --- a/t/watch_maildir_v2.t +++ b/t/watch_maildir_v2.t @@ -50,8 +50,8 @@ ok($ibx, 'found inbox by name'); my $srch = $ibx->search; PublicInbox::WatchMaildir->new($config)->scan('full'); -my $res = $srch->reopen->query(''); -is($res->{total}, 1, 'got one revision'); +my ($total, undef) = $srch->reopen->query(''); +is($total, 1, 'got one revision'); # my $git = PublicInbox::Git->new("$mainrepo/git/0.git"); # my @list = $git->qx(qw(rev-list refs/heads/master)); @@ -70,7 +70,7 @@ my $write_spam = sub { $write_spam->(); is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam'); PublicInbox::WatchMaildir->new($config)->scan('full'); -is($srch->reopen->query('')->{total}, 0, 'deleted file'); +is(($srch->reopen->query(''))[0], 0, 'deleted file'); # check with scrubbing { @@ -80,16 +80,16 @@ the body of a message to majordomo\@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::WatchMaildir->new($config)->scan('full'); - $res = $srch->reopen->query(''); - is($res->{total}, 1, 'got one file back'); - my $mref = $ibx->msg_by_smsg($res->{msgs}->[0]); + my ($nr, $msgs) = $srch->reopen->query(''); + is($nr, 1, 'got one file back'); + my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam'); $write_spam->(); PublicInbox::WatchMaildir->new($config)->scan('full'); - $res = $srch->reopen->query(''); - is($res->{total}, 0, 'inbox is empty again'); + ($nr, $msgs) = $srch->reopen->query(''); + is($nr, 0, 'inbox is empty again'); } { @@ -103,8 +103,8 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); local $SIG{__WARN__} = sub {}; # quiet spam check warning PublicInbox::WatchMaildir->new($config)->scan('full'); } - $res = $srch->reopen->query(''); - is($res->{total}, 0, 'inbox is still empty'); + ($nr, $msgs) = $srch->reopen->query(''); + is($nr, 0, 'inbox is still empty'); is(unlink(glob("$maildir/new/*")), 1); } @@ -116,9 +116,9 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); $config->{'publicinboxwatch.spamcheck'} = 'spamc'; PublicInbox::WatchMaildir->new($config)->scan('full'); - $res = $srch->reopen->query(''); - is($res->{total}, 1, 'inbox has one mail after spamc OK-ed a message'); - my $mref = $ibx->msg_by_smsg($res->{msgs}->[0]); + ($nr, $msgs) = $srch->reopen->query(''); + is($nr, 1, 'inbox has one mail after spamc OK-ed a message'); + my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); } -- cgit v1.2.3-24-ge0c7