From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 5/7] mbox: remove remaining OFFSET usage in SQLite
Date: Tue, 3 Apr 2018 11:09:10 +0000 [thread overview]
Message-ID: <20180403110912.24231-6-e@80x24.org> (raw)
In-Reply-To: <20180403110912.24231-1-e@80x24.org>
We can use id_batch in the common case to speed up full mbox
retrievals. Gigantic msets are still a problem, but will
be fixed in future commits.
---
lib/PublicInbox/Mbox.pm | 37 +++++++++++++++++++++++++++++++------
lib/PublicInbox/Over.pm | 13 ++++++-------
lib/PublicInbox/Search.pm | 4 ++--
t/psgi_v2.t | 22 +++++++++++++++++++++-
4 files changed, 60 insertions(+), 16 deletions(-)
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 05de6be..0be1968 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -138,8 +138,12 @@ sub thread_mbox {
my ($ctx, $srch, $sfx) = @_;
eval { require IO::Compress::Gzip };
return sub { need_gzip(@_) } if $@;
-
- my $cb = sub { $srch->get_thread($ctx->{mid}, @_) };
+ my $prev = 0;
+ my $cb = sub {
+ my $msgs = $srch->get_thread($ctx->{mid}, $prev);
+ $prev = $msgs->[-1]->{num} if scalar(@$msgs);
+ $msgs;
+ };
PublicInbox::MboxGz->response($ctx, $cb);
}
@@ -160,7 +164,25 @@ sub mbox_all {
eval { require IO::Compress::Gzip };
return sub { need_gzip(@_) } if $@;
- my $cb = sub { $ctx->{srch}->query($query, @_) };
+ if ($query eq '') {
+ my $prev = 0;
+ my $msgs = [];
+ my $cb = sub {
+ $ctx->{-inbox}->mm->id_batch($prev, sub {
+ $msgs = $_[0];
+ });
+ $prev = $msgs->[-1] if @$msgs;
+ $msgs;
+ };
+ return PublicInbox::MboxGz->response($ctx, $cb, 'all');
+ }
+ my $opts = { offset => 0 };
+ my $srch = $ctx->{srch};
+ my $cb = sub { # called by MboxGz->getline
+ my $msgs = $srch->query($query, $opts);
+ $opts->{offset} += scalar @$msgs;
+ $msgs;
+ };
PublicInbox::MboxGz->response($ctx, $cb, 'results-'.$query);
}
@@ -192,7 +214,6 @@ sub new {
cb => $cb,
ctx => $ctx,
msgs => [],
- opts => { offset => 0 },
}, $class;
}
@@ -223,6 +244,10 @@ sub getline {
do {
# work on existing result set
while (defined(my $smsg = shift @$msgs)) {
+ # id_batch may return integers
+ ref($smsg) or
+ $smsg = $ctx->{srch}->{over_ro}->get_art($smsg);
+
my $msg = eval { $ibx->msg_by_smsg($smsg) } or next;
$msg = Email::Simple->new($msg);
$gz->write(PublicInbox::Mbox::msg_str($ctx, $msg,
@@ -247,10 +272,10 @@ sub getline {
}
# refill result set
- $msgs = $self->{msgs} = $self->{cb}->($self->{opts});
- $self->{opts}->{offset} += scalar @$msgs;
+ $msgs = $self->{msgs} = $self->{cb}->();
} while (@$msgs);
$gz->close;
+ # signal that we're done and can return undef next call:
delete $self->{ctx};
${delete $self->{buf}};
}
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index b230d44..0bd6008 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -50,9 +50,7 @@ sub do_get {
my ($self, $sql, $opts, @args) = @_;
my $dbh = $self->connect;
my $lim = (($opts->{limit} || 0) + 0) || 1000;
- my $off = (($opts->{offset} || 0) + 0) || 0;
$sql .= "LIMIT $lim";
- $sql .= " OFFSET $off" if $off > 0;
my $msgs = $dbh->selectall_arrayref($sql, { Slice => {} }, @args);
load_from_row($_) for @$msgs;
$msgs
@@ -77,7 +75,7 @@ ORDER BY num ASC
sub nothing () { wantarray ? (0, []) : [] };
sub get_thread {
- my ($self, $mid, $opts) = @_;
+ my ($self, $mid, $prev) = @_;
my $dbh = $self->connect;
my $id = $dbh->selectrow_array(<<'', undef, $mid);
@@ -96,13 +94,14 @@ SELECT tid,sid FROM over WHERE num = ? LIMIT 1
defined $tid or return nothing; # $sid may be undef
- my $cond = 'FROM over WHERE (tid = ? OR sid = ?) AND num > 0';
- my $msgs = do_get($self, <<"", $opts, $tid, $sid);
-SELECT * $cond ORDER BY ts ASC
+ $prev ||= 0;
+ my $cond = 'FROM over WHERE (tid = ? OR sid = ?) AND num > ?';
+ my $msgs = do_get($self, <<"", {}, $tid, $sid, $prev);
+SELECT * $cond ORDER BY num ASC
return $msgs unless wantarray;
- my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid);
+ my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid, $prev);
SELECT COUNT(num) $cond
($nr, $msgs);
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index f7fdf85..eca2b0f 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -179,8 +179,8 @@ sub query {
}
sub get_thread {
- my ($self, $mid, $opts) = @_;
- $self->{over_ro}->get_thread($mid, $opts);
+ my ($self, $mid, $prev) = @_;
+ $self->{over_ro}->get_thread($mid, $prev);
}
sub retry_reopen {
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 31c4178..aa3279c 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -125,8 +125,28 @@ test_psgi(sub { $www->call(@_) }, sub {
like($out, qr/^hello world$/m, 'got first in t.mbox.gz');
like($out, qr/^hello world!$/m, 'got second in t.mbox.gz');
like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz');
- @from_ = ($raw =~ m/^From /mg);
+ @from_ = ($out =~ m/^From /mg);
is(scalar(@from_), 3, 'three From_ lines in t.mbox.gz');
+
+ # search interface
+ $res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m'));
+ $in = $res->content;
+ $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ like($out, qr/^hello world$/m, 'got first in mbox POST');
+ like($out, qr/^hello world!$/m, 'got second in mbox POST');
+ like($out, qr/^hello ghosts$/m, 'got third in mbox POST');
+ @from_ = ($out =~ m/^From /mg);
+ is(scalar(@from_), 3, 'three From_ lines in mbox POST');
+
+ # all.mbox.gz interface
+ $res = $cb->(GET('/v2test/all.mbox.gz'));
+ $in = $res->content;
+ $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ like($out, qr/^hello world$/m, 'got first in all.mbox');
+ like($out, qr/^hello world!$/m, 'got second in all.mbox');
+ like($out, qr/^hello ghosts$/m, 'got third in all.mbox');
+ @from_ = ($out =~ m/^From /mg);
+ is(scalar(@from_), 3, 'three From_ lines in all.mbox');
};
local $SIG{__WARN__} = 'DEFAULT';
--
EW
next prev parent reply other threads:[~2018-04-03 11:09 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-03 11:09 [PATCH 0/7] optimize V2 Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 1/7] t/thread-all.t: modernize test to support modern inboxes Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 2/7] rename+rewrite test using Benchmark module Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 3/7] nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 4/7] view: avoid offset during pagination Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` Eric Wong (Contractor, The Linux Foundation) [this message]
2018-04-03 11:09 ` [PATCH 6/7] msgmap: replace id_batch with ids_after Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 7/7] nntp: simplify the long_response API Eric Wong (Contractor, The Linux Foundation)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180403110912.24231-6-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).