user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 5/7] mbox: remove remaining OFFSET usage in SQLite
Date: Tue,  3 Apr 2018 11:09:10 +0000	[thread overview]
Message-ID: <20180403110912.24231-6-e@80x24.org> (raw)
In-Reply-To: <20180403110912.24231-1-e@80x24.org>

We can use id_batch in the common case to speed up full mbox
retrievals.  Gigantic msets are still a problem, but will
be fixed in future commits.
---
 lib/PublicInbox/Mbox.pm   | 37 +++++++++++++++++++++++++++++++------
 lib/PublicInbox/Over.pm   | 13 ++++++-------
 lib/PublicInbox/Search.pm |  4 ++--
 t/psgi_v2.t               | 22 +++++++++++++++++++++-
 4 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 05de6be..0be1968 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -138,8 +138,12 @@ sub thread_mbox {
 	my ($ctx, $srch, $sfx) = @_;
 	eval { require IO::Compress::Gzip };
 	return sub { need_gzip(@_) } if $@;
-
-	my $cb = sub { $srch->get_thread($ctx->{mid}, @_) };
+	my $prev = 0;
+	my $cb = sub {
+		my $msgs = $srch->get_thread($ctx->{mid}, $prev);
+		$prev = $msgs->[-1]->{num} if scalar(@$msgs);
+		$msgs;
+	};
 	PublicInbox::MboxGz->response($ctx, $cb);
 }
 
@@ -160,7 +164,25 @@ sub mbox_all {
 
 	eval { require IO::Compress::Gzip };
 	return sub { need_gzip(@_) } if $@;
-	my $cb = sub { $ctx->{srch}->query($query, @_) };
+	if ($query eq '') {
+		my $prev = 0;
+		my $msgs = [];
+		my $cb = sub {
+			$ctx->{-inbox}->mm->id_batch($prev, sub {
+				$msgs = $_[0];
+			});
+			$prev = $msgs->[-1] if @$msgs;
+			$msgs;
+		};
+		return PublicInbox::MboxGz->response($ctx, $cb, 'all');
+	}
+	my $opts = { offset => 0 };
+	my $srch = $ctx->{srch};
+	my $cb = sub { # called by MboxGz->getline
+		my $msgs = $srch->query($query, $opts);
+		$opts->{offset} += scalar @$msgs;
+		$msgs;
+	};
 	PublicInbox::MboxGz->response($ctx, $cb, 'results-'.$query);
 }
 
@@ -192,7 +214,6 @@ sub new {
 		cb => $cb,
 		ctx => $ctx,
 		msgs => [],
-		opts => { offset => 0 },
 	}, $class;
 }
 
@@ -223,6 +244,10 @@ sub getline {
 	do {
 		# work on existing result set
 		while (defined(my $smsg = shift @$msgs)) {
+			# id_batch may return integers
+			ref($smsg) or
+				$smsg = $ctx->{srch}->{over_ro}->get_art($smsg);
+
 			my $msg = eval { $ibx->msg_by_smsg($smsg) } or next;
 			$msg = Email::Simple->new($msg);
 			$gz->write(PublicInbox::Mbox::msg_str($ctx, $msg,
@@ -247,10 +272,10 @@ sub getline {
 		}
 
 		# refill result set
-		$msgs = $self->{msgs} = $self->{cb}->($self->{opts});
-		$self->{opts}->{offset} += scalar @$msgs;
+		$msgs = $self->{msgs} = $self->{cb}->();
 	} while (@$msgs);
 	$gz->close;
+	# signal that we're done and can return undef next call:
 	delete $self->{ctx};
 	${delete $self->{buf}};
 }
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index b230d44..0bd6008 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -50,9 +50,7 @@ sub do_get {
 	my ($self, $sql, $opts, @args) = @_;
 	my $dbh = $self->connect;
 	my $lim = (($opts->{limit} || 0) + 0) || 1000;
-	my $off = (($opts->{offset} || 0) + 0) || 0;
 	$sql .= "LIMIT $lim";
-	$sql .= " OFFSET $off" if $off > 0;
 	my $msgs = $dbh->selectall_arrayref($sql, { Slice => {} }, @args);
 	load_from_row($_) for @$msgs;
 	$msgs
@@ -77,7 +75,7 @@ ORDER BY num ASC
 sub nothing () { wantarray ? (0, []) : [] };
 
 sub get_thread {
-	my ($self, $mid, $opts) = @_;
+	my ($self, $mid, $prev) = @_;
 	my $dbh = $self->connect;
 
 	my $id = $dbh->selectrow_array(<<'', undef, $mid);
@@ -96,13 +94,14 @@ SELECT tid,sid FROM over WHERE num = ? LIMIT 1
 
 	defined $tid or return nothing; # $sid may be undef
 
-	my $cond = 'FROM over WHERE (tid = ? OR sid = ?) AND num > 0';
-	my $msgs = do_get($self, <<"", $opts, $tid, $sid);
-SELECT * $cond ORDER BY ts ASC
+	$prev ||= 0;
+	my $cond = 'FROM over WHERE (tid = ? OR sid = ?) AND num > ?';
+	my $msgs = do_get($self, <<"", {}, $tid, $sid, $prev);
+SELECT * $cond ORDER BY num ASC
 
 	return $msgs unless wantarray;
 
-	my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid);
+	my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid, $prev);
 SELECT COUNT(num) $cond
 
 	($nr, $msgs);
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index f7fdf85..eca2b0f 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -179,8 +179,8 @@ sub query {
 }
 
 sub get_thread {
-	my ($self, $mid, $opts) = @_;
-	$self->{over_ro}->get_thread($mid, $opts);
+	my ($self, $mid, $prev) = @_;
+	$self->{over_ro}->get_thread($mid, $prev);
 }
 
 sub retry_reopen {
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 31c4178..aa3279c 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -125,8 +125,28 @@ test_psgi(sub { $www->call(@_) }, sub {
 		like($out, qr/^hello world$/m, 'got first in t.mbox.gz');
 		like($out, qr/^hello world!$/m, 'got second in t.mbox.gz');
 		like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz');
-		@from_ = ($raw =~ m/^From /mg);
+		@from_ = ($out =~ m/^From /mg);
 		is(scalar(@from_), 3, 'three From_ lines in t.mbox.gz');
+
+		# search interface
+		$res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m'));
+		$in = $res->content;
+		$status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+		like($out, qr/^hello world$/m, 'got first in mbox POST');
+		like($out, qr/^hello world!$/m, 'got second in mbox POST');
+		like($out, qr/^hello ghosts$/m, 'got third in mbox POST');
+		@from_ = ($out =~ m/^From /mg);
+		is(scalar(@from_), 3, 'three From_ lines in mbox POST');
+
+		# all.mbox.gz interface
+		$res = $cb->(GET('/v2test/all.mbox.gz'));
+		$in = $res->content;
+		$status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+		like($out, qr/^hello world$/m, 'got first in all.mbox');
+		like($out, qr/^hello world!$/m, 'got second in all.mbox');
+		like($out, qr/^hello ghosts$/m, 'got third in all.mbox');
+		@from_ = ($out =~ m/^From /mg);
+		is(scalar(@from_), 3, 'three From_ lines in all.mbox');
 	};
 
 	local $SIG{__WARN__} = 'DEFAULT';
-- 
EW


  parent reply	other threads:[~2018-04-03 11:09 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-03 11:09 [PATCH 0/7] optimize V2 Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 1/7] t/thread-all.t: modernize test to support modern inboxes Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 2/7] rename+rewrite test using Benchmark module Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 3/7] nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 4/7] view: avoid offset during pagination Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` Eric Wong (Contractor, The Linux Foundation) [this message]
2018-04-03 11:09 ` [PATCH 6/7] msgmap: replace id_batch with ids_after Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 7/7] nntp: simplify the long_response API Eric Wong (Contractor, The Linux Foundation)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180403110912.24231-6-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).