user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/7] nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster
Date: Tue,  3 Apr 2018 11:09:08 +0000	[thread overview]
Message-ID: <20180403110912.24231-4-e@80x24.org> (raw)
In-Reply-To: <20180403110912.24231-1-e@80x24.org>

While SQLite is faster than Xapian for some queries we
use, it sucks at handling OFFSET.  Fortunately, we do
not need offsets when retrieving sorted results and
can bake it into the query.

For inbox.comp.version-control.git (v1 Xapian),
XOVER and XHDR are over 20x faster.
---
 MANIFEST                  |   1 +
 lib/PublicInbox/NNTP.pm   |  39 +++++++-------
 lib/PublicInbox/Over.pm   |  15 +++---
 lib/PublicInbox/Search.pm |   4 +-
 t/perf-nntpd.t            | 130 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 159 insertions(+), 30 deletions(-)
 create mode 100644 t/perf-nntpd.t

diff --git a/MANIFEST b/MANIFEST
index 4e79a4c..2dad988 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -173,6 +173,7 @@ t/msgmap.t
 t/nntp.t
 t/nntpd.t
 t/over.t
+t/perf-nntpd.t
 t/perf-threading.t
 t/plack.t
 t/precheck.t
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 48ab7fc..ff6d895 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -331,20 +331,20 @@ sub cmd_newnews ($$$$;$$) {
 	};
 	return '.' unless @srch;
 
-	my $opts = { limit => 1000, offset => 0 };
+	my $prev = 0;
 	long_response($self, 0, long_response_limit, sub {
 		my ($i) = @_;
 		my $srch = $srch[0];
-		my $msgs = $srch->query_ts($ts, $opts);
-		if (my $nr = scalar @$msgs) {
+		my $msgs = $srch->query_ts($ts, $prev);
+		if (scalar @$msgs) {
 			more($self, '<' .
 				join(">\r\n<", map { $_->mid } @$msgs ).
 				'>');
-			$opts->{offset} += $nr;
+			$prev = $msgs->[-1]->{num};
 		} else {
 			shift @srch;
 			if (@srch) { # continue onto next newsgroup
-				$opts->{offset} = 0;
+				$prev = 0;
 			} else { # break out of the long response.
 				$$i = long_response_limit;
 			}
@@ -582,7 +582,7 @@ sub long_response ($$$$) {
 	$self->{long_res} = sub {
 		# limit our own running time for fairness with other
 		# clients and to avoid buffering too much:
-		my $lim = 100;
+		my $lim = $end == long_response_limit ? 1 : 100;
 
 		my $err;
 		do {
@@ -710,20 +710,19 @@ sub hdr_searchmsg ($$$$) {
 		return $r unless ref $r;
 		my ($beg, $end) = @$r;
 		more($self, $xhdr ? r221 : r225);
-		my $off = 0;
-		long_response($self, $beg, $end, sub {
+		my $cur = $beg;
+		long_response($self, 0, long_response_limit, sub {
 			my ($i) = @_;
-			my $msgs = $srch->query_xover($beg, $end, $off);
-			my $nr = scalar @$msgs or return;
-			$off += $nr;
+			my $msgs = $srch->query_xover($cur, $end);
+			my $nr = scalar @$msgs or
+					return ($$i = long_response_limit);
 			my $tmp = '';
 			foreach my $s (@$msgs) {
 				$tmp .= $s->num . ' ' . $s->$field . "\r\n";
 			}
 			utf8::encode($tmp);
 			do_more($self, $tmp);
-			# -1 to adjust for implicit increment in long_response
-			$$i = $nr ? $$i + $nr - 1 : long_response_limit;
+			$cur = $msgs->[-1]->{num} + 1;
 		});
 	}
 }
@@ -848,20 +847,18 @@ sub cmd_xover ($;$) {
 	my ($beg, $end) = @$r;
 	more($self, "224 Overview information follows for $beg to $end");
 	my $srch = $self->{ng}->search;
-	my $off = 0;
-	long_response($self, $beg, $end, sub {
+	my $cur = $beg;
+	long_response($self, 0, long_response_limit, sub {
 		my ($i) = @_;
-		my $msgs = $srch->query_xover($beg, $end, $off);
-		my $nr = scalar @$msgs or return;
-		$off += $nr;
+		my $msgs = $srch->query_xover($cur, $end);
+		my $nr = scalar @$msgs or return ($$i = long_response_limit);
 
 		# OVERVIEW.FMT
 		more($self, join("\r\n", map {
 			over_line($_->{num}, $_);
 			} @$msgs));
-
-		# -1 to adjust for implicit increment in long_response
-		$$i = $nr ? $$i + $nr - 1 : long_response_limit;
+		$cur = $msgs->[-1]->{num} + 1;
+		1;
 	});
 }
 
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index 3d285ac..a7fd131 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -51,25 +51,26 @@ sub do_get {
 	my $dbh = $self->connect;
 	my $lim = (($opts->{limit} || 0) + 0) || 1000;
 	my $off = (($opts->{offset} || 0) + 0) || 0;
-	$sql .= "LIMIT $lim OFFSET $off";
+	$sql .= "LIMIT $lim";
+	$sql .= " OFFSET $off" if $off > 0;
 	my $msgs = $dbh->selectall_arrayref($sql, { Slice => {} }, @args);
 	load_from_row($_) for @$msgs;
 	$msgs
 }
 
 sub query_xover {
-	my ($self, $beg, $end, $off) = @_;
-	do_get($self, <<'', { offset => $off }, $beg, $end);
+	my ($self, $beg, $end) = @_;
+	do_get($self, <<'', {}, $beg, $end);
 SELECT * FROM over WHERE num >= ? AND num <= ?
 ORDER BY num ASC
 
 }
 
 sub query_ts {
-	my ($self, $ts, $opts) = @_;
-	do_get($self, <<'', $opts, $ts);
-SELECT * FROM over WHERE num > 0 AND ts >= ?
-ORDER BY ts ASC
+	my ($self, $ts, $prev) = @_;
+	do_get($self, <<'', {}, $ts, $prev);
+SELECT num,ddd FROM over WHERE ts >= ? AND num > ?
+ORDER BY num ASC
 
 }
 
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 84c0a22..f7fdf85 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -291,8 +291,8 @@ sub query_xover {
 }
 
 sub query_ts {
-	my ($self, $ts, $offset) = @_;
-	$self->{over_ro}->query_ts($ts, $offset);
+	my ($self, $ts, $prev) = @_;
+	$self->{over_ro}->query_ts($ts, $prev);
 }
 
 sub first_smsg_by_mid {
diff --git a/t/perf-nntpd.t b/t/perf-nntpd.t
new file mode 100644
index 0000000..4987f98
--- /dev/null
+++ b/t/perf-nntpd.t
@@ -0,0 +1,130 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Benchmark qw(:all);
+use PublicInbox::Inbox;
+use File::Temp qw/tempdir/;
+use POSIX qw(dup2);
+use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD);
+use Net::NNTP;
+my $pi_dir = $ENV{GIANT_PI_DIR};
+plan skip_all => "GIANT_PI_DIR not defined for $0" unless $pi_dir;
+eval { require PublicInbox::Search };
+my ($host_port, $group, %opts, $s, $pid);
+END {
+	if ($s) {
+		$s->print("QUIT\r\n");
+		$s->getline;
+		$s = undef;
+	}
+	kill 'TERM', $pid if defined $pid;
+};
+
+if (($ENV{NNTP_TEST_URL} || '') =~ m!\Anntp://([^/]+)/([^/]+)\z!) {
+	($host_port, $group) = ($1, $2);
+	$host_port .= ":119" unless index($host_port, ':') > 0;
+} else {
+	$group = 'inbox.test.perf.nntpd';
+	my $ibx = { mainrepo => $pi_dir, newsgroup => $group };
+	$ibx = PublicInbox::Inbox->new($ibx);
+	my $nntpd = 'blib/script/public-inbox-nntpd';
+	my $tmpdir = tempdir('perf-nntpd-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+
+	my $pi_config = "$tmpdir/config";
+	{
+		open my $fh, '>', $pi_config or die "open($pi_config): $!";
+		print $fh <<"" or die "print $pi_config: $!";
+[publicinbox "test"]
+	newsgroup = $group
+	mainrepo = $pi_dir
+	address = test\@example.com
+
+		close $fh or die "close($pi_config): $!";
+	}
+
+	%opts = (
+		LocalAddr => '127.0.0.1',
+		ReuseAddr => 1,
+		Proto => 'tcp',
+		Listen => 1024,
+	);
+	my $sock = IO::Socket::INET->new(%opts);
+
+	ok($sock, 'sock created');
+	$! = 0;
+	$pid = fork;
+	if ($pid == 0) {
+		# pretend to be systemd
+		my $fl = fcntl($sock, F_GETFD, 0);
+		dup2(fileno($sock), 3) or die "dup2 failed: $!\n";
+		dup2(1, 2) or die "dup2 failed: $!\n";
+		fcntl($sock, F_SETFD, $fl &= ~FD_CLOEXEC);
+		$ENV{LISTEN_PID} = $$;
+		$ENV{LISTEN_FDS} = 1;
+		$ENV{PI_CONFIG} = $pi_config;
+		exec $nntpd, '-W0';
+		die "FAIL: $!\n";
+	}
+	ok(defined $pid, 'forked nntpd process successfully');
+	$host_port = $sock->sockhost . ':' . $sock->sockport;
+}
+%opts = (
+	PeerAddr => $host_port,
+	Proto => 'tcp',
+	Timeout => 1,
+);
+$s = IO::Socket::INET->new(%opts);
+$s->autoflush(1);
+my $buf = $s->getline;
+is($buf, "201 server ready - post via email\r\n", 'got greeting');
+ok($s->print("GROUP $group\r\n"), 'changed group');
+$buf = $s->getline;
+my ($tot, $min, $max) = ($buf =~ /\A211 (\d+) (\d+) (\d+) /);
+ok($tot && $min && $max, 'got GROUP response');
+my $nr = $max - $min;
+my $nmax = 50000;
+my $nmin = $max - $nmax;
+$nmin = $min if $nmin < $min;
+my $res;
+my $spec = "$nmin-$max";
+my $n;
+
+sub read_until_dot ($) {
+	my $n = 0;
+	do {
+		$buf = $s->getline;
+		++$n
+	} until $buf eq ".\r\n";
+	$n;
+}
+
+my $t = timeit(1, sub {
+	$s->print("XOVER $spec\r\n");
+	$n = read_until_dot($s);
+});
+diag 'xover took: ' . timestr($t) . " for $n";
+
+$t = timeit(1, sub {
+	$s->print("HDR From $spec\r\n");
+	$n = read_until_dot($s);
+
+});
+diag "XHDR From ". timestr($t) . " for $n";
+
+my $date = $ENV{NEWNEWS_DATE};
+unless ($date) {
+	my (undef, undef, undef, $d, $m, $y) = gmtime(time - 30 * 86400);
+	$date = sprintf('%04u%02u%02u', $y + 1900, $m, $d);
+	diag "NEWNEWS_DATE undefined, using $date";
+}
+$t = timeit(1, sub {
+	$s->print("NEWNEWS * $date 000000 GMT\r\n");
+	$n = read_until_dot($s);
+});
+diag 'newnews took: ' . timestr($t) . " for $n";
+
+done_testing();
+
+1;
-- 
EW


  parent reply	other threads:[~2018-04-03 11:09 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-03 11:09 [PATCH 0/7] optimize V2 Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 1/7] t/thread-all.t: modernize test to support modern inboxes Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 2/7] rename+rewrite test using Benchmark module Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` Eric Wong (Contractor, The Linux Foundation) [this message]
2018-04-03 11:09 ` [PATCH 4/7] view: avoid offset during pagination Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 5/7] mbox: remove remaining OFFSET usage in SQLite Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 6/7] msgmap: replace id_batch with ids_after Eric Wong (Contractor, The Linux Foundation)
2018-04-03 11:09 ` [PATCH 7/7] nntp: simplify the long_response API Eric Wong (Contractor, The Linux Foundation)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180403110912.24231-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).