about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-04-03 11:09:08 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-04-03 12:06:11 +0000
commitf91753f00b760c0e06eb0384792e5b18bd99bb0f (patch)
tree2cc2f60b629b4947fcd4a02c39c7cfc688c2c243
parente932bd748750234331d5664873272e0cb7ba6025 (diff)
downloadpublic-inbox-f91753f00b760c0e06eb0384792e5b18bd99bb0f.tar.gz
While SQLite is faster than Xapian for some queries we
use, it sucks at handling OFFSET.  Fortunately, we do
not need offsets when retrieving sorted results and
can bake it into the query.

For inbox.comp.version-control.git (v1 Xapian),
XOVER and XHDR are over 20x faster.
-rw-r--r--MANIFEST1
-rw-r--r--lib/PublicInbox/NNTP.pm39
-rw-r--r--lib/PublicInbox/Over.pm15
-rw-r--r--lib/PublicInbox/Search.pm4
-rw-r--r--t/perf-nntpd.t130
5 files changed, 159 insertions, 30 deletions
diff --git a/MANIFEST b/MANIFEST
index 4e79a4c8..2dad988e 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -173,6 +173,7 @@ t/msgmap.t
 t/nntp.t
 t/nntpd.t
 t/over.t
+t/perf-nntpd.t
 t/perf-threading.t
 t/plack.t
 t/precheck.t
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 48ab7fc2..ff6d8958 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -331,20 +331,20 @@ sub cmd_newnews ($$$$;$$) {
         };
         return '.' unless @srch;
 
-        my $opts = { limit => 1000, offset => 0 };
+        my $prev = 0;
         long_response($self, 0, long_response_limit, sub {
                 my ($i) = @_;
                 my $srch = $srch[0];
-                my $msgs = $srch->query_ts($ts, $opts);
-                if (my $nr = scalar @$msgs) {
+                my $msgs = $srch->query_ts($ts, $prev);
+                if (scalar @$msgs) {
                         more($self, '<' .
                                 join(">\r\n<", map { $_->mid } @$msgs ).
                                 '>');
-                        $opts->{offset} += $nr;
+                        $prev = $msgs->[-1]->{num};
                 } else {
                         shift @srch;
                         if (@srch) { # continue onto next newsgroup
-                                $opts->{offset} = 0;
+                                $prev = 0;
                         } else { # break out of the long response.
                                 $$i = long_response_limit;
                         }
@@ -582,7 +582,7 @@ sub long_response ($$$$) {
         $self->{long_res} = sub {
                 # limit our own running time for fairness with other
                 # clients and to avoid buffering too much:
-                my $lim = 100;
+                my $lim = $end == long_response_limit ? 1 : 100;
 
                 my $err;
                 do {
@@ -710,20 +710,19 @@ sub hdr_searchmsg ($$$$) {
                 return $r unless ref $r;
                 my ($beg, $end) = @$r;
                 more($self, $xhdr ? r221 : r225);
-                my $off = 0;
-                long_response($self, $beg, $end, sub {
+                my $cur = $beg;
+                long_response($self, 0, long_response_limit, sub {
                         my ($i) = @_;
-                        my $msgs = $srch->query_xover($beg, $end, $off);
-                        my $nr = scalar @$msgs or return;
-                        $off += $nr;
+                        my $msgs = $srch->query_xover($cur, $end);
+                        my $nr = scalar @$msgs or
+                                        return ($$i = long_response_limit);
                         my $tmp = '';
                         foreach my $s (@$msgs) {
                                 $tmp .= $s->num . ' ' . $s->$field . "\r\n";
                         }
                         utf8::encode($tmp);
                         do_more($self, $tmp);
-                        # -1 to adjust for implicit increment in long_response
-                        $$i = $nr ? $$i + $nr - 1 : long_response_limit;
+                        $cur = $msgs->[-1]->{num} + 1;
                 });
         }
 }
@@ -848,20 +847,18 @@ sub cmd_xover ($;$) {
         my ($beg, $end) = @$r;
         more($self, "224 Overview information follows for $beg to $end");
         my $srch = $self->{ng}->search;
-        my $off = 0;
-        long_response($self, $beg, $end, sub {
+        my $cur = $beg;
+        long_response($self, 0, long_response_limit, sub {
                 my ($i) = @_;
-                my $msgs = $srch->query_xover($beg, $end, $off);
-                my $nr = scalar @$msgs or return;
-                $off += $nr;
+                my $msgs = $srch->query_xover($cur, $end);
+                my $nr = scalar @$msgs or return ($$i = long_response_limit);
 
                 # OVERVIEW.FMT
                 more($self, join("\r\n", map {
                         over_line($_->{num}, $_);
                         } @$msgs));
-
-                # -1 to adjust for implicit increment in long_response
-                $$i = $nr ? $$i + $nr - 1 : long_response_limit;
+                $cur = $msgs->[-1]->{num} + 1;
+                1;
         });
 }
 
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index 3d285ac2..a7fd1315 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -51,25 +51,26 @@ sub do_get {
         my $dbh = $self->connect;
         my $lim = (($opts->{limit} || 0) + 0) || 1000;
         my $off = (($opts->{offset} || 0) + 0) || 0;
-        $sql .= "LIMIT $lim OFFSET $off";
+        $sql .= "LIMIT $lim";
+        $sql .= " OFFSET $off" if $off > 0;
         my $msgs = $dbh->selectall_arrayref($sql, { Slice => {} }, @args);
         load_from_row($_) for @$msgs;
         $msgs
 }
 
 sub query_xover {
-        my ($self, $beg, $end, $off) = @_;
-        do_get($self, <<'', { offset => $off }, $beg, $end);
+        my ($self, $beg, $end) = @_;
+        do_get($self, <<'', {}, $beg, $end);
 SELECT * FROM over WHERE num >= ? AND num <= ?
 ORDER BY num ASC
 
 }
 
 sub query_ts {
-        my ($self, $ts, $opts) = @_;
-        do_get($self, <<'', $opts, $ts);
-SELECT * FROM over WHERE num > 0 AND ts >= ?
-ORDER BY ts ASC
+        my ($self, $ts, $prev) = @_;
+        do_get($self, <<'', {}, $ts, $prev);
+SELECT num,ddd FROM over WHERE ts >= ? AND num > ?
+ORDER BY num ASC
 
 }
 
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 84c0a22f..f7fdf854 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -291,8 +291,8 @@ sub query_xover {
 }
 
 sub query_ts {
-        my ($self, $ts, $offset) = @_;
-        $self->{over_ro}->query_ts($ts, $offset);
+        my ($self, $ts, $prev) = @_;
+        $self->{over_ro}->query_ts($ts, $prev);
 }
 
 sub first_smsg_by_mid {
diff --git a/t/perf-nntpd.t b/t/perf-nntpd.t
new file mode 100644
index 00000000..4987f981
--- /dev/null
+++ b/t/perf-nntpd.t
@@ -0,0 +1,130 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Benchmark qw(:all);
+use PublicInbox::Inbox;
+use File::Temp qw/tempdir/;
+use POSIX qw(dup2);
+use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD);
+use Net::NNTP;
+my $pi_dir = $ENV{GIANT_PI_DIR};
+plan skip_all => "GIANT_PI_DIR not defined for $0" unless $pi_dir;
+eval { require PublicInbox::Search };
+my ($host_port, $group, %opts, $s, $pid);
+END {
+        if ($s) {
+                $s->print("QUIT\r\n");
+                $s->getline;
+                $s = undef;
+        }
+        kill 'TERM', $pid if defined $pid;
+};
+
+if (($ENV{NNTP_TEST_URL} || '') =~ m!\Anntp://([^/]+)/([^/]+)\z!) {
+        ($host_port, $group) = ($1, $2);
+        $host_port .= ":119" unless index($host_port, ':') > 0;
+} else {
+        $group = 'inbox.test.perf.nntpd';
+        my $ibx = { mainrepo => $pi_dir, newsgroup => $group };
+        $ibx = PublicInbox::Inbox->new($ibx);
+        my $nntpd = 'blib/script/public-inbox-nntpd';
+        my $tmpdir = tempdir('perf-nntpd-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+
+        my $pi_config = "$tmpdir/config";
+        {
+                open my $fh, '>', $pi_config or die "open($pi_config): $!";
+                print $fh <<"" or die "print $pi_config: $!";
+[publicinbox "test"]
+        newsgroup = $group
+        mainrepo = $pi_dir
+        address = test\@example.com
+
+                close $fh or die "close($pi_config): $!";
+        }
+
+        %opts = (
+                LocalAddr => '127.0.0.1',
+                ReuseAddr => 1,
+                Proto => 'tcp',
+                Listen => 1024,
+        );
+        my $sock = IO::Socket::INET->new(%opts);
+
+        ok($sock, 'sock created');
+        $! = 0;
+        $pid = fork;
+        if ($pid == 0) {
+                # pretend to be systemd
+                my $fl = fcntl($sock, F_GETFD, 0);
+                dup2(fileno($sock), 3) or die "dup2 failed: $!\n";
+                dup2(1, 2) or die "dup2 failed: $!\n";
+                fcntl($sock, F_SETFD, $fl &= ~FD_CLOEXEC);
+                $ENV{LISTEN_PID} = $$;
+                $ENV{LISTEN_FDS} = 1;
+                $ENV{PI_CONFIG} = $pi_config;
+                exec $nntpd, '-W0';
+                die "FAIL: $!\n";
+        }
+        ok(defined $pid, 'forked nntpd process successfully');
+        $host_port = $sock->sockhost . ':' . $sock->sockport;
+}
+%opts = (
+        PeerAddr => $host_port,
+        Proto => 'tcp',
+        Timeout => 1,
+);
+$s = IO::Socket::INET->new(%opts);
+$s->autoflush(1);
+my $buf = $s->getline;
+is($buf, "201 server ready - post via email\r\n", 'got greeting');
+ok($s->print("GROUP $group\r\n"), 'changed group');
+$buf = $s->getline;
+my ($tot, $min, $max) = ($buf =~ /\A211 (\d+) (\d+) (\d+) /);
+ok($tot && $min && $max, 'got GROUP response');
+my $nr = $max - $min;
+my $nmax = 50000;
+my $nmin = $max - $nmax;
+$nmin = $min if $nmin < $min;
+my $res;
+my $spec = "$nmin-$max";
+my $n;
+
+sub read_until_dot ($) {
+        my $n = 0;
+        do {
+                $buf = $s->getline;
+                ++$n
+        } until $buf eq ".\r\n";
+        $n;
+}
+
+my $t = timeit(1, sub {
+        $s->print("XOVER $spec\r\n");
+        $n = read_until_dot($s);
+});
+diag 'xover took: ' . timestr($t) . " for $n";
+
+$t = timeit(1, sub {
+        $s->print("HDR From $spec\r\n");
+        $n = read_until_dot($s);
+
+});
+diag "XHDR From ". timestr($t) . " for $n";
+
+my $date = $ENV{NEWNEWS_DATE};
+unless ($date) {
+        my (undef, undef, undef, $d, $m, $y) = gmtime(time - 30 * 86400);
+        $date = sprintf('%04u%02u%02u', $y + 1900, $m, $d);
+        diag "NEWNEWS_DATE undefined, using $date";
+}
+$t = timeit(1, sub {
+        $s->print("NEWNEWS * $date 000000 GMT\r\n");
+        $n = read_until_dot($s);
+});
+diag 'newnews took: ' . timestr($t) . " for $n";
+
+done_testing();
+
+1;