From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 211A81FAE4 for ; Tue, 3 Apr 2018 11:09:13 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 3/7] nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster Date: Tue, 3 Apr 2018 11:09:08 +0000 Message-Id: <20180403110912.24231-4-e@80x24.org> In-Reply-To: <20180403110912.24231-1-e@80x24.org> References: <20180403110912.24231-1-e@80x24.org> List-Id: While SQLite is faster than Xapian for some queries we use, it sucks at handling OFFSET. Fortunately, we do not need offsets when retrieving sorted results and can bake it into the query. For inbox.comp.version-control.git (v1 Xapian), XOVER and XHDR are over 20x faster. --- MANIFEST | 1 + lib/PublicInbox/NNTP.pm | 39 +++++++------- lib/PublicInbox/Over.pm | 15 +++--- lib/PublicInbox/Search.pm | 4 +- t/perf-nntpd.t | 130 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 159 insertions(+), 30 deletions(-) create mode 100644 t/perf-nntpd.t diff --git a/MANIFEST b/MANIFEST index 4e79a4c..2dad988 100644 --- a/MANIFEST +++ b/MANIFEST @@ -173,6 +173,7 @@ t/msgmap.t t/nntp.t t/nntpd.t t/over.t +t/perf-nntpd.t t/perf-threading.t t/plack.t t/precheck.t diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 48ab7fc..ff6d895 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -331,20 +331,20 @@ sub cmd_newnews ($$$$;$$) { }; return '.' unless @srch; - my $opts = { limit => 1000, offset => 0 }; + my $prev = 0; long_response($self, 0, long_response_limit, sub { my ($i) = @_; my $srch = $srch[0]; - my $msgs = $srch->query_ts($ts, $opts); - if (my $nr = scalar @$msgs) { + my $msgs = $srch->query_ts($ts, $prev); + if (scalar @$msgs) { more($self, '<' . join(">\r\n<", map { $_->mid } @$msgs ). '>'); - $opts->{offset} += $nr; + $prev = $msgs->[-1]->{num}; } else { shift @srch; if (@srch) { # continue onto next newsgroup - $opts->{offset} = 0; + $prev = 0; } else { # break out of the long response. $$i = long_response_limit; } @@ -582,7 +582,7 @@ sub long_response ($$$$) { $self->{long_res} = sub { # limit our own running time for fairness with other # clients and to avoid buffering too much: - my $lim = 100; + my $lim = $end == long_response_limit ? 1 : 100; my $err; do { @@ -710,20 +710,19 @@ sub hdr_searchmsg ($$$$) { return $r unless ref $r; my ($beg, $end) = @$r; more($self, $xhdr ? r221 : r225); - my $off = 0; - long_response($self, $beg, $end, sub { + my $cur = $beg; + long_response($self, 0, long_response_limit, sub { my ($i) = @_; - my $msgs = $srch->query_xover($beg, $end, $off); - my $nr = scalar @$msgs or return; - $off += $nr; + my $msgs = $srch->query_xover($cur, $end); + my $nr = scalar @$msgs or + return ($$i = long_response_limit); my $tmp = ''; foreach my $s (@$msgs) { $tmp .= $s->num . ' ' . $s->$field . "\r\n"; } utf8::encode($tmp); do_more($self, $tmp); - # -1 to adjust for implicit increment in long_response - $$i = $nr ? $$i + $nr - 1 : long_response_limit; + $cur = $msgs->[-1]->{num} + 1; }); } } @@ -848,20 +847,18 @@ sub cmd_xover ($;$) { my ($beg, $end) = @$r; more($self, "224 Overview information follows for $beg to $end"); my $srch = $self->{ng}->search; - my $off = 0; - long_response($self, $beg, $end, sub { + my $cur = $beg; + long_response($self, 0, long_response_limit, sub { my ($i) = @_; - my $msgs = $srch->query_xover($beg, $end, $off); - my $nr = scalar @$msgs or return; - $off += $nr; + my $msgs = $srch->query_xover($cur, $end); + my $nr = scalar @$msgs or return ($$i = long_response_limit); # OVERVIEW.FMT more($self, join("\r\n", map { over_line($_->{num}, $_); } @$msgs)); - - # -1 to adjust for implicit increment in long_response - $$i = $nr ? $$i + $nr - 1 : long_response_limit; + $cur = $msgs->[-1]->{num} + 1; + 1; }); } diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index 3d285ac..a7fd131 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -51,25 +51,26 @@ sub do_get { my $dbh = $self->connect; my $lim = (($opts->{limit} || 0) + 0) || 1000; my $off = (($opts->{offset} || 0) + 0) || 0; - $sql .= "LIMIT $lim OFFSET $off"; + $sql .= "LIMIT $lim"; + $sql .= " OFFSET $off" if $off > 0; my $msgs = $dbh->selectall_arrayref($sql, { Slice => {} }, @args); load_from_row($_) for @$msgs; $msgs } sub query_xover { - my ($self, $beg, $end, $off) = @_; - do_get($self, <<'', { offset => $off }, $beg, $end); + my ($self, $beg, $end) = @_; + do_get($self, <<'', {}, $beg, $end); SELECT * FROM over WHERE num >= ? AND num <= ? ORDER BY num ASC } sub query_ts { - my ($self, $ts, $opts) = @_; - do_get($self, <<'', $opts, $ts); -SELECT * FROM over WHERE num > 0 AND ts >= ? -ORDER BY ts ASC + my ($self, $ts, $prev) = @_; + do_get($self, <<'', {}, $ts, $prev); +SELECT num,ddd FROM over WHERE ts >= ? AND num > ? +ORDER BY num ASC } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 84c0a22..f7fdf85 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -291,8 +291,8 @@ sub query_xover { } sub query_ts { - my ($self, $ts, $offset) = @_; - $self->{over_ro}->query_ts($ts, $offset); + my ($self, $ts, $prev) = @_; + $self->{over_ro}->query_ts($ts, $prev); } sub first_smsg_by_mid { diff --git a/t/perf-nntpd.t b/t/perf-nntpd.t new file mode 100644 index 0000000..4987f98 --- /dev/null +++ b/t/perf-nntpd.t @@ -0,0 +1,130 @@ +# Copyright (C) 2018 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use Benchmark qw(:all); +use PublicInbox::Inbox; +use File::Temp qw/tempdir/; +use POSIX qw(dup2); +use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD); +use Net::NNTP; +my $pi_dir = $ENV{GIANT_PI_DIR}; +plan skip_all => "GIANT_PI_DIR not defined for $0" unless $pi_dir; +eval { require PublicInbox::Search }; +my ($host_port, $group, %opts, $s, $pid); +END { + if ($s) { + $s->print("QUIT\r\n"); + $s->getline; + $s = undef; + } + kill 'TERM', $pid if defined $pid; +}; + +if (($ENV{NNTP_TEST_URL} || '') =~ m!\Anntp://([^/]+)/([^/]+)\z!) { + ($host_port, $group) = ($1, $2); + $host_port .= ":119" unless index($host_port, ':') > 0; +} else { + $group = 'inbox.test.perf.nntpd'; + my $ibx = { mainrepo => $pi_dir, newsgroup => $group }; + $ibx = PublicInbox::Inbox->new($ibx); + my $nntpd = 'blib/script/public-inbox-nntpd'; + my $tmpdir = tempdir('perf-nntpd-XXXXXX', TMPDIR => 1, CLEANUP => 1); + + my $pi_config = "$tmpdir/config"; + { + open my $fh, '>', $pi_config or die "open($pi_config): $!"; + print $fh <<"" or die "print $pi_config: $!"; +[publicinbox "test"] + newsgroup = $group + mainrepo = $pi_dir + address = test\@example.com + + close $fh or die "close($pi_config): $!"; + } + + %opts = ( + LocalAddr => '127.0.0.1', + ReuseAddr => 1, + Proto => 'tcp', + Listen => 1024, + ); + my $sock = IO::Socket::INET->new(%opts); + + ok($sock, 'sock created'); + $! = 0; + $pid = fork; + if ($pid == 0) { + # pretend to be systemd + my $fl = fcntl($sock, F_GETFD, 0); + dup2(fileno($sock), 3) or die "dup2 failed: $!\n"; + dup2(1, 2) or die "dup2 failed: $!\n"; + fcntl($sock, F_SETFD, $fl &= ~FD_CLOEXEC); + $ENV{LISTEN_PID} = $$; + $ENV{LISTEN_FDS} = 1; + $ENV{PI_CONFIG} = $pi_config; + exec $nntpd, '-W0'; + die "FAIL: $!\n"; + } + ok(defined $pid, 'forked nntpd process successfully'); + $host_port = $sock->sockhost . ':' . $sock->sockport; +} +%opts = ( + PeerAddr => $host_port, + Proto => 'tcp', + Timeout => 1, +); +$s = IO::Socket::INET->new(%opts); +$s->autoflush(1); +my $buf = $s->getline; +is($buf, "201 server ready - post via email\r\n", 'got greeting'); +ok($s->print("GROUP $group\r\n"), 'changed group'); +$buf = $s->getline; +my ($tot, $min, $max) = ($buf =~ /\A211 (\d+) (\d+) (\d+) /); +ok($tot && $min && $max, 'got GROUP response'); +my $nr = $max - $min; +my $nmax = 50000; +my $nmin = $max - $nmax; +$nmin = $min if $nmin < $min; +my $res; +my $spec = "$nmin-$max"; +my $n; + +sub read_until_dot ($) { + my $n = 0; + do { + $buf = $s->getline; + ++$n + } until $buf eq ".\r\n"; + $n; +} + +my $t = timeit(1, sub { + $s->print("XOVER $spec\r\n"); + $n = read_until_dot($s); +}); +diag 'xover took: ' . timestr($t) . " for $n"; + +$t = timeit(1, sub { + $s->print("HDR From $spec\r\n"); + $n = read_until_dot($s); + +}); +diag "XHDR From ". timestr($t) . " for $n"; + +my $date = $ENV{NEWNEWS_DATE}; +unless ($date) { + my (undef, undef, undef, $d, $m, $y) = gmtime(time - 30 * 86400); + $date = sprintf('%04u%02u%02u', $y + 1900, $m, $d); + diag "NEWNEWS_DATE undefined, using $date"; +} +$t = timeit(1, sub { + $s->print("NEWNEWS * $date 000000 GMT\r\n"); + $n = read_until_dot($s); +}); +diag 'newnews took: ' . timestr($t) . " for $n"; + +done_testing(); + +1; -- EW