From b700fce60f25038ecd32551027e44b9b4b3ac544 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 27 Nov 2020 09:52:47 +0000 Subject: nntp: NEWNEWS: speed up filtering With 50K newsgroups, the filtering phase goes from ~2000 seconds to ~90 MILLISECONDS by relying on the grep perlop. This moves ->over checking out of the main dispatch and amortizes the cost via long_response. (Fairly scheduled) long_response time in newnews_i now takes ~360 seconds as opposed to ~30 seconds before this change, however; but the initial filtering speedup eliminating 2000s is more than worth it. --- lib/PublicInbox/NNTP.pm | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) (limited to 'lib/PublicInbox/NNTP.pm') diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 68222196..5cbf5a16 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -294,23 +294,28 @@ sub ngpat2re (;$) { } sub newnews_i { - my ($self, $overs, $ts, $prev) = @_; - my $over = $overs->[0]; - my $msgs = $over->query_ts($ts, $$prev); - if (scalar @$msgs) { - more($self, '<' . - join(">\r\n<", map { $_->{mid} } @$msgs ). - '>'); - $$prev = $msgs->[-1]->{num}; - } else { - shift @$overs; - if (@$overs) { # continue onto next newsgroup - $$prev = 0; - return 1; - } else { # break out of the long response. - return; + my ($self, $names, $ts, $prev) = @_; + my $ngname = $names->[0]; + if (my $ibx = $self->{nntpd}->{groups}->{$ngname}) { + if (my $over = $ibx->over) { + my $msgs = $over->query_ts($ts, $$prev); + if (scalar @$msgs) { + more($self, '<' . + join(">\r\n<", + map { $_->{mid} } @$msgs ) . + '>'); + $$prev = $msgs->[-1]->{num}; + return 1; # continue on current group + } } } + shift @$names; + if (@$names) { # continue onto next newsgroup + $$prev = 0; + 1; + } else { # all done, break out of the long_response + undef; + } } sub cmd_newnews ($$$$;$$) { @@ -321,17 +326,11 @@ sub cmd_newnews ($$$$;$$) { my ($keep, $skip) = split('!', $newsgroups, 2); ngpat2re($keep); ngpat2re($skip); - my @overs; - foreach my $ng (@{$self->{nntpd}->{grouplist}}) { - $ng->{newsgroup} =~ $keep or next; - $ng->{newsgroup} =~ $skip and next; - my $over = $ng->over or next; - push @overs, $over; - }; - return '.' unless @overs; - + my @names = grep(!/$skip/, grep(/$keep/, + @{$self->{nntpd}->{groupnames}})); + return '.' unless scalar(@names); my $prev = 0; - long_response($self, \&newnews_i, \@overs, $ts, \$prev); + long_response($self, \&newnews_i, \@names, $ts, \$prev); } sub cmd_group ($$) { -- cgit v1.2.3-24-ge0c7