about summary refs log tree commit homepage
path: root/lib/PublicInbox/NewsWWW.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-04 22:03:48 +0000
committerEric Wong <e@80x24.org>2020-12-05 21:41:52 +0000
commit629e22b8351e96a831535baa2854d0c9ca014445 (patch)
treec0739fd29537cfec066daa3938a8dfbe688e7bff /lib/PublicInbox/NewsWWW.pm
parent6ef28d7e22a499b8a9d531d1c16acd4847496e8e (diff)
downloadpublic-inbox-629e22b8351e96a831535baa2854d0c9ca014445.tar.gz
newswww: use ->ALL to avoid O(n) inbox scan
We can avoid doing a Message-ID lookup on every single inbox
by using ->ALL to scan its over.sqlite3 DB.  This mimics NNTP
behavior and picks the first message indexed, though redirecting
to /all/$MESSAGE_ID/ could be done.

With the current lore.kernel.org set of inboxes (~140), this
provides a 10-40% speedup depending on inbox ordering.
Diffstat (limited to 'lib/PublicInbox/NewsWWW.pm')
-rw-r--r--lib/PublicInbox/NewsWWW.pm30
1 files changed, 23 insertions, 7 deletions
diff --git a/lib/PublicInbox/NewsWWW.pm b/lib/PublicInbox/NewsWWW.pm
index 6bed0103..ade8dfd1 100644
--- a/lib/PublicInbox/NewsWWW.pm
+++ b/lib/PublicInbox/NewsWWW.pm
@@ -63,7 +63,6 @@ sub call {
                 return redirect($code, $url);
         }
 
-        my $res;
         my @try = (join('/', @parts));
 
         # trailing slash is in the rest of our WWW, so maybe some users
@@ -72,13 +71,30 @@ sub call {
                 pop @parts;
                 push @try, join('/', @parts);
         }
-
-        foreach my $mid (@try) {
-                my $arg = [ $mid ];
-                $pi_config->each_inbox(\&try_inbox, $arg);
-                defined($res = $arg->[1]) and last;
+        my $ALL = $pi_config->ALL;
+        if (my $over = $ALL ? $ALL->over : undef) {
+                my $by_eidx_key = $pi_config->{-by_eidx_key};
+                for my $mid (@try) {
+                        my ($id, $prev);
+                        while (my $x = $over->next_by_mid($mid, \$id, \$prev)) {
+                                my $xr3 = $over->get_xref3($x->{num});
+                                for (@$xr3) {
+                                        s/:[0-9]+:$x->{blob}\z// or next;
+                                        my $ibx = $by_eidx_key->{$_} // next;
+                                        my $url = $ibx->base_url or next;
+                                        $url .= mid_escape($mid) . '/';
+                                        return redirect(302, $url);
+                                }
+                        }
+                }
+        } else { # slow path, scan every inbox
+                for my $mid (@try) {
+                        my $arg = [ $mid ]; # [1] => result
+                        $pi_config->each_inbox(\&try_inbox, $arg);
+                        return $arg->[1] if $arg->[1];
+                }
         }
-        $res || [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ];
+        [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ];
 }
 
 1;