about summary refs log tree commit homepage
path: root/lib/PublicInbox/WWW.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/WWW.pm')
-rw-r--r--lib/PublicInbox/WWW.pm252
1 files changed, 163 insertions, 89 deletions
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 1e7d3c1e..289599b8 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Main web interface for mailing list archives
@@ -11,30 +11,28 @@
 # - Must not rely on static content
 # - UTF-8 is only for user-content, 7-bit US-ASCII for us
 package PublicInbox::WWW;
-use 5.010_001;
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
 use PublicInbox::Config;
+use PublicInbox::Git;
 use PublicInbox::Hval;
 use URI::Escape qw(uri_unescape);
 use PublicInbox::MID qw(mid_escape);
-require PublicInbox::Git;
 use PublicInbox::GitHTTPBackend;
 use PublicInbox::UserContent;
 use PublicInbox::WwwStatic qw(r path_info_raw);
+use PublicInbox::Eml;
 
 # TODO: consider a routing tree now that we have more endpoints:
-our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
+our $INBOX_RE = qr!\A/([\w\-][\w\.\-\+]*)!;
 our $MID_RE = qr!([^/]+)!;
-our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
+our $END_RE = qr!(T/|t/|d/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
 our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
-our $OID_RE = qr![a-f0-9]{7,40}!;
+our $OID_RE = qr![a-f0-9]{7,}!;
 
 sub new {
-        my ($class, $pi_config) = @_;
-        $pi_config ||= PublicInbox::Config->new;
-        bless { pi_config => $pi_config }, $class;
+        my ($class, $pi_cfg) = @_;
+        bless { pi_cfg => $pi_cfg // PublicInbox::Config->new }, $class;
 }
 
 # backwards compatibility, do not use
@@ -48,15 +46,21 @@ sub call {
         my $ctx = { env => $env, www => $self };
 
         # we don't care about multi-value
-        %{$ctx->{qp}} = map {
-                utf8::decode($_);
-                tr/+/ /;
-                my ($k, $v) = split('=', $_, 2);
-                $v = uri_unescape($v // '');
-                # none of the keys we care about will need escaping
-                $k => $v;
-        } split(/[&;]+/, $env->{QUERY_STRING});
-
+        # '0' isn't a QUERY_STRING we care about
+        if (my $qs = $env->{QUERY_STRING}) {
+                utf8::decode($qs);
+                $qs =~ tr/+/ /;
+                %{$ctx->{qp}} = map {
+                        # we only use single-char query param keys
+                        if (s/\A([A-Za-z])=//) {
+                                $1 => uri_unescape($_)
+                        } elsif (/\A[a-z]\z/) { # some boolean options
+                                $_ => ''
+                        } else {
+                                () # ignored
+                        }
+                } split(/[&;]+/, $qs);
+        }
         my $path_info = path_info_raw($env);
         my $method = $env->{REQUEST_METHOD};
 
@@ -66,7 +70,15 @@ sub call {
                         my ($epoch, $path) = ($2, $3);
                         return invalid_inbox($ctx, $1) ||
                                 serve_git($ctx, $epoch, $path);
-                } elsif ($path_info =~ m!$INBOX_RE/!o) {
+                } elsif ($path_info =~ m!$INBOX_RE/(\w+)\.sql\.gz\z!o) {
+                        return get_altid_dump($ctx, $1, $2);
+                } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$ATTACH_RE\z!o) {
+                        my ($idx, $fn) = ($3, $4);
+                        return invalid_inbox_mid($ctx, $1, $2) ||
+                                get_attach($ctx, $idx, $fn);
+                } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/\z!o) {
+                        return invalid_inbox_mid($ctx, $1, $2) || mbox_results($ctx);
+                } elsif ($path_info =~ m!$INBOX_RE/\z!o) {
                         return invalid_inbox($ctx, $1) || mbox_results($ctx);
                 }
         }
@@ -75,8 +87,12 @@ sub call {
         }
 
         # top-level indices and feeds
-        if ($path_info eq '/' || $path_info eq '/manifest.js.gz') {
-                www_listing($self)->call($env);
+        if ($path_info eq '/') {
+                require PublicInbox::WwwListing;
+                PublicInbox::WwwListing->response($ctx);
+        } elsif ($path_info eq '/manifest.js.gz') {
+                require PublicInbox::ManifestJsGz;
+                PublicInbox::ManifestJsGz->response($ctx);
         } elsif ($path_info =~ m!$INBOX_RE\z!o) {
                 invalid_inbox($ctx, $1) || r301($ctx, $1);
         } elsif ($path_info =~ m!$INBOX_RE(?:/|/index\.html)?\z!o) {
@@ -85,6 +101,9 @@ sub call {
                 invalid_inbox($ctx, $1) || get_atom($ctx);
         } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
                 invalid_inbox($ctx, $1) || get_new($ctx);
+        } elsif ($path_info =~
+                        m!$INBOX_RE/topics_(new|active)\.(atom|html)\z!o) {
+                get_topics($ctx, $1, $2, $3);
         } elsif ($path_info =~ m!$INBOX_RE/description\z!o) {
                 get_description($ctx, $1);
         } elsif ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)?
@@ -124,37 +143,55 @@ sub call {
                 get_vcs_object($ctx, $1, $2, $3);
         } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
                 r301($ctx, $1, $2, 's/');
+        } elsif ($path_info =~ m!$INBOX_RE/(\w+)\.sql\.gz\z!o) {
+                get_altid_dump($ctx, $1, $2);
         # convenience redirects order matters
         } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
                 r301($ctx, $1, $2);
-
+        } elsif ($path_info =~ m!\A/\+/([a-zA-Z0-9_\-\.]+)\.css\z!) {
+                get_css($ctx, undef, $1); # for WwwListing
         } else {
                 legacy_redirects($ctx, $path_info);
         }
 }
 
-# for CoW-friendliness, MOOOOO!
+# for CoW-friendliness, MOOOOO!  Even for single-process setups,
+# we want to get all immortal allocations done early to avoid heap
+# fragmentation since common allocators favor a large contiguous heap.
 sub preload {
         my ($self) = @_;
+
+        # populate caches used by Encode internally, since emails
+        # may show up with any encoding.
+        require Encode;
+        Encode::find_encoding($_) for Encode->encodings(':all');
+
+        require PublicInbox::ExtMsg;
         require PublicInbox::Feed;
         require PublicInbox::View;
         require PublicInbox::SearchThread;
-        require PublicInbox::MIME;
-        require Digest::SHA;
-        require POSIX;
+        require PublicInbox::Eml;
+        require PublicInbox::Mbox;
+        require PublicInbox::ViewVCS;
+        require PublicInbox::WwwText;
+        require PublicInbox::WwwAttach;
         eval {
                 require PublicInbox::Search;
                 PublicInbox::Search::load_xapian();
         };
-        foreach (qw(PublicInbox::SearchView
-                        PublicInbox::Mbox IO::Compress::Gzip
-                        PublicInbox::NewsWWW)) {
-                eval "require $_;";
+        for (qw(SearchView MboxGz WwwAltId)) {
+                eval "require PublicInbox::$_;";
         }
         if (ref($self)) {
+                my $pi_cfg = $self->{pi_cfg};
+                if (defined($pi_cfg->{'publicinbox.cgitrc'})) {
+                        $pi_cfg->limiter('-cgit');
+                }
+                $pi_cfg->ALL and require PublicInbox::Isearch;
                 $self->cgit;
+                $self->coderepo;
                 $self->stylesheets_prepare($_) for ('', '../', '../../');
-                $self->www_listing;
+                $self->news_www;
         }
 }
 
@@ -171,18 +208,29 @@ sub r404 {
 
 sub news_cgit_fallback ($) {
         my ($ctx) = @_;
-        my $www = $ctx->{www};
-        my $env = $ctx->{env};
-        my $res = $www->news_www->call($env);
-        $res->[0] == 404 ? $www->cgit->call($env) : $res;
+        my $res = $ctx->{www}->news_www->call($ctx->{env});
+
+        $res->[0] == 404 and ($ctx->{www}->{cgit_fallback} //= do {
+                my $c = $ctx->{www}->{pi_cfg}->{'publicinbox.cgit'} // 'first';
+                $c ne 'first' # `fallback' and `rewrite' => true
+        } // 0) and $res = $ctx->{www}->coderepo->srv($ctx);
+
+        ref($res) eq 'ARRAY' && $res->[0] == 404 and
+                $res = $ctx->{www}->cgit->call($ctx->{env}, $ctx);
+
+        ref($res) eq 'ARRAY' && $res->[0] == 404 &&
+                        !$ctx->{www}->{cgit_fallback} and
+                $res = $ctx->{www}->coderepo->srv($ctx);
+        $res;
 }
 
 # returns undef if valid, array ref response if invalid
 sub invalid_inbox ($$) {
         my ($ctx, $inbox) = @_;
-        my $ibx = $ctx->{www}->{pi_config}->lookup_name($inbox);
+        my $ibx = $ctx->{www}->{pi_cfg}->lookup_name($inbox) //
+                        $ctx->{www}->{pi_cfg}->lookup_ei($inbox);
         if (defined $ibx) {
-                $ctx->{-inbox} = $ibx;
+                $ctx->{ibx} = $ibx;
                 return;
         }
 
@@ -200,14 +248,13 @@ sub invalid_inbox_mid {
         return $ret if $ret;
 
         my $mid = $ctx->{mid} = uri_unescape($mid_ue);
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) {
                 my ($x2, $x38) = ($1, $2);
                 # this is horrifically wasteful for legacy URLs:
-                my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return;
-                require Email::Simple;
-                my $s = Email::Simple->new($str);
-                $mid = PublicInbox::MID::mid_clean($s->header('Message-ID'));
+                my $str = $ctx->{ibx}->msg_by_path("$x2/$x38") or return;
+                my $s = PublicInbox::Eml->new($str);
+                $mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID'));
                 return r301($ctx, $inbox, mid_escape($mid));
         }
         undef;
@@ -227,6 +274,13 @@ sub get_new {
         PublicInbox::Feed::new_html($ctx);
 }
 
+# /$INBOX/topics_(new|active).(html|atom)
+sub get_topics {
+        my ($ctx, $ibx_name, $category, $type) = @_;
+        require PublicInbox::WwwTopics;
+        PublicInbox::WwwTopics::response($ctx, $ibx_name, $category, $type);
+}
+
 # /$INBOX/?r=$GIT_COMMIT                 -> HTML only
 sub get_index {
         my ($ctx) = @_;
@@ -243,7 +297,7 @@ sub get_index {
 sub get_mid_txt {
         my ($ctx) = @_;
         require PublicInbox::Mbox;
-        PublicInbox::Mbox::emit_raw($ctx) || r404($ctx);
+        PublicInbox::Mbox::emit_raw($ctx) || r(404);
 }
 
 # /$INBOX/$MESSAGE_ID/                   -> HTML content (short quotes)
@@ -256,7 +310,7 @@ sub get_mid_html {
 # /$INBOX/$MESSAGE_ID/t/
 sub get_thread {
         my ($ctx, $flat) = @_;
-        $ctx->{-inbox}->over or return need($ctx, 'Overview');
+        $ctx->{ibx}->over or return need($ctx, 'Overview');
         $ctx->{flat} = $flat;
         require PublicInbox::View;
         PublicInbox::View::thread_html($ctx);
@@ -275,25 +329,33 @@ sub get_text {
 }
 
 # show git objects (blobs and commits)
-# /$INBOX/_/$OBJECT_ID/show
-# /$INBOX/_/${OBJECT_ID}_${FILENAME}
-# KEY may contain slashes
+# /$INBOX/$GIT_OBJECT_ID/s/
+# /$INBOX/$GIT_OBJECT_ID/s/$FILENAME
 sub get_vcs_object ($$$;$) {
         my ($ctx, $inbox, $oid, $filename) = @_;
         my $r404 = invalid_inbox($ctx, $inbox);
         return $r404 if $r404;
+        return r(404) if !$ctx->{www}->{pi_cfg}->repo_objs($ctx->{ibx});
         require PublicInbox::ViewVCS;
         PublicInbox::ViewVCS::show($ctx, $oid, $filename);
 }
 
+sub get_altid_dump {
+        my ($ctx, $inbox, $altid_pfx) =@_;
+        my $r404 = invalid_inbox($ctx, $inbox);
+        return $r404 if $r404;
+        eval { require PublicInbox::WwwAltId } or return need($ctx, 'sqlite3');
+        PublicInbox::WwwAltId::sqldump($ctx, $altid_pfx);
+}
+
 sub need {
-        my ($ctx, $extra) = @_;
-        my $msg = <<EOF;
-<html><head><title>$extra not available for this
-public-inbox</title><body><pre>$extra is not available for this public-inbox
-<a href="../">Return to index</a></pre></body></html>
+        my ($ctx, $extra, $upref) = @_;
+        require PublicInbox::WwwStream;
+        $upref //= '../';
+        PublicInbox::WwwStream::html_oneshot($ctx, 501, <<EOF);
+<pre>$extra is not available for this public-inbox
+<a\nhref="$upref">Return to index</a></pre>
 EOF
-        [ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ];
 }
 
 # /$INBOX/$MESSAGE_ID/t.mbox           -> thread as mbox
@@ -303,7 +365,7 @@ EOF
 # especially on older systems.  Stick to zlib since that's what git uses.
 sub get_thread_mbox {
         my ($ctx, $sfx) = @_;
-        my $over = $ctx->{-inbox}->over or return need($ctx, 'Overview');
+        my $over = $ctx->{ibx}->over or return need($ctx, 'Overview');
         require PublicInbox::Mbox;
         PublicInbox::Mbox::thread_mbox($ctx, $over, $sfx);
 }
@@ -312,7 +374,7 @@ sub get_thread_mbox {
 # /$INBOX/$MESSAGE_ID/t.atom                  -> thread as Atom feed
 sub get_thread_atom {
         my ($ctx) = @_;
-        $ctx->{-inbox}->over or return need($ctx, 'Overview');
+        $ctx->{ibx}->over or return need($ctx, 'Overview');
         require PublicInbox::Feed;
         PublicInbox::Feed::generate_thread_atom($ctx);
 }
@@ -377,11 +439,11 @@ sub legacy_redirects {
 
 sub r301 {
         my ($ctx, $inbox, $mid_ue, $suffix) = @_;
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         unless ($ibx) {
                 my $r404 = invalid_inbox($ctx, $inbox);
                 return $r404 if $r404;
-                $ibx = $ctx->{-inbox};
+                $ibx = $ctx->{ibx};
         }
         my $url = $ibx->base_url($ctx->{env});
         my $qs = $ctx->{env}->{QUERY_STRING};
@@ -412,13 +474,17 @@ sub msg_page {
 
         # legacy, but no redirect for compatibility:
         'f/' eq $e and return get_mid_html($ctx);
+        if ($e eq 'd/') {
+                require PublicInbox::View;
+                return PublicInbox::View::diff_msg($ctx);
+        }
         r404($ctx);
 }
 
 sub serve_git {
         my ($ctx, $epoch, $path) = @_;
         my $env = $ctx->{env};
-        my $ibx = $ctx->{-inbox};
+        my $ibx = $ctx->{ibx};
         my $git = defined $epoch ? $ibx->git_epoch($epoch) : $ibx->git;
         $git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404();
 }
@@ -426,7 +492,7 @@ sub serve_git {
 sub mbox_results {
         my ($ctx) = @_;
         if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
-                $ctx->{-inbox}->search or return need($ctx, 'search');
+                $ctx->{ibx}->isrch or return need($ctx, 'search');
                 require PublicInbox::SearchView;
                 return PublicInbox::SearchView::mbox_results($ctx);
         }
@@ -443,32 +509,29 @@ sub serve_mbox_range {
 
 sub news_www {
         my ($self) = @_;
-        $self->{news_www} ||= do {
+        $self->{news_www} //= do {
                 require PublicInbox::NewsWWW;
-                PublicInbox::NewsWWW->new($self->{pi_config});
+                PublicInbox::NewsWWW->new($self->{pi_cfg});
         }
 }
 
 sub cgit {
         my ($self) = @_;
-        $self->{cgit} ||= do {
-                my $pi_config = $self->{pi_config};
-
-                if (defined($pi_config->{'publicinbox.cgitrc'})) {
+        $self->{cgit} //=
+                (defined($self->{pi_cfg}->{'publicinbox.cgitrc'}) ? do {
                         require PublicInbox::Cgit;
-                        PublicInbox::Cgit->new($pi_config);
-                } else {
+                        PublicInbox::Cgit->new($self->{pi_cfg});
+                } : undef) // do {
                         require Plack::Util;
                         Plack::Util::inline_object(call => sub { r404() });
-                }
-        }
+                };
 }
 
-sub www_listing {
+sub coderepo {
         my ($self) = @_;
-        $self->{www_listing} ||= do {
-                require PublicInbox::WwwListing;
-                PublicInbox::WwwListing->new($self);
+        $self->{coderepo} //= do {
+                require PublicInbox::WwwCoderepo;
+                PublicInbox::WwwCoderepo->new($self->{pi_cfg});
         }
 }
 
@@ -477,8 +540,8 @@ sub get_inbox_manifest ($$$) {
         my ($ctx, $inbox, $key) = @_;
         my $r404 = invalid_inbox($ctx, $inbox);
         return $r404 if $r404;
-        require PublicInbox::WwwListing;
-        PublicInbox::WwwListing::js($ctx->{env}, [$ctx->{-inbox}]);
+        require PublicInbox::ManifestJsGz;
+        PublicInbox::ManifestJsGz::per_inbox($ctx);
 }
 
 sub get_attach {
@@ -510,7 +573,7 @@ sub stylesheets_prepare ($$) {
         } || sub { $_[0] };
 
         my $css_map = {};
-        my $stylesheets = $self->{pi_config}->{css} || [];
+        my $stylesheets = $self->{pi_cfg}->{css} || [];
         my $links = [];
         my $inline_ok = 1;
 
@@ -537,9 +600,9 @@ sub stylesheets_prepare ($$) {
                                 next;
                         };
                         my $ctime = 0;
-                        my $local = do { local $/; <$fh> };
+                        my $local = PublicInbox::IO::read_all $fh; # sets _
                         if ($local =~ /\S/) {
-                                $ctime = sprintf('%x',(stat($fh))[10]);
+                                $ctime = sprintf('%x',(stat(_))[10]);
                                 $local = $mini->($local);
                         }
 
@@ -601,24 +664,25 @@ sub style {
         };
 }
 
-# /$INBOX/$KEY.css endpoint
+# /$INBOX/$KEY.css and /+/$KEY.css endpoints
 # CSS is configured globally for all inboxes, but we access them on
 # a per-inbox basis.  This allows administrators to setup per-inbox
 # static routes to intercept the request before it hits PSGI
+# inbox == undef => top-level WwwListing
 sub get_css ($$$) {
         my ($ctx, $inbox, $key) = @_;
-        my $r404 = invalid_inbox($ctx, $inbox);
+        my $r404 = defined($inbox) ? invalid_inbox($ctx, $inbox) : undef;
         return $r404 if $r404;
         my $self = $ctx->{www};
-        my $css_map = $self->{-css_map} || stylesheets_prepare($self, '');
+        my $css_map = $self->{-css_map} ||
+                stylesheets_prepare($self, defined($inbox) ? '' : '+/');
         my $css = $css_map->{$key};
-        if (!defined($css) && $key eq 'userContent') {
+        if (!defined($css) && defined($inbox) && $key eq 'userContent') {
                 my $env = $ctx->{env};
-                $css = PublicInbox::UserContent::sample($ctx->{-inbox}, $env);
+                $css = PublicInbox::UserContent::sample($ctx->{ibx}, $env);
         }
         defined $css or return r404();
-        my $h = [ 'Content-Length', bytes::length($css),
-                'Content-Type', 'text/css' ];
+        my $h = [ 'Content-Length', length($css), 'Content-Type', 'text/css' ];
         PublicInbox::GitHTTPBackend::cache_one_year($h);
         [ 200, $h, [ $css ] ];
 }
@@ -626,10 +690,20 @@ sub get_css ($$$) {
 sub get_description {
         my ($ctx, $inbox) = @_;
         invalid_inbox($ctx, $inbox) || do {
-                my $d = $ctx->{-inbox}->description . "\n";
-                [ 200, [ 'Content-Length', bytes::length($d),
+                my $d = $ctx->{ibx}->description . "\n";
+                utf8::encode($d);
+                [ 200, [ 'Content-Length', length($d),
                         'Content-Type', 'text/plain' ], [ $d ] ];
         };
 }
 
+sub event_step { # called via requeue
+        my ($self) = @_;
+        # gzf = PublicInbox::GzipFilter == $ctx
+        my $gzf = shift(@{$self->{-low_prio_q}}) // return;
+        PublicInbox::DS::requeue($self) if scalar(@{$self->{-low_prio_q}});
+        my $http = $gzf->{env}->{'psgix.io'}; # PublicInbox::HTTP
+        $http->next_step($gzf->can('async_next'));
+}
+
 1;