about summary refs log tree commit homepage
path: root/lib/PublicInbox/WwwText.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/WwwText.pm')
-rw-r--r--lib/PublicInbox/WwwText.pm334
1 files changed, 180 insertions, 154 deletions
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index bb9a0a0f..5e23005e 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # used for displaying help texts and other non-mail content
@@ -7,7 +7,8 @@ use strict;
 use v5.10.1;
 use PublicInbox::Linkify;
 use PublicInbox::WwwStream;
-use PublicInbox::Hval qw(ascii_html prurl);
+use PublicInbox::Hval qw(ascii_html prurl fmt_ts);
+use HTTP::Date qw(time2str);
 use URI::Escape qw(uri_escape_utf8);
 use PublicInbox::GzipFilter qw(gzf_maybe);
 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
@@ -30,20 +31,17 @@ sub get_text {
         my $have_tslash = ($key =~ s!/\z!!) if !$raw;
 
         my $txt = '';
-        my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ];
-        if (!_default_text($ctx, $key, $hdr, \$txt)) {
+        if (!_default_text($ctx, $key, \$txt)) {
                 $code = 404;
                 $txt = "404 Not Found ($key)\n";
         }
         my $env = $ctx->{env};
         if ($raw) {
-                if ($code == 200) {
-                        my $gzf = gzf_maybe($hdr, $env);
-                        $txt = $gzf->translate($txt);
-                        $txt .= $gzf->zflush;
-                }
-                $hdr->[3] = length($txt);
-                return [ $code, $hdr, [ $txt ] ]
+                my $h = delete $ctx->{-res_hdr};
+                $txt = gzf_maybe($h, $env)->zflush($txt) if $code == 200;
+                push @$h, 'Content-Type', 'text/plain',
+                        'Content-Length', length($txt);
+                return [ $code, $h, [ $txt ] ]
         }
 
         # enforce trailing slash for "wget -r" compatibility
@@ -70,14 +68,18 @@ sub get_text {
                 $txt = ascii_html($txt);
         }
         $txt = '<pre>' . $l->linkify_2($txt) . '</pre>';
-        PublicInbox::WwwStream::html_oneshot($ctx, $code, \$txt);
+        $txt =~ s!^search$!<a\nid=search>search</a>!sm;
+        $txt =~ s!\bPOP3\b!<a\nid=pop3>POP3</a>!;
+        $txt =~ s!\b(Newsgroups?)\b!<a\nid=nntp>$1</a>!;
+        $txt =~ s!\bIMAP\b!<a\nid=imap>IMAP</a>!;
+        PublicInbox::WwwStream::html_oneshot($ctx, $code, $txt);
 }
 
 sub _srch_prefix ($$) {
-        my ($srch, $txt) = @_;
+        my ($ibx, $txt) = @_;
         my $pad = 0;
         my $htxt = '';
-        my $help = $srch->help;
+        my $help = $ibx->isrch->help;
         my $i;
         for ($i = 0; $i < @$help; $i += 2) {
                 my $pfx = $help->[$i];
@@ -88,10 +90,9 @@ sub _srch_prefix ($$) {
                 $htxt .= "\f\n";
         }
         $pad += 2;
-        my $padding = ' ' x ($pad + 8);
+        my $padding = ' ' x ($pad + 4);
         $htxt =~ s/^/$padding/gms;
-        $htxt =~ s/^$padding(\S+)\0/"        $1".
-                                (' ' x ($pad - length($1)))/egms;
+        $htxt =~ s/^$padding(\S+)\0/"    $1".(' ' x ($pad - length($1)))/egms;
         $htxt =~ s/\f\n/\n/gs;
         $$txt .= $htxt;
         1;
@@ -112,7 +113,7 @@ Users of browsers such as dillo, Firefox, or some browser
 extensions may start by downloading the following sample CSS file
 to control the colors they see:
 
-        ${base_url}userContent.css
+  ${base_url}userContent.css
 
 CSS sample
 ----------
@@ -167,10 +168,13 @@ EOF
 }
 
 # n.b. this is a perfect candidate for memoization
-sub inbox_config ($$$) {
-        my ($ctx, $hdr, $txt) = @_;
+sub inbox_config ($$) {
+        my ($ctx, $txt) = @_;
         my $ibx = $ctx->{ibx};
-        push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
+        push @{$ctx->{-res_hdr}},
+                'Content-Disposition', 'inline; filename=inbox.config';
+        my $t = eval { $ibx->mm->created_at };
+        push(@{$ctx->{-res_hdr}}, 'Last-Modified', time2str($t)) if $t;
         my $name = dq_escape($ibx->{name});
         my $inboxdir = '/path/to/top-level-inbox';
         my $base_url = $ibx->base_url($ctx->{env});
@@ -210,17 +214,18 @@ EOF
                 defined(my $v = $ibx->{$k}) or next;
                 $$txt .= "\t$k = $v\n";
         }
-        $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)});
         $$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)});
+        $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)});
         _coderepo_config($ctx, $txt);
         1;
 }
 
 # n.b. this is a perfect candidate for memoization
-sub extindex_config ($$$) {
-        my ($ctx, $hdr, $txt) = @_;
+sub extindex_config ($$) {
+        my ($ctx, $txt) = @_;
         my $ibx = $ctx->{ibx};
-        push @$hdr, 'Content-Disposition', 'inline; filename=extindex.config';
+        push @{$ctx->{-res_hdr}},
+                'Content-Disposition', 'inline; filename=extindex.config';
         my $name = dq_escape($ibx->{name});
         my $base_url = $ibx->base_url($ctx->{env});
         $$txt .= <<EOS;
@@ -243,31 +248,73 @@ EOS
 
 sub coderepos_raw ($$) {
         my ($ctx, $top_url) = @_;
-        my $cr = $ctx->{ibx}->{coderepo} // return ();
         my $cfg = $ctx->{www}->{pi_cfg};
-        my @ret;
-        for my $cr_name (@$cr) {
-                $ret[0] //= do {
-                        my $thing = $ctx->{ibx}->can('cloneurl') ?
-                                'public inbox' : 'external index';
-                        <<EOF;
-Code repositories for project(s) associated with this $thing
-EOF
-                };
-                my $urls = $cfg->get_all("coderepo.$cr_name.cgiturl");
-                if ($urls) {
-                        for (@$urls) {
-                                # relative or absolute URL?, prefix relative
-                                # "foo.git" with appropriate number of "../"
-                                my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ :
-                                        $top_url.$_;
-                                $ret[0] .= "\n\t" . prurl($ctx->{env}, $u);
-                        }
-                } else {
-                        $ret[0] .= qq[\n\t$cr_name.git (no URL configured)];
+        my $cr = $cfg->repo_objs($ctx->{ibx}) or return ();
+        my $buf = 'Code repositories for project(s) associated with this '.
+                $ctx->{ibx}->thing_type . ":\n";
+        my @recs = PublicInbox::CodeSearch::repos_sorted($cfg, @$cr);
+        my $cr_score = $ctx->{ibx}->{-cr_score};
+        my $env = $ctx->{env};
+        for (@recs) {
+                my ($t, $git) = @$_;
+                for ($git->pub_urls($env)) {
+                        my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ : $top_url.$_;
+                        my $nr = $cr_score->{$git->{nick}};
+                        $buf .= "\n";
+                        $buf .= $nr ? sprintf('% 9u', $nr) : (' 'x9);
+                        $buf .= ' '.fmt_ts($t).' '.prurl($env, $u);
                 }
         }
-        @ret; # may be empty, this sub is called as an arg for join()
+        ($buf);
+}
+
+sub _add_non_http_urls ($$) {
+        my ($ctx, $txt) = @_;
+        $ctx->{ibx}->can('nntp_url') or return; # TODO extindex can have IMAP
+        my $urls = $ctx->{ibx}->imap_url($ctx);
+        if (@$urls) {
+                $urls = join("\n  ", @$urls);
+                $urls =~ s!://([^/@]+)/!://;AUTH=ANONYMOUS\@$1/!sg;
+                $$txt .= <<EOM
+
+IMAP subfolder(s) are available under:
+  $urls
+  # each subfolder (starting with `0') holds 50K messages at most
+EOM
+        }
+        $urls = $ctx->{ibx}->nntp_url($ctx);
+        if (@$urls) {
+                $$txt .= @$urls == 1 ? "\nNewsgroup" : "\nNewsgroups are";
+                $$txt .= ' available over NNTP:';
+                $$txt .= "\n  " . join("\n  ", @$urls) . "\n";
+        }
+        $urls = $ctx->{ibx}->pop3_url($ctx);
+        if (@$urls) {
+                $urls = join("\n  ", @$urls);
+                $$txt .= <<EOM;
+
+POP3 access is available:
+  $urls
+
+The POP3 password is: anonymous
+The POP3 username is: \$(uuidgen)\@$ctx->{ibx}->{newsgroup}
+where \$(uuidgen) in the output of the `uuidgen' command on your system.
+The UUID in the username functions as a private cookie (don't share it).
+By default, only 1000 messages are retrieved.  You may download more
+by appending `?limit=NUM' (without quotes) to the username, where
+`NUM' is an integer between 1 and 50000.
+Idle accounts will expire periodically.
+EOM
+        }
+}
+
+sub _add_onion_note ($) {
+        my ($txt) = @_;
+        $$txt =~ m!\b[^:]+://\w+\.onion/!i and $$txt .= <<EOM
+
+note: .onion URLs require Tor: https://www.torproject.org/
+
+EOM
 }
 
 sub _mirror_help ($$) {
@@ -298,8 +345,10 @@ sub _mirror_help ($$) {
                         }
                         my $nr = scalar(@urls);
                         if ($nr > 1) {
-                                $$txt .= "\n\t";
-                                $$txt .= "# this inbox consists of $nr epochs:";
+                                chomp($$txt .= <<EOM);
+
+  # this inbox consists of $nr epochs: (no need to clone all of them)
+EOM
                                 $urls[0] .= " # oldest";
                                 $urls[-1] .= " # newest";
                         }
@@ -313,19 +362,21 @@ sub _mirror_help ($$) {
                         push @urls, $u;
                 }
                 $$txt .= "\n";
-                $$txt .= join('', map { "\tgit clone --mirror $_\n" } @urls);
-                if (my $addrs = $ibx->{address}) {
-                        $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
-                        my $v = defined $max ? '-V2' : '-V1';
-                        $$txt .= <<EOF;
-
-        # If you have public-inbox 1.1+ installed, you may
-        # initialize and index your mirror using the following commands:
-        public-inbox-init $v $ibx->{name} $dir/ $base_url \\
-                $addrs
-        public-inbox-index $dir
+                $$txt .= join('', map { "  git clone --mirror $_\n" } @urls);
+                my $addrs = $ibx->{address} // 'inbox@example.com';
+                my $ng = $ibx->{newsgroup} // '';
+                substr($ng, 0, 0, ' --ng ') if $ng;
+                $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
+                my $v = defined $max ? '-V2' : '-V1';
+                $$txt .= <<EOF;
+
+  # If you have public-inbox 1.1+ installed, you may
+  # initialize and index your mirror using the following commands:
+  public-inbox-init $v$ng \\
+    $ibx->{name} ./$dir $base_url \\
+    $addrs
+  public-inbox-index ./$dir
 EOF
-                }
         } else { # PublicInbox::ExtSearch
                 $$txt .= <<EOM;
 This is an external index which is an amalgamation of several public inboxes.
@@ -343,173 +394,148 @@ EOM
 
 Example config snippet for mirrors: $cfg_link
 EOF
-        if ($ibx->can('imap_url')) {
-                my $imap = $ibx->imap_url($ctx);
-                if (@$imap) {
-                        $$txt .= "\n";
-                        $$txt .= 'IMAP subfolder(s) available under:';
-                        $$txt .= "\n\t" . join("\n\t", @$imap) . "\n";
-                        $$txt .= <<EOM
-        # each subfolder (starting with `0') holds 50K messages at most
-EOM
-                }
-        }
-        if ($ibx->can('nntp_url')) {
-                my $nntp = $ibx->nntp_url($ctx);
-                if (scalar @$nntp) {
-                        $$txt .= "\n";
-                        $$txt .= @$nntp == 1 ? 'Newsgroup' : 'Newsgroups are';
-                        $$txt .= ' available over NNTP:';
-                        $$txt .= "\n\t" . join("\n\t", @$nntp) . "\n";
-                }
-        }
-        if ($$txt =~ m!\b[^:]+://\w+\.onion/!) {
-                $$txt .= <<EOM
+        _add_non_http_urls($ctx, $txt);
+        _add_onion_note($txt);
 
-note: .onion URLs require Tor: https://www.torproject.org/
-
-EOM
-        }
         my $code_url = prurl($ctx->{env}, $PublicInbox::WwwStream::CODE_URL);
         $$txt .= join("\n\n",
                 coderepos_raw($ctx, $top_url), # may be empty
-                "AGPL code for this site:\n\tgit clone $code_url");
+                "AGPL code for this site:\n  git clone $code_url");
         1;
 }
 
-sub _default_text ($$$$) {
-        my ($ctx, $key, $hdr, $txt) = @_;
+sub _default_text ($$$) {
+        my ($ctx, $key, $txt) = @_;
         if ($key eq 'mirror') {
                 return _mirror_help($ctx, $txt);
         } elsif ($key eq 'color') {
                 return _colors_help($ctx, $txt);
         } elsif ($key eq 'config') {
                 return $ctx->{ibx}->can('cloneurl') ?
-                        inbox_config($ctx, $hdr, $txt) :
-                        extindex_config($ctx, $hdr, $txt);
+                        inbox_config($ctx, $txt) :
+                        extindex_config($ctx, $txt);
         }
-
         return if $key ne 'help'; # TODO more keys?
 
         my $ibx = $ctx->{ibx};
         my $base_url = $ibx->base_url($ctx->{env});
-        $$txt .= "public-inbox help for $base_url\n";
         $$txt .= <<EOF;
+public-inbox help for $base_url
 
 overview
 --------
 
-    public-inbox uses Message-ID identifiers in URLs.
-    One may look up messages by substituting Message-IDs
-    (without the leading '<' or trailing '>') into the URL.
-    Forward slash ('/') characters in the Message-IDs
-    need to be escaped as "%2F" (without quotes).
-
-    Thus, it is possible to retrieve any message by its
-    Message-ID by going to:
+  public-inbox uses Message-ID identifiers in URLs.
+  One may look up messages by substituting Message-IDs
+  (without the leading '<' or trailing '>') into the URL.
+  Forward slash ('/') characters in the Message-IDs
+  need to be escaped as "%2F" (without quotes).
 
-        $base_url<Message-ID>/
+  Thus, it is possible to retrieve any message by its
+  Message-ID by going to:
 
-        (without the '<' or '>')
+    $base_url<Message-ID>/
+    (without the '<' or '>')
 
-    Message-IDs are described at:
+  Message-IDs are described at:
 
-        $WIKI_URL/Message-ID
+    $WIKI_URL/Message-ID
 
 EOF
 
         # n.b. we use the Xapian DB for any regeneratable,
         # order-of-arrival-independent data.
-        my $srch = $ibx->isrch;
-        if ($srch) {
+        if ($ibx->isrch) {
                 $$txt .= <<EOF;
 search
 ------
 
-    This public-inbox has search functionality provided by Xapian.
+  This public-inbox has search functionality provided by Xapian.
 
-    It supports typical AND, OR, NOT, '+', '-' queries present
-    in other search engines.
+  It supports typical AND, OR, NOT, '+', '-' queries present
+  in other search engines.
 
-    We also support search prefixes to limit the scope of the
-    search to certain fields.
+  We also support search prefixes to limit the scope of the
+  search to certain fields.
 
-    Prefixes supported in this installation include:
+  Prefixes supported in this installation include:
 
 EOF
-                _srch_prefix($srch, $txt);
-
+                _srch_prefix($ibx, $txt);
                 $$txt .= <<EOF;
 
-    Most prefixes are probabilistic, meaning they support stemming
-    and wildcards ('*').  Ranges (such as 'd:') and boolean prefixes
-    do not support stemming or wildcards.
-    The upstream Xapian query parser documentation fully explains
-    the query syntax:
+  Most prefixes are probabilistic, meaning they support stemming
+  and wildcards ('*').  Ranges (such as 'd:') and boolean prefixes
+  do not support stemming or wildcards.
+  The upstream Xapian query parser documentation fully explains
+  the query syntax:
 
-        $QP_URL
+    $QP_URL
 
 EOF
         } # $srch
-        my $over = $ibx->over;
-        if ($over) {
+        if ($ibx->over) {
                 $$txt .= <<EOF;
 message threading
 -----------------
 
-    Message threading is enabled for this public-inbox,
-    additional endpoints for message threads are available:
+  Message threading is enabled for this public-inbox,
+  additional endpoints for message threads are available:
 
-    * $base_url<Message-ID>/T/#u
+  * $base_url<Message-ID>/T/#u
 
-      Loads the thread belonging to the given <Message-ID>
-      in flat chronological order.  The "#u" anchor
-      focuses the browser on the given <Message-ID>.
+    Loads the thread belonging to the given <Message-ID>
+    in flat chronological order.  The "#u" anchor
+    focuses the browser on the given <Message-ID>.
 
-    * $base_url<Message-ID>/t/#u
+  * $base_url<Message-ID>/t/#u
 
-      Loads the thread belonging to the given <Message-ID>
-      in threaded order with nesting.  For deep threads,
-      this requires a wide display or horizontal scrolling.
+    Loads the thread belonging to the given <Message-ID>
+    in threaded order with nesting.  For deep threads,
+    this requires a wide display or horizontal scrolling.
 
-    Both of these HTML endpoints are suitable for offline reading
-    using the thread overview at the bottom of each page.
+  Both of these HTML endpoints are suitable for offline reading
+  using the thread overview at the bottom of each page.
 
-    Users of feed readers may follow a particular thread using:
+  The gzipped mbox for a thread is available for downloading and
+  importing into your favorite mail client:
 
-    * $base_url<Message-ID>/t.atom
+  * $base_url<Message-ID>/t.mbox.gz
 
-      Which loads the thread in Atom Syndication Standard
-      described at Wikipedia and RFC4287:
+    We use the mboxrd variant of the mbox format described at:
 
-        $WIKI_URL/Atom_(standard)
-        https://tools.ietf.org/html/rfc4287
+    $WIKI_URL/Mbox
 
-      Atom Threading Extensions (RFC4685) is supported:
+  Users of feed readers may follow a particular thread using:
 
-        https://tools.ietf.org/html/rfc4685
+  * $base_url<Message-ID>/t.atom
 
-    Finally, the gzipped mbox for a thread is available for
-    downloading and importing into your favorite mail client:
+    Which loads the thread in Atom Syndication Standard
+    described at Wikipedia and RFC4287:
 
-    * $base_url<Message-ID>/t.mbox.gz
+    $WIKI_URL/Atom_(standard)
+    https://tools.ietf.org/html/rfc4287
 
-    We use the mboxrd variant of the mbox format described
-    at:
+    Atom Threading Extensions (RFC4685) are supported:
 
-        $WIKI_URL/Mbox
+    https://tools.ietf.org/html/rfc4685
 
 EOF
         } # $over
 
+        _add_non_http_urls($ctx, \(my $note = ''));
+        $note and $note =~ s/^/  /gms and $$txt .= <<EOF;
+additional protocols
+--------------------
+$note
+EOF
         $$txt .= <<EOF;
 contact
 -------
 
-    This help text is maintained by public-inbox developers
-    reachable via plain-text email at: meta\@public-inbox.org
-    Their inbox is archived at: https://public-inbox.org/meta/
-
+  This help text is maintained by public-inbox developers
+  reachable via plain-text email at: meta\@public-inbox.org
+  Their inbox is archived at: https://public-inbox.org/meta/
 EOF
         # TODO: support admin contact info in ~/.public-inbox/config
         1;