diff options
Diffstat (limited to 'lib/PublicInbox/WwwText.pm')
-rw-r--r-- | lib/PublicInbox/WwwText.pm | 492 |
1 files changed, 360 insertions, 132 deletions
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm index f6d831f6..5e23005e 100644 --- a/lib/PublicInbox/WwwText.pm +++ b/lib/PublicInbox/WwwText.pm @@ -1,15 +1,16 @@ -# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # used for displaying help texts and other non-mail content package PublicInbox::WwwText; use strict; -use warnings; -use bytes (); # only for bytes::length +use v5.10.1; use PublicInbox::Linkify; use PublicInbox::WwwStream; -use PublicInbox::Hval qw(ascii_html); +use PublicInbox::Hval qw(ascii_html prurl fmt_ts); +use HTTP::Date qw(time2str); use URI::Escape qw(uri_escape_utf8); +use PublicInbox::GzipFilter qw(gzf_maybe); our $QP_URL = 'https://xapian.org/docs/queryparser.html'; our $WIKI_URL = 'https://en.wikipedia.org/wiki'; my $hl = eval { @@ -23,26 +24,29 @@ sub get_text { my ($ctx, $key) = @_; my $code = 200; - $key = 'help' if !defined $key; # this 302s to _/text/help/ + $key //= 'help'; # this 302s to _/text/help/ # get the raw text the same way we get mboxrds my $raw = ($key =~ s!/raw\z!!); my $have_tslash = ($key =~ s!/\z!!) if !$raw; my $txt = ''; - my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ]; - if (!_default_text($ctx, $key, $hdr, \$txt)) { + if (!_default_text($ctx, $key, \$txt)) { $code = 404; $txt = "404 Not Found ($key)\n"; } + my $env = $ctx->{env}; if ($raw) { - $hdr->[3] = bytes::length($txt); - return [ $code, $hdr, [ $txt ] ] + my $h = delete $ctx->{-res_hdr}; + $txt = gzf_maybe($h, $env)->zflush($txt) if $code == 200; + push @$h, 'Content-Type', 'text/plain', + 'Content-Length', length($txt); + return [ $code, $h, [ $txt ] ] } # enforce trailing slash for "wget -r" compatibility if (!$have_tslash && $code == 200) { - my $url = $ctx->{-inbox}->base_url($ctx->{env}); + my $url = $ctx->{ibx}->base_url($env); $url .= "_/text/$key/"; return [ 302, [ 'Content-Type', 'text/plain', @@ -53,32 +57,29 @@ sub get_text { # Follow git commit message conventions, # first line is the Subject/title my ($title) = ($txt =~ /\A([^\n]*)/s); - $ctx->{txt} = \$txt; $ctx->{-title_html} = ascii_html($title); my $nslash = ($key =~ tr!/!/!); $ctx->{-upfx} = '../../../' . ('../' x $nslash); - PublicInbox::WwwStream->response($ctx, $code, \&_do_linkify); -} - -sub _do_linkify { - my ($nr, $ctx) = @_; - return unless $nr == 1; my $l = PublicInbox::Linkify->new; - my $txt = delete $ctx->{txt}; - $l->linkify_1($$txt); + $l->linkify_1($txt); if ($hl) { - $hl->do_hl_text($txt); + $hl->do_hl_text(\$txt); } else { - $$txt = ascii_html($$txt); + $txt = ascii_html($txt); } - '<pre>' . $l->linkify_2($$txt) . '</pre>'; + $txt = '<pre>' . $l->linkify_2($txt) . '</pre>'; + $txt =~ s!^search$!<a\nid=search>search</a>!sm; + $txt =~ s!\bPOP3\b!<a\nid=pop3>POP3</a>!; + $txt =~ s!\b(Newsgroups?)\b!<a\nid=nntp>$1</a>!; + $txt =~ s!\bIMAP\b!<a\nid=imap>IMAP</a>!; + PublicInbox::WwwStream::html_oneshot($ctx, $code, $txt); } sub _srch_prefix ($$) { - my ($srch, $txt) = @_; + my ($ibx, $txt) = @_; my $pad = 0; my $htxt = ''; - my $help = $srch->help; + my $help = $ibx->isrch->help; my $i; for ($i = 0; $i < @$help; $i += 2) { my $pfx = $help->[$i]; @@ -89,10 +90,9 @@ sub _srch_prefix ($$) { $htxt .= "\f\n"; } $pad += 2; - my $padding = ' ' x ($pad + 8); + my $padding = ' ' x ($pad + 4); $htxt =~ s/^/$padding/gms; - $htxt =~ s/^$padding(\S+)\0/" $1". - (' ' x ($pad - length($1)))/egms; + $htxt =~ s/^$padding(\S+)\0/" $1".(' ' x ($pad - length($1)))/egms; $htxt =~ s/\f\n/\n/gs; $$txt .= $htxt; 1; @@ -100,7 +100,7 @@ sub _srch_prefix ($$) { sub _colors_help ($$) { my ($ctx, $txt) = @_; - my $ibx = $ctx->{-inbox}; + my $ibx = $ctx->{ibx}; my $env = $ctx->{env}; my $base_url = $ibx->base_url($env); $$txt .= "color customization for $base_url\n"; @@ -113,7 +113,7 @@ Users of browsers such as dillo, Firefox, or some browser extensions may start by downloading the following sample CSS file to control the colors they see: - ${base_url}userContent.css + ${base_url}userContent.css CSS sample ---------- @@ -130,186 +130,414 @@ sub dq_escape ($) { $name; } -sub URI_PATH () { '^A-Za-z0-9\-\._~/' } +sub _coderepo_config ($$) { + my ($ctx, $txt) = @_; + my $cr = $ctx->{ibx}->{coderepo} // return; + # note: this doesn't preserve cgitrc layout, since we parse cgitrc + # and drop the original structure + $$txt .= "\tcoderepo = $_\n" for @$cr; + $$txt .= <<'EOF'; + +; `coderepo' entries allows blob reconstruction via patch emails if +; the inbox is indexed with Xapian. `@@ <from-range> <to-range> @@' +; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/, +; an HTTP endpoint which reconstructs git blobs via git-apply(1). +EOF + my $pi_cfg = $ctx->{www}->{pi_cfg}; + for my $cr_name (@$cr) { + my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl"); + my $path = "/path/to/$cr_name"; + $cr_name = dq_escape($cr_name); + + $$txt .= qq([coderepo "$cr_name"]\n); + if ($urls && scalar(@$urls)) { + $$txt .= "\t; "; + $$txt .= join(" ||\n\t;\t", map {; + my $dst = $path; + if ($path !~ m![a-z0-9_/\.\-]!i) { + $dst = '"'.dq_escape($dst).'"'; + } + qq(git clone $_ $dst); + } @$urls); + $$txt .= "\n"; + } + $$txt .= "\tdir = $path\n"; + $$txt .= "\tcgiturl = https://example.com/"; + $$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n"; + } +} # n.b. this is a perfect candidate for memoization -sub inbox_config ($$$) { - my ($ctx, $hdr, $txt) = @_; - my $ibx = $ctx->{-inbox}; - push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config'; +sub inbox_config ($$) { + my ($ctx, $txt) = @_; + my $ibx = $ctx->{ibx}; + push @{$ctx->{-res_hdr}}, + 'Content-Disposition', 'inline; filename=inbox.config'; + my $t = eval { $ibx->mm->created_at }; + push(@{$ctx->{-res_hdr}}, 'Last-Modified', time2str($t)) if $t; my $name = dq_escape($ibx->{name}); + my $inboxdir = '/path/to/top-level-inbox'; + my $base_url = $ibx->base_url($ctx->{env}); $$txt .= <<EOS; -; example public-inbox config snippet for "$name" -; see public-inbox-config(5) manpage for more details: +; Example public-inbox config snippet for a mirror of +; $base_url +; See public-inbox-config(5) manpage for more details: ; https://public-inbox.org/public-inbox-config.html [publicinbox "$name"] - inboxdir = /path/to/top-level-inbox - ; note: public-inbox before v1.2.0 used "mainrepo" - ; instead of "inboxdir", both remain supported after 1.2 - mainrepo = /path/to/top-level-inbox + inboxdir = $inboxdir + ; note: public-inbox before v1.2.0 used `mainrepo' instead of + ; `inboxdir', both remain supported after 1.2 + mainrepo = $inboxdir url = https://example.com/$name/ url = http://example.onion/$name/ EOS - for my $k (qw(address listid infourl)) { + for my $k (qw(address listid infourl watchheader)) { defined(my $v = $ibx->{$k}) or next; $$txt .= "\t$k = $_\n" for @$v; } + if (my $altid = $ibx->{altid}) { + my $altid_map = $ibx->altid_map; + $$txt .= <<EOF; + ; altid DBs may be used to provide numeric article ID lookup from + ; old, pre-existing sources. You can recreate them via curl(1), + ; gzip(1), and sqlite3(1) as documented: +EOF + for (sort keys %$altid_map) { + $$txt .= "\t;\tcurl -d '' $base_url$_.sql.gz | \\\n" . + "\t;\tgzip -dc | \\\n" . + "\t;\tsqlite3 $inboxdir/$_.sqlite3\n"; + $$txt .= "\taltid = serial:$_:file=$_.sqlite3\n"; + } + } - for my $k (qw(filter newsgroup obfuscate replyto watchheader)) { + for my $k (qw(filter newsgroup obfuscate replyto)) { defined(my $v = $ibx->{$k}) or next; $$txt .= "\t$k = $v\n"; } - $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url}); + $$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)}); + $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)}); + _coderepo_config($ctx, $txt); + 1; +} - # note: this doesn't preserve cgitrc layout, since we parse cgitrc - # and drop the original structure - if (defined(my $cr = $ibx->{coderepo})) { - $$txt .= "\tcoderepo = $_\n" for @$cr; - - my $pi_config = $ctx->{www}->{pi_config}; - for my $cr_name (@$cr) { - my $urls = $pi_config->{"coderepo.$cr_name.cgiturl"}; - my $path = "/path/to/$cr_name"; - $cr_name = dq_escape($cr_name); - - $$txt .= qq([coderepo "$cr_name"]\n); - if ($urls && scalar(@$urls)) { - $$txt .= "\t; "; - $$txt .= join(" ||\n\t;\t", map {; - my $cpath = $path; - if ($path !~ m![a-z0-9_/\.\-]!i) { - $cpath = dq_escape($cpath); - } - qq(git clone $_ "$cpath"); - } @$urls); - $$txt .= "\n"; +# n.b. this is a perfect candidate for memoization +sub extindex_config ($$) { + my ($ctx, $txt) = @_; + my $ibx = $ctx->{ibx}; + push @{$ctx->{-res_hdr}}, + 'Content-Disposition', 'inline; filename=extindex.config'; + my $name = dq_escape($ibx->{name}); + my $base_url = $ibx->base_url($ctx->{env}); + $$txt .= <<EOS; +; Example public-inbox config snippet for the external index (extindex) at: +; $base_url +; See public-inbox-config(5)manpage for more details: +; https://public-inbox.org/public-inbox-config.html +[extindex "$name"] + topdir = /path/to/extindex-topdir + url = https://example.com/$name/ + url = http://example.onion/$name/ +EOS + for my $k (qw(infourl)) { + defined(my $v = $ibx->{$k}) or next; + $$txt .= "\t$k = $v\n"; + } + _coderepo_config($ctx, $txt); + 1; +} + +sub coderepos_raw ($$) { + my ($ctx, $top_url) = @_; + my $cfg = $ctx->{www}->{pi_cfg}; + my $cr = $cfg->repo_objs($ctx->{ibx}) or return (); + my $buf = 'Code repositories for project(s) associated with this '. + $ctx->{ibx}->thing_type . ":\n"; + my @recs = PublicInbox::CodeSearch::repos_sorted($cfg, @$cr); + my $cr_score = $ctx->{ibx}->{-cr_score}; + my $env = $ctx->{env}; + for (@recs) { + my ($t, $git) = @$_; + for ($git->pub_urls($env)) { + my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ : $top_url.$_; + my $nr = $cr_score->{$git->{nick}}; + $buf .= "\n"; + $buf .= $nr ? sprintf('% 9u', $nr) : (' 'x9); + $buf .= ' '.fmt_ts($t).' '.prurl($env, $u); + } + } + ($buf); +} + +sub _add_non_http_urls ($$) { + my ($ctx, $txt) = @_; + $ctx->{ibx}->can('nntp_url') or return; # TODO extindex can have IMAP + my $urls = $ctx->{ibx}->imap_url($ctx); + if (@$urls) { + $urls = join("\n ", @$urls); + $urls =~ s!://([^/@]+)/!://;AUTH=ANONYMOUS\@$1/!sg; + $$txt .= <<EOM + +IMAP subfolder(s) are available under: + $urls + # each subfolder (starting with `0') holds 50K messages at most +EOM + } + $urls = $ctx->{ibx}->nntp_url($ctx); + if (@$urls) { + $$txt .= @$urls == 1 ? "\nNewsgroup" : "\nNewsgroups are"; + $$txt .= ' available over NNTP:'; + $$txt .= "\n " . join("\n ", @$urls) . "\n"; + } + $urls = $ctx->{ibx}->pop3_url($ctx); + if (@$urls) { + $urls = join("\n ", @$urls); + $$txt .= <<EOM; + +POP3 access is available: + $urls + +The POP3 password is: anonymous +The POP3 username is: \$(uuidgen)\@$ctx->{ibx}->{newsgroup} +where \$(uuidgen) in the output of the `uuidgen' command on your system. +The UUID in the username functions as a private cookie (don't share it). +By default, only 1000 messages are retrieved. You may download more +by appending `?limit=NUM' (without quotes) to the username, where +`NUM' is an integer between 1 and 50000. +Idle accounts will expire periodically. +EOM + } +} + +sub _add_onion_note ($) { + my ($txt) = @_; + $$txt =~ m!\b[^:]+://\w+\.onion/!i and $$txt .= <<EOM + +note: .onion URLs require Tor: https://www.torproject.org/ + +EOM +} + +sub _mirror_help ($$) { + my ($ctx, $txt) = @_; + my $ibx = $ctx->{ibx}; + my $base_url = $ibx->base_url($ctx->{env}); + chop $base_url; # no trailing slash for "git clone" + my $dir = (split(m!/!, $base_url))[-1]; + my %seen = ($base_url => 1); + my $top_url = $base_url; + $top_url =~ s!/[^/]+\z!/!; + $$txt .= "public-inbox mirroring instructions\n\n"; + if ($ibx->can('cloneurl')) { # PublicInbox::Inbox + $$txt .= + "This public inbox may be cloned and mirrored by anyone:\n"; + my @urls; + my $max = $ibx->max_git_epoch; + # TODO: some of these URLs may be too long and we may need to + # do something like code_footer() above, but these are local + # admin-defined + if (defined($max)) { # v2 + for my $i (0..$max) { + # old epochs my be deleted: + -d "$ibx->{inboxdir}/git/$i.git" or next; + my $url = "$base_url/$i"; + $seen{$url} = 1; + push @urls, "$url $dir/git/$i.git"; + } + my $nr = scalar(@urls); + if ($nr > 1) { + chomp($$txt .= <<EOM); + + # this inbox consists of $nr epochs: (no need to clone all of them) +EOM + $urls[0] .= " # oldest"; + $urls[-1] .= " # newest"; } - $$txt .= "\tdir = $path\n"; - $$txt .= "\tcgiturl = https://example.com/"; - $$txt .= uri_escape_utf8($cr_name, URI_PATH)."\n"; + } else { # v1 + push @urls, $base_url; + } + # FIXME: epoch splits can be different in other repositories, + # use the "cloneurl" file as-is for now: + for my $u (@{$ibx->cloneurl}) { + next if $seen{$u}++; + push @urls, $u; + } + $$txt .= "\n"; + $$txt .= join('', map { " git clone --mirror $_\n" } @urls); + my $addrs = $ibx->{address} // 'inbox@example.com'; + my $ng = $ibx->{newsgroup} // ''; + substr($ng, 0, 0, ' --ng ') if $ng; + $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY'; + my $v = defined $max ? '-V2' : '-V1'; + $$txt .= <<EOF; + + # If you have public-inbox 1.1+ installed, you may + # initialize and index your mirror using the following commands: + public-inbox-init $v$ng \\ + $ibx->{name} ./$dir $base_url \\ + $addrs + public-inbox-index ./$dir +EOF + } else { # PublicInbox::ExtSearch + $$txt .= <<EOM; +This is an external index which is an amalgamation of several public inboxes. +Each public inbox needs to be mirrored individually. +EOM + my $v = $ctx->{www}->{pi_cfg}->{lc('publicInbox.wwwListing')}; + if (($v // '') =~ /\A(?:all|match=domain)\z/) { + $$txt .= <<EOM; +A list of them is available at $top_url +EOM } } + my $cfg_link = "$base_url/_/text/config/raw"; + $$txt .= <<EOF; + +Example config snippet for mirrors: $cfg_link +EOF + _add_non_http_urls($ctx, $txt); + _add_onion_note($txt); + + my $code_url = prurl($ctx->{env}, $PublicInbox::WwwStream::CODE_URL); + $$txt .= join("\n\n", + coderepos_raw($ctx, $top_url), # may be empty + "AGPL code for this site:\n git clone $code_url"); 1; } -sub _default_text ($$$$) { - my ($ctx, $key, $hdr, $txt) = @_; - return _colors_help($ctx, $txt) if $key eq 'color'; - return inbox_config($ctx, $hdr, $txt) if $key eq 'config'; +sub _default_text ($$$) { + my ($ctx, $key, $txt) = @_; + if ($key eq 'mirror') { + return _mirror_help($ctx, $txt); + } elsif ($key eq 'color') { + return _colors_help($ctx, $txt); + } elsif ($key eq 'config') { + return $ctx->{ibx}->can('cloneurl') ? + inbox_config($ctx, $txt) : + extindex_config($ctx, $txt); + } return if $key ne 'help'; # TODO more keys? - my $ibx = $ctx->{-inbox}; + my $ibx = $ctx->{ibx}; my $base_url = $ibx->base_url($ctx->{env}); - $$txt .= "public-inbox help for $base_url\n"; $$txt .= <<EOF; +public-inbox help for $base_url overview -------- - public-inbox uses Message-ID identifiers in URLs. - One may look up messages by substituting Message-IDs - (without the leading '<' or trailing '>') into the URL. - Forward slash ('/') characters in the Message-IDs - need to be escaped as "%2F" (without quotes). - - Thus, it is possible to retrieve any message by its - Message-ID by going to: + public-inbox uses Message-ID identifiers in URLs. + One may look up messages by substituting Message-IDs + (without the leading '<' or trailing '>') into the URL. + Forward slash ('/') characters in the Message-IDs + need to be escaped as "%2F" (without quotes). - $base_url<Message-ID>/ + Thus, it is possible to retrieve any message by its + Message-ID by going to: - (without the '<' or '>') + $base_url<Message-ID>/ + (without the '<' or '>') - Message-IDs are described at: + Message-IDs are described at: - $WIKI_URL/Message-ID + $WIKI_URL/Message-ID EOF # n.b. we use the Xapian DB for any regeneratable, # order-of-arrival-independent data. - my $srch = $ibx->search; - if ($srch) { + if ($ibx->isrch) { $$txt .= <<EOF; search ------ - This public-inbox has search functionality provided by Xapian. + This public-inbox has search functionality provided by Xapian. - It supports typical AND, OR, NOT, '+', '-' queries present - in other search engines. + It supports typical AND, OR, NOT, '+', '-' queries present + in other search engines. - We also support search prefixes to limit the scope of the - search to certain fields. + We also support search prefixes to limit the scope of the + search to certain fields. - Prefixes supported in this installation include: + Prefixes supported in this installation include: EOF - _srch_prefix($srch, $txt); - + _srch_prefix($ibx, $txt); $$txt .= <<EOF; - Most prefixes are probabilistic, meaning they support stemming - and wildcards ('*'). Ranges (such as 'd:') and boolean prefixes - do not support stemming or wildcards. - The upstream Xapian query parser documentation fully explains - the query syntax: + Most prefixes are probabilistic, meaning they support stemming + and wildcards ('*'). Ranges (such as 'd:') and boolean prefixes + do not support stemming or wildcards. + The upstream Xapian query parser documentation fully explains + the query syntax: - $QP_URL + $QP_URL +EOF + } # $srch + if ($ibx->over) { + $$txt .= <<EOF; message threading ----------------- - Message threading is enabled for this public-inbox, - additional endpoints for message threads are available: + Message threading is enabled for this public-inbox, + additional endpoints for message threads are available: - * $base_url<Message-ID>/T/#u + * $base_url<Message-ID>/T/#u - Loads the thread belonging to the given <Message-ID> - in flat chronological order. The "#u" anchor - focuses the browser on the given <Message-ID>. + Loads the thread belonging to the given <Message-ID> + in flat chronological order. The "#u" anchor + focuses the browser on the given <Message-ID>. - * $base_url<Message-ID>/t/#u + * $base_url<Message-ID>/t/#u - Loads the thread belonging to the given <Message-ID> - in threaded order with nesting. For deep threads, - this requires a wide display or horizontal scrolling. + Loads the thread belonging to the given <Message-ID> + in threaded order with nesting. For deep threads, + this requires a wide display or horizontal scrolling. - Both of these HTML endpoints are suitable for offline reading - using the thread overview at the bottom of each page. + Both of these HTML endpoints are suitable for offline reading + using the thread overview at the bottom of each page. - Users of feed readers may follow a particular thread using: + The gzipped mbox for a thread is available for downloading and + importing into your favorite mail client: - * $base_url<Message-ID>/t.atom + * $base_url<Message-ID>/t.mbox.gz - Which loads the thread in Atom Syndication Standard - described at Wikipedia and RFC4287: + We use the mboxrd variant of the mbox format described at: - $WIKI_URL/Atom_(standard) - https://tools.ietf.org/html/rfc4287 + $WIKI_URL/Mbox - Atom Threading Extensions (RFC4685) is supported: + Users of feed readers may follow a particular thread using: - https://tools.ietf.org/html/rfc4685 + * $base_url<Message-ID>/t.atom - Finally, the gzipped mbox for a thread is available for - downloading and importing into your favorite mail client: + Which loads the thread in Atom Syndication Standard + described at Wikipedia and RFC4287: - * $base_url<Message-ID>/t.mbox.gz + $WIKI_URL/Atom_(standard) + https://tools.ietf.org/html/rfc4287 - We use the mboxrd variant of the mbox format described - at: + Atom Threading Extensions (RFC4685) are supported: - $WIKI_URL/Mbox + https://tools.ietf.org/html/rfc4685 + +EOF + } # $over + _add_non_http_urls($ctx, \(my $note = '')); + $note and $note =~ s/^/ /gms and $$txt .= <<EOF; +additional protocols +-------------------- +$note +EOF + $$txt .= <<EOF; contact ------- - This help text is maintained by public-inbox developers - reachable via plain-text email at: meta\@public-inbox.org - + This help text is maintained by public-inbox developers + reachable via plain-text email at: meta\@public-inbox.org + Their inbox is archived at: https://public-inbox.org/meta/ EOF # TODO: support admin contact info in ~/.public-inbox/config - } 1; } |