about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--Documentation/.gitignore1
-rw-r--r--Documentation/include.mk6
-rw-r--r--Documentation/public-inbox-config.pod9
-rwxr-xr-xDocumentation/standards.perl77
-rw-r--r--MANIFEST2
-rw-r--r--examples/cgit-commit-filter.lua10
-rw-r--r--examples/cgit-wwwhighlight-filter.lua105
-rw-r--r--lib/PublicInbox/Cgit.pm16
-rw-r--r--lib/PublicInbox/ExtMsg.pm4
-rw-r--r--lib/PublicInbox/View.pm3
-rw-r--r--lib/PublicInbox/ViewDiff.pm2
-rw-r--r--lib/PublicInbox/WwwHighlight.pm11
-rw-r--r--lib/PublicInbox/WwwStream.pm5
-rw-r--r--t/search.t12
14 files changed, 247 insertions, 16 deletions
diff --git a/Documentation/.gitignore b/Documentation/.gitignore
index 107ad36f..e78a0d33 100644
--- a/Documentation/.gitignore
+++ b/Documentation/.gitignore
@@ -1 +1,2 @@
 /public-inbox-*.txt
+/standards.txt
diff --git a/Documentation/include.mk b/Documentation/include.mk
index 28fa7574..02cbef30 100644
--- a/Documentation/include.mk
+++ b/Documentation/include.mk
@@ -81,8 +81,12 @@ txt2pre = $(PERL) -I lib ./Documentation/txt2pre <$< >$@+ && \
         touch -r $< $@+ && mv $@+ $@
 txt := INSTALL README COPYING TODO
 dtxt := design_notes.txt design_www.txt dc-dlvr-spam-flow.txt hosted.txt
+dtxt += standards.txt
 dtxt := $(addprefix Documentation/, $(dtxt)) $(mantxt)
 
+Documentation/standards.txt : Documentation/standards.perl
+        $(PERL) $< >$@+ && mv $@+ $@
+
 %.html: %.txt
         TITLE="$(basename $(<F))" $(txt2pre)
 %.html: %
@@ -91,7 +95,7 @@ dtxt := $(addprefix Documentation/, $(dtxt)) $(mantxt)
 docs_html := $(addsuffix .html, $(subst .txt,,$(dtxt)) $(txt))
 html: $(docs_html)
 gz_docs := $(addsuffix .gz, $(docs) $(docs_html))
-rsync_docs := $(gz_docs) $(docs) $(txt) $(docs_html)
+rsync_docs := $(gz_docs) $(docs) $(txt) $(docs_html) $(dtxt)
 %.gz: %
         gzip -9 --rsyncable <$< >$@+
         touch -r $< $@+
diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod
index 17b8bac7..d44c8f30 100644
--- a/Documentation/public-inbox-config.pod
+++ b/Documentation/public-inbox-config.pod
@@ -225,6 +225,15 @@ directive is configured.
 
 Default: /var/www/htdocs/cgit/cgit.cgi or /usr/lib/cgit/cgit.cgi
 
+=item publicinbox.cgitdata
+
+A path to the data directory used by cgit for storing static files.
+Typically guessed based the location of C<cgit.cgi> (from
+C<publicinbox.cgitbin>, but may be overridden.
+
+Default: basename of C<publicinbox.cgitbin>, /var/www/htdocs/cgit/
+or /usr/share/cgit/
+
 =item publicinbox.wwwlisting
 
 Enable a HTML listing style when the root path of the URL '/' is accessed.
diff --git a/Documentation/standards.perl b/Documentation/standards.perl
new file mode 100755
index 00000000..f75c4122
--- /dev/null
+++ b/Documentation/standards.perl
@@ -0,0 +1,77 @@
+#!/usr/bin/perl -w
+use strict;
+# Copyright 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+print <<EOF;
+Relevant standards for public-inbox users and hackers
+-----------------------------------------------------
+
+Non-exhaustive list of standards public-inbox software attempts or
+intends to implement.  This list is intended to be a quick reference
+for hackers and users.
+
+Given the goals of interoperability and accessibility; strict
+conformance to standards is not always possible, but rather
+best-effort taking into account real-world cases.  In particular,
+"obsolete" standards remain relevant as long as clients and
+data exists.
+
+IETF RFCs
+---------
+
+EOF
+
+my $rfcs = [
+        3977 => 'NNTP',
+        977 => 'NNTP (old)',
+        6048 => 'NNTP additions to LIST command (TODO)',
+        8054 => 'NNTP compression (TODO)',
+        4642 => 'NNTP TLS (TODO)',
+        8143 => 'NNTP TLS (TODO)',
+        2980 => 'NNTP extensions (obsolete, but NOT irrelevant)',
+        4287 => 'Atom syndication',
+        4685 => 'Atom threading extensions',
+        2919 => 'List-Id mail header',
+        5064 => 'Archived-At mail header',
+        3986 => 'URI escaping',
+        1521 => 'MIME extensions',
+        2616 => 'HTTP/1.1 (newer updates should apply, too)',
+        7230 => 'HTTP/1.1 message syntax and routing',
+        7231 => 'HTTP/1.1 semantics and content',
+        2822 => 'Internet message format',
+        # TODO: flesh this out
+
+];
+
+my @rfc_urls = qw(tools.ietf.org/html/rfc%d
+                  www.rfc-editor.org/errata_search.php?rfc=%d);
+
+for (my $i = 0; $i < $#$rfcs;) {
+        my $num = $rfcs->[$i++];
+        my $txt = $rfcs->[$i++];
+        print "rfc$num\t- $txt\n";
+
+        printf "\thttps://$_\n", $num foreach @rfc_urls;
+        print "\n";
+}
+
+print <<'EOF'
+Other relevant documentation
+----------------------------
+
+* Documentation/technical/http-protocol.txt in git source code:
+  https://public-inbox.org/git/9c5b6f0fac/s
+
+* Various mbox formats (we currently emit and parse mboxrd)
+  https://en.wikipedia.org/wiki/Mbox
+
+* PSGI/Plack specifications (as long as our web frontend uses Perl5)
+  git clone https://github.com/plack/psgi-specs.git
+
+Copyright
+---------
+
+Copyright 2019 all contributors <meta@public-inbox.org>
+License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+EOF
diff --git a/MANIFEST b/MANIFEST
index 881d2f07..4bdcda3c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -19,6 +19,7 @@ Documentation/public-inbox-overview.pod
 Documentation/public-inbox-v1-format.pod
 Documentation/public-inbox-v2-format.pod
 Documentation/public-inbox-watch.pod
+Documentation/standards.perl
 Documentation/txt2pre
 HACKING
 INSTALL
@@ -38,6 +39,7 @@ examples/apache2_perl.conf
 examples/apache2_perl_old.conf
 examples/cgi-webrick.rb
 examples/cgit-commit-filter.lua
+examples/cgit-wwwhighlight-filter.lua
 examples/cgit.psgi
 examples/highlight.psgi
 examples/logrotate.conf
diff --git a/examples/cgit-commit-filter.lua b/examples/cgit-commit-filter.lua
index 7799befa..16772534 100644
--- a/examples/cgit-commit-filter.lua
+++ b/examples/cgit-commit-filter.lua
@@ -13,14 +13,19 @@
 local urls = {}
 urls['public-inbox.git'] = 'https://public-inbox.org/meta/'
 -- additional URLs here...
+-- TODO we should be able to auto-generate this based on "coderepo"
+-- directives in the public-inbox config file; but keep in mind
+-- the mapping is M:N between inboxes and coderepos
 
 function filter_open(...)
         lineno = 0
         buffer = ""
-        subject = ""
 end
 
 function filter_close()
+        -- cgit opens and closes this filter for the commit subject
+        -- and body separately, and we only generate the link based
+        -- on the commit subject:
         if lineno == 1 and string.find(buffer, "\n") == nil then
                 u = urls[os.getenv('CGIT_REPO_URL')]
                 if u == nil then
@@ -33,6 +38,9 @@ function filter_close()
                         html('</tt></a>')
                 end
         else
+                -- pass the body-through as-is
+                -- TODO: optionally use WwwHighlight for linkification like
+                -- cgit-wwwhighlight-filter.lua
                 html(buffer)
         end
         return 0
diff --git a/examples/cgit-wwwhighlight-filter.lua b/examples/cgit-wwwhighlight-filter.lua
new file mode 100644
index 00000000..a267d1c8
--- /dev/null
+++ b/examples/cgit-wwwhighlight-filter.lua
@@ -0,0 +1,105 @@
+-- Copyright (C) 2019 all contributors <meta@public-inbox.org>
+-- License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+--
+-- This filter accesses the PublicInbox::WwwHighlight PSGI endpoint
+-- (see examples/highlight.psgi)
+--
+-- Dependencies: lua-http
+--
+-- disclaimer: written by someone who does not know Lua.
+--
+-- This requires cgit linked with Lua
+-- Usage (in your cgitrc(5) config file):
+--
+--   source-filter=lua:/path/to/this/script.lua
+--   about-filter=lua:/path/to/this/script.lua
+--
+local wwwhighlight_url = 'http://127.0.0.1:9090/'
+local req_timeout = 10
+local too_big = false
+
+-- match $PublicInbox::HTTP::MAX_REQUEST_BUFFER
+local max_len = 10 * 1024 * 1024
+
+-- about-filter needs surrounding <pre> tags if all we do is
+-- highlight and linkify
+local pre = true
+
+function filter_open(...)
+        req_body = ""
+
+        -- detect when we're used in an about-filter
+        local repo_url = os.getenv('CGIT_REPO_URL')
+        if repo_url then
+                local path_info = os.getenv('PATH_INFO')
+                rurl = path_info:match("^/(.+)/about/?$")
+                pre = rurl == repo_url
+        end
+
+        -- hand filename off for language detection
+        local fn = select(1, ...)
+        if fn then
+                local http_util = require 'http.util'
+                wwwhighlight_url = wwwhighlight_url .. http_util.encodeURI(fn)
+        end
+end
+
+-- try to buffer the entire source in memory
+function filter_write(str)
+        if too_big then
+                html(str)
+        elseif (req_body:len() + str:len()) > max_len then
+                too_big = true
+                req_body = ""
+                html(req_body)
+                html(str)
+        else
+                req_body = req_body .. str
+        end
+end
+
+function fail(err)
+        io.stderr:write(tostring(err), "\n")
+        if pre then
+                html("<pre>")
+        end
+        html_txt(req_body)
+        if pre then
+                html("</pre>")
+        end
+        return 1
+end
+
+function filter_close()
+        if too_big then
+                return 0
+        end
+        local request = require 'http.request'
+        local req = request.new_from_uri(wwwhighlight_url)
+        req.headers:upsert(':method', 'PUT')
+        req:set_body(req_body)
+
+        -- don't wait for 100-Continue message from the PSGI app
+        req.headers:delete('expect')
+
+        local headers, stream = req:go(req_timeout)
+        if headers == nil then
+                return fail(stream)
+        end
+        local status = headers:get(':status')
+        if status ~= '200' then
+                return fail('status ' .. status)
+        end
+        local body, err = stream:get_body_as_string()
+        if not body and err then
+                return fail(err)
+        end
+        if pre then
+                html("<pre>")
+        end
+        html(body)
+        if pre then
+                html("</pre>")
+        end
+        return 0
+end
diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm
index 8922ec56..353f4162 100644
--- a/lib/PublicInbox/Cgit.pm
+++ b/lib/PublicInbox/Cgit.pm
@@ -35,7 +35,15 @@ sub locate_cgit ($) {
                 }
         }
         unless (defined $cgit_data) {
-                foreach my $d (qw(/var/www/htdocs/cgit /usr/share/cgit)) {
+                my @dirs = qw(/var/www/htdocs/cgit /usr/share/cgit);
+
+                # local installs of cgit from source have
+                # CGIT_SCRIPT_PATH==CGIT_DATA_PATH by default,
+                # so we can usually infer the cgit_data path from cgit_bin
+                if (defined($cgit_bin) && $cgit_bin =~ m!\A(.+?)/[^/]+\z!) {
+                        unshift @dirs, $1 if -d $1;
+                }
+                foreach my $d (@dirs) {
                         my $f = "$d/cgit.css";
                         next unless -f $f;
                         $cgit_data = $d;
@@ -90,6 +98,7 @@ my @PASS_ENV = qw(
 sub call {
         my ($self, $env) = @_;
         my $path_info = $env->{PATH_INFO};
+        my $cgit_data;
 
         # handle requests without spawning cgit iff possible:
         if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
@@ -97,10 +106,11 @@ sub call {
                 if (my $git = $self->{"\0$nick"}) {
                         return serve($env, $git, $path);
                 }
-        } elsif ($path_info =~ m!$self->{static}!) {
+        } elsif ($path_info =~ m!$self->{static}! &&
+                 defined($cgit_data = $self->{cgit_data})) {
                 my $f = $1;
                 my $type = Plack::MIME->mime_type($f);
-                return static_result($env, [], "$self->{cgit_data}$f", $type);
+                return static_result($env, [], $cgit_data.$f, $type);
         }
 
         my $cgi_env = { PATH_INFO => $path_info };
diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 14d49cc5..d07d5a79 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -8,13 +8,13 @@
 package PublicInbox::ExtMsg;
 use strict;
 use warnings;
-use PublicInbox::Hval;
+use PublicInbox::Hval qw/ascii_html/;
 use PublicInbox::MID qw/mid2path/;
 use PublicInbox::WwwStream;
 our $MIN_PARTIAL_LEN = 16;
 
 # TODO: user-configurable
-our @EXT_URL = (
+our @EXT_URL = map { ascii_html($_) } (
         # leading "//" denotes protocol-relative (http:// or https://)
         '//marc.info/?i=%s',
         '//www.mail-archive.com/search?l=mid&q=%s',
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 62bdf0a1..47a2046e 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -458,7 +458,8 @@ sub thread_html {
         $ctx->{prev_level} = 0;
         $ctx->{root_anchor} = anchor_for($mid);
         $ctx->{mapping} = {};
-        $ctx->{s_nr} = "$nr+ messages in thread";
+        $ctx->{s_nr} = ($nr > 1 ? "$nr+ messages" : 'only message')
+                       .' in thread';
 
         my $rootset = thread_results($ctx, $msgs);
 
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 0cce952d..6b8d9437 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -146,7 +146,7 @@ sub flush_diff ($$$) {
                 if ($s =~ /^---$/) {
                         to_state($dst, $state, DSTATE_STAT);
                         $$dst .= $s;
-                } elsif ($s =~ /^ /) {
+                } elsif ($s =~ /^ / || ($s =~ /^$/ && $state >= DSTATE_CTX)) {
                         # works for common cases, but not weird/long filenames
                         if ($state == DSTATE_STAT &&
                                         $s =~ /^ (.+)( +\| .*\z)/s) {
diff --git a/lib/PublicInbox/WwwHighlight.pm b/lib/PublicInbox/WwwHighlight.pm
index 01916401..bc349f8a 100644
--- a/lib/PublicInbox/WwwHighlight.pm
+++ b/lib/PublicInbox/WwwHighlight.pm
@@ -24,6 +24,8 @@ use warnings;
 use bytes (); # only for bytes::length
 use HTTP::Status qw(status_message);
 use parent qw(PublicInbox::HlMod);
+use PublicInbox::Linkify qw();
+use PublicInbox::Hval qw(ascii_html);
 
 # TODO: support highlight(1) for distros which don't package the
 # SWIG extension.  Also, there may be admins who don't want to
@@ -64,7 +66,14 @@ sub call {
         return r(405) if $req_method ne 'PUT';
 
         my $bref = read_in_full($env) or return r(500);
-        $bref = $self->do_hl($bref, $env->{PATH_INFO});
+        my $l = PublicInbox::Linkify->new;
+        $l->linkify_1($$bref);
+        if (my $res = $self->do_hl($bref, $env->{PATH_INFO})) {
+                $bref = $res;
+        } else {
+                $$bref = ascii_html($$bref);
+        }
+        $l->linkify_2($$bref);
 
         my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
         push @$h, 'Content-Length', bytes::length($$bref);
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index c708c21f..2893138d 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -12,7 +12,6 @@ use warnings;
 use PublicInbox::Hval qw(ascii_html);
 use URI;
 our $TOR_URL = 'https://www.torproject.org/';
-our $TOR2WEB_URL = 'https://www.tor2web.org/';
 our $CODE_URL = 'https://public-inbox.org/';
 our $PROJECT = 'public-inbox';
 
@@ -140,10 +139,6 @@ EOF
         if ($urls =~ m!\b[^:]+://\w+\.onion/!) {
                 $urls .= "\n note: .onion URLs require Tor: ";
                 $urls .= qq[<a\nhref="$TOR_URL">$TOR_URL</a>];
-                if ($TOR2WEB_URL) {
-                        $urls .= "\n       or Tor2web: ";
-                        $urls .= qq[<a\nhref="$TOR2WEB_URL">$TOR2WEB_URL</a>];
-                }
         }
         '<hr><pre>'.join("\n\n",
                 $desc,
diff --git a/t/search.t b/t/search.t
index 6415a644..35d71473 100644
--- a/t/search.t
+++ b/t/search.t
@@ -430,13 +430,23 @@ $ibx->with_umask(sub {
         is($ro->lookup_article($art->{num}), undef, 'gone from OVER DB') if defined($art);
 });
 
+my $all_mask = 07777;
+my $dir_mask = 02770;
+
+# FreeBSD does not allow non-root users to set S_ISGID, so
+# git doesn't set it, either (see DIR_HAS_BSD_GROUP_SEMANTICS in git.git)
+if ($^O =~ /freebsd/i) {
+        $all_mask = 0777;
+        $dir_mask = 0770;
+}
+
 foreach my $f ("$git_dir/public-inbox/msgmap.sqlite3",
                 "$git_dir/public-inbox",
                 glob("$git_dir/public-inbox/xapian*/"),
                 glob("$git_dir/public-inbox/xapian*/*")) {
         my @st = stat($f);
         my ($bn) = (split(m!/!, $f))[-1];
-        is($st[2] & 07777, -f _ ? 0660 : 02770,
+        is($st[2] & $all_mask, -f _ ? 0660 : $dir_mask,
                 "sharedRepository respected for $bn");
 }