about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/Config.pm118
-rw-r--r--lib/PublicInbox/ExtMsg.pm16
-rw-r--r--lib/PublicInbox/GetlineBody.pm16
-rw-r--r--lib/PublicInbox/Git.pm142
-rw-r--r--lib/PublicInbox/GitHTTPBackend.pm64
-rw-r--r--lib/PublicInbox/HTTPD.pm9
-rw-r--r--lib/PublicInbox/HTTPD/Async.pm27
-rw-r--r--lib/PublicInbox/HlMod.pm125
-rw-r--r--lib/PublicInbox/Hval.pm46
-rw-r--r--lib/PublicInbox/Inbox.pm21
-rw-r--r--lib/PublicInbox/MID.pm25
-rw-r--r--lib/PublicInbox/NNTP.pm1
-rw-r--r--lib/PublicInbox/Qspawn.pm208
-rw-r--r--lib/PublicInbox/SearchIdx.pm21
-rw-r--r--lib/PublicInbox/SolverGit.pm573
-rw-r--r--lib/PublicInbox/UserContent.pm98
-rw-r--r--lib/PublicInbox/View.pm60
-rw-r--r--lib/PublicInbox/ViewDiff.pm161
-rw-r--r--lib/PublicInbox/ViewVCS.pm174
-rw-r--r--lib/PublicInbox/WWW.pm156
-rw-r--r--lib/PublicInbox/WwwHighlight.pm74
-rw-r--r--lib/PublicInbox/WwwStream.pm4
-rw-r--r--lib/PublicInbox/WwwText.pm35
23 files changed, 1991 insertions, 183 deletions
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index a2b721d2..da443e58 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -2,12 +2,19 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Used throughout the project for reading configuration
+#
+# Note: I hate camelCase; but git-config(1) uses it, but it's better
+# than alllowercasewithoutunderscores, so use lc('configKey') where
+# applicable for readability
+
 package PublicInbox::Config;
 use strict;
 use warnings;
 require PublicInbox::Inbox;
 use PublicInbox::Spawn qw(popen_rd);
 
+sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] }
+
 # returns key-value pairs of config directives in a hash
 # if keys may be multi-value, the value is an array ref containing all values
 sub new {
@@ -22,9 +29,10 @@ sub new {
         $self->{-by_newsgroup} ||= {};
         $self->{-no_obfuscate} ||= {};
         $self->{-limiters} ||= {};
+        $self->{-code_repos} ||= {}; # nick => PublicInbox::Git object
 
         if (my $no = delete $self->{'publicinbox.noobfuscate'}) {
-                $no = [ $no ] if ref($no) ne 'ARRAY';
+                $no = _array($no);
                 my @domains;
                 foreach my $n (@$no) {
                         my @n = split(/\s+/, $n);
@@ -41,6 +49,9 @@ sub new {
                 my $nod = join('|', @domains);
                 $self->{-no_obfuscate_re} = qr/(?:$nod)\z/i;
         }
+        if (my $css = delete $self->{'publicinbox.css'}) {
+                $self->{css} = _array($css);
+        }
 
         $self;
 }
@@ -79,13 +90,22 @@ sub lookup_name ($$) {
 
 sub each_inbox {
         my ($self, $cb) = @_;
-        my %seen;
-        foreach my $k (keys %$self) {
-                $k =~ m!\Apublicinbox\.([^/]+)\.mainrepo\z! or next;
-                next if $seen{$1};
-                $seen{$1} = 1;
-                my $ibx = lookup_name($self, $1) or next;
-                $cb->($ibx);
+        if (my $section_order = $self->{-section_order}) {
+                foreach my $section (@$section_order) {
+                        next if $section !~ m!\Apublicinbox\.([^/]+)\z!;
+                        $self->{"publicinbox.$1.mainrepo"} or next;
+                        my $ibx = lookup_name($self, $1) or next;
+                        $cb->($ibx);
+                }
+        } else {
+                my %seen;
+                foreach my $k (keys %$self) {
+                        $k =~ m!\Apublicinbox\.([^/]+)\.mainrepo\z! or next;
+                        next if $seen{$1};
+                        $seen{$1} = 1;
+                        my $ibx = lookup_name($self, $1) or next;
+                        $cb->($ibx);
+                }
         }
 }
 
@@ -126,7 +146,7 @@ sub default_file {
 
 sub git_config_dump {
         my ($file) = @_;
-        my ($in, $out);
+        my (%section_seen, @section_order);
         my @cmd = (qw/git config/, "--file=$file", '-l');
         my $cmd = join(' ', @cmd);
         my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n";
@@ -135,8 +155,14 @@ sub git_config_dump {
         while (defined(my $line = <$fh>)) {
                 chomp $line;
                 my ($k, $v) = split(/=/, $line, 2);
-                my $cur = $rv{$k};
 
+                my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/);
+                unless (defined $section_seen{$section}) {
+                        $section_seen{$section} = 1;
+                        push @section_order, $section;
+                }
+
+                my $cur = $rv{$k};
                 if (defined $cur) {
                         if (ref($cur) eq "ARRAY") {
                                 push @$cur, $v;
@@ -148,10 +174,63 @@ sub git_config_dump {
                 }
         }
         close $fh or die "failed to close ($cmd) pipe: $?";
+        $rv{-section_order} = \@section_order;
 
         \%rv;
 }
 
+sub valid_inbox_name ($) {
+        my ($name) = @_;
+
+        # Similar rules found in git.git/remote.c::valid_remote_nick
+        # and git.git/refs.c::check_refname_component
+        # We don't reject /\.lock\z/, however, since we don't lock refs
+        if ($name eq '' || $name =~ /\@\{/ ||
+            $name =~ /\.\./ || $name =~ m![/:\?\[\]\^~\s\f[:cntrl:]\*]! ||
+            $name =~ /\A\./ || $name =~ /\.\z/) {
+                return 0;
+        }
+
+        # Note: we allow URL-unfriendly characters; users may configure
+        # non-HTTP-accessible inboxes
+        1;
+}
+
+# parse a code repo
+# Only git is supported at the moment, but SVN and Hg are possibilities
+sub _fill_code_repo {
+        my ($self, $nick) = @_;
+        my $pfx = "coderepo.$nick";
+
+        my $dir = $self->{"$pfx.dir"}; # aka "GIT_DIR"
+        unless (defined $dir) {
+                warn "$pfx.repodir unset";
+                return;
+        }
+
+        my $git = PublicInbox::Git->new($dir);
+        foreach my $t (qw(blob commit tree tag)) {
+                $git->{$t.'_url_format'} =
+                                _array($self->{lc("$pfx.${t}UrlFormat")});
+        }
+
+        if (my $cgits = $self->{lc("$pfx.cgitUrl")}) {
+                $git->{cgit_url} = $cgits = _array($cgits);
+
+                # cgit supports "/blob/?id=%s", but it's only a plain-text
+                # display and requires an unabbreviated id=
+                foreach my $t (qw(blob commit tag)) {
+                        $git->{$t.'_url_format'} ||= map {
+                                "$_/$t/?id=%s"
+                        } @$cgits;
+                }
+        }
+        # TODO: support gitweb and other repository viewers?
+        # TODO: parse cgitrc
+
+        $git;
+}
+
 sub _fill {
         my ($self, $pfx) = @_;
         my $rv = {};
@@ -175,9 +254,9 @@ sub _fill {
         }
         # TODO: more arrays, we should support multi-value for
         # more things to encourage decentralization
-        foreach my $k (qw(address altid nntpmirror)) {
+        foreach my $k (qw(address altid nntpmirror coderepo)) {
                 if (defined(my $v = $self->{"$pfx.$k"})) {
-                        $rv->{$k} = ref($v) eq 'ARRAY' ? $v : [ $v ];
+                        $rv->{$k} = _array($v);
                 }
         }
 
@@ -185,8 +264,7 @@ sub _fill {
         my $name = $pfx;
         $name =~ s/\Apublicinbox\.//;
 
-        # same rules as git.git/remote.c::valid_remote_nick
-        if ($name eq '' || $name =~ m!/! || $name eq '.' || $name eq '..') {
+        if (!valid_inbox_name($name)) {
                 warn "invalid inbox name: '$name'\n";
                 return;
         }
@@ -208,6 +286,18 @@ sub _fill {
                 $rv->{-no_obfuscate_re} = $self->{-no_obfuscate_re};
                 each_inbox($self, sub {}); # noop to populate -no_obfuscate
         }
+
+        if (my $ibx_code_repos = $rv->{coderepo}) {
+                my $code_repos = $self->{-code_repos};
+                my $repo_objs = $rv->{-repo_objs} = [];
+                foreach my $nick (@$ibx_code_repos) {
+                        valid_inbox_name($nick) or next;
+                        my $repo = $code_repos->{$nick} ||=
+                                                _fill_code_repo($self, $nick);
+                        push @$repo_objs, $repo if $repo;
+                }
+        }
+
         $rv
 }
 
diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 51e7799d..14d49cc5 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -11,6 +11,7 @@ use warnings;
 use PublicInbox::Hval;
 use PublicInbox::MID qw/mid2path/;
 use PublicInbox::WwwStream;
+our $MIN_PARTIAL_LEN = 16;
 
 # TODO: user-configurable
 our @EXT_URL = (
@@ -30,6 +31,7 @@ sub PARTIAL_MAX () { 100 }
 
 sub search_partial ($$) {
         my ($srch, $mid) = @_;
+        return if length($mid) < $MIN_PARTIAL_LEN;
         my $opt = { limit => PARTIAL_MAX, mset => 2 };
         my @try = ("m:$mid*");
         my $chop = $mid;
@@ -58,12 +60,12 @@ sub search_partial ($$) {
         }
 
         foreach my $m (@try) {
-                my $mset = eval { $srch->query($m, $opt) };
-                if (ref($@) eq 'Search::Xapian::QueryParserError') {
-                        # If Xapian can't handle the wildcard since it
-                        # has too many results.
-                        next;
-                }
+                # If Xapian can't handle the wildcard since it
+                # has too many results.  $@ can be
+                # Search::Xapian::QueryParserError or even:
+                # "something terrible happened at ../Search/Xapian/Enquire.pm"
+                my $mset = eval { $srch->query($m, $opt) } or next;
+
                 my @mids = map {
                         my $doc = $_->get_document;
                         PublicInbox::SearchMsg->load_doc($doc)->mid;
@@ -112,7 +114,7 @@ sub ext_msg {
         }
 
         # can't find a partial match in current inbox, try the others:
-        if (!$n_partial && length($mid) >= 16) {
+        if (!$n_partial && length($mid) >= $MIN_PARTIAL_LEN) {
                 foreach my $ibx (@ibx) {
                         $srch = $ibx->search or next;
                         $mids = search_partial($srch, $mid) or next;
diff --git a/lib/PublicInbox/GetlineBody.pm b/lib/PublicInbox/GetlineBody.pm
index ea07f3d6..0a922fd2 100644
--- a/lib/PublicInbox/GetlineBody.pm
+++ b/lib/PublicInbox/GetlineBody.pm
@@ -13,8 +13,13 @@ use strict;
 use warnings;
 
 sub new {
-        my ($class, $rpipe, $end, $buf) = @_;
-        bless { rpipe => $rpipe, end => $end, buf => $buf }, $class;
+        my ($class, $rpipe, $end, $buf, $filter) = @_;
+        bless {
+                rpipe => $rpipe,
+                end => $end,
+                buf => $buf,
+                filter => $filter || 0,
+        }, $class;
 }
 
 # close should always be called after getline returns undef,
@@ -24,8 +29,13 @@ sub DESTROY { $_[0]->close }
 
 sub getline {
         my ($self) = @_;
+        my $filter = $self->{filter};
+        return if $filter == -1; # last call was EOF
+
         my $buf = delete $self->{buf}; # initial buffer
-        defined $buf ? $buf : $self->{rpipe}->getline;
+        $buf = $self->{rpipe}->getline unless defined $buf;
+        $self->{filter} = -1 unless defined $buf; # set EOF for next call
+        $filter ? $filter->($buf) : $buf;
 }
 
 sub close {
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 16117277..a756684a 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -12,17 +12,60 @@ use warnings;
 use POSIX qw(dup2);
 require IO::Handle;
 use PublicInbox::Spawn qw(spawn popen_rd);
+use base qw(Exporter);
+our @EXPORT_OK = qw(git_unquote git_quote);
+
+my %GIT_ESC = (
+        a => "\a",
+        b => "\b",
+        f => "\f",
+        n => "\n",
+        r => "\r",
+        t => "\t",
+        v => "\013",
+        '"' => '"',
+        '\\' => '\\',
+);
+my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC;
+
+
+# unquote pathnames used by git, see quote.c::unquote_c_style.c in git.git
+sub git_unquote ($) {
+        return $_[0] unless ($_[0] =~ /\A"(.*)"\z/);
+        $_[0] = $1;
+        $_[0] =~ s/\\([\\"abfnrtv])/$GIT_ESC{$1}/g;
+        $_[0] =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
+        $_[0];
+}
+
+sub git_quote ($) {
+        if ($_[0] =~ s/([\\"\a\b\f\n\r\t\013]|[^[:print:]])/
+                      '\\'.($ESC_GIT{$1}||sprintf("%0o",ord($1)))/egs) {
+                return qq{"$_[0]"};
+        }
+        $_[0];
+}
 
 sub new {
         my ($class, $git_dir) = @_;
         my @st;
         $st[7] = $st[10] = 0;
-        bless { git_dir => $git_dir, st => \@st }, $class
+        # may contain {-tmp} field for File::Temp::Dir
+        bless { git_dir => $git_dir, st => \@st, -git_path => {} }, $class
+}
+
+sub git_path ($$) {
+        my ($self, $path) = @_;
+        $self->{-git_path}->{$path} ||= do {
+                local $/ = "\n";
+                chomp(my $str = $self->qx(qw(rev-parse --git-path), $path));
+                $str;
+        };
 }
 
 sub alternates_changed {
         my ($self) = @_;
-        my $alt = "$self->{git_dir}/objects/info/alternates";
+        my $alt = git_path($self, 'objects/info/alternates');
         my @st = stat($alt) or return 0;
         my $old_st = $self->{st};
         # 10 - ctime, 7 - size
@@ -30,9 +73,25 @@ sub alternates_changed {
         $self->{st} = \@st;
 }
 
+sub last_check_err {
+        my ($self) = @_;
+        my $fh = $self->{err_c} or return;
+        sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
+        defined(sysread($fh, my $buf, -s $fh)) or
+                        fail($self, "sysread failed: $!");
+        $buf;
+}
+
 sub _bidi_pipe {
-        my ($self, $batch, $in, $out, $pid) = @_;
-        return if $self->{$pid};
+        my ($self, $batch, $in, $out, $pid, $err) = @_;
+        if ($self->{$pid}) {
+                if (defined $err) { # "err_c"
+                        my $fh = $self->{$err};
+                        sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
+                        truncate($fh, 0) or fail($self, "truncate failed: $!");
+                }
+                return;
+        }
         my ($in_r, $in_w, $out_r, $out_w);
 
         pipe($in_r, $in_w) or fail($self, "pipe failed: $!");
@@ -42,8 +101,14 @@ sub _bidi_pipe {
                 fcntl($in_w, 1031, 4096) if $batch eq '--batch-check';
         }
 
-        my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch);
+        my @cmd = (qw(git), "--git-dir=$self->{git_dir}",
+                        qw(-c core.abbrev=40 cat-file), $batch);
         my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) };
+        if ($err) {
+                open(my $fh, '+>', undef) or fail($self, "open.err failed: $!");
+                $self->{$err} = $fh;
+                $redir->{2} = fileno($fh);
+        }
         my $p = spawn(\@cmd, undef, $redir);
         defined $p or fail($self, "spawn failed: $!");
         $self->{$pid} = $p;
@@ -118,17 +183,38 @@ sub batch_prepare ($) { _bidi_pipe($_[0], qw(--batch in out pid)) }
 
 sub check {
         my ($self, $obj) = @_;
-        $self->_bidi_pipe(qw(--batch-check in_c out_c pid_c));
+        _bidi_pipe($self, qw(--batch-check in_c out_c pid_c err_c));
         $self->{out_c}->print($obj, "\n") or fail($self, "write error: $!");
         local $/ = "\n";
         chomp(my $line = $self->{in_c}->getline);
         my ($hex, $type, $size) = split(' ', $line);
-        return if $type eq 'missing';
+
+        # Future versions of git.git may show 'ambiguous', but for now,
+        # we must handle 'dangling' below (and maybe some other oddball
+        # stuff):
+        # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
+        return if $type eq 'missing' || $type eq 'ambiguous';
+
+        if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') {
+                $size = $type + length("\n");
+                my $r = read($self->{in_c}, my $buf, $size);
+                defined($r) or fail($self, "read failed: $!");
+                return;
+        }
+
         ($hex, $type, $size);
 }
 
 sub _destroy {
-        my ($self, $in, $out, $pid) = @_;
+        my ($self, $in, $out, $pid, $expire) = @_;
+        my $rfh = $self->{$in} or return;
+        if (defined $expire) {
+                # at least FreeBSD 11.2 and Linux 4.20 update mtime of the
+                # read end of a pipe when the pipe is written to; dunno
+                # about other OSes.
+                my $mtime = (stat($rfh))[9];
+                return if $mtime > $expire;
+        }
         my $p = delete $self->{$pid} or return;
         foreach my $f ($in, $out) {
                 delete $self->{$f};
@@ -158,10 +244,12 @@ sub qx {
         <$fh>
 }
 
+# returns true if there are pending "git cat-file" processes
 sub cleanup {
-        my ($self) = @_;
-        _destroy($self, qw(in out pid));
-        _destroy($self, qw(in_c out_c pid_c));
+        my ($self, $expire) = @_;
+        _destroy($self, qw(in out pid), $expire);
+        _destroy($self, qw(in_c out_c pid_c), $expire);
+        !!($self->{pid} || $self->{pid_c});
 }
 
 # assuming a well-maintained repo, this should be a somewhat
@@ -170,7 +258,8 @@ sub cleanup {
 sub packed_bytes {
         my ($self) = @_;
         my $n = 0;
-        foreach my $p (glob("$self->{git_dir}/objects/pack/*.pack")) {
+        my $pack_dir = git_path($self, 'objects/pack');
+        foreach my $p (glob("$pack_dir/*.pack")) {
                 $n += -s $p;
         }
         $n
@@ -178,6 +267,35 @@ sub packed_bytes {
 
 sub DESTROY { cleanup(@_) }
 
+sub local_nick ($) {
+        my ($self) = @_;
+        my $ret = '???';
+        # don't show full FS path, basename should be OK:
+        if ($self->{git_dir} =~ m!/([^/]+)(?:/\.git)?\z!) {
+                $ret = "/path/to/$1";
+        }
+        wantarray ? ($ret) : $ret;
+}
+
+# show the blob URL for cgit/gitweb/whatever
+sub src_blob_url {
+        my ($self, $oid) = @_;
+        # blob_url_format = "https://example.com/foo.git/blob/%s"
+        if (my $bfu = $self->{blob_url_format}) {
+                return map { sprintf($_, $oid) } @$bfu if wantarray;
+                return sprintf($bfu->[0], $oid);
+        }
+        local_nick($self);
+}
+
+sub pub_urls {
+        my ($self) = @_;
+        if (my $urls = $self->{cgit_url}) {
+                return @$urls;
+        }
+        local_nick($self);
+}
+
 1;
 __END__
 =pod
diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index 54ccfa05..ab43a009 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -200,69 +200,15 @@ sub serve_smart {
                 $env{$name} = $val if defined $val;
         }
         my $limiter = $git->{-httpbackend_limiter} || $default_limiter;
-        my $git_dir = $git->{git_dir};
         $env{GIT_HTTP_EXPORT_ALL} = '1';
-        $env{PATH_TRANSLATED} = "$git_dir/$path";
+        $env{PATH_TRANSLATED} = "$git->{git_dir}/$path";
         my $rdr = { 0 => fileno($in) };
         my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr);
-        my ($fh, $rpipe);
-        my $end = sub {
-                if (my $err = $qsp->finish) {
-                        err($env, "git http-backend ($git_dir): $err");
-                }
-                $fh->close if $fh; # async-only
-        };
-
-        # Danga::Socket users, we queue up the read_enable callback to
-        # fire after pending writes are complete:
-        my $buf = '';
-        my $rd_hdr = sub {
-                my $r = sysread($rpipe, $buf, 1024, length($buf));
-                return if !defined($r) && ($!{EINTR} || $!{EAGAIN});
-                return r(500, 'http-backend error') unless $r;
-                $r = parse_cgi_headers(\$buf) or return; # incomplete headers
+        $qsp->psgi_return($env, $limiter, sub {
+                my ($r, $bref) = @_;
+                $r = parse_cgi_headers($bref) or return; # incomplete headers
                 $r->[0] == 403 ? serve_dumb($env, $git, $path) : $r;
-        };
-        my $res;
-        my $async = $env->{'pi-httpd.async'}; # XXX unstable API
-        my $cb = sub {
-                my $r = $rd_hdr->() or return;
-                $rd_hdr = undef;
-                if (scalar(@$r) == 3) { # error:
-                        if ($async) {
-                                $async->close; # calls rpipe->close
-                        } else {
-                                $rpipe->close;
-                                $end->();
-                        }
-                        $res->($r);
-                } elsif ($async) {
-                        $fh = $res->($r);
-                        $async->async_pass($env->{'psgix.io'}, $fh, \$buf);
-                } else { # for synchronous PSGI servers
-                        require PublicInbox::GetlineBody;
-                        $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end,
-                                                                $buf);
-                        $res->($r);
-                }
-        };
-        sub {
-                ($res) = @_;
-
-                # hopefully this doesn't break any middlewares,
-                # holding the input here is a waste of FDs and memory
-                $env->{'psgi.input'} = undef;
-
-                $qsp->start($limiter, sub { # may run later, much later...
-                        ($rpipe) = @_;
-                        $in = undef;
-                        if ($async) {
-                                $async = $async->($rpipe, $cb, $end);
-                        } else { # generic PSGI
-                                $cb->() while $rd_hdr;
-                        }
-                });
-        };
+        });
 }
 
 sub input_to_file {
diff --git a/lib/PublicInbox/HTTPD.pm b/lib/PublicInbox/HTTPD.pm
index 38517710..b0bf94ad 100644
--- a/lib/PublicInbox/HTTPD.pm
+++ b/lib/PublicInbox/HTTPD.pm
@@ -29,9 +29,16 @@ sub new {
                 'psgi.run_once'         => Plack::Util::FALSE,
                 'psgi.multithread' => Plack::Util::FALSE,
                 'psgi.multiprocess' => Plack::Util::TRUE,
+
+                # We don't use this anywhere, but we can support
+                # other PSGI apps which might use it:
                 'psgix.input.buffered' => Plack::Util::TRUE,
 
-                # XXX unstable API!
+                # XXX unstable API!, only GitHTTPBackend needs
+                # this to limit git-http-backend(1) parallelism.
+                # The rest of our PSGI code is generic, relying
+                # on "pull" model using "getline" to prevent
+                # over-buffering.
                 'pi-httpd.async' => do {
                         no warnings 'once';
                         *pi_httpd_async
diff --git a/lib/PublicInbox/HTTPD/Async.pm b/lib/PublicInbox/HTTPD/Async.pm
index 842aaf62..a647f10d 100644
--- a/lib/PublicInbox/HTTPD/Async.pm
+++ b/lib/PublicInbox/HTTPD/Async.pm
@@ -14,6 +14,15 @@ require PublicInbox::EvCleanup;
 
 sub new {
         my ($class, $io, $cb, $cleanup) = @_;
+
+        # no $io? call $cb at the top of the next event loop to
+        # avoid recursion:
+        unless (defined($io)) {
+                PublicInbox::EvCleanup::asap($cb) if $cb;
+                PublicInbox::EvCleanup::next_tick($cleanup) if $cleanup;
+                return;
+        }
+
         my $self = fields::new($class);
         IO::Handle::blocking($io, 0);
         $self->SUPER::new($io);
@@ -23,6 +32,7 @@ sub new {
         $self;
 }
 
+# fires after pending writes are complete:
 sub restart_read_cb ($) {
         my ($self) = @_;
         sub { $self->watch_read(1) }
@@ -35,14 +45,16 @@ sub main_cb ($$$) {
                 my $r = sysread($self->{sock}, $$bref, 8192);
                 if ($r) {
                         $fh->write($$bref);
-                        return if $http->{closed};
-                        if ($http->{write_buf_size}) {
-                                $self->watch_read(0);
-                                $http->write(restart_read_cb($self));
+                        unless ($http->{closed}) { # Danga::Socket sets this
+                                if ($http->{write_buf_size}) {
+                                        $self->watch_read(0);
+                                        $http->write(restart_read_cb($self));
+                                }
+                                # stay in watch_read, but let other clients
+                                # get some work done, too.
+                                return;
                         }
-                        # stay in watch_read, but let other clients
-                        # get some work done, too.
-                        return;
+                        # fall through to close below...
                 } elsif (!defined $r) {
                         return if $!{EAGAIN} || $!{EINTR};
                 }
@@ -66,7 +78,6 @@ sub async_pass {
 sub event_read { $_[0]->{cb}->(@_) }
 sub event_hup { $_[0]->{cb}->(@_) }
 sub event_err { $_[0]->{cb}->(@_) }
-sub sysread { shift->{sock}->sysread(@_) }
 
 sub close {
         my $self = shift;
diff --git a/lib/PublicInbox/HlMod.pm b/lib/PublicInbox/HlMod.pm
new file mode 100644
index 00000000..237ffaca
--- /dev/null
+++ b/lib/PublicInbox/HlMod.pm
@@ -0,0 +1,125 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# I have no idea how stable or safe this is for handling untrusted
+# input, but it seems to have been around for a while, and the
+# highlight(1) executable is supported by gitweb and cgit.
+#
+# I'm also unsure about API stability, but highlight 3.x seems to
+# have been around a few years and ikiwiki (apparently the only
+# user of the SWIG/Perl bindings, at least in Debian) hasn't needed
+# major changes to support it in recent years.
+#
+# Some code stolen from ikiwiki (GPL-2.0+)
+# wrapper for SWIG-generated highlight.pm bindings
+package PublicInbox::HlMod;
+use strict;
+use warnings;
+use highlight; # SWIG-generated stuff
+
+sub _parse_filetypes ($) {
+        my $ft_conf = $_[0]->searchFile('filetypes.conf') or
+                                die 'filetypes.conf not found by highlight';
+        open my $fh, '<', $ft_conf or die "failed to open($ft_conf): $!";
+        local $/;
+        my $cfg = <$fh>;
+        my %ext2lang;
+        my @shebang; # order matters
+
+        # Hrm... why isn't this exposed by the highlight API?
+        # highlight >= 3.2 format (bind-style) (from ikiwiki)
+        while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
+                         Extensions\s*=\s*{([^}]+)}/sgx) {
+                my $lang = $1;
+                foreach my $bit (split(/,/, $2)) {
+                        $bit =~ s/.*"(.*)".*/$1/s;
+                        $ext2lang{$bit} = $lang;
+                }
+        }
+        # AFAIK, all the regexps used by in filetypes.conf distributed
+        # by highlight work as Perl REs
+        while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
+                        Shebang\s*=\s*\[\s*\[([^}]+)\s*\]\s*\]\s*}\s*,/sgx) {
+                my ($lang, $re) = ($1, $2);
+                eval {
+                        my $perl_re = qr/$re/;
+                        push @shebang, [ $lang, $perl_re ];
+                };
+                if ($@) {
+                        warn "$lang shebang=[[$re]] did not work in Perl: $@";
+                }
+        }
+        (\%ext2lang, \@shebang);
+}
+
+sub new {
+        my ($class) = @_;
+        my $dir = highlight::DataDir->new;
+        $dir->initSearchDirectories('');
+        my ($ext2lang, $shebang) = _parse_filetypes($dir);
+        bless {
+                -dir => $dir,
+                -ext2lang => $ext2lang,
+                -shebang => $shebang,
+        }, $class;
+}
+
+sub _shebang2lang ($$) {
+        my ($self, $str) = @_;
+        my $shebang = $self->{-shebang};
+        foreach my $s (@$shebang) {
+                return $s->[0] if $$str =~ $s->[1];
+        }
+        undef;
+}
+
+sub _path2lang ($$) {
+        my ($self, $path) = @_;
+        my ($ext) = ($path =~ m!([^\\/\.]+)\z!);
+        $ext = lc($ext);
+        $self->{-ext2lang}->{$ext} || $ext;
+}
+
+sub do_hl {
+        my ($self, $str, $path) = @_;
+        my $lang = _path2lang($self, $path) if defined $path;
+        my $dir = $self->{-dir};
+        my $langpath;
+        if (defined $lang) {
+                $langpath = $dir->getLangPath("$lang.lang") or return;
+                $langpath = undef unless -f $langpath;
+        }
+        unless (defined $langpath) {
+                $lang = _shebang2lang($self, $str) or return;
+                $langpath = $dir->getLangPath("$lang.lang") or return;
+                $langpath = undef unless -f $langpath;
+        }
+        return unless defined $langpath;
+
+        my $gen = $self->{$langpath} ||= do {
+                my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
+                $g->setFragmentCode(1); # generate html fragment
+
+                # whatever theme works
+                my $themepath = $dir->getThemePath('print.theme');
+                $g->initTheme($themepath);
+                $g->loadLanguage($langpath);
+                $g->setEncoding('utf-8');
+                $g;
+        };
+        \($gen->generateString($$str))
+}
+
+# SWIG instances aren't reference-counted, but $self is;
+# so we need to delete all the CodeGenerator instances manually
+# at our own destruction
+sub DESTROY {
+        my ($self) = @_;
+        foreach my $gen (values %$self) {
+                if (ref($gen) eq 'highlight::CodeGenerator') {
+                        highlight::CodeGenerator::deleteInstance($gen);
+                }
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index ccfa3242..53810b33 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -9,17 +9,7 @@ use warnings;
 use Encode qw(find_encoding);
 use PublicInbox::MID qw/mid_clean mid_escape/;
 use base qw/Exporter/;
-our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename/;
-
-# User-generated content (UGC) may have excessively long lines
-# and screw up rendering on some browsers, so we use pre-wrap.
-#
-# We also force everything to the same scaled font-size because GUI
-# browsers (tested both Firefox and surf (webkit)) uses a larger font
-# for the Search <form> element than the rest of the page.  Font size
-# uniformity is important to people who rely on gigantic fonts.
-use constant STYLE =>
-        '<style>pre{white-space:pre-wrap}*{font-size:100%}</style>';
+our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape/;
 
 my $enc_ascii = find_encoding('us-ascii');
 
@@ -47,6 +37,21 @@ sub new_oneline {
         $class->new($raw);
 }
 
+# some of these overrides are standard C escapes so they're
+# easy-to-understand when rendered.
+my %escape_sequence = (
+        "\x00" => '\\0', # NUL
+        "\x07" => '\\a', # bell
+        "\x08" => '\\b', # backspace
+        "\x09" => "\t", # obvious to show as-is
+        "\x0a" => "\n", # obvious to show as-is
+        "\x0b" => '\\v', # vertical tab
+        "\x0c" => '\\f', # form feed
+        "\x0d" => '\\r', # carriage ret (not preceding \n)
+        "\x1b" => '^[', # ASCII escape (mutt seems to escape this way)
+        "\x7f" => '\\x7f', # DEL
+);
+
 my %xhtml_map = (
         '"' => '&#34;',
         '&' => '&#38;',
@@ -56,18 +61,13 @@ my %xhtml_map = (
 );
 
 $xhtml_map{chr($_)} = sprintf('\\x%02x', $_) for (0..31);
-# some of these overrides are standard C escapes so they're
-# easy-to-understand when rendered.
-$xhtml_map{"\x00"} = '\\0'; # NUL
-$xhtml_map{"\x07"} = '\\a'; # bell
-$xhtml_map{"\x08"} = '\\b'; # backspace
-$xhtml_map{"\x09"} = "\t"; # obvious to show as-is
-$xhtml_map{"\x0a"} = "\n"; # obvious to show as-is
-$xhtml_map{"\x0b"} = '\\v'; # vertical tab
-$xhtml_map{"\x0c"} = '\\f'; # form feed
-$xhtml_map{"\x0d"} = '\\r'; # carriage ret (not preceding \n)
-$xhtml_map{"\x1b"} = '^['; # ASCII escape (mutt seems to escape this way)
-$xhtml_map{"\x7f"} = '\\x7f'; # DEL
+%xhtml_map = (%xhtml_map, %escape_sequence);
+
+sub src_escape ($) {
+        $_[0] =~ s/\r\n/\n/sg;
+        $_[0] =~ s/([\x7f\x00-\x1f])/$xhtml_map{$1}/sge;
+        $_[0] = $enc_ascii->encode($_[0], Encode::HTMLCREF);
+}
 
 sub ascii_html {
         my ($s) = @_;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index d57e46d2..cde46258 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -22,12 +22,25 @@ my $cleanup_broken = $@;
 my $CLEANUP = {}; # string(inbox) -> inbox
 sub cleanup_task () {
         $cleanup_timer = undef;
+        my $next = {};
         for my $ibx (values %$CLEANUP) {
-                foreach my $f (qw(git mm search)) {
+                my $again;
+                foreach my $f (qw(mm search)) {
                         delete $ibx->{$f} if SvREFCNT($ibx->{$f}) == 1;
                 }
+                my $expire = time - 60;
+                if (my $git = $ibx->{git}) {
+                        $again = $git->cleanup($expire);
+                }
+                if (my $gits = $ibx->{-repo_objs}) {
+                        foreach my $git (@$gits) {
+                                $again = 1 if $git->cleanup($expire);
+                        }
+                }
+                $again ||= !!($ibx->{mm} || $ibx->{search});
+                $next->{"$ibx"} = $ibx if $again;
         }
-        $CLEANUP = {};
+        $CLEANUP = $next;
 }
 
 sub _cleanup_later ($) {
@@ -189,8 +202,8 @@ sub cloneurl {
 
 sub base_url {
         my ($self, $env) = @_;
-        if ($env) { # PSGI env
-                my $scheme = $env->{'psgi.url_scheme'};
+        my $scheme;
+        if ($env && ($scheme = $env->{'psgi.url_scheme'})) { # PSGI env
                 my $host_port = $env->{HTTP_HOST} ||
                         "$env->{SERVER_NAME}:$env->{SERVER_PORT}";
                 my $url = "$scheme://$host_port". ($env->{SCRIPT_NAME} || '/');
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index cd56f272..7f1ab15e 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -10,6 +10,7 @@ our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC
         mids references/;
 use URI::Escape qw(uri_escape_utf8);
 use Digest::SHA qw/sha1_hex/;
+require PublicInbox::Address;
 use constant {
         MID_MAX => 40, # SHA-1 hex length # TODO: get rid of this
         MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q')
@@ -79,22 +80,34 @@ sub references ($) {
                         push(@mids, ($v =~ /<([^>]+)>/sg));
                 }
         }
-        uniq_mids(\@mids);
+
+        # old versions of git-send-email would prompt users for
+        # In-Reply-To and users' muscle memory would use 'y' or 'n'
+        # as responses:
+        my %addr = ( y => 1, n => 1 );
+
+        foreach my $f (qw(To From Cc)) {
+                my @v = $hdr->header_raw($f);
+                foreach my $v (@v) {
+                        $addr{$_} = 1 for (PublicInbox::Address::emails($v));
+                }
+        }
+        uniq_mids(\@mids, \%addr);
 }
 
-sub uniq_mids ($) {
-        my ($mids) = @_;
+sub uniq_mids ($;$) {
+        my ($mids, $seen) = @_;
         my @ret;
-        my %seen;
+        $seen ||= {};
         foreach my $mid (@$mids) {
                 $mid =~ tr/\n\t\r//d;
                 if (length($mid) > MAX_MID_SIZE) {
                         warn "Message-ID: <$mid> too long, truncating\n";
                         $mid = substr($mid, 0, MAX_MID_SIZE);
                 }
-                next if $seen{$mid};
+                next if $seen->{$mid};
                 push @ret, $mid;
-                $seen{$mid} = 1;
+                $seen->{$mid} = 1;
         }
         \@ret;
 }
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 623ffd35..63d5870b 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -515,6 +515,7 @@ sub set_art {
 sub _header ($) {
         my $hdr = $_[0]->header_obj->as_string;
         utf8::encode($hdr);
+        $hdr =~ s/(?<!\r)\n/\r\n/sg;
         $hdr
 }
 
diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm
index 3500f8a4..509a4412 100644
--- a/lib/PublicInbox/Qspawn.pm
+++ b/lib/PublicInbox/Qspawn.pm
@@ -1,15 +1,40 @@
-# Copyright (C) 2016-2018 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2019 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# Limits the number of processes spawned
+# Like most Perl modules in public-inbox, this is internal and
+# NOT subject to any stability guarantees!  It is only documented
+# for other hackers.
+#
+# This is used to limit the number of processes spawned by the
+# PSGI server, so it acts like a semaphore and queues up extra
+# commands to be run if currently at the limit.  Multiple "limiters"
+# may be configured which give inboxes different channels to
+# operate in.  This can be useful to ensure smaller inboxes can
+# be cloned while cloning of large inboxes is maxed out.
+#
 # This does not depend on Danga::Socket or any other external
-# scheduling mechanism, you just need to call start and finish
-# appropriately
+# scheduling mechanism, you just need to call start() and finish()
+# appropriately. However, public-inbox-httpd (which uses Danga::Socket)
+# will be able to schedule this based on readability of stdout from
+# the spawned process.  See GitHTTPBackend.pm and SolverGit.pm for
+# usage examples.  It does not depend on any form of threading.
+#
+# This is useful for scheduling CGI execution of both long-lived
+# git-http-backend(1) process (for "git clone") as well as short-lived
+# processes such as git-apply(1).
+
 package PublicInbox::Qspawn;
 use strict;
 use warnings;
 use PublicInbox::Spawn qw(popen_rd);
+require Plack::Util;
+
+my $def_limiter;
 
+# declares a command to spawn (but does not spawn it).
+# $cmd is the command to spawn
+# $env is the environ for the child process
+# $opt can include redirects and perhaps other process spawning options
 sub new ($$$;) {
         my ($class, $cmd, $env, $opt) = @_;
         bless { args => [ $cmd, $env, $opt ] }, $class;
@@ -28,13 +53,22 @@ sub _do_spawn {
         $cb->($self->{rpipe});
 }
 
+sub child_err ($) {
+        my ($child_error) = @_; # typically $?
+        my $exitstatus = ($child_error >> 8) or return;
+        my $sig = $child_error & 127;
+        my $msg = "exit status=$exitstatus";
+        $msg .= " signal=$sig" if $sig;
+        $msg;
+}
+
 sub finish ($) {
         my ($self) = @_;
         my $limiter = $self->{limiter};
         my $running;
         if (delete $self->{rpipe}) {
                 my $pid = delete $self->{pid};
-                $self->{err} = $pid == waitpid($pid, 0) ? $? :
+                $self->{err} = $pid == waitpid($pid, 0) ? child_err($?) :
                                 "PID:$pid still running?";
                 $running = --$limiter->{running};
         }
@@ -59,6 +93,153 @@ sub start {
         }
 }
 
+sub _psgi_finish ($$) {
+        my ($self, $env) = @_;
+        my $err = $self->finish;
+        if ($err && !$env->{'qspawn.quiet'}) {
+                $err = join(' ', @{$self->{args}->[0]}).": $err\n";
+                $env->{'psgi.errors'}->print($err);
+        }
+}
+
+# Similar to `backtick` or "qx" ("perldoc -f qx"), it calls $qx_cb with
+# the stdout of the given command when done; but respects the given limiter
+# $env is the PSGI env.  As with ``/qx; only use this when output is small
+# and safe to slurp.
+sub psgi_qx {
+        my ($self, $env, $limiter, $qx_cb) = @_;
+        my $qx = PublicInbox::Qspawn::Qx->new;
+        my $end = sub {
+                _psgi_finish($self, $env);
+                eval { $qx_cb->($qx) };
+                $qx = undef;
+        };
+        my $rpipe;
+        my $async = $env->{'pi-httpd.async'};
+        my $cb = sub {
+                my $r = sysread($rpipe, my $buf, 8192);
+                if ($async) {
+                        $async->async_pass($env->{'psgix.io'}, $qx, \$buf);
+                } elsif (defined $r) {
+                        $r ? $qx->write($buf) : $end->();
+                } else {
+                        return if $!{EAGAIN} || $!{EINTR}; # loop again
+                        $end->();
+                }
+        };
+        $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32);
+        $self->start($limiter, sub { # may run later, much later...
+                ($rpipe) = @_;
+                if ($async) {
+                # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end)
+                        $async = $async->($rpipe, $cb, $end);
+                } else { # generic PSGI
+                        $cb->() while $qx;
+                }
+        });
+}
+
+# create a filter for "push"-based streaming PSGI writes used by HTTPD::Async
+sub filter_fh ($$) {
+        my ($fh, $filter) = @_;
+        Plack::Util::inline_object(
+                close => sub {
+                        $fh->write($filter->(undef));
+                        $fh->close;
+                },
+                write => sub {
+                        $fh->write($filter->($_[0]));
+                });
+}
+
+# Used for streaming the stdout of one process as a PSGI response.
+#
+# $env is the PSGI env.
+# optional keys in $env:
+#   $env->{'qspawn.wcb'} - the write callback from the PSGI server
+#                          optional, use this if you've already
+#                          captured it elsewhere.  If not given,
+#                          psgi_return will return an anonymous
+#                          sub for the PSGI server to call
+#
+#   $env->{'qspawn.filter'} - filter callback, receives a string as input,
+#                             undef on EOF
+#
+# $limiter - the Limiter object to use (uses the def_limiter if not given)
+#
+# $parse_hdr - Initial read function; often for parsing CGI header output.
+#              It will be given the return value of sysread from the pipe
+#              and a string ref of the current buffer.  Returns an arrayref
+#              for PSGI responses.  2-element arrays in PSGI mean the
+#              body will be streamed, later, via writes (push-based) to
+#              psgix.io.  3-element arrays means the body is available
+#              immediately (or streamed via ->getline (pull-based)).
+sub psgi_return {
+        my ($self, $env, $limiter, $parse_hdr) = @_;
+        my ($fh, $rpipe);
+        my $end = sub {
+                _psgi_finish($self, $env);
+                $fh->close if $fh; # async-only
+        };
+
+        my $buf = '';
+        my $rd_hdr = sub {
+                my $r = sysread($rpipe, $buf, 1024, length($buf));
+                return if !defined($r) && ($!{EINTR} || $!{EAGAIN});
+                $parse_hdr->($r, \$buf);
+        };
+
+        my $wcb = delete $env->{'qspawn.wcb'};
+        my $async = $env->{'pi-httpd.async'};
+
+        my $cb = sub {
+                my $r = $rd_hdr->() or return;
+                $rd_hdr = undef;
+                my $filter = delete $env->{'qspawn.filter'};
+                if (scalar(@$r) == 3) { # error
+                        if ($async) {
+                                $async->close; # calls rpipe->close and $end
+                        } else {
+                                $rpipe->close;
+                                $end->();
+                        }
+                        $wcb->($r);
+                } elsif ($async) {
+                        $fh = $wcb->($r); # scalar @$r == 2
+                        $fh = filter_fh($fh, $filter) if $filter;
+                        $async->async_pass($env->{'psgix.io'}, $fh, \$buf);
+                } else { # for synchronous PSGI servers
+                        require PublicInbox::GetlineBody;
+                        $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end,
+                                                                $buf, $filter);
+                        $wcb->($r);
+                }
+        };
+        $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32);
+        my $start_cb = sub { # may run later, much later...
+                ($rpipe) = @_;
+                if ($async) {
+                        # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end)
+                        $async = $async->($rpipe, $cb, $end);
+                } else { # generic PSGI
+                        $cb->() while $rd_hdr;
+                }
+        };
+
+        # the caller already captured the PSGI write callback from
+        # the PSGI server, so we can call ->start, here:
+        return $self->start($limiter, $start_cb) if $wcb;
+
+        # the caller will return this sub to the PSGI server, so
+        # it can set the response callback (that is, for PublicInbox::HTTP,
+        # the chunked_wcb or identity_wcb callback), but other HTTP servers
+        # are supported:
+        sub {
+                ($wcb) = @_;
+                $self->start($limiter, $start_cb);
+        };
+}
+
 package PublicInbox::Qspawn::Limiter;
 use strict;
 use warnings;
@@ -73,4 +254,21 @@ sub new {
         }, $class;
 }
 
+# captures everything into a buffer and executes a callback when done
+package PublicInbox::Qspawn::Qx;
+use strict;
+use warnings;
+
+sub new {
+        my ($class) = @_;
+        my $buf = '';
+        bless \$buf, $class;
+}
+
+# called by PublicInbox::HTTPD::Async ($fh->write)
+sub write {
+        ${$_[0]} .= $_[1];
+        undef;
+}
+
 1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 8810fe76..db0495bc 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -18,7 +18,7 @@ use Carp qw(croak);
 use POSIX qw(strftime);
 use PublicInbox::OverIdx;
 use PublicInbox::Spawn qw(spawn);
-require PublicInbox::Git;
+use PublicInbox::Git qw(git_unquote);
 use Compress::Zlib qw(compress);
 
 use constant {
@@ -29,25 +29,6 @@ use constant {
 
 my $xapianlevels = qr/\A(?:full|medium)\z/;
 
-my %GIT_ESC = (
-        a => "\a",
-        b => "\b",
-        f => "\f",
-        n => "\n",
-        r => "\r",
-        t => "\t",
-        v => "\013",
-);
-
-sub git_unquote ($) {
-        my ($s) = @_;
-        return $s unless ($s =~ /\A"(.*)"\z/);
-        $s = $1;
-        $s =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g;
-        $s =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
-        $s;
-}
-
 sub new {
         my ($class, $ibx, $creat, $part) = @_;
         my $levels = qr/\A(?:full|medium|basic)\z/;
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
new file mode 100644
index 00000000..a13ae9e5
--- /dev/null
+++ b/lib/PublicInbox/SolverGit.pm
@@ -0,0 +1,573 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "Solve" blobs which don't exist in git code repositories by
+# searching inboxes for post-image blobs.
+
+# this emits a lot of debugging/tracing information which may be
+# publically viewed over HTTP(S).  Be careful not to expose
+# local filesystem layouts in the process.
+package PublicInbox::SolverGit;
+use strict;
+use warnings;
+use File::Temp qw();
+use Fcntl qw(SEEK_SET);
+use PublicInbox::Git qw(git_unquote git_quote);
+use PublicInbox::Spawn qw(spawn popen_rd);
+use PublicInbox::MsgIter qw(msg_iter msg_part_text);
+use PublicInbox::Qspawn;
+use URI::Escape qw(uri_escape_utf8);
+
+# POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to
+# subtract 2048 bytes.  We also don't factor in environment variable
+# headroom into this.
+use POSIX qw(sysconf _SC_ARG_MAX);
+my $ARG_SIZE_MAX = (sysconf(_SC_ARG_MAX) || 4096) - 2048;
+my $OID_MIN = 7;
+
+# By default, "git format-patch" generates filenames with a four-digit
+# prefix, so that means 9999 patch series are OK, right? :>
+# Maybe we can make this configurable, main concern is disk space overhead
+# for uncompressed patch fragments.  Aside from space, public-inbox-httpd
+# is otherwise unaffected by having many patches, here, as it can share
+# work fairly.  Other PSGI servers may have trouble, though.
+my $MAX_PATCH = 9999;
+
+# di = diff info / a hashref with information about a diff ($di):
+# {
+#        oid_a => abbreviated pre-image oid,
+#        oid_b => abbreviated post-image oid,
+#        tmp => anonymous file handle with the diff,
+#        hdr_lines => arrayref of various header lines for mode information
+#        mode_a => original mode of oid_a (string, not integer),
+#        ibx => PublicInbox::Inbox object containing the diff
+#        smsg => PublicInbox::SearchMsg object containing diff
+#        path_a => pre-image path
+#        path_b => post-image path
+# }
+
+# don't bother if somebody sends us a patch with these path components,
+# it's junk at best, an attack attempt at worse:
+my %bad_component = map { $_ => 1 } ('', '.', '..');
+
+sub dbg ($$) {
+        print { $_[0]->{out} } $_[1], "\n" or ERR($_[0], "print(dbg): $!");
+}
+
+sub ERR ($$) {
+        my ($self, $err) = @_;
+        print { $self->{out} } $err, "\n";
+        my $ucb = delete($self->{user_cb});
+        eval { $ucb->($err) } if $ucb;
+        die $err;
+}
+
+# look for existing blobs already in git repos
+sub solve_existing ($$) {
+        my ($self, $want) = @_;
+        my $oid_b = $want->{oid_b};
+        my @ambiguous; # Array of [ git, $oids]
+        foreach my $git (@{$self->{gits}}) {
+                my ($oid_full, $type, $size) = $git->check($oid_b);
+                if (defined($type) && $type eq 'blob') {
+                        return [ $git, $oid_full, $type, int($size) ];
+                }
+
+                next if length($oid_b) == 40;
+
+                # parse stderr of "git cat-file --batch-check"
+                my $err = $git->last_check_err;
+                my (@oids) = ($err =~ /\b([a-f0-9]{40})\s+blob\b/g);
+                next unless scalar(@oids);
+
+                # TODO: do something with the ambiguous array?
+                # push @ambiguous, [ $git, @oids ];
+
+                dbg($self, "`$oid_b' ambiguous in " .
+                                join("\n\t", $git->pub_urls) . "\n" .
+                                join('', map { "$_ blob\n" } @oids));
+        }
+        scalar(@ambiguous) ? \@ambiguous : undef;
+}
+
+sub extract_diff ($$$$$) {
+        my ($self, $p, $re, $ibx, $smsg) = @_;
+        my ($part) = @$p; # ignore $depth and @idx;
+        my $hdr_lines; # diff --git a/... b/...
+        my $tmp;
+        my $ct = $part->content_type || 'text/plain';
+        my ($s, undef) = msg_part_text($part, $ct);
+        defined $s or return;
+        my $di = {};
+
+        # Email::MIME::Encodings forces QP to be CRLF upon decoding,
+        # change it back to LF:
+        my $cte = $part->header('Content-Transfer-Encoding') || '';
+        if ($cte =~ /\bquoted-printable\b/i && $part->crlf eq "\n") {
+                $s =~ s/\r\n/\n/sg;
+        }
+
+        foreach my $l (split(/^/m, $s)) {
+                if ($l =~ $re) {
+                        $di->{oid_a} = $1;
+                        $di->{oid_b} = $2;
+                        if (defined($3)) {
+                                my $mode_a = $3;
+                                if ($mode_a =~ /\A(?:100644|120000|100755)\z/) {
+                                        $di->{mode_a} = $mode_a;
+                                }
+                        }
+
+
+                        # start writing the diff out to a tempfile
+                        my $pn = ++$self->{tot};
+                        open($tmp, '>', $self->{tmp}->dirname . "/$pn") or
+                                                        die "open(tmp): $!";
+
+                        push @$hdr_lines, $l;
+                        $di->{hdr_lines} = $hdr_lines;
+                        utf8::encode($_) for @$hdr_lines;
+                        print $tmp @$hdr_lines or die "print(tmp): $!";
+
+                        # for debugging/diagnostics:
+                        $di->{ibx} = $ibx;
+                        $di->{smsg} = $smsg;
+                } elsif ($l =~ m!\Adiff --git ("?[^/]+/.+) ("?[^/]+/.+)$!) {
+                        last if $tmp; # got our blob, done!
+
+                        my ($path_a, $path_b) = ($1, $2);
+
+                        # diff header lines won't have \r because git
+                        # will quote them, but Email::MIME gives CRLF
+                        # for quoted-printable:
+                        $path_b =~ tr/\r//d;
+
+                        # don't care for leading 'a/' and 'b/'
+                        my (undef, @a) = split(m{/}, git_unquote($path_a));
+                        my (undef, @b) = split(m{/}, git_unquote($path_b));
+
+                        # get rid of path-traversal attempts and junk patches:
+                        foreach (@a, @b) {
+                                return if $bad_component{$_};
+                        }
+
+                        $di->{path_a} = join('/', @a);
+                        $di->{path_b} = join('/', @b);
+                        $hdr_lines = [ $l ];
+                } elsif ($tmp) {
+                        utf8::encode($l);
+                        print $tmp $l or die "print(tmp): $!";
+                } elsif ($hdr_lines) {
+                        push @$hdr_lines, $l;
+                        if ($l =~ /\Anew file mode (100644|120000|100755)$/) {
+                                $di->{mode_a} = $1;
+                        }
+                }
+        }
+        return undef unless $tmp;
+        close $tmp or die "close(tmp): $!";
+        $di;
+}
+
+sub path_searchable ($) { defined($_[0]) && $_[0] =~ m!\A[\w/\. \-]+\z! }
+
+# ".." appears in path names, which confuses Xapian into treating
+# it as a range query.  So we split on ".." since Xapian breaks
+# on punctuation anyways:
+sub filename_query ($) {
+        join('', map { qq( dfn:"$_") } split(/\.\./, $_[0]));
+}
+
+sub find_extract_diff ($$$) {
+        my ($self, $ibx, $want) = @_;
+        my $srch = $ibx->search or return;
+
+        my $post = $want->{oid_b} or die 'BUG: no {oid_b}';
+        $post =~ /\A[a-f0-9]+\z/ or die "BUG: oid_b not hex: $post";
+
+        my $q = "dfpost:$post";
+        my $pre = $want->{oid_a};
+        if (defined $pre && $pre =~ /\A[a-f0-9]+\z/) {
+                $q .= " dfpre:$pre";
+        } else {
+                $pre = '[a-f0-9]{7}'; # for $re below
+        }
+
+        my $path_b = $want->{path_b};
+        if (path_searchable($path_b)) {
+                $q .= filename_query($path_b);
+
+                my $path_a = $want->{path_a};
+                if (path_searchable($path_a) && $path_a ne $path_b) {
+                        $q .= filename_query($path_a);
+                }
+        }
+
+        my $msgs = $srch->query($q, { relevance => 1 });
+        my $re = qr/\Aindex ($pre[a-f0-9]*)\.\.($post[a-f0-9]*)(?: (\d+))?/;
+
+        my $di;
+        foreach my $smsg (@$msgs) {
+                $ibx->smsg_mime($smsg) or next;
+                msg_iter(delete($smsg->{mime}), sub {
+                        $di ||= extract_diff($self, $_[0], $re, $ibx, $smsg);
+                });
+                return $di if $di;
+        }
+}
+
+sub prepare_index ($) {
+        my ($self) = @_;
+        my $patches = $self->{patches};
+        $self->{nr} = 0;
+
+        my $di = $patches->[0] or die 'no patches';
+        my $oid_a = $di->{oid_a} or die '{oid_a} unset';
+        my $existing = $self->{found}->{$oid_a};
+
+        # no index creation for added files
+        $oid_a =~ /\A0+\z/ and return next_step($self);
+
+        die "BUG: $oid_a not not found" unless $existing;
+
+        my $oid_full = $existing->[1];
+        my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full";
+        my $mode_a = $di->{mode_a} || extract_old_mode($di);
+
+        open my $in, '+>', undef or die "open: $!";
+        print $in "$mode_a $oid_full\t$path_a\0" or die "print: $!";
+        $in->flush or die "flush: $!";
+        sysseek($in, 0, 0) or die "seek: $!";
+
+        dbg($self, 'preparing index');
+        my $rdr = { 0 => fileno($in) };
+        my $cmd = [ qw(git update-index -z --index-info) ];
+        my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr);
+        $qsp->psgi_qx($self->{psgi_env}, undef, sub {
+                my ($bref) = @_;
+                if (my $err = $qsp->{err}) {
+                        ERR($self, "git update-index error: $err");
+                }
+                dbg($self, "index prepared:\n" .
+                        "$mode_a $oid_full\t" . git_quote($path_a));
+                next_step($self); # onto do_git_apply
+        });
+}
+
+# pure Perl "git init"
+sub do_git_init ($) {
+        my ($self) = @_;
+        my $dir = $self->{tmp}->dirname;
+        my $git_dir = "$dir/git";
+
+        foreach ('', qw(objects refs objects/info refs/heads)) {
+                mkdir("$git_dir/$_") or die "mkdir $_: $!";
+        }
+        open my $fh, '>', "$git_dir/config" or die "open git/config: $!";
+        print $fh <<'EOF' or die "print git/config $!";
+[core]
+        repositoryFormatVersion = 0
+        filemode = true
+        bare = false
+        fsyncObjectfiles = false
+        logAllRefUpdates = false
+EOF
+        close $fh or die "close git/config: $!";
+
+        open $fh, '>', "$git_dir/HEAD" or die "open git/HEAD: $!";
+        print $fh "ref: refs/heads/master\n" or die "print git/HEAD: $!";
+        close $fh or die "close git/HEAD: $!";
+
+        my $f = 'objects/info/alternates';
+        open $fh, '>', "$git_dir/$f" or die "open: $f: $!";
+        foreach my $git (@{$self->{gits}}) {
+                print $fh $git->git_path('objects'),"\n" or die "print $f: $!";
+        }
+        close $fh or die "close: $f: $!";
+        my $tmp_git = $self->{tmp_git} = PublicInbox::Git->new($git_dir);
+        $tmp_git->{-tmp} = $self->{tmp};
+        $self->{git_env} = {
+                GIT_DIR => $git_dir,
+                GIT_INDEX_FILE => "$git_dir/index",
+        };
+        prepare_index($self);
+}
+
+sub extract_old_mode ($) {
+        my ($di) = @_;
+        if (join('', @{$di->{hdr_lines}}) =~
+                        /^old mode (100644|100755|120000)\b/) {
+                return $1;
+        }
+        '100644';
+}
+
+sub do_finish ($$) {
+        my ($self, $user_cb) = @_;
+        my $found = $self->{found};
+        my $oid_want = $self->{oid_want};
+        if (my $exists = $found->{$oid_want}) {
+                return $user_cb->($exists);
+        }
+
+        # let git disambiguate if oid_want was too short,
+        # but long enough to be unambiguous:
+        my $tmp_git = $self->{tmp_git};
+        if (my @res = $tmp_git->check($oid_want)) {
+                return $user_cb->($found->{$res[0]});
+        }
+        if (my $err = $tmp_git->last_check_err) {
+                dbg($self, $err);
+        }
+        $user_cb->(undef);
+}
+
+sub do_step ($) {
+        my ($self) = @_;
+        eval {
+                # step 1: resolve blobs to patches in the todo queue
+                if (my $want = pop @{$self->{todo}}) {
+                        # this populates {patches} and {todo}
+                        resolve_patch($self, $want);
+
+                # step 2: then we instantiate a working tree once
+                # the todo queue is finally empty:
+                } elsif (!defined($self->{tmp_git})) {
+                        do_git_init($self);
+
+                # step 3: apply each patch in the stack
+                } elsif (scalar @{$self->{patches}}) {
+                        do_git_apply($self);
+
+                # step 4: execute the user-supplied callback with
+                # our result: (which may be undef)
+                # Other steps may call user_cb to terminate prematurely
+                # on error
+                } elsif (my $user_cb = delete($self->{user_cb})) {
+                        do_finish($self, $user_cb);
+                } else {
+                        die 'about to call user_cb twice'; # Oops :x
+                }
+        }; # eval
+        my $err = $@;
+        if ($err) {
+                $err =~ s/^\s*Exception:\s*//; # bad word to show users :P
+                dbg($self, "E: $err");
+                my $ucb = delete($self->{user_cb});
+                eval { $ucb->($err) } if $ucb;
+        }
+}
+
+sub step_cb ($) {
+        my ($self) = @_;
+        sub { do_step($self) };
+}
+
+sub next_step ($) {
+        my ($self) = @_;
+        # if outside of public-inbox-httpd, caller is expected to be
+        # looping step_cb, anyways
+        my $async = $self->{psgi_env}->{'pi-httpd.async'} or return;
+        # PublicInbox::HTTPD::Async->new
+        $async->(undef, step_cb($self));
+}
+
+sub mark_found ($$$) {
+        my ($self, $oid, $found_info) = @_;
+        my $found = $self->{found};
+        $found->{$oid} = $found_info;
+        my $oid_cur = $found_info->[1];
+        while ($oid_cur ne $oid && length($oid_cur) > $OID_MIN) {
+                $found->{$oid_cur} = $found_info;
+                chop($oid_cur);
+        }
+}
+
+sub parse_ls_files ($$$$) {
+        my ($self, $qsp, $bref, $di) = @_;
+        if (my $err = $qsp->{err}) {
+                die "git ls-files error: $err";
+        }
+
+        my ($line, @extra) = split(/\0/, $$bref);
+        scalar(@extra) and die "BUG: extra files in index: <",
+                                join('> <', @extra), ">";
+
+        my ($info, $file) = split(/\t/, $line, 2);
+        my ($mode_b, $oid_b_full, $stage) = split(/ /, $info);
+        if ($file ne $di->{path_b}) {
+                die
+"BUG: index mismatch: file=$file != path_b=$di->{path_b}";
+        }
+
+        my $tmp_git = $self->{tmp_git} or die 'no git working tree';
+        my (undef, undef, $size) = $tmp_git->check($oid_b_full);
+        defined($size) or die "check $oid_b_full failed";
+
+        dbg($self, "index at:\n$mode_b $oid_b_full\t$file");
+        my $created = [ $tmp_git, $oid_b_full, 'blob', $size, $di ];
+        mark_found($self, $di->{oid_b}, $created);
+        next_step($self); # onto the next patch
+}
+
+sub start_ls_files ($$) {
+        my ($self, $di) = @_;
+        my $cmd = [qw(git ls-files -s -z)];
+        my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env});
+        $qsp->psgi_qx($self->{psgi_env}, undef, sub {
+                my ($bref) = @_;
+                eval { parse_ls_files($self, $qsp, $bref, $di) };
+                ERR($self, $@) if $@;
+        });
+}
+
+sub do_git_apply ($) {
+        my ($self) = @_;
+        my $dn = $self->{tmp}->dirname;
+        my $patches = $self->{patches};
+
+        # we need --ignore-whitespace because some patches are CRLF
+        my @cmd = (qw(git -C), $dn, qw(apply --cached --ignore-whitespace
+                        --whitespace=warn --verbose));
+        my $len = length(join(' ', @cmd));
+        my $total = $self->{tot};
+        my $di; # keep track of the last one for "git ls-files"
+
+        do {
+                my $i = ++$self->{nr};
+                $di = shift @$patches;
+                dbg($self, "\napplying [$i/$total] " . di_url($self, $di) .
+                        "\n" . join('', @{$di->{hdr_lines}}));
+                my $path = $total + 1 - $i;
+                $len += length($path) + 1;
+                push @cmd, $path;
+        } while (@$patches && $len < $ARG_SIZE_MAX);
+
+        my $rdr = { 2 => 1 };
+        my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}, $rdr);
+        $qsp->psgi_qx($self->{psgi_env}, undef, sub {
+                my ($bref) = @_;
+                dbg($self, $$bref);
+                if (my $err = $qsp->{err}) {
+                        ERR($self, "git apply error: $err");
+                }
+                eval { start_ls_files($self, $di) };
+                ERR($self, $@) if $@;
+        });
+}
+
+sub di_url ($$) {
+        my ($self, $di) = @_;
+        # note: we don't pass the PSGI env unconditionally, here,
+        # different inboxes can have different HTTP_HOST on the same instance.
+        my $ibx = $di->{ibx};
+        my $env = $self->{psgi_env} if $ibx eq $self->{inboxes}->[0];
+        my $url = $ibx->base_url($env);
+        my $mid = $di->{smsg}->{mid};
+        defined($url) ? "$url$mid/" : "<$mid>";
+}
+
+sub resolve_patch ($$) {
+        my ($self, $want) = @_;
+
+        if (scalar(@{$self->{patches}}) > $MAX_PATCH) {
+                die "Aborting, too many steps to $self->{oid_want}";
+        }
+
+        # see if we can find the blob in an existing git repo:
+        my $cur_want = $want->{oid_b};
+        if ($self->{seen_oid}->{$cur_want}++) {
+                die "Loop detected solving $cur_want\n";
+        }
+        if (my $existing = solve_existing($self, $want)) {
+                dbg($self, "found $cur_want in " .
+                        join("\n", $existing->[0]->pub_urls));
+
+                if ($cur_want eq $self->{oid_want}) { # all done!
+                        eval { delete($self->{user_cb})->($existing) };
+                        die "E: $@" if $@;
+                        return;
+                }
+                mark_found($self, $cur_want, $existing);
+                return next_step($self); # onto patch application
+        }
+
+        # scan through inboxes to look for emails which results in
+        # the oid we want:
+        my $di;
+        foreach my $ibx (@{$self->{inboxes}}) {
+                $di = find_extract_diff($self, $ibx, $want) or next;
+
+                unshift @{$self->{patches}}, $di;
+                dbg($self, "found $cur_want in ".di_url($self, $di));
+
+                # good, we can find a path to the oid we $want, now
+                # lets see if we need to apply more patches:
+                my $src = $di->{oid_a};
+
+                unless ($src =~ /\A0+\z/) {
+                        # we have to solve it using another oid, fine:
+                        my $job = { oid_b => $src, path_b => $di->{path_a} };
+                        push @{$self->{todo}}, $job;
+                }
+                return next_step($self); # onto the next todo item
+        }
+        if (length($cur_want) > $OID_MIN) {
+                chop($cur_want);
+                dbg($self, "retrying $want->{oid_b} as $cur_want");
+                $want->{oid_b} = $cur_want;
+                push @{$self->{todo}}, $want;
+                return next_step($self); # retry with shorter abbrev
+        }
+
+        dbg($self, "could not find $cur_want");
+        eval { delete($self->{user_cb})->(undef) }; # not found! :<
+        die "E: $@" if $@;
+}
+
+# this API is designed to avoid creating self-referential structures;
+# so user_cb never references the SolverGit object
+sub new {
+        my ($class, $ibx, $user_cb) = @_;
+
+        bless {
+                gits => $ibx->{-repo_objs},
+                user_cb => $user_cb,
+
+                # TODO: config option for searching related inboxes
+                inboxes => [ $ibx ],
+        }, $class;
+}
+
+# recreate $oid_want using $hints
+# Calls {user_cb} with: [ ::Git object, oid_full, type, size, di (diff_info) ]
+# with found object, or undef if nothing was found
+# Calls {user_cb} with a string error on fatal errors
+sub solve ($$$$$) {
+        my ($self, $env, $out, $oid_want, $hints) = @_;
+
+        # should we even get here? Probably not, but somebody
+        # could be manually typing URLs:
+        return (delete $self->{user_cb})->(undef) if $oid_want =~ /\A0+\z/;
+
+        $self->{oid_want} = $oid_want;
+        $self->{out} = $out;
+        $self->{seen_oid} = {};
+        $self->{tot} = 0;
+        $self->{psgi_env} = $env;
+        $self->{todo} = [ { %$hints, oid_b => $oid_want } ];
+        $self->{patches} = []; # [ $di, $di, ... ]
+        $self->{found} = {}; # { abbr => [ ::Git, oid, type, size, $di ] }
+        $self->{tmp} = File::Temp->newdir('solver.tmp-XXXXXXXX', TMPDIR => 1);
+
+        dbg($self, "solving $oid_want ...");
+        my $step_cb = step_cb($self);
+        if (my $async = $env->{'pi-httpd.async'}) {
+                # PublicInbox::HTTPD::Async->new
+                $async->(undef, $step_cb);
+        } else {
+                $step_cb->() while $self->{user_cb};
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/UserContent.pm b/lib/PublicInbox/UserContent.pm
new file mode 100644
index 00000000..df0429c3
--- /dev/null
+++ b/lib/PublicInbox/UserContent.pm
@@ -0,0 +1,98 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Self-updating module containing a sample CSS for client-side
+# customization by users of public-inbox.  Used by Makefile.PL
+package PublicInbox::UserContent;
+use strict;
+use warnings;
+
+# this sub is updated automatically:
+sub CSS () {
+        <<'_'
+        /*
+         * Dark color scheme using 216 web-safe colors, inspired
+         * somewhat by the default color scheme in mutt.
+         * It reduces eyestrain for me, and energy usage for all:
+         * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme
+         */
+        * { background:#000; color:#ccc }
+
+        /*
+         * Underlined links add visual noise which make them hard-to-read.
+         * Use colors to make them stand out, instead.
+         */
+        a { color:#69f; text-decoration:none }
+        a:visited { color:#96f }
+
+        /* quoted text gets a different color */
+        *.q { color:#09f }
+
+        /*
+         * these may be used with cgit, too
+         * (cgit uses <div>, public-inbox uses <span>)
+         */
+        *.add { color:#0ff }
+        *.del { color:#f0f }
+        *.head { color:#fff }
+        *.hunk { color:#c93 }
+
+        /*
+         * highlight 3.x colors (tested 3.18)
+         * this doesn't use most of the colors available (I find too many
+         * colors overwhelming).  So the #ccc default is commented out.
+         */
+        .hl.num { color:#f30 } /* number */
+        .hl.esc { color:#f0f } /* escape character */
+        .hl.str { color:#f30 } /* string */
+        .hl.ppc { color:#f0f } /* preprocessor */
+        .hl.pps { color:#f30 } /* preprocessor string */
+        .hl.slc { color:#09f } /* single-line comment */
+        .hl.com { color:#09f }
+        /* .hl.opt { color:#ccc } */
+        /* .hl.ipl { color:#ccc } */
+        /* .hl.lin { color:#ccc } */
+        .hl.kwa { color:#ff0 }
+        .hl.kwb { color:#0f0 }
+        .hl.kwc { color:#ff0 }
+        /* .hl.kwd { color:#ccc } */
+_
+}
+# end of auto-updated sub
+
+# return a sample CSS
+sub sample ($$) {
+        my ($ibx, $env) = @_;
+        my $url_prefix = $ibx->base_url($env);
+        my $preamble = <<"";
+/*
+ * Firefox users: this goes in \$PROFILE_FOLDER/chrome/userContent.css
+ * where \$PROFILE_FOLDER is platform-specific
+ *
+ * cf. http://kb.mozillazine.org/UserContent.css
+ *     http://kb.mozillazine.org/Profile_folder_-_Firefox
+ *
+ * Users of dillo can remove the entire lines with "moz-only"
+ * in them and place the resulting file in ~/.dillo/style.css
+ */
+\@-moz-document url-prefix($url_prefix) { /* moz-only */
+
+        $preamble . CSS() . "\n} /* moz-only */\n";
+}
+
+# Auto-update this file based on the contents of a CSS file:
+# usage: perl -I lib __FILE__ contrib/css/216dark.css
+# (See Makefile.PL)
+if (scalar(@ARGV) == 1 && -r __FILE__) {
+        use autodie;
+        open my $ro, '<', $ARGV[0];
+        my $css = do { local $/; <$ro> };
+        $css =~ s/^([ \t]*\S)/\t$1/smg;
+        open my $rw, '+<', __FILE__;
+        my $out = do { local $/; <$rw> };
+        $out =~ s/^sub CSS.*^_\n\}/sub CSS () {\n\t<<'_'\n${css}_\n}/sm;
+        seek $rw, 0, 0;
+        print $rw $out;
+}
+
+1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index cd125e00..ca9b9550 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -14,6 +14,7 @@ use PublicInbox::MsgIter;
 use PublicInbox::Address;
 use PublicInbox::WwwStream;
 use PublicInbox::Reply;
+use PublicInbox::ViewDiff qw(flush_diff);
 require POSIX;
 use Time::Local qw(timegm);
 
@@ -28,7 +29,7 @@ sub msg_html {
         my ($ctx, $mime, $more, $smsg) = @_;
         my $hdr = $mime->header_obj;
         my $ibx = $ctx->{-inbox};
-        my $obfs_ibx = $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
+        $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
         my $tip = _msg_html_prepare($hdr, $ctx, $more, 0);
         my $end = 2;
         PublicInbox::WwwStream->response($ctx, 200, sub {
@@ -36,7 +37,7 @@ sub msg_html {
                 if ($nr == 1) {
                         # $more cannot be true w/o $smsg being defined:
                         my $upfx = $more ? '../'.mid_escape($smsg->mid).'/' : '';
-                        $tip . multipart_text_as_html($mime, $upfx, $obfs_ibx) .
+                        $tip . multipart_text_as_html($mime, $upfx, $ibx) .
                                 '</pre><hr>'
                 } elsif ($more && @$more) {
                         ++$end;
@@ -81,15 +82,15 @@ sub msg_html_more {
         my $str = eval {
                 my ($id, $prev, $smsg) = @$more;
                 my $mid = $ctx->{mid};
-                $smsg = $ctx->{-inbox}->smsg_mime($smsg);
+                my $ibx = $ctx->{-inbox};
+                $smsg = $ibx->smsg_mime($smsg);
                 my $next = $ctx->{srch}->next_by_mid($mid, \$id, \$prev);
                 @$more = $next ? ($id, $prev, $next) : ();
                 if ($smsg) {
                         my $mime = $smsg->{mime};
                         my $upfx = '../' . mid_escape($smsg->mid) . '/';
                         _msg_html_prepare($mime->header_obj, $ctx, $more, $nr) .
-                                multipart_text_as_html($mime, $upfx,
-                                                        $ctx->{-obfs_ibx}) .
+                                multipart_text_as_html($mime, $upfx, $ibx) .
                                 '</pre><hr>'
                 } else {
                         '';
@@ -260,7 +261,8 @@ sub index_entry {
         $rv .= "\n";
 
         # scan through all parts, looking for displayable text
-        msg_iter($mime, sub { $rv .= add_text_body($mhref, $obfs_ibx, $_[0]) });
+        my $ibx = $ctx->{-inbox};
+        msg_iter($mime, sub { $rv .= add_text_body($mhref, $ibx, $_[0]) });
 
         # add the footer
         $rv .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
@@ -488,11 +490,11 @@ sub thread_html {
 }
 
 sub multipart_text_as_html {
-        my ($mime, $upfx, $obfs_ibx) = @_;
+        my ($mime, $upfx, $ibx) = @_;
         my $rv = "";
 
         # scan through all parts, looking for displayable text
-        msg_iter($mime, sub { $rv .= add_text_body($upfx, $obfs_ibx, $_[0]) });
+        msg_iter($mime, sub { $rv .= add_text_body($upfx, $ibx, $_[0]) });
         $rv;
 }
 
@@ -545,7 +547,8 @@ sub attach_link ($$$$;$) {
 }
 
 sub add_text_body {
-        my ($upfx, $obfs_ibx, $p) = @_;
+        my ($upfx, $ibx, $p) = @_;
+        my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
         # $p - from msg_iter: [ Email::MIME, depth, @idx ]
         my ($part, $depth) = @$p; # attachment @idx is unused
         my $ct = $part->content_type || 'text/plain';
@@ -554,6 +557,25 @@ sub add_text_body {
 
         return attach_link($upfx, $ct, $p, $fn) unless defined $s;
 
+        # makes no difference to browsers, and don't screw up filename
+        # link generation in diffs with the extra '%0D'
+        $s =~ s/\r\n/\n/sg;
+
+        my ($diff, $spfx);
+        if ($s =~ /^(?:diff|---|\+{3}) /ms) {
+                $diff = [];
+                if ($ibx->{-repo_objs}) {
+                        my $n_slash = $upfx =~ tr!/!/!;
+                        if ($n_slash == 0) {
+                                $spfx = '../';
+                        } elsif ($n_slash == 1) {
+                                $spfx = '';
+                        } else { # nslash == 2
+                                $spfx = '../../';
+                        }
+                }
+        };
+
         my @lines = split(/^/m, $s);
         $s = '';
         if (defined($fn) || $depth > 0 || $err) {
@@ -568,19 +590,26 @@ sub add_text_body {
                         # show the previously buffered quote inline
                         flush_quote(\$s, $l, \@quot) if @quot;
 
-                        # regular line, OK
-                        $l->linkify_1($cur);
-                        $s .= $l->linkify_2(ascii_html($cur));
+                        if ($diff) {
+                                push @$diff, $cur;
+                        } else {
+                                # regular line, OK
+                                $l->linkify_1($cur);
+                                $s .= $l->linkify_2(ascii_html($cur));
+                        }
                 } else {
+                        flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
                         push @quot, $cur;
                 }
         }
 
         if (@quot) { # ugh, top posted
                 flush_quote(\$s, $l, \@quot);
+                flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
                 obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
                 $s;
         } else {
+                flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
                 obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
                 if ($s =~ /\n\z/s) { # common, last line ends with a newline
                         $s;
@@ -867,11 +896,6 @@ sub missing_thread {
         PublicInbox::ExtMsg::ext_msg($ctx);
 }
 
-sub _msg_date {
-        my ($hdr) = @_;
-        fmt_ts(msg_datestamp($hdr));
-}
-
 sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
 
 sub dedupe_subject {
@@ -1075,7 +1099,7 @@ sub dump_topics {
 
                 my $mbox = qq(<a\nhref="$href/t.mbox.gz">mbox.gz</a>);
                 my $atom = qq(<a\nhref="$href/t.atom">Atom</a>);
-                my $s = "<a\nhref=\"$href/T/$anchor\"><b>$top</b></a>\n" .
+                my $s = "<a\nhref=\"$href/T/$anchor\">$top</a>\n" .
                         " $ds UTC $n - $mbox / $atom\n";
                 for (my $i = 0; $i < scalar(@ex); $i += 2) {
                         my $level = $ex[$i];
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
new file mode 100644
index 00000000..a8045687
--- /dev/null
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -0,0 +1,161 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# used by PublicInbox::View
+# This adds CSS spans for diff highlighting.
+# It also generates links for ViewVCS + SolverGit to show
+# (or reconstruct) blobs.
+
+package PublicInbox::ViewDiff;
+use strict;
+use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw(flush_diff);
+use URI::Escape qw(uri_escape_utf8);
+use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::Git qw(git_unquote);
+
+sub DSTATE_INIT () { 0 }
+sub DSTATE_STAT () { 1 } # TODO
+sub DSTATE_HEAD () { 2 } # /^diff --git /, /^index /, /^--- /, /^\+\+\+ /
+sub DSTATE_CTX () { 3 } # /^ /
+sub DSTATE_ADD () { 4 } # /^\+/
+sub DSTATE_DEL () { 5 } # /^\-/
+my @state2class = (
+        '', # init
+        '', # stat
+        'head',
+        '', # ctx
+        'add',
+        'del'
+);
+
+sub UNSAFE () { "^A-Za-z0-9\-\._~/" }
+
+my $OID_NULL = '0{7,40}';
+my $OID_BLOB = '[a-f0-9]{7,40}';
+my $PATH_A = '"?a/.+|/dev/null';
+my $PATH_B = '"?b/.+|/dev/null';
+
+sub to_html ($$) {
+        $_[0]->linkify_1($_[1]);
+        $_[0]->linkify_2(ascii_html($_[1]));
+}
+
+# link to line numbers in blobs
+sub diff_hunk ($$$$) {
+        my ($dctx, $spfx, $ca, $cb) = @_;
+        my $oid_a = $dctx->{oid_a};
+        my $oid_b = $dctx->{oid_b};
+
+        (defined($spfx) && defined($oid_a) && defined($oid_b)) or
+                return "@@ $ca $cb @@";
+
+        my ($n) = ($ca =~ /^-(\d+)/);
+        $n = defined($n) ? do { ++$n; "#n$n" } : '';
+
+        my $rv = qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>);
+
+        ($n) = ($cb =~ /^\+(\d+)/);
+        $n = defined($n) ? do { ++$n; "#n$n" } : '';
+
+        $rv .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@);
+}
+
+sub oid ($$$) {
+        my ($dctx, $spfx, $oid) = @_;
+        defined($spfx) ? qq(<a\nhref="$spfx$oid/s/$dctx->{Q}">$oid</a>) : $oid;
+}
+
+sub to_state ($$$) {
+        my ($dst, $state, $new_state) = @_;
+        $$dst .= '</span>' if $state2class[$state];
+        $_[1] = $new_state;
+        my $class = $state2class[$new_state] or return;
+        $$dst .= qq(<span\nclass="$class">);
+}
+
+sub flush_diff ($$$$) {
+        my ($dst, $spfx, $linkify, $diff) = @_;
+        my $state = DSTATE_INIT;
+        my $dctx = { Q => '' }; # {}, keys: oid_a, oid_b, path_a, path_b
+
+        foreach my $s (@$diff) {
+                if ($s =~ /^ /) {
+                        if ($state2class[$state]) {
+                                to_state($dst, $state, DSTATE_CTX);
+                        }
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ /^-- $/) { # email signature begins
+                        $state == DSTATE_INIT or
+                                to_state($dst, $state, DSTATE_INIT);
+                        $$dst .= $s;
+                } elsif ($s =~ m!^diff --git ($PATH_A) ($PATH_B)$!) {
+                        if ($state != DSTATE_HEAD) {
+                                my ($pa, $pb) = ($1, $2);
+                                to_state($dst, $state, DSTATE_HEAD);
+                                $pa = (split('/', git_unquote($pa), 2))[1];
+                                $pb = (split('/', git_unquote($pb), 2))[1];
+                                $dctx = {
+                                        Q => "?b=".uri_escape_utf8($pb, UNSAFE),
+                                };
+                                if ($pa ne $pb) {
+                                        $dctx->{Q} .=
+                                             "&a=".uri_escape_utf8($pa, UNSAFE);
+                                }
+                        }
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
+                        $$dst .= $1 . oid($dctx, $spfx, $2);
+                        $dctx = { Q => '' };
+                        $$dst .= to_html($linkify, $s) ;
+                } elsif ($s =~ s/^index ($OID_BLOB)(\.\.$OID_NULL)\b//o) {
+                        $$dst .= 'index ' . oid($dctx, $spfx, $1) . $2;
+                        $dctx = { Q => '' };
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ /^index ($OID_BLOB)\.\.($OID_BLOB)/o) {
+                        $dctx->{oid_a} = $1;
+                        $dctx->{oid_b} = $2;
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ s/^@@ (\S+) (\S+) @@//) {
+                        $$dst .= '</span>' if $state2class[$state];
+                        $$dst .= qq(<span\nclass="hunk">);
+                        $$dst .= diff_hunk($dctx, $spfx, $1, $2);
+                        $$dst .= '</span>';
+                        $state = DSTATE_CTX;
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ m!^--- $PATH_A! ||
+                         $s =~ m!^\+{3} $PATH_B!)  {
+                        # color only (no oid link)
+                        $state == DSTATE_INIT and
+                                to_state($dst, $state, DSTATE_HEAD);
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ /^\+/) {
+                        if ($state != DSTATE_ADD && $state != DSTATE_INIT) {
+                                to_state($dst, $state, DSTATE_ADD);
+                        }
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ /^-/) {
+                        if ($state != DSTATE_DEL && $state != DSTATE_INIT) {
+                                to_state($dst, $state, DSTATE_DEL);
+                        }
+                        $$dst .= to_html($linkify, $s);
+                # ignore the following lines in headers:
+                } elsif ($s =~ /^(?:dis)similarity index/ ||
+                         $s =~ /^(?:old|new) mode/ ||
+                         $s =~ /^(?:deleted|new) file mode/ ||
+                         $s =~ /^(?:copy|rename) (?:from|to) / ||
+                         $s =~ /^(?:dis)?similarity index /) {
+                        $$dst .= to_html($linkify, $s);
+                } else {
+                        $state == DSTATE_INIT or
+                                to_state($dst, $state, DSTATE_INIT);
+                        $$dst .= to_html($linkify, $s);
+                }
+        }
+        @$diff = ();
+        $$dst .= '</span>' if $state2class[$state];
+        undef;
+}
+
+1;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
new file mode 100644
index 00000000..eecc51e6
--- /dev/null
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -0,0 +1,174 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# show any VCS object, similar to "git show"
+# FIXME: we only show blobs for now
+#
+# This can use a "solver" to reconstruct blobs based on git
+# patches (with abbreviated OIDs in the header).  However, the
+# abbreviated OIDs must match exactly what's in the original
+# email (unless a normal code repo already has the blob).
+#
+# In other words, we can only reliably reconstruct blobs based
+# on links generated by ViewDiff (and only if the emailed
+# patches apply 100% cleanly to published blobs).
+
+package PublicInbox::ViewVCS;
+use strict;
+use warnings;
+use Encode qw(find_encoding);
+use PublicInbox::SolverGit;
+use PublicInbox::WwwStream;
+use PublicInbox::Linkify;
+use PublicInbox::Hval qw(ascii_html to_filename src_escape);
+my $hl = eval {
+        require PublicInbox::HlMod;
+        PublicInbox::HlMod->new;
+};
+
+# we need to trigger highlight::CodeGenerator::deleteInstance
+# in HlMod::DESTROY before the rest of Perl shuts down to avoid
+# a segfault at shutdown
+END { $hl = undef };
+
+my %QP_MAP = ( A => 'oid_a', B => 'oid_b', a => 'path_a', b => 'path_b' );
+my $max_size = 1024 * 1024; # TODO: configurable
+my $enc_utf8 = find_encoding('UTF-8');
+my $BIN_DETECT = 8000; # same as git
+
+sub html_page ($$$) {
+        my ($ctx, $code, $strref) = @_;
+        my $wcb = delete $ctx->{-wcb};
+        $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/"
+        my $res = PublicInbox::WwwStream->response($ctx, $code, sub {
+                my ($nr, undef) =  @_;
+                $nr == 1 ? $$strref : undef;
+        });
+        $wcb ? $wcb->($res) : $res;
+}
+
+sub stream_large_blob ($$$$) {
+        my ($ctx, $res, $logref, $fn) = @_;
+        my ($git, $oid, $type, $size, $di) = @$res;
+        my $cmd = ['git', "--git-dir=$git->{git_dir}", 'cat-file', $type, $oid];
+        my $qsp = PublicInbox::Qspawn->new($cmd);
+        my @cl = ('Content-Length', $size);
+        my $env = $ctx->{env};
+        $env->{'qspawn.wcb'} = delete $ctx->{-wcb};
+        $qsp->psgi_return($env, undef, sub {
+                my ($r, $bref) = @_;
+                if (!defined $r) { # error
+                        html_page($ctx, 500, $logref);
+                } elsif (index($$bref, "\0") >= 0) {
+                        my $ct = 'application/octet-stream';
+                        [200, ['Content-Type', $ct, @cl ] ];
+                } else {
+                        my $n = bytes::length($$bref);
+                        if ($n >= $BIN_DETECT || $n == $size) {
+                                my $ct = 'text/plain; charset=UTF-8';
+                                return [200, ['Content-Type', $ct, @cl] ];
+                        }
+                        undef; # bref keeps growing
+                }
+        });
+}
+
+sub solve_result {
+        my ($ctx, $res, $log, $hints, $fn) = @_;
+
+        unless (seek($log, 0, 0)) {
+                $ctx->{env}->{'psgi.errors'}->print("seek(log): $!\n");
+                return html_page($ctx, 500, \'seek error');
+        }
+        $log = do { local $/; <$log> };
+
+        my $ref = ref($res);
+        my $l = PublicInbox::Linkify->new;
+        $l->linkify_1($log);
+        $log = '<pre>debug log:</pre><hr /><pre>' .
+                $l->linkify_2(ascii_html($log)) . '</pre>';
+
+        $res or return html_page($ctx, 404, \$log);
+        $ref eq 'ARRAY' or return html_page($ctx, 500, \$log);
+
+        my ($git, $oid, $type, $size, $di) = @$res;
+        my $path = to_filename($di->{path_b} || $hints->{path_b} || 'blob');
+        my $raw_link = "(<a\nhref=$path>raw</a>)";
+        if ($size > $max_size) {
+                return stream_large_blob($ctx, $res, \$log, $fn) if defined $fn;
+                $log = "<pre><b>Too big to show, download available</b>\n" .
+                        "$oid $type $size bytes $raw_link</pre>" . $log;
+                return html_page($ctx, 500, \$log);
+        }
+
+        my $blob = $git->cat_file($oid);
+        if (!$blob) { # WTF?
+                my $e = "Failed to retrieve generated blob ($oid)";
+                $ctx->{env}->{'psgi.errors'}->print("$e ($git->{git_dir})\n");
+                $log = "<pre><b>$e</b></pre>" . $log;
+                return html_page($ctx, 500, \$log);
+        }
+
+        my $binary = index($$blob, "\0") >= 0;
+        if ($fn) {
+                my $h = [ 'Content-Length', $size, 'Content-Type' ];
+                push(@$h, ($binary ? 'application/octet-stream' : 'text/plain'));
+                return delete($ctx->{-wcb})->([200, $h, [ $$blob ]]);
+        }
+
+        if ($binary) {
+                $log = "<pre>$oid $type $size bytes (binary)" .
+                        " $raw_link</pre>" . $log;
+                return html_page($ctx, 200, \$log);
+        }
+
+        $$blob = $enc_utf8->decode($$blob);
+        my $nl = ($$blob =~ tr/\n/\n/);
+        my $pad = length($nl);
+
+        $l->linkify_1($$blob);
+        my $ok = $hl->do_hl($blob, $path) if $hl;
+        if ($ok) {
+                $$ok = $enc_utf8->decode($$ok);
+                src_escape($$ok);
+                $blob = $ok;
+        } else {
+                $$blob = ascii_html($$blob);
+        }
+
+        # using some of the same CSS class names and ids as cgit
+        $log = "<pre>$oid $type $size bytes $raw_link</pre>" .
+                "<hr /><table\nclass=blob>".
+                "<tr><td\nclass=linenumbers><pre>" . join('', map {
+                        sprintf("<a id=n$_ href=#n$_>% ${pad}u</a>\n", $_)
+                } (1..$nl)) . '</pre></td>' .
+                '<td><pre> </pre></td>'. # pad for non-CSS users
+                "<td\nclass=lines><pre\nstyle='white-space:pre'><code>" .
+                $l->linkify_2($$blob) .
+                '</code></pre></td></tr></table>' . $log;
+
+        html_page($ctx, 200, \$log);
+}
+
+sub show ($$;$) {
+        my ($ctx, $oid_b, $fn) = @_;
+        my $qp = $ctx->{qp};
+        my $hints = {};
+        while (my ($from, $to) = each %QP_MAP) {
+                defined(my $v = $qp->{$from}) or next;
+                $hints->{$to} = $v;
+        }
+
+        open my $log, '+>', undef or die "open: $!";
+        my $solver = PublicInbox::SolverGit->new($ctx->{-inbox}, sub {
+                solve_result($ctx, $_[0], $log, $hints, $fn);
+        });
+
+        # PSGI server will call this and give us a callback
+        sub {
+                $ctx->{-wcb} = $_[0]; # HTTP write callback
+                $solver->solve($ctx->{env}, $log, $oid_b, $hints);
+        };
+}
+
+1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index c1c39265..406802a9 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -6,6 +6,7 @@
 # We focus on the lowest common denominators here:
 # - targeted at text-only console browsers (w3m, links, etc..)
 # - Only basic HTML, CSS only for line-wrapping <pre> text content for GUIs
+#   and diff/syntax-highlighting (optional)
 # - No JavaScript, graphics or icons allowed.
 # - Must not rely on static content
 # - UTF-8 is only for user-content, 7-bit US-ASCII for us
@@ -19,10 +20,14 @@ use URI::Escape qw(uri_unescape);
 use PublicInbox::MID qw(mid_escape);
 require PublicInbox::Git;
 use PublicInbox::GitHTTPBackend;
-our $INBOX_RE = qr!\A/([\w\.\-]+)!;
+use PublicInbox::UserContent;
+
+# TODO: consider a routing tree now that we have more endpoints:
+our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
 our $MID_RE = qr!([^/]+)!;
 our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
 our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i;
+our $OID_RE = qr![a-f0-9]{7,40}!;
 
 sub new {
         my ($class, $pi_config) = @_;
@@ -115,7 +120,14 @@ sub call {
                 r301($ctx, $1, $2);
         } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) {
                 get_text($ctx, $1, $2);
-
+        } elsif ($path_info =~ m!$INBOX_RE/([\w\-\.]+)\.css\z!o) {
+                get_css($ctx, $1, $2);
+        } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) {
+                get_vcs_object($ctx, $1, $2);
+        } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) {
+                get_vcs_object($ctx, $1, $2, $3);
+        } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
+                r301($ctx, $1, $2, 's/');
         # convenience redirects order matters
         } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
                 r301($ctx, $1, $2);
@@ -127,6 +139,7 @@ sub call {
 
 # for CoW-friendliness, MOOOOO!
 sub preload {
+        my ($self) = @_;
         require PublicInbox::Feed;
         require PublicInbox::View;
         require PublicInbox::SearchThread;
@@ -139,6 +152,9 @@ sub preload {
                         PublicInbox::NewsWWW)) {
                 eval "require $_;";
         }
+        if (ref($self)) {
+                $self->stylesheets_prepare($_) for ('', '../', '../../');
+        }
 }
 
 # private functions below
@@ -257,6 +273,18 @@ sub get_text {
         PublicInbox::WwwText::get_text($ctx, $key);
 }
 
+# show git objects (blobs and commits)
+# /$INBOX/_/$OBJECT_ID/show
+# /$INBOX/_/${OBJECT_ID}_${FILENAME}
+# KEY may contain slashes
+sub get_vcs_object ($$$;$) {
+        my ($ctx, $inbox, $oid, $filename) = @_;
+        my $r404 = invalid_inbox($ctx, $inbox);
+        return $r404 if $r404;
+        require PublicInbox::ViewVCS;
+        PublicInbox::ViewVCS::show($ctx, $oid, $filename);
+}
+
 sub ctx_get {
         my ($ctx, $key) = @_;
         my $val = $ctx->{$key};
@@ -444,4 +472,128 @@ sub get_attach {
         PublicInbox::WwwAttach::get_attach($ctx, $idx, $fn);
 }
 
+# User-generated content (UGC) may have excessively long lines
+# and screw up rendering on some browsers, so we use pre-wrap.
+#
+# We also force everything to the same scaled font-size because GUI
+# browsers (tested both Firefox and surf (webkit)) uses a larger font
+# for the Search <form> element than the rest of the page.  Font size
+# uniformity is important to people who rely on gigantic fonts.
+# Finally, we use monospace to ensure the Search field and button
+# has the same size and spacing as everything else which is
+# <pre>-formatted anyways.
+our $STYLE = 'pre{white-space:pre-wrap}*{font-size:100%;font-family:monospace}';
+
+sub stylesheets_prepare ($$) {
+        my ($self, $upfx) = @_;
+        my $mini = eval {
+                require CSS::Minifier;
+                sub { CSS::Minifier::minify(input => $_[0]) };
+        } || eval {
+                require CSS::Minifier::XS;
+                sub { CSS::Minifier::XS::minify($_[0]) };
+        } || sub { $_[0] };
+
+        my $css_map = {};
+        my $stylesheets = $self->{pi_config}->{css} || [];
+        my $links = [];
+        my $inline_ok = 1;
+
+        foreach my $s (@$stylesheets) {
+                my $attr = {};
+                local $_ = $s;
+                foreach my $k (qw(media title href)) {
+                        if (s/\s*$k='([^']+)'// || s/\s*$k=(\S+)//) {
+                                $attr->{$k} = $1;
+                        }
+                }
+
+                if (defined $attr->{href}) {
+                        $inline_ok = 0;
+                } else {
+                        open(my $fh, '<', $_) or do {
+                                warn "failed to open $_: $!\n";
+                                next;
+                        };
+                        my ($key) = (m!([^/]+?)(?:\.css)?\z!i);
+                        my $ctime = 0;
+                        my $local = do { local $/; <$fh> };
+                        if ($local =~ /\S/) {
+                                $ctime = sprintf('%x',(stat($fh))[10]);
+                                $local = $mini->($local);
+                        }
+                        $css_map->{$key} = $local;
+                        $attr->{href} = "$upfx$key.css?$ctime";
+                        if (defined($attr->{title})) {
+                                $inline_ok = 0;
+                        } elsif (($attr->{media}||'screen') eq 'screen') {
+                                $attr->{-inline} = $local;
+                        }
+                }
+                push @$links, $attr;
+        }
+
+        my $buf = "<style>$STYLE";
+        if ($inline_ok) {
+                my @ext; # for media=print and whatnot
+                foreach my $attr (@$links) {
+                        if (defined(my $str = delete $attr->{-inline})) {
+                                $buf .= $str;
+                        } else {
+                                push @ext, $attr;
+                        }
+                }
+                $links = \@ext;
+        }
+        $buf .= '</style>';
+
+        if (@$links) {
+                foreach my $attr (@$links) {
+                        delete $attr->{-inline};
+                        $buf .= "<link\ntype=text/css\nrel=stylesheet";
+                        while (my ($k, $v) = each %$attr) {
+                                $v = qq{"$v"} if $v =~ /[\s=]/;
+                                $buf .= qq{\n$k=$v};
+                        }
+                        $buf .= ' />';
+                }
+                $self->{"-style-$upfx"} = $buf;
+        } else {
+                $self->{-style_inline} = $buf;
+        }
+        $self->{-css_map} = $css_map;
+}
+
+# returns an HTML fragment with <style> or <link> tags in them
+# Called by WwwStream by nearly every HTML page
+sub style {
+        my ($self, $upfx) = @_;
+        $self->{-style_inline} || $self->{"-style-$upfx"} || do {
+                stylesheets_prepare($self, $upfx);
+                $self->{-style_inline} || $self->{"-style-$upfx"}
+        };
+}
+
+# /$INBOX/$KEY.css endpoint
+# CSS is configured globally for all inboxes, but we access them on
+# a per-inbox basis.  This allows administrators to setup per-inbox
+# static routes to intercept the request before it hits PSGI
+sub get_css ($$$) {
+        my ($ctx, $inbox, $key) = @_;
+        my $r404 = invalid_inbox($ctx, $inbox);
+        return $r404 if $r404;
+        my $self = $ctx->{www};
+        my $css_map = $self->{-css_map} || stylesheets_prepare($self, '');
+        my $css = $css_map->{$key};
+        if (!defined($css) && $key eq 'userContent') {
+                my $env = $ctx->{env};
+                $css = PublicInbox::UserContent::sample($ctx->{-inbox}, $env);
+        }
+        defined $css or return r404();
+        my $h = [ 'Content-Length', bytes::length($css),
+                'Content-Type', 'text/css' ];
+        PublicInbox::GitHTTPBackend::cache_one_year($h);
+        [ 200, $h, [ $css ] ];
+}
+
 1;
diff --git a/lib/PublicInbox/WwwHighlight.pm b/lib/PublicInbox/WwwHighlight.pm
new file mode 100644
index 00000000..09fc48ab
--- /dev/null
+++ b/lib/PublicInbox/WwwHighlight.pm
@@ -0,0 +1,74 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Standalone PSGI app to provide syntax highlighting as-a-service
+# via "highlight" Perl module ("libhighlight-perl" in Debian).
+#
+# This allows exposing highlight as a persistent HTTP service for
+# other scripts via HTTP PUT requests.  PATH_INFO will be used
+# as a hint for detecting the language for highlight.
+#
+# The following example using curl(1) will do the right thing
+# regarding the file extension:
+#
+#   curl -HExpect: -T /path/to/file http://example.com/
+#
+# You can also force a file extension by giving a path
+# (in this case, "c") via:
+#
+#   curl -HExpect: -T /path/to/file http://example.com/x.c
+
+package PublicInbox::WwwHighlight;
+use strict;
+use warnings;
+use HTTP::Status qw(status_message);
+use parent qw(PublicInbox::HlMod);
+
+# TODO: support highlight(1) for distros which don't package the
+# SWIG extension.  Also, there may be admins who don't want to
+# have ugly SWIG-generated code in a long-lived Perl process.
+
+sub r ($) {
+        my ($code) = @_;
+        my $msg = status_message($code);
+        my $len = length($msg);
+        [ $code, [qw(Content-Type text/plain Content-Length), $len], [$msg] ]
+}
+
+# another slurp API hogging up all my memory :<
+# This is capped by whatever the PSGI server allows,
+# $ENV{GIT_HTTP_MAX_REQUEST_BUFFER} for PublicInbox::HTTP (10 MB)
+sub read_in_full ($) {
+        my ($env) = @_;
+
+        my $in = $env->{'psgi.input'};
+        my $off = 0;
+        my $buf = '';
+        my $len = $env->{CONTENT_LENGTH} || 8192;
+        while (1) {
+                my $r = $in->read($buf, $len, $off);
+                last unless defined $r;
+                return \$buf if $r == 0;
+                $off += $r;
+        }
+        $env->{'psgi.errors'}->print("input read error: $!\n");
+        undef;
+}
+
+# entry point for PSGI
+sub call {
+        my ($self, $env) = @_;
+        my $req_method = $env->{REQUEST_METHOD};
+
+        return r(405) if $req_method ne 'PUT';
+
+        my $bref = read_in_full($env) or return r(500);
+        $bref = $self->do_hl($bref, $env->{PATH_INFO});
+
+        my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
+        push @$h, 'Content-Length', bytes::length($$bref);
+
+        [ 200, $h, [ $$bref ] ]
+}
+
+1;
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index e548f00f..8ae35c73 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -38,10 +38,12 @@ sub _html_top ($) {
         my $title = $ctx->{-title_html} || $desc;
         my $upfx = $ctx->{-upfx} || '';
         my $help = $upfx.'_/text/help';
+        my $color = $upfx.'_/text/color';
         my $atom = $ctx->{-atom} || $upfx.'new.atom';
         my $tip = $ctx->{-html_tip} || '';
         my $top = "<b>$desc</b>";
         my $links = "<a\nhref=\"$help\">help</a> / ".
+                        "<a\nhref=\"$color\">color</a> / ".
                         "<a\nhref=\"$atom\">Atom feed</a>";
         if ($obj->search) {
                 my $q_val = $ctx->{-q_value_html};
@@ -65,7 +67,7 @@ sub _html_top ($) {
         "<html><head><title>$title</title>" .
                 "<link\nrel=alternate\ntitle=\"Atom feed\"\n".
                 "href=\"$atom\"\ntype=\"application/atom+xml\"/>" .
-                PublicInbox::Hval::STYLE .
+                $ctx->{www}->style($upfx) .
                 "</head><body>". $top . $tip;
 }
 
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index b5874cf6..d3413ad7 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -88,9 +88,44 @@ sub _srch_prefix ($$) {
         1;
 }
 
+sub _colors_help ($$) {
+        my ($ctx, $txt) = @_;
+        my $ibx = $ctx->{-inbox};
+        my $base_url = $ibx->base_url($ctx->{env});
+        $$txt .= "color customization for $base_url\n";
+        $$txt .= <<EOF;
+
+public-inbox provides a stable set of CSS classes for users to
+customize colors for highlighting diffs and code.
+
+Users of browsers such as dillo, Firefox, or some browser
+extensions may start by downloading the following sample CSS file
+to control the colors they see:
+
+        ${base_url}userContent.css
+
+CSS classes
+-----------
+
+           span.q - quoted text in email messages
+
+For diff highlighting, we try to match class names with those
+used by cgit: https://git.zx2c4.com/cgit/
+
+         span.add - diff post-image lines
+
+         span.del - diff pre-image lines
+
+        span.head - diff header (metainformation)
+
+        span.hunk - diff hunk-header
+
+EOF
+}
 
 sub _default_text ($$$) {
         my ($ctx, $key, $txt) = @_;
+        return _colors_help($ctx, $txt) if $key eq 'color';
         return if $key ne 'help'; # TODO more keys?
 
         my $ibx = $ctx->{-inbox};