about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-01-30 07:40:35 +0000
committerEric Wong <e@80x24.org>2019-01-30 07:40:35 +0000
commitcf439203c31836e4c48f632f902aa58db6924972 (patch)
treea46bf739c5318db857783a1b2773ed51ad2f0fde /lib
parent41bb8a962d4d2d8f6e7ec9988908aaf7d55f09c7 (diff)
parent11f984fa36647b0ab107596bcec2dd9b963f1379 (diff)
downloadpublic-inbox-cf439203c31836e4c48f632f902aa58db6924972.tar.gz
* origin/viewvcs: (66 commits)
  solvergit: deal with alternative diff prefixes
  solvergit: extract mode from diff headers properly
  solvergit: avoid "Wide character" warnings
  solvergit: do not show full path names to "git apply"
  css/216dark: add comments and tweak highlight colors
  viewvcs: avoid segfault with highlight.pm at shutdown
  solvergit: do not solve blobs twice
  t/check-www-inbox: disable history
  t/check-www-inbox: don't follow mboxes
  t/check-www-inbox: replace IPC::Run with PublicInbox::Spawn
  hval: add src_escape for highlight post-processing
  viewvcs: wire up syntax-highlighting for blobs
  hlmod: disable enclosing <pre> tag
  t/hl_mod: extra check to ensure we escape HTML
  wwwhighlight: read_in_full returns undef on errors
  solver: crank up max patches to 9999
  viewvcs: do not show final error message twice
  qspawn: decode $? for user-friendliness
  solver: reduce "git apply" invocations
  solver: hold patches in temporary directory
  ...
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Config.pm96
-rw-r--r--lib/PublicInbox/GetlineBody.pm16
-rw-r--r--lib/PublicInbox/Git.pm87
-rw-r--r--lib/PublicInbox/GitHTTPBackend.pm64
-rw-r--r--lib/PublicInbox/HTTPD/Async.pm27
-rw-r--r--lib/PublicInbox/HlMod.pm125
-rw-r--r--lib/PublicInbox/Hval.pm46
-rw-r--r--lib/PublicInbox/Qspawn.pm143
-rw-r--r--lib/PublicInbox/SolverGit.pm530
-rw-r--r--lib/PublicInbox/UserContent.pm98
-rw-r--r--lib/PublicInbox/View.pm55
-rw-r--r--lib/PublicInbox/ViewDiff.pm161
-rw-r--r--lib/PublicInbox/ViewVCS.pm146
-rw-r--r--lib/PublicInbox/WWW.pm152
-rw-r--r--lib/PublicInbox/WwwHighlight.pm74
-rw-r--r--lib/PublicInbox/WwwStream.pm4
-rw-r--r--lib/PublicInbox/WwwText.pm35
17 files changed, 1733 insertions, 126 deletions
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index bea26176..ccfc114f 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -2,12 +2,19 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Used throughout the project for reading configuration
+#
+# Note: I hate camelCase; but git-config(1) uses it, but it's better
+# than alllowercasewithoutunderscores, so use lc('configKey') where
+# applicable for readability
+
 package PublicInbox::Config;
 use strict;
 use warnings;
 require PublicInbox::Inbox;
 use PublicInbox::Spawn qw(popen_rd);
 
+sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] }
+
 # returns key-value pairs of config directives in a hash
 # if keys may be multi-value, the value is an array ref containing all values
 sub new {
@@ -22,6 +29,7 @@ sub new {
         $self->{-by_newsgroup} ||= {};
         $self->{-no_obfuscate} ||= {};
         $self->{-limiters} ||= {};
+        $self->{-code_repos} ||= {}; # nick => PublicInbox::Git object
 
         if (my $no = delete $self->{'publicinbox.noobfuscate'}) {
                 $no = [ $no ] if ref($no) ne 'ARRAY';
@@ -41,6 +49,9 @@ sub new {
                 my $nod = join('|', @domains);
                 $self->{-no_obfuscate_re} = qr/(?:$nod)\z/i;
         }
+        if (my $css = delete $self->{'publicinbox.css'}) {
+                $self->{css} = _array($css);
+        }
 
         $self;
 }
@@ -79,13 +90,22 @@ sub lookup_name ($$) {
 
 sub each_inbox {
         my ($self, $cb) = @_;
-        my %seen;
-        foreach my $k (keys %$self) {
-                $k =~ m!\Apublicinbox\.([^/]+)\.mainrepo\z! or next;
-                next if $seen{$1};
-                $seen{$1} = 1;
-                my $ibx = lookup_name($self, $1) or next;
-                $cb->($ibx);
+        if (my $section_order = $self->{-section_order}) {
+                foreach my $section (@$section_order) {
+                        next if $section !~ m!\Apublicinbox\.([^/]+)\z!;
+                        $self->{"publicinbox.$1.mainrepo"} or next;
+                        my $ibx = lookup_name($self, $1) or next;
+                        $cb->($ibx);
+                }
+        } else {
+                my %seen;
+                foreach my $k (keys %$self) {
+                        $k =~ m!\Apublicinbox\.([^/]+)\.mainrepo\z! or next;
+                        next if $seen{$1};
+                        $seen{$1} = 1;
+                        my $ibx = lookup_name($self, $1) or next;
+                        $cb->($ibx);
+                }
         }
 }
 
@@ -126,7 +146,7 @@ sub default_file {
 
 sub git_config_dump {
         my ($file) = @_;
-        my ($in, $out);
+        my (%section_seen, @section_order);
         my @cmd = (qw/git config/, "--file=$file", '-l');
         my $cmd = join(' ', @cmd);
         my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n";
@@ -135,8 +155,14 @@ sub git_config_dump {
         while (defined(my $line = <$fh>)) {
                 chomp $line;
                 my ($k, $v) = split(/=/, $line, 2);
-                my $cur = $rv{$k};
 
+                my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/);
+                unless (defined $section_seen{$section}) {
+                        $section_seen{$section} = 1;
+                        push @section_order, $section;
+                }
+
+                my $cur = $rv{$k};
                 if (defined $cur) {
                         if (ref($cur) eq "ARRAY") {
                                 push @$cur, $v;
@@ -148,6 +174,7 @@ sub git_config_dump {
                 }
         }
         close $fh or die "failed to close ($cmd) pipe: $?";
+        $rv{-section_order} = \@section_order;
 
         \%rv;
 }
@@ -169,6 +196,41 @@ sub valid_inbox_name ($) {
         1;
 }
 
+# parse a code repo
+# Only git is supported at the moment, but SVN and Hg are possibilities
+sub _fill_code_repo {
+        my ($self, $nick) = @_;
+        my $pfx = "coderepo.$nick";
+
+        my $dir = $self->{"$pfx.dir"}; # aka "GIT_DIR"
+        unless (defined $dir) {
+                warn "$pfx.repodir unset";
+                return;
+        }
+
+        my $git = PublicInbox::Git->new($dir);
+        foreach my $t (qw(blob commit tree tag)) {
+                $git->{$t.'_url_format'} =
+                                _array($self->{lc("$pfx.${t}UrlFormat")});
+        }
+
+        if (my $cgits = $self->{lc("$pfx.cgitUrl")}) {
+                $git->{cgit_url} = $cgits = _array($cgits);
+
+                # cgit supports "/blob/?id=%s", but it's only a plain-text
+                # display and requires an unabbreviated id=
+                foreach my $t (qw(blob commit tag)) {
+                        $git->{$t.'_url_format'} ||= map {
+                                "$_/$t/?id=%s"
+                        } @$cgits;
+                }
+        }
+        # TODO: support gitweb and other repository viewers?
+        # TODO: parse cgitrc
+
+        $git;
+}
+
 sub _fill {
         my ($self, $pfx) = @_;
         my $rv = {};
@@ -192,9 +254,9 @@ sub _fill {
         }
         # TODO: more arrays, we should support multi-value for
         # more things to encourage decentralization
-        foreach my $k (qw(address altid nntpmirror)) {
+        foreach my $k (qw(address altid nntpmirror coderepo)) {
                 if (defined(my $v = $self->{"$pfx.$k"})) {
-                        $rv->{$k} = ref($v) eq 'ARRAY' ? $v : [ $v ];
+                        $rv->{$k} = _array($v);
                 }
         }
 
@@ -224,6 +286,18 @@ sub _fill {
                 $rv->{-no_obfuscate_re} = $self->{-no_obfuscate_re};
                 each_inbox($self, sub {}); # noop to populate -no_obfuscate
         }
+
+        if (my $ibx_code_repos = $rv->{coderepo}) {
+                my $code_repos = $self->{-code_repos};
+                my $repo_objs = $rv->{-repo_objs} = [];
+                foreach my $nick (@$ibx_code_repos) {
+                        valid_inbox_name($nick) or next;
+                        my $repo = $code_repos->{$nick} ||=
+                                                _fill_code_repo($self, $nick);
+                        push @$repo_objs, $repo if $repo;
+                }
+        }
+
         $rv
 }
 
diff --git a/lib/PublicInbox/GetlineBody.pm b/lib/PublicInbox/GetlineBody.pm
index ea07f3d6..0a922fd2 100644
--- a/lib/PublicInbox/GetlineBody.pm
+++ b/lib/PublicInbox/GetlineBody.pm
@@ -13,8 +13,13 @@ use strict;
 use warnings;
 
 sub new {
-        my ($class, $rpipe, $end, $buf) = @_;
-        bless { rpipe => $rpipe, end => $end, buf => $buf }, $class;
+        my ($class, $rpipe, $end, $buf, $filter) = @_;
+        bless {
+                rpipe => $rpipe,
+                end => $end,
+                buf => $buf,
+                filter => $filter || 0,
+        }, $class;
 }
 
 # close should always be called after getline returns undef,
@@ -24,8 +29,13 @@ sub DESTROY { $_[0]->close }
 
 sub getline {
         my ($self) = @_;
+        my $filter = $self->{filter};
+        return if $filter == -1; # last call was EOF
+
         my $buf = delete $self->{buf}; # initial buffer
-        defined $buf ? $buf : $self->{rpipe}->getline;
+        $buf = $self->{rpipe}->getline unless defined $buf;
+        $self->{filter} = -1 unless defined $buf; # set EOF for next call
+        $filter ? $filter->($buf) : $buf;
 }
 
 sub close {
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 90b9214a..3ad08112 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -13,7 +13,7 @@ use POSIX qw(dup2);
 require IO::Handle;
 use PublicInbox::Spawn qw(spawn popen_rd);
 use base qw(Exporter);
-our @EXPORT_OK = qw(git_unquote);
+our @EXPORT_OK = qw(git_unquote git_quote);
 
 my %GIT_ESC = (
         a => "\a",
@@ -26,6 +26,8 @@ my %GIT_ESC = (
         '"' => '"',
         '\\' => '\\',
 );
+my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC;
+
 
 # unquote pathnames used by git, see quote.c::unquote_c_style.c in git.git
 sub git_unquote ($) {
@@ -36,10 +38,19 @@ sub git_unquote ($) {
         $_[0];
 }
 
+sub git_quote ($) {
+        if ($_[0] =~ s/([\\"\a\b\f\n\r\t\013]|[^[:print:]])/
+                      '\\'.($ESC_GIT{$1}||sprintf("%0o",ord($1)))/egs) {
+                return qq{"$_[0]"};
+        }
+        $_[0];
+}
+
 sub new {
         my ($class, $git_dir) = @_;
         my @st;
         $st[7] = $st[10] = 0;
+        # may contain {-tmp} field for File::Temp::Dir
         bless { git_dir => $git_dir, st => \@st }, $class
 }
 
@@ -53,9 +64,25 @@ sub alternates_changed {
         $self->{st} = \@st;
 }
 
+sub last_check_err {
+        my ($self) = @_;
+        my $fh = $self->{err_c} or return;
+        sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
+        defined(sysread($fh, my $buf, -s $fh)) or
+                        fail($self, "sysread failed: $!");
+        $buf;
+}
+
 sub _bidi_pipe {
-        my ($self, $batch, $in, $out, $pid) = @_;
-        return if $self->{$pid};
+        my ($self, $batch, $in, $out, $pid, $err) = @_;
+        if ($self->{$pid}) {
+                if (defined $err) { # "err_c"
+                        my $fh = $self->{$err};
+                        sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
+                        truncate($fh, 0) or fail($self, "truncate failed: $!");
+                }
+                return;
+        }
         my ($in_r, $in_w, $out_r, $out_w);
 
         pipe($in_r, $in_w) or fail($self, "pipe failed: $!");
@@ -65,8 +92,14 @@ sub _bidi_pipe {
                 fcntl($in_w, 1031, 4096) if $batch eq '--batch-check';
         }
 
-        my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch);
+        my @cmd = (qw(git), "--git-dir=$self->{git_dir}",
+                        qw(-c core.abbrev=40 cat-file), $batch);
         my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) };
+        if ($err) {
+                open(my $fh, '+>', undef) or fail($self, "open.err failed: $!");
+                $self->{$err} = $fh;
+                $redir->{2} = fileno($fh);
+        }
         my $p = spawn(\@cmd, undef, $redir);
         defined $p or fail($self, "spawn failed: $!");
         $self->{$pid} = $p;
@@ -141,12 +174,25 @@ sub batch_prepare ($) { _bidi_pipe($_[0], qw(--batch in out pid)) }
 
 sub check {
         my ($self, $obj) = @_;
-        $self->_bidi_pipe(qw(--batch-check in_c out_c pid_c));
+        _bidi_pipe($self, qw(--batch-check in_c out_c pid_c err_c));
         $self->{out_c}->print($obj, "\n") or fail($self, "write error: $!");
         local $/ = "\n";
         chomp(my $line = $self->{in_c}->getline);
         my ($hex, $type, $size) = split(' ', $line);
-        return if $type eq 'missing';
+
+        # Future versions of git.git may show 'ambiguous', but for now,
+        # we must handle 'dangling' below (and maybe some other oddball
+        # stuff):
+        # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
+        return if $type eq 'missing' || $type eq 'ambiguous';
+
+        if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') {
+                $size = $type + length("\n");
+                my $r = read($self->{in_c}, my $buf, $size);
+                defined($r) or fail($self, "read failed: $!");
+                return;
+        }
+
         ($hex, $type, $size);
 }
 
@@ -201,6 +247,35 @@ sub packed_bytes {
 
 sub DESTROY { cleanup(@_) }
 
+sub local_nick ($) {
+        my ($self) = @_;
+        my $ret = '???';
+        # don't show full FS path, basename should be OK:
+        if ($self->{git_dir} =~ m!/([^/]+)(?:/\.git)?\z!) {
+                $ret = "/path/to/$1";
+        }
+        wantarray ? ($ret) : $ret;
+}
+
+# show the blob URL for cgit/gitweb/whatever
+sub src_blob_url {
+        my ($self, $oid) = @_;
+        # blob_url_format = "https://example.com/foo.git/blob/%s"
+        if (my $bfu = $self->{blob_url_format}) {
+                return map { sprintf($_, $oid) } @$bfu if wantarray;
+                return sprintf($bfu->[0], $oid);
+        }
+        local_nick($self);
+}
+
+sub pub_urls {
+        my ($self) = @_;
+        if (my $urls = $self->{cgit_url}) {
+                return @$urls;
+        }
+        local_nick($self);
+}
+
 1;
 __END__
 =pod
diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index 54ccfa05..ab43a009 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -200,69 +200,15 @@ sub serve_smart {
                 $env{$name} = $val if defined $val;
         }
         my $limiter = $git->{-httpbackend_limiter} || $default_limiter;
-        my $git_dir = $git->{git_dir};
         $env{GIT_HTTP_EXPORT_ALL} = '1';
-        $env{PATH_TRANSLATED} = "$git_dir/$path";
+        $env{PATH_TRANSLATED} = "$git->{git_dir}/$path";
         my $rdr = { 0 => fileno($in) };
         my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr);
-        my ($fh, $rpipe);
-        my $end = sub {
-                if (my $err = $qsp->finish) {
-                        err($env, "git http-backend ($git_dir): $err");
-                }
-                $fh->close if $fh; # async-only
-        };
-
-        # Danga::Socket users, we queue up the read_enable callback to
-        # fire after pending writes are complete:
-        my $buf = '';
-        my $rd_hdr = sub {
-                my $r = sysread($rpipe, $buf, 1024, length($buf));
-                return if !defined($r) && ($!{EINTR} || $!{EAGAIN});
-                return r(500, 'http-backend error') unless $r;
-                $r = parse_cgi_headers(\$buf) or return; # incomplete headers
+        $qsp->psgi_return($env, $limiter, sub {
+                my ($r, $bref) = @_;
+                $r = parse_cgi_headers($bref) or return; # incomplete headers
                 $r->[0] == 403 ? serve_dumb($env, $git, $path) : $r;
-        };
-        my $res;
-        my $async = $env->{'pi-httpd.async'}; # XXX unstable API
-        my $cb = sub {
-                my $r = $rd_hdr->() or return;
-                $rd_hdr = undef;
-                if (scalar(@$r) == 3) { # error:
-                        if ($async) {
-                                $async->close; # calls rpipe->close
-                        } else {
-                                $rpipe->close;
-                                $end->();
-                        }
-                        $res->($r);
-                } elsif ($async) {
-                        $fh = $res->($r);
-                        $async->async_pass($env->{'psgix.io'}, $fh, \$buf);
-                } else { # for synchronous PSGI servers
-                        require PublicInbox::GetlineBody;
-                        $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end,
-                                                                $buf);
-                        $res->($r);
-                }
-        };
-        sub {
-                ($res) = @_;
-
-                # hopefully this doesn't break any middlewares,
-                # holding the input here is a waste of FDs and memory
-                $env->{'psgi.input'} = undef;
-
-                $qsp->start($limiter, sub { # may run later, much later...
-                        ($rpipe) = @_;
-                        $in = undef;
-                        if ($async) {
-                                $async = $async->($rpipe, $cb, $end);
-                        } else { # generic PSGI
-                                $cb->() while $rd_hdr;
-                        }
-                });
-        };
+        });
 }
 
 sub input_to_file {
diff --git a/lib/PublicInbox/HTTPD/Async.pm b/lib/PublicInbox/HTTPD/Async.pm
index 842aaf62..a647f10d 100644
--- a/lib/PublicInbox/HTTPD/Async.pm
+++ b/lib/PublicInbox/HTTPD/Async.pm
@@ -14,6 +14,15 @@ require PublicInbox::EvCleanup;
 
 sub new {
         my ($class, $io, $cb, $cleanup) = @_;
+
+        # no $io? call $cb at the top of the next event loop to
+        # avoid recursion:
+        unless (defined($io)) {
+                PublicInbox::EvCleanup::asap($cb) if $cb;
+                PublicInbox::EvCleanup::next_tick($cleanup) if $cleanup;
+                return;
+        }
+
         my $self = fields::new($class);
         IO::Handle::blocking($io, 0);
         $self->SUPER::new($io);
@@ -23,6 +32,7 @@ sub new {
         $self;
 }
 
+# fires after pending writes are complete:
 sub restart_read_cb ($) {
         my ($self) = @_;
         sub { $self->watch_read(1) }
@@ -35,14 +45,16 @@ sub main_cb ($$$) {
                 my $r = sysread($self->{sock}, $$bref, 8192);
                 if ($r) {
                         $fh->write($$bref);
-                        return if $http->{closed};
-                        if ($http->{write_buf_size}) {
-                                $self->watch_read(0);
-                                $http->write(restart_read_cb($self));
+                        unless ($http->{closed}) { # Danga::Socket sets this
+                                if ($http->{write_buf_size}) {
+                                        $self->watch_read(0);
+                                        $http->write(restart_read_cb($self));
+                                }
+                                # stay in watch_read, but let other clients
+                                # get some work done, too.
+                                return;
                         }
-                        # stay in watch_read, but let other clients
-                        # get some work done, too.
-                        return;
+                        # fall through to close below...
                 } elsif (!defined $r) {
                         return if $!{EAGAIN} || $!{EINTR};
                 }
@@ -66,7 +78,6 @@ sub async_pass {
 sub event_read { $_[0]->{cb}->(@_) }
 sub event_hup { $_[0]->{cb}->(@_) }
 sub event_err { $_[0]->{cb}->(@_) }
-sub sysread { shift->{sock}->sysread(@_) }
 
 sub close {
         my $self = shift;
diff --git a/lib/PublicInbox/HlMod.pm b/lib/PublicInbox/HlMod.pm
new file mode 100644
index 00000000..237ffaca
--- /dev/null
+++ b/lib/PublicInbox/HlMod.pm
@@ -0,0 +1,125 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# I have no idea how stable or safe this is for handling untrusted
+# input, but it seems to have been around for a while, and the
+# highlight(1) executable is supported by gitweb and cgit.
+#
+# I'm also unsure about API stability, but highlight 3.x seems to
+# have been around a few years and ikiwiki (apparently the only
+# user of the SWIG/Perl bindings, at least in Debian) hasn't needed
+# major changes to support it in recent years.
+#
+# Some code stolen from ikiwiki (GPL-2.0+)
+# wrapper for SWIG-generated highlight.pm bindings
+package PublicInbox::HlMod;
+use strict;
+use warnings;
+use highlight; # SWIG-generated stuff
+
+sub _parse_filetypes ($) {
+        my $ft_conf = $_[0]->searchFile('filetypes.conf') or
+                                die 'filetypes.conf not found by highlight';
+        open my $fh, '<', $ft_conf or die "failed to open($ft_conf): $!";
+        local $/;
+        my $cfg = <$fh>;
+        my %ext2lang;
+        my @shebang; # order matters
+
+        # Hrm... why isn't this exposed by the highlight API?
+        # highlight >= 3.2 format (bind-style) (from ikiwiki)
+        while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
+                         Extensions\s*=\s*{([^}]+)}/sgx) {
+                my $lang = $1;
+                foreach my $bit (split(/,/, $2)) {
+                        $bit =~ s/.*"(.*)".*/$1/s;
+                        $ext2lang{$bit} = $lang;
+                }
+        }
+        # AFAIK, all the regexps used by in filetypes.conf distributed
+        # by highlight work as Perl REs
+        while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
+                        Shebang\s*=\s*\[\s*\[([^}]+)\s*\]\s*\]\s*}\s*,/sgx) {
+                my ($lang, $re) = ($1, $2);
+                eval {
+                        my $perl_re = qr/$re/;
+                        push @shebang, [ $lang, $perl_re ];
+                };
+                if ($@) {
+                        warn "$lang shebang=[[$re]] did not work in Perl: $@";
+                }
+        }
+        (\%ext2lang, \@shebang);
+}
+
+sub new {
+        my ($class) = @_;
+        my $dir = highlight::DataDir->new;
+        $dir->initSearchDirectories('');
+        my ($ext2lang, $shebang) = _parse_filetypes($dir);
+        bless {
+                -dir => $dir,
+                -ext2lang => $ext2lang,
+                -shebang => $shebang,
+        }, $class;
+}
+
+sub _shebang2lang ($$) {
+        my ($self, $str) = @_;
+        my $shebang = $self->{-shebang};
+        foreach my $s (@$shebang) {
+                return $s->[0] if $$str =~ $s->[1];
+        }
+        undef;
+}
+
+sub _path2lang ($$) {
+        my ($self, $path) = @_;
+        my ($ext) = ($path =~ m!([^\\/\.]+)\z!);
+        $ext = lc($ext);
+        $self->{-ext2lang}->{$ext} || $ext;
+}
+
+sub do_hl {
+        my ($self, $str, $path) = @_;
+        my $lang = _path2lang($self, $path) if defined $path;
+        my $dir = $self->{-dir};
+        my $langpath;
+        if (defined $lang) {
+                $langpath = $dir->getLangPath("$lang.lang") or return;
+                $langpath = undef unless -f $langpath;
+        }
+        unless (defined $langpath) {
+                $lang = _shebang2lang($self, $str) or return;
+                $langpath = $dir->getLangPath("$lang.lang") or return;
+                $langpath = undef unless -f $langpath;
+        }
+        return unless defined $langpath;
+
+        my $gen = $self->{$langpath} ||= do {
+                my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
+                $g->setFragmentCode(1); # generate html fragment
+
+                # whatever theme works
+                my $themepath = $dir->getThemePath('print.theme');
+                $g->initTheme($themepath);
+                $g->loadLanguage($langpath);
+                $g->setEncoding('utf-8');
+                $g;
+        };
+        \($gen->generateString($$str))
+}
+
+# SWIG instances aren't reference-counted, but $self is;
+# so we need to delete all the CodeGenerator instances manually
+# at our own destruction
+sub DESTROY {
+        my ($self) = @_;
+        foreach my $gen (values %$self) {
+                if (ref($gen) eq 'highlight::CodeGenerator') {
+                        highlight::CodeGenerator::deleteInstance($gen);
+                }
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index ccfa3242..53810b33 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -9,17 +9,7 @@ use warnings;
 use Encode qw(find_encoding);
 use PublicInbox::MID qw/mid_clean mid_escape/;
 use base qw/Exporter/;
-our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename/;
-
-# User-generated content (UGC) may have excessively long lines
-# and screw up rendering on some browsers, so we use pre-wrap.
-#
-# We also force everything to the same scaled font-size because GUI
-# browsers (tested both Firefox and surf (webkit)) uses a larger font
-# for the Search <form> element than the rest of the page.  Font size
-# uniformity is important to people who rely on gigantic fonts.
-use constant STYLE =>
-        '<style>pre{white-space:pre-wrap}*{font-size:100%}</style>';
+our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape/;
 
 my $enc_ascii = find_encoding('us-ascii');
 
@@ -47,6 +37,21 @@ sub new_oneline {
         $class->new($raw);
 }
 
+# some of these overrides are standard C escapes so they're
+# easy-to-understand when rendered.
+my %escape_sequence = (
+        "\x00" => '\\0', # NUL
+        "\x07" => '\\a', # bell
+        "\x08" => '\\b', # backspace
+        "\x09" => "\t", # obvious to show as-is
+        "\x0a" => "\n", # obvious to show as-is
+        "\x0b" => '\\v', # vertical tab
+        "\x0c" => '\\f', # form feed
+        "\x0d" => '\\r', # carriage ret (not preceding \n)
+        "\x1b" => '^[', # ASCII escape (mutt seems to escape this way)
+        "\x7f" => '\\x7f', # DEL
+);
+
 my %xhtml_map = (
         '"' => '&#34;',
         '&' => '&#38;',
@@ -56,18 +61,13 @@ my %xhtml_map = (
 );
 
 $xhtml_map{chr($_)} = sprintf('\\x%02x', $_) for (0..31);
-# some of these overrides are standard C escapes so they're
-# easy-to-understand when rendered.
-$xhtml_map{"\x00"} = '\\0'; # NUL
-$xhtml_map{"\x07"} = '\\a'; # bell
-$xhtml_map{"\x08"} = '\\b'; # backspace
-$xhtml_map{"\x09"} = "\t"; # obvious to show as-is
-$xhtml_map{"\x0a"} = "\n"; # obvious to show as-is
-$xhtml_map{"\x0b"} = '\\v'; # vertical tab
-$xhtml_map{"\x0c"} = '\\f'; # form feed
-$xhtml_map{"\x0d"} = '\\r'; # carriage ret (not preceding \n)
-$xhtml_map{"\x1b"} = '^['; # ASCII escape (mutt seems to escape this way)
-$xhtml_map{"\x7f"} = '\\x7f'; # DEL
+%xhtml_map = (%xhtml_map, %escape_sequence);
+
+sub src_escape ($) {
+        $_[0] =~ s/\r\n/\n/sg;
+        $_[0] =~ s/([\x7f\x00-\x1f])/$xhtml_map{$1}/sge;
+        $_[0] = $enc_ascii->encode($_[0], Encode::HTMLCREF);
+}
 
 sub ascii_html {
         my ($s) = @_;
diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm
index 3500f8a4..913fac81 100644
--- a/lib/PublicInbox/Qspawn.pm
+++ b/lib/PublicInbox/Qspawn.pm
@@ -9,6 +9,8 @@ package PublicInbox::Qspawn;
 use strict;
 use warnings;
 use PublicInbox::Spawn qw(popen_rd);
+require Plack::Util;
+my $def_limiter;
 
 sub new ($$$;) {
         my ($class, $cmd, $env, $opt) = @_;
@@ -28,13 +30,22 @@ sub _do_spawn {
         $cb->($self->{rpipe});
 }
 
+sub child_err ($) {
+        my ($child_error) = @_; # typically $?
+        my $exitstatus = ($child_error >> 8) or return;
+        my $sig = $child_error & 127;
+        my $msg = "exit status=$exitstatus";
+        $msg .= " signal=$sig" if $sig;
+        $msg;
+}
+
 sub finish ($) {
         my ($self) = @_;
         my $limiter = $self->{limiter};
         my $running;
         if (delete $self->{rpipe}) {
                 my $pid = delete $self->{pid};
-                $self->{err} = $pid == waitpid($pid, 0) ? $? :
+                $self->{err} = $pid == waitpid($pid, 0) ? child_err($?) :
                                 "PID:$pid still running?";
                 $running = --$limiter->{running};
         }
@@ -59,6 +70,119 @@ sub start {
         }
 }
 
+sub _psgi_finish ($$) {
+        my ($self, $env) = @_;
+        my $err = $self->finish;
+        if ($err && !$env->{'qspawn.quiet'}) {
+                $err = join(' ', @{$self->{args}->[0]}).": $err\n";
+                $env->{'psgi.errors'}->print($err);
+        }
+}
+
+sub psgi_qx {
+        my ($self, $env, $limiter, $qx_cb) = @_;
+        my $qx = PublicInbox::Qspawn::Qx->new;
+        my $end = sub {
+                _psgi_finish($self, $env);
+                eval { $qx_cb->($qx) };
+                $qx = undef;
+        };
+        my $rpipe;
+        my $async = $env->{'pi-httpd.async'};
+        my $cb = sub {
+                my $r = sysread($rpipe, my $buf, 8192);
+                if ($async) {
+                        $async->async_pass($env->{'psgix.io'}, $qx, \$buf);
+                } elsif (defined $r) {
+                        $r ? $qx->write($buf) : $end->();
+                } else {
+                        return if $!{EAGAIN} || $!{EINTR}; # loop again
+                        $end->();
+                }
+        };
+        $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32);
+        $self->start($limiter, sub { # may run later, much later...
+                ($rpipe) = @_;
+                if ($async) {
+                # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end)
+                        $async = $async->($rpipe, $cb, $end);
+                } else { # generic PSGI
+                        $cb->() while $qx;
+                }
+        });
+}
+
+# create a filter for "push"-based streaming PSGI writes used by HTTPD::Async
+sub filter_fh ($$) {
+        my ($fh, $filter) = @_;
+        Plack::Util::inline_object(
+                close => sub {
+                        $fh->write($filter->(undef));
+                        $fh->close;
+                },
+                write => sub {
+                        $fh->write($filter->($_[0]));
+                });
+}
+
+sub psgi_return {
+        my ($self, $env, $limiter, $parse_hdr) = @_;
+        my ($fh, $rpipe);
+        my $end = sub {
+                _psgi_finish($self, $env);
+                $fh->close if $fh; # async-only
+        };
+
+        my $buf = '';
+        my $rd_hdr = sub {
+                my $r = sysread($rpipe, $buf, 1024, length($buf));
+                return if !defined($r) && ($!{EINTR} || $!{EAGAIN});
+                $parse_hdr->($r, \$buf);
+        };
+        my $res = delete $env->{'qspawn.response'};
+        my $async = $env->{'pi-httpd.async'};
+        my $cb = sub {
+                my $r = $rd_hdr->() or return;
+                $rd_hdr = undef;
+                my $filter = delete $env->{'qspawn.filter'};
+                if (scalar(@$r) == 3) { # error
+                        if ($async) {
+                                $async->close; # calls rpipe->close and $end
+                        } else {
+                                $rpipe->close;
+                                $end->();
+                        }
+                        $res->($r);
+                } elsif ($async) {
+                        $fh = $res->($r); # scalar @$r == 2
+                        $fh = filter_fh($fh, $filter) if $filter;
+                        $async->async_pass($env->{'psgix.io'}, $fh, \$buf);
+                } else { # for synchronous PSGI servers
+                        require PublicInbox::GetlineBody;
+                        $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end,
+                                                                $buf, $filter);
+                        $res->($r);
+                }
+        };
+        $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32);
+        my $start_cb = sub { # may run later, much later...
+                ($rpipe) = @_;
+                if ($async) {
+                        # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end)
+                        $async = $async->($rpipe, $cb, $end);
+                } else { # generic PSGI
+                        $cb->() while $rd_hdr;
+                }
+        };
+
+        return $self->start($limiter, $start_cb) if $res;
+
+        sub {
+                ($res) = @_;
+                $self->start($limiter, $start_cb);
+        };
+}
+
 package PublicInbox::Qspawn::Limiter;
 use strict;
 use warnings;
@@ -73,4 +197,21 @@ sub new {
         }, $class;
 }
 
+# captures everything into a buffer and executes a callback when done
+package PublicInbox::Qspawn::Qx;
+use strict;
+use warnings;
+
+sub new {
+        my ($class) = @_;
+        my $buf = '';
+        bless \$buf, $class;
+}
+
+# called by PublicInbox::HTTPD::Async ($fh->write)
+sub write {
+        ${$_[0]} .= $_[1];
+        undef;
+}
+
 1;
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
new file mode 100644
index 00000000..d7875333
--- /dev/null
+++ b/lib/PublicInbox/SolverGit.pm
@@ -0,0 +1,530 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "Solve" blobs which don't exist in git code repositories by
+# searching inboxes for post-image blobs.
+
+# this emits a lot of debugging/tracing information which may be
+# publically viewed over HTTP(S).  Be careful not to expose
+# local filesystem layouts in the process.
+package PublicInbox::SolverGit;
+use strict;
+use warnings;
+use File::Temp qw();
+use Fcntl qw(SEEK_SET);
+use PublicInbox::Git qw(git_unquote git_quote);
+use PublicInbox::Spawn qw(spawn popen_rd);
+use PublicInbox::MsgIter qw(msg_iter msg_part_text);
+use PublicInbox::Qspawn;
+use URI::Escape qw(uri_escape_utf8);
+
+# POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to
+# subtract 2048 bytes.  We also don't factor in environment variable
+# headroom into this.
+use POSIX qw(sysconf _SC_ARG_MAX);
+my $ARG_SIZE_MAX = (sysconf(_SC_ARG_MAX) || 4096) - 2048;
+
+# By default, "git format-patch" generates filenames with a four-digit
+# prefix, so that means 9999 patch series are OK, right? :>
+# Maybe we can make this configurable, main concern is disk space overhead
+# for uncompressed patch fragments.  Aside from space, public-inbox-httpd
+# is otherwise unaffected by having many patches, here, as it can share
+# work fairly.  Other PSGI servers may have trouble, though.
+my $MAX_PATCH = 9999;
+
+# di = diff info / a hashref with information about a diff ($di):
+# {
+#        oid_a => abbreviated pre-image oid,
+#        oid_b => abbreviated post-image oid,
+#        tmp => anonymous file handle with the diff,
+#        hdr_lines => arrayref of various header lines for mode information
+#        mode_a => original mode of oid_a (string, not integer),
+#        ibx => PublicInbox::Inbox object containing the diff
+#        smsg => PublicInbox::SearchMsg object containing diff
+#        path_a => pre-image path
+#        path_b => post-image path
+# }
+
+# don't bother if somebody sends us a patch with these path components,
+# it's junk at best, an attack attempt at worse:
+my %bad_component = map { $_ => 1 } ('', '.', '..');
+
+sub dbg ($$) {
+        print { $_[0]->{out} } $_[1], "\n" or ERR($_[0], "print(dbg): $!");
+}
+
+sub ERR ($$) {
+        my ($self, $err) = @_;
+        print { $self->{out} } $err, "\n";
+        my $ucb = delete($self->{user_cb});
+        eval { $ucb->($err) } if $ucb;
+        die $err;
+}
+
+# look for existing blobs already in git repos
+sub solve_existing ($$) {
+        my ($self, $want) = @_;
+        my $oid_b = $want->{oid_b};
+        my @ambiguous; # Array of [ git, $oids]
+        foreach my $git (@{$self->{gits}}) {
+                my ($oid_full, $type, $size) = $git->check($oid_b);
+                if (defined($type) && $type eq 'blob') {
+                        return [ $git, $oid_full, $type, int($size) ];
+                }
+
+                next if length($oid_b) == 40;
+
+                # parse stderr of "git cat-file --batch-check"
+                my $err = $git->last_check_err;
+                my (@oids) = ($err =~ /\b([a-f0-9]{40})\s+blob\b/g);
+                next unless scalar(@oids);
+
+                # TODO: do something with the ambiguous array?
+                # push @ambiguous, [ $git, @oids ];
+
+                dbg($self, "`$oid_b' ambiguous in " .
+                                join("\n\t", $git->pub_urls) . "\n" .
+                                join('', map { "$_ blob\n" } @oids));
+        }
+        scalar(@ambiguous) ? \@ambiguous : undef;
+}
+
+sub extract_diff ($$$$$) {
+        my ($self, $p, $re, $ibx, $smsg) = @_;
+        my ($part) = @$p; # ignore $depth and @idx;
+        my $hdr_lines; # diff --git a/... b/...
+        my $tmp;
+        my $ct = $part->content_type || 'text/plain';
+        my ($s, undef) = msg_part_text($part, $ct);
+        defined $s or return;
+        my $di = {};
+
+        # Email::MIME::Encodings forces QP to be CRLF upon decoding,
+        # change it back to LF:
+        my $cte = $part->header('Content-Transfer-Encoding') || '';
+        if ($cte =~ /\bquoted-printable\b/i && $part->crlf eq "\n") {
+                $s =~ s/\r\n/\n/sg;
+        }
+
+        foreach my $l (split(/^/m, $s)) {
+                if ($l =~ $re) {
+                        $di->{oid_a} = $1;
+                        $di->{oid_b} = $2;
+                        if (defined($3)) {
+                                my $mode_a = $3;
+                                if ($mode_a =~ /\A(?:100644|120000|100755)\z/) {
+                                        $di->{mode_a} = $mode_a;
+                                }
+                        }
+
+
+                        # start writing the diff out to a tempfile
+                        my $pn = ++$self->{tot};
+                        open($tmp, '>', $self->{tmp}->dirname . "/$pn") or
+                                                        die "open(tmp): $!";
+
+                        push @$hdr_lines, $l;
+                        $di->{hdr_lines} = $hdr_lines;
+                        utf8::encode($_) for @$hdr_lines;
+                        print $tmp @$hdr_lines or die "print(tmp): $!";
+
+                        # for debugging/diagnostics:
+                        $di->{ibx} = $ibx;
+                        $di->{smsg} = $smsg;
+                } elsif ($l =~ m!\Adiff --git ("?[^/]+/.+) ("?[^/]+/.+)$!) {
+                        last if $tmp; # got our blob, done!
+
+                        my ($path_a, $path_b) = ($1, $2);
+
+                        # diff header lines won't have \r because git
+                        # will quote them, but Email::MIME gives CRLF
+                        # for quoted-printable:
+                        $path_b =~ tr/\r//d;
+
+                        # don't care for leading 'a/' and 'b/'
+                        my (undef, @a) = split(m{/}, git_unquote($path_a));
+                        my (undef, @b) = split(m{/}, git_unquote($path_b));
+
+                        # get rid of path-traversal attempts and junk patches:
+                        foreach (@a, @b) {
+                                return if $bad_component{$_};
+                        }
+
+                        $di->{path_a} = join('/', @a);
+                        $di->{path_b} = join('/', @b);
+                        $hdr_lines = [ $l ];
+                } elsif ($tmp) {
+                        utf8::encode($l);
+                        print $tmp $l or die "print(tmp): $!";
+                } elsif ($hdr_lines) {
+                        push @$hdr_lines, $l;
+                        if ($l =~ /\Anew file mode (100644|120000|100755)$/) {
+                                $di->{mode_a} = $1;
+                        }
+                }
+        }
+        return undef unless $tmp;
+        close $tmp or die "close(tmp): $!";
+        $di;
+}
+
+sub path_searchable ($) { defined($_[0]) && $_[0] =~ m!\A[\w/\. \-]+\z! }
+
+sub find_extract_diff ($$$) {
+        my ($self, $ibx, $want) = @_;
+        my $srch = $ibx->search or return;
+
+        my $post = $want->{oid_b} or die 'BUG: no {oid_b}';
+        $post =~ /\A[a-f0-9]+\z/ or die "BUG: oid_b not hex: $post";
+
+        my $q = "dfpost:$post";
+        my $pre = $want->{oid_a};
+        if (defined $pre && $pre =~ /\A[a-f0-9]+\z/) {
+                $q .= " dfpre:$pre";
+        } else {
+                $pre = '[a-f0-9]{7}'; # for $re below
+        }
+
+        my $path_b = $want->{path_b};
+        if (path_searchable($path_b)) {
+                $q .= qq{ dfn:"$path_b"};
+
+                my $path_a = $want->{path_a};
+                if (path_searchable($path_a) && $path_a ne $path_b) {
+                        $q .= qq{ dfn:"$path_a"};
+                }
+        }
+
+        my $msgs = $srch->query($q, { relevance => 1 });
+        my $re = qr/\Aindex ($pre[a-f0-9]*)\.\.($post[a-f0-9]*)(?: (\d+))?/;
+
+        my $di;
+        foreach my $smsg (@$msgs) {
+                $ibx->smsg_mime($smsg) or next;
+                msg_iter(delete($smsg->{mime}), sub {
+                        $di ||= extract_diff($self, $_[0], $re, $ibx, $smsg);
+                });
+                return $di if $di;
+        }
+}
+
+sub prepare_index ($) {
+        my ($self) = @_;
+        my $patches = $self->{patches};
+        $self->{nr} = 0;
+
+        my $di = $patches->[0] or die 'no patches';
+        my $oid_a = $di->{oid_a} or die '{oid_a} unset';
+        my $existing = $self->{found}->{$oid_a};
+
+        # no index creation for added files
+        $oid_a =~ /\A0+\z/ and return next_step($self);
+
+        die "BUG: $oid_a not not found" unless $existing;
+
+        my $oid_full = $existing->[1];
+        my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full";
+        my $mode_a = $di->{mode_a} || extract_old_mode($di);
+
+        open my $in, '+>', undef or die "open: $!";
+        print $in "$mode_a $oid_full\t$path_a\0" or die "print: $!";
+        $in->flush or die "flush: $!";
+        sysseek($in, 0, 0) or die "seek: $!";
+
+        dbg($self, 'preparing index');
+        my $rdr = { 0 => fileno($in) };
+        my $cmd = [ qw(git update-index -z --index-info) ];
+        my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr);
+        $qsp->psgi_qx($self->{psgi_env}, undef, sub {
+                my ($bref) = @_;
+                if (my $err = $qsp->{err}) {
+                        ERR($self, "git update-index error: $err");
+                }
+                dbg($self, "index prepared:\n" .
+                        "$mode_a $oid_full\t" . git_quote($path_a));
+                next_step($self); # onto do_git_apply
+        });
+}
+
+# pure Perl "git init"
+sub do_git_init ($) {
+        my ($self) = @_;
+        my $dir = $self->{tmp}->dirname;
+        my $git_dir = "$dir/git";
+
+        foreach ('', qw(objects refs objects/info refs/heads)) {
+                mkdir("$git_dir/$_") or die "mkdir $_: $!";
+        }
+        open my $fh, '>', "$git_dir/config" or die "open git/config: $!";
+        print $fh <<'EOF' or die "print git/config $!";
+[core]
+        repositoryFormatVersion = 0
+        filemode = true
+        bare = false
+        fsyncObjectfiles = false
+        logAllRefUpdates = false
+EOF
+        close $fh or die "close git/config: $!";
+
+        open $fh, '>', "$git_dir/HEAD" or die "open git/HEAD: $!";
+        print $fh "ref: refs/heads/master\n" or die "print git/HEAD: $!";
+        close $fh or die "close git/HEAD: $!";
+
+        my $f = 'objects/info/alternates';
+        open $fh, '>', "$git_dir/$f" or die "open: $f: $!";
+        print($fh (map { "$_->{git_dir}/objects\n" } @{$self->{gits}})) or
+                die "print $f: $!";
+        close $fh or die "close: $f: $!";
+        my $tmp_git = $self->{tmp_git} = PublicInbox::Git->new($git_dir);
+        $tmp_git->{-tmp} = $self->{tmp};
+        $self->{git_env} = {
+                GIT_DIR => $git_dir,
+                GIT_INDEX_FILE => "$git_dir/index",
+        };
+        prepare_index($self);
+}
+
+sub extract_old_mode ($) {
+        my ($di) = @_;
+        if (join('', @{$di->{hdr_lines}}) =~
+                        /^old mode (100644|100755|120000)\b/) {
+                return $1;
+        }
+        '100644';
+}
+
+sub do_step ($) {
+        my ($self) = @_;
+        eval {
+                # step 1: resolve blobs to patches in the todo queue
+                if (my $want = pop @{$self->{todo}}) {
+                        # this populates {patches} and {todo}
+                        resolve_patch($self, $want);
+
+                # step 2: then we instantiate a working tree once
+                # the todo queue is finally empty:
+                } elsif (!defined($self->{tmp_git})) {
+                        do_git_init($self);
+
+                # step 3: apply each patch in the stack
+                } elsif (scalar @{$self->{patches}}) {
+                        do_git_apply($self);
+
+                # step 4: execute the user-supplied callback with
+                # our result: (which may be undef)
+                # Other steps may call user_cb to terminate prematurely
+                # on error
+                } elsif (my $ucb = delete($self->{user_cb})) {
+                        $ucb->($self->{found}->{$self->{oid_want}});
+                } else {
+                        die 'about to call user_cb twice'; # Oops :x
+                }
+        }; # eval
+        my $err = $@;
+        if ($err) {
+                $err =~ s/^\s*Exception:\s*//; # bad word to show users :P
+                dbg($self, "E: $err");
+                my $ucb = delete($self->{user_cb});
+                eval { $ucb->($err) } if $ucb;
+        }
+}
+
+sub step_cb ($) {
+        my ($self) = @_;
+        sub { do_step($self) };
+}
+
+sub next_step ($) {
+        my ($self) = @_;
+        # if outside of public-inbox-httpd, caller is expected to be
+        # looping step_cb, anyways
+        my $async = $self->{psgi_env}->{'pi-httpd.async'} or return;
+        # PublicInbox::HTTPD::Async->new
+        $async->(undef, step_cb($self));
+}
+
+sub mark_found ($$$) {
+        my ($self, $oid, $found_info) = @_;
+        $self->{found}->{$oid} = $found_info;
+}
+
+sub parse_ls_files ($$$$) {
+        my ($self, $qsp, $bref, $di) = @_;
+        if (my $err = $qsp->{err}) {
+                die "git ls-files error: $err";
+        }
+
+        my ($line, @extra) = split(/\0/, $$bref);
+        scalar(@extra) and die "BUG: extra files in index: <",
+                                join('> <', @extra), ">";
+
+        my ($info, $file) = split(/\t/, $line, 2);
+        my ($mode_b, $oid_b_full, $stage) = split(/ /, $info);
+        if ($file ne $di->{path_b}) {
+                die
+"BUG: index mismatch: file=$file != path_b=$di->{path_b}";
+        }
+
+        my $tmp_git = $self->{tmp_git} or die 'no git working tree';
+        my (undef, undef, $size) = $tmp_git->check($oid_b_full);
+        defined($size) or die "check $oid_b_full failed";
+
+        dbg($self, "index at:\n$mode_b $oid_b_full\t$file");
+        my $created = [ $tmp_git, $oid_b_full, 'blob', $size, $di ];
+        mark_found($self, $di->{oid_b}, $created);
+        next_step($self); # onto the next patch
+}
+
+sub start_ls_files ($$) {
+        my ($self, $di) = @_;
+        my $cmd = [qw(git ls-files -s -z)];
+        my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env});
+        $qsp->psgi_qx($self->{psgi_env}, undef, sub {
+                my ($bref) = @_;
+                eval { parse_ls_files($self, $qsp, $bref, $di) };
+                ERR($self, $@) if $@;
+        });
+}
+
+sub do_git_apply ($) {
+        my ($self) = @_;
+        my $dn = $self->{tmp}->dirname;
+        my $patches = $self->{patches};
+
+        # we need --ignore-whitespace because some patches are CRLF
+        my @cmd = (qw(git -C), $dn, qw(apply --cached --ignore-whitespace
+                        --whitespace=warn --verbose));
+        my $len = length(join(' ', @cmd));
+        my $total = $self->{tot};
+        my $di; # keep track of the last one for "git ls-files"
+
+        do {
+                my $i = ++$self->{nr};
+                $di = shift @$patches;
+                dbg($self, "\napplying [$i/$total] " . di_url($self, $di) .
+                        "\n" . join('', @{$di->{hdr_lines}}));
+                my $path = $total + 1 - $i;
+                $len += length($path) + 1;
+                push @cmd, $path;
+        } while (@$patches && $len < $ARG_SIZE_MAX);
+
+        my $rdr = { 2 => 1 };
+        my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}, $rdr);
+        $qsp->psgi_qx($self->{psgi_env}, undef, sub {
+                my ($bref) = @_;
+                dbg($self, $$bref);
+                if (my $err = $qsp->{err}) {
+                        ERR($self, "git apply error: $err");
+                }
+                eval { start_ls_files($self, $di) };
+                ERR($self, $@) if $@;
+        });
+}
+
+sub di_url ($$) {
+        my ($self, $di) = @_;
+        # note: we don't pass the PSGI env unconditionally, here,
+        # different inboxes can have different HTTP_HOST on the same instance.
+        my $ibx = $di->{ibx};
+        my $env = $self->{psgi_env} if $ibx eq $self->{inboxes}->[0];
+        my $url = $ibx->base_url($env);
+        my $mid = $di->{smsg}->{mid};
+        defined($url) ? "$url$mid/" : "<$mid>";
+}
+
+sub resolve_patch ($$) {
+        my ($self, $want) = @_;
+
+        if (scalar(@{$self->{patches}}) > $MAX_PATCH) {
+                die "Aborting, too many steps to $self->{oid_want}";
+        }
+
+        # see if we can find the blob in an existing git repo:
+        my $cur_want = $want->{oid_b};
+        if ($self->{seen_oid}->{$cur_want}++) {
+                die "Loop detected solving $cur_want\n";
+        }
+        if (my $existing = solve_existing($self, $want)) {
+                dbg($self, "found $cur_want in " .
+                        join("\n", $existing->[0]->pub_urls));
+
+                if ($cur_want eq $self->{oid_want}) { # all done!
+                        eval { delete($self->{user_cb})->($existing) };
+                        die "E: $@" if $@;
+                        return;
+                }
+                mark_found($self, $cur_want, $existing);
+                return next_step($self); # onto patch application
+        }
+
+        # scan through inboxes to look for emails which results in
+        # the oid we want:
+        my $di;
+        foreach my $ibx (@{$self->{inboxes}}) {
+                $di = find_extract_diff($self, $ibx, $want) or next;
+
+                unshift @{$self->{patches}}, $di;
+                dbg($self, "found $cur_want in ".di_url($self, $di));
+
+                # good, we can find a path to the oid we $want, now
+                # lets see if we need to apply more patches:
+                my $src = $di->{oid_a};
+
+                unless ($src =~ /\A0+\z/) {
+                        # we have to solve it using another oid, fine:
+                        my $job = { oid_b => $src, path_b => $di->{path_a} };
+                        push @{$self->{todo}}, $job;
+                }
+                return next_step($self); # onto the next todo item
+        }
+        dbg($self, "could not find $cur_want");
+        eval { delete($self->{user_cb})->(undef) }; # not found! :<
+        die "E: $@" if $@;
+}
+
+# this API is designed to avoid creating self-referential structures;
+# so user_cb never references the SolverGit object
+sub new {
+        my ($class, $ibx, $user_cb) = @_;
+
+        bless {
+                gits => $ibx->{-repo_objs},
+                user_cb => $user_cb,
+
+                # TODO: config option for searching related inboxes
+                inboxes => [ $ibx ],
+        }, $class;
+}
+
+# recreate $oid_want using $hints
+# Calls {user_cb} with: [ ::Git object, oid_full, type, size, di (diff_info) ]
+# with found object, or undef if nothing was found
+# Calls {user_cb} with a string error on fatal errors
+sub solve ($$$$$) {
+        my ($self, $env, $out, $oid_want, $hints) = @_;
+
+        # should we even get here? Probably not, but somebody
+        # could be manually typing URLs:
+        return (delete $self->{user_cb})->(undef) if $oid_want =~ /\A0+\z/;
+
+        $self->{oid_want} = $oid_want;
+        $self->{out} = $out;
+        $self->{seen_oid} = {};
+        $self->{tot} = 0;
+        $self->{psgi_env} = $env;
+        $self->{todo} = [ { %$hints, oid_b => $oid_want } ];
+        $self->{patches} = []; # [ $di, $di, ... ]
+        $self->{found} = {}; # { abbr => [ ::Git, oid, type, size, $di ] }
+        $self->{tmp} = File::Temp->newdir('solver.tmp-XXXXXXXX', TMPDIR => 1);
+
+        dbg($self, "solving $oid_want ...");
+        my $step_cb = step_cb($self);
+        if (my $async = $env->{'pi-httpd.async'}) {
+                # PublicInbox::HTTPD::Async->new
+                $async->(undef, $step_cb);
+        } else {
+                $step_cb->() while $self->{user_cb};
+        }
+}
+
+1;
diff --git a/lib/PublicInbox/UserContent.pm b/lib/PublicInbox/UserContent.pm
new file mode 100644
index 00000000..df0429c3
--- /dev/null
+++ b/lib/PublicInbox/UserContent.pm
@@ -0,0 +1,98 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Self-updating module containing a sample CSS for client-side
+# customization by users of public-inbox.  Used by Makefile.PL
+package PublicInbox::UserContent;
+use strict;
+use warnings;
+
+# this sub is updated automatically:
+sub CSS () {
+        <<'_'
+        /*
+         * Dark color scheme using 216 web-safe colors, inspired
+         * somewhat by the default color scheme in mutt.
+         * It reduces eyestrain for me, and energy usage for all:
+         * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme
+         */
+        * { background:#000; color:#ccc }
+
+        /*
+         * Underlined links add visual noise which make them hard-to-read.
+         * Use colors to make them stand out, instead.
+         */
+        a { color:#69f; text-decoration:none }
+        a:visited { color:#96f }
+
+        /* quoted text gets a different color */
+        *.q { color:#09f }
+
+        /*
+         * these may be used with cgit, too
+         * (cgit uses <div>, public-inbox uses <span>)
+         */
+        *.add { color:#0ff }
+        *.del { color:#f0f }
+        *.head { color:#fff }
+        *.hunk { color:#c93 }
+
+        /*
+         * highlight 3.x colors (tested 3.18)
+         * this doesn't use most of the colors available (I find too many
+         * colors overwhelming).  So the #ccc default is commented out.
+         */
+        .hl.num { color:#f30 } /* number */
+        .hl.esc { color:#f0f } /* escape character */
+        .hl.str { color:#f30 } /* string */
+        .hl.ppc { color:#f0f } /* preprocessor */
+        .hl.pps { color:#f30 } /* preprocessor string */
+        .hl.slc { color:#09f } /* single-line comment */
+        .hl.com { color:#09f }
+        /* .hl.opt { color:#ccc } */
+        /* .hl.ipl { color:#ccc } */
+        /* .hl.lin { color:#ccc } */
+        .hl.kwa { color:#ff0 }
+        .hl.kwb { color:#0f0 }
+        .hl.kwc { color:#ff0 }
+        /* .hl.kwd { color:#ccc } */
+_
+}
+# end of auto-updated sub
+
+# return a sample CSS
+sub sample ($$) {
+        my ($ibx, $env) = @_;
+        my $url_prefix = $ibx->base_url($env);
+        my $preamble = <<"";
+/*
+ * Firefox users: this goes in \$PROFILE_FOLDER/chrome/userContent.css
+ * where \$PROFILE_FOLDER is platform-specific
+ *
+ * cf. http://kb.mozillazine.org/UserContent.css
+ *     http://kb.mozillazine.org/Profile_folder_-_Firefox
+ *
+ * Users of dillo can remove the entire lines with "moz-only"
+ * in them and place the resulting file in ~/.dillo/style.css
+ */
+\@-moz-document url-prefix($url_prefix) { /* moz-only */
+
+        $preamble . CSS() . "\n} /* moz-only */\n";
+}
+
+# Auto-update this file based on the contents of a CSS file:
+# usage: perl -I lib __FILE__ contrib/css/216dark.css
+# (See Makefile.PL)
+if (scalar(@ARGV) == 1 && -r __FILE__) {
+        use autodie;
+        open my $ro, '<', $ARGV[0];
+        my $css = do { local $/; <$ro> };
+        $css =~ s/^([ \t]*\S)/\t$1/smg;
+        open my $rw, '+<', __FILE__;
+        my $out = do { local $/; <$rw> };
+        $out =~ s/^sub CSS.*^_\n\}/sub CSS () {\n\t<<'_'\n${css}_\n}/sm;
+        seek $rw, 0, 0;
+        print $rw $out;
+}
+
+1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 4c1c6705..ca9b9550 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -14,6 +14,7 @@ use PublicInbox::MsgIter;
 use PublicInbox::Address;
 use PublicInbox::WwwStream;
 use PublicInbox::Reply;
+use PublicInbox::ViewDiff qw(flush_diff);
 require POSIX;
 use Time::Local qw(timegm);
 
@@ -28,7 +29,7 @@ sub msg_html {
         my ($ctx, $mime, $more, $smsg) = @_;
         my $hdr = $mime->header_obj;
         my $ibx = $ctx->{-inbox};
-        my $obfs_ibx = $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
+        $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
         my $tip = _msg_html_prepare($hdr, $ctx, $more, 0);
         my $end = 2;
         PublicInbox::WwwStream->response($ctx, 200, sub {
@@ -36,7 +37,7 @@ sub msg_html {
                 if ($nr == 1) {
                         # $more cannot be true w/o $smsg being defined:
                         my $upfx = $more ? '../'.mid_escape($smsg->mid).'/' : '';
-                        $tip . multipart_text_as_html($mime, $upfx, $obfs_ibx) .
+                        $tip . multipart_text_as_html($mime, $upfx, $ibx) .
                                 '</pre><hr>'
                 } elsif ($more && @$more) {
                         ++$end;
@@ -81,15 +82,15 @@ sub msg_html_more {
         my $str = eval {
                 my ($id, $prev, $smsg) = @$more;
                 my $mid = $ctx->{mid};
-                $smsg = $ctx->{-inbox}->smsg_mime($smsg);
+                my $ibx = $ctx->{-inbox};
+                $smsg = $ibx->smsg_mime($smsg);
                 my $next = $ctx->{srch}->next_by_mid($mid, \$id, \$prev);
                 @$more = $next ? ($id, $prev, $next) : ();
                 if ($smsg) {
                         my $mime = $smsg->{mime};
                         my $upfx = '../' . mid_escape($smsg->mid) . '/';
                         _msg_html_prepare($mime->header_obj, $ctx, $more, $nr) .
-                                multipart_text_as_html($mime, $upfx,
-                                                        $ctx->{-obfs_ibx}) .
+                                multipart_text_as_html($mime, $upfx, $ibx) .
                                 '</pre><hr>'
                 } else {
                         '';
@@ -260,7 +261,8 @@ sub index_entry {
         $rv .= "\n";
 
         # scan through all parts, looking for displayable text
-        msg_iter($mime, sub { $rv .= add_text_body($mhref, $obfs_ibx, $_[0]) });
+        my $ibx = $ctx->{-inbox};
+        msg_iter($mime, sub { $rv .= add_text_body($mhref, $ibx, $_[0]) });
 
         # add the footer
         $rv .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
@@ -488,11 +490,11 @@ sub thread_html {
 }
 
 sub multipart_text_as_html {
-        my ($mime, $upfx, $obfs_ibx) = @_;
+        my ($mime, $upfx, $ibx) = @_;
         my $rv = "";
 
         # scan through all parts, looking for displayable text
-        msg_iter($mime, sub { $rv .= add_text_body($upfx, $obfs_ibx, $_[0]) });
+        msg_iter($mime, sub { $rv .= add_text_body($upfx, $ibx, $_[0]) });
         $rv;
 }
 
@@ -545,7 +547,8 @@ sub attach_link ($$$$;$) {
 }
 
 sub add_text_body {
-        my ($upfx, $obfs_ibx, $p) = @_;
+        my ($upfx, $ibx, $p) = @_;
+        my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
         # $p - from msg_iter: [ Email::MIME, depth, @idx ]
         my ($part, $depth) = @$p; # attachment @idx is unused
         my $ct = $part->content_type || 'text/plain';
@@ -554,6 +557,25 @@ sub add_text_body {
 
         return attach_link($upfx, $ct, $p, $fn) unless defined $s;
 
+        # makes no difference to browsers, and don't screw up filename
+        # link generation in diffs with the extra '%0D'
+        $s =~ s/\r\n/\n/sg;
+
+        my ($diff, $spfx);
+        if ($s =~ /^(?:diff|---|\+{3}) /ms) {
+                $diff = [];
+                if ($ibx->{-repo_objs}) {
+                        my $n_slash = $upfx =~ tr!/!/!;
+                        if ($n_slash == 0) {
+                                $spfx = '../';
+                        } elsif ($n_slash == 1) {
+                                $spfx = '';
+                        } else { # nslash == 2
+                                $spfx = '../../';
+                        }
+                }
+        };
+
         my @lines = split(/^/m, $s);
         $s = '';
         if (defined($fn) || $depth > 0 || $err) {
@@ -568,19 +590,26 @@ sub add_text_body {
                         # show the previously buffered quote inline
                         flush_quote(\$s, $l, \@quot) if @quot;
 
-                        # regular line, OK
-                        $l->linkify_1($cur);
-                        $s .= $l->linkify_2(ascii_html($cur));
+                        if ($diff) {
+                                push @$diff, $cur;
+                        } else {
+                                # regular line, OK
+                                $l->linkify_1($cur);
+                                $s .= $l->linkify_2(ascii_html($cur));
+                        }
                 } else {
+                        flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
                         push @quot, $cur;
                 }
         }
 
         if (@quot) { # ugh, top posted
                 flush_quote(\$s, $l, \@quot);
+                flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
                 obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
                 $s;
         } else {
+                flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
                 obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
                 if ($s =~ /\n\z/s) { # common, last line ends with a newline
                         $s;
@@ -1070,7 +1099,7 @@ sub dump_topics {
 
                 my $mbox = qq(<a\nhref="$href/t.mbox.gz">mbox.gz</a>);
                 my $atom = qq(<a\nhref="$href/t.atom">Atom</a>);
-                my $s = "<a\nhref=\"$href/T/$anchor\"><b>$top</b></a>\n" .
+                my $s = "<a\nhref=\"$href/T/$anchor\">$top</a>\n" .
                         " $ds UTC $n - $mbox / $atom\n";
                 for (my $i = 0; $i < scalar(@ex); $i += 2) {
                         my $level = $ex[$i];
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
new file mode 100644
index 00000000..a8045687
--- /dev/null
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -0,0 +1,161 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# used by PublicInbox::View
+# This adds CSS spans for diff highlighting.
+# It also generates links for ViewVCS + SolverGit to show
+# (or reconstruct) blobs.
+
+package PublicInbox::ViewDiff;
+use strict;
+use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw(flush_diff);
+use URI::Escape qw(uri_escape_utf8);
+use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::Git qw(git_unquote);
+
+sub DSTATE_INIT () { 0 }
+sub DSTATE_STAT () { 1 } # TODO
+sub DSTATE_HEAD () { 2 } # /^diff --git /, /^index /, /^--- /, /^\+\+\+ /
+sub DSTATE_CTX () { 3 } # /^ /
+sub DSTATE_ADD () { 4 } # /^\+/
+sub DSTATE_DEL () { 5 } # /^\-/
+my @state2class = (
+        '', # init
+        '', # stat
+        'head',
+        '', # ctx
+        'add',
+        'del'
+);
+
+sub UNSAFE () { "^A-Za-z0-9\-\._~/" }
+
+my $OID_NULL = '0{7,40}';
+my $OID_BLOB = '[a-f0-9]{7,40}';
+my $PATH_A = '"?a/.+|/dev/null';
+my $PATH_B = '"?b/.+|/dev/null';
+
+sub to_html ($$) {
+        $_[0]->linkify_1($_[1]);
+        $_[0]->linkify_2(ascii_html($_[1]));
+}
+
+# link to line numbers in blobs
+sub diff_hunk ($$$$) {
+        my ($dctx, $spfx, $ca, $cb) = @_;
+        my $oid_a = $dctx->{oid_a};
+        my $oid_b = $dctx->{oid_b};
+
+        (defined($spfx) && defined($oid_a) && defined($oid_b)) or
+                return "@@ $ca $cb @@";
+
+        my ($n) = ($ca =~ /^-(\d+)/);
+        $n = defined($n) ? do { ++$n; "#n$n" } : '';
+
+        my $rv = qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>);
+
+        ($n) = ($cb =~ /^\+(\d+)/);
+        $n = defined($n) ? do { ++$n; "#n$n" } : '';
+
+        $rv .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@);
+}
+
+sub oid ($$$) {
+        my ($dctx, $spfx, $oid) = @_;
+        defined($spfx) ? qq(<a\nhref="$spfx$oid/s/$dctx->{Q}">$oid</a>) : $oid;
+}
+
+sub to_state ($$$) {
+        my ($dst, $state, $new_state) = @_;
+        $$dst .= '</span>' if $state2class[$state];
+        $_[1] = $new_state;
+        my $class = $state2class[$new_state] or return;
+        $$dst .= qq(<span\nclass="$class">);
+}
+
+sub flush_diff ($$$$) {
+        my ($dst, $spfx, $linkify, $diff) = @_;
+        my $state = DSTATE_INIT;
+        my $dctx = { Q => '' }; # {}, keys: oid_a, oid_b, path_a, path_b
+
+        foreach my $s (@$diff) {
+                if ($s =~ /^ /) {
+                        if ($state2class[$state]) {
+                                to_state($dst, $state, DSTATE_CTX);
+                        }
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ /^-- $/) { # email signature begins
+                        $state == DSTATE_INIT or
+                                to_state($dst, $state, DSTATE_INIT);
+                        $$dst .= $s;
+                } elsif ($s =~ m!^diff --git ($PATH_A) ($PATH_B)$!) {
+                        if ($state != DSTATE_HEAD) {
+                                my ($pa, $pb) = ($1, $2);
+                                to_state($dst, $state, DSTATE_HEAD);
+                                $pa = (split('/', git_unquote($pa), 2))[1];
+                                $pb = (split('/', git_unquote($pb), 2))[1];
+                                $dctx = {
+                                        Q => "?b=".uri_escape_utf8($pb, UNSAFE),
+                                };
+                                if ($pa ne $pb) {
+                                        $dctx->{Q} .=
+                                             "&a=".uri_escape_utf8($pa, UNSAFE);
+                                }
+                        }
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
+                        $$dst .= $1 . oid($dctx, $spfx, $2);
+                        $dctx = { Q => '' };
+                        $$dst .= to_html($linkify, $s) ;
+                } elsif ($s =~ s/^index ($OID_BLOB)(\.\.$OID_NULL)\b//o) {
+                        $$dst .= 'index ' . oid($dctx, $spfx, $1) . $2;
+                        $dctx = { Q => '' };
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ /^index ($OID_BLOB)\.\.($OID_BLOB)/o) {
+                        $dctx->{oid_a} = $1;
+                        $dctx->{oid_b} = $2;
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ s/^@@ (\S+) (\S+) @@//) {
+                        $$dst .= '</span>' if $state2class[$state];
+                        $$dst .= qq(<span\nclass="hunk">);
+                        $$dst .= diff_hunk($dctx, $spfx, $1, $2);
+                        $$dst .= '</span>';
+                        $state = DSTATE_CTX;
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ m!^--- $PATH_A! ||
+                         $s =~ m!^\+{3} $PATH_B!)  {
+                        # color only (no oid link)
+                        $state == DSTATE_INIT and
+                                to_state($dst, $state, DSTATE_HEAD);
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ /^\+/) {
+                        if ($state != DSTATE_ADD && $state != DSTATE_INIT) {
+                                to_state($dst, $state, DSTATE_ADD);
+                        }
+                        $$dst .= to_html($linkify, $s);
+                } elsif ($s =~ /^-/) {
+                        if ($state != DSTATE_DEL && $state != DSTATE_INIT) {
+                                to_state($dst, $state, DSTATE_DEL);
+                        }
+                        $$dst .= to_html($linkify, $s);
+                # ignore the following lines in headers:
+                } elsif ($s =~ /^(?:dis)similarity index/ ||
+                         $s =~ /^(?:old|new) mode/ ||
+                         $s =~ /^(?:deleted|new) file mode/ ||
+                         $s =~ /^(?:copy|rename) (?:from|to) / ||
+                         $s =~ /^(?:dis)?similarity index /) {
+                        $$dst .= to_html($linkify, $s);
+                } else {
+                        $state == DSTATE_INIT or
+                                to_state($dst, $state, DSTATE_INIT);
+                        $$dst .= to_html($linkify, $s);
+                }
+        }
+        @$diff = ();
+        $$dst .= '</span>' if $state2class[$state];
+        undef;
+}
+
+1;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
new file mode 100644
index 00000000..85edf22f
--- /dev/null
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -0,0 +1,146 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# show any VCS object, similar to "git show"
+# FIXME: we only show blobs for now
+#
+# This can use a "solver" to reconstruct blobs based on git
+# patches (with abbreviated OIDs in the header).  However, the
+# abbreviated OIDs must match exactly what's in the original
+# email (unless a normal code repo already has the blob).
+#
+# In other words, we can only reliably reconstruct blobs based
+# on links generated by ViewDiff (and only if the emailed
+# patches apply 100% cleanly to published blobs).
+
+package PublicInbox::ViewVCS;
+use strict;
+use warnings;
+use Encode qw(find_encoding);
+use PublicInbox::SolverGit;
+use PublicInbox::WwwStream;
+use PublicInbox::Linkify;
+use PublicInbox::Hval qw(ascii_html to_filename src_escape);
+my $hl = eval {
+        require PublicInbox::HlMod;
+        PublicInbox::HlMod->new;
+};
+
+# we need to trigger highlight::CodeGenerator::deleteInstance
+# in HlMod::DESTROY before the rest of Perl shuts down to avoid
+# a segfault at shutdown
+END { $hl = undef };
+
+my %QP_MAP = ( A => 'oid_a', B => 'oid_b', a => 'path_a', b => 'path_b' );
+my $max_size = 1024 * 1024; # TODO: configurable
+my $enc_utf8 = find_encoding('UTF-8');
+
+sub html_page ($$$) {
+        my ($ctx, $code, $strref) = @_;
+        my $wcb = delete $ctx->{-wcb};
+        $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/"
+        my $res = PublicInbox::WwwStream->response($ctx, $code, sub {
+                my ($nr, undef) =  @_;
+                $nr == 1 ? $$strref : undef;
+        });
+        $wcb->($res);
+}
+
+sub solve_result {
+        my ($ctx, $res, $log, $hints, $fn) = @_;
+
+        unless (seek($log, 0, 0)) {
+                $ctx->{env}->{'psgi.errors'}->print("seek(log): $!\n");
+                return html_page($ctx, 500, \'seek error');
+        }
+        $log = do { local $/; <$log> };
+
+        my $ref = ref($res);
+        my $l = PublicInbox::Linkify->new;
+        $l->linkify_1($log);
+        $log = '<pre>debug log:</pre><hr /><pre>' .
+                $l->linkify_2(ascii_html($log)) . '</pre>';
+
+        $res or return html_page($ctx, 404, \$log);
+        $ref eq 'ARRAY' or return html_page($ctx, 500, \$log);
+
+        my ($git, $oid, $type, $size, $di) = @$res;
+        if ($size > $max_size) {
+                # TODO: stream the raw file if it's gigantic, at least
+                $log = '<pre><b>Too big to show</b></pre>' . $log;
+                return html_page($ctx, 500, \$log);
+        }
+
+        my $blob = $git->cat_file($oid);
+        if (!$blob) { # WTF?
+                my $e = "Failed to retrieve generated blob ($oid)";
+                $ctx->{env}->{'psgi.errors'}->print("$e ($git->{git_dir})\n");
+                $log = "<pre><b>$e</b></pre>" . $log;
+                return html_page($ctx, 500, \$log);
+        }
+
+        my $binary = index($$blob, "\0") >= 0;
+        if ($fn) {
+                my $h = [ 'Content-Length', $size, 'Content-Type' ];
+                push(@$h, ($binary ? 'application/octet-stream' : 'text/plain'));
+                return delete($ctx->{-wcb})->([200, $h, [ $$blob ]]);
+        }
+
+        my $path = to_filename($di->{path_b} || $hints->{path_b} || 'blob');
+        my $raw_link = "(<a\nhref=$path>raw</a>)";
+        if ($binary) {
+                $log = "<pre>$oid $type $size bytes (binary)" .
+                        " $raw_link</pre>" . $log;
+                return html_page($ctx, 200, \$log);
+        }
+
+        $$blob = $enc_utf8->decode($$blob);
+        my $nl = ($$blob =~ tr/\n/\n/);
+        my $pad = length($nl);
+
+        $l->linkify_1($$blob);
+        my $ok = $hl->do_hl($blob, $path) if $hl;
+        if ($ok) {
+                $$ok = $enc_utf8->decode($$ok);
+                src_escape($$ok);
+                $blob = $ok;
+        } else {
+                $$blob = ascii_html($$blob);
+        }
+
+        # using some of the same CSS class names and ids as cgit
+        $log = "<pre>$oid $type $size bytes $raw_link</pre>" .
+                "<hr /><table\nclass=blob>".
+                "<tr><td\nclass=linenumbers><pre>" . join('', map {
+                        sprintf("<a id=n$_ href=#n$_>% ${pad}u</a>\n", $_)
+                } (1..$nl)) . '</pre></td>' .
+                '<td><pre> </pre></td>'. # pad for non-CSS users
+                "<td\nclass=lines><pre\nstyle='white-space:pre'><code>" .
+                $l->linkify_2($$blob) .
+                '</code></pre></td></tr></table>' . $log;
+
+        html_page($ctx, 200, \$log);
+}
+
+sub show ($$;$) {
+        my ($ctx, $oid_b, $fn) = @_;
+        my $qp = $ctx->{qp};
+        my $hints = {};
+        while (my ($from, $to) = each %QP_MAP) {
+                defined(my $v = $qp->{$from}) or next;
+                $hints->{$to} = $v;
+        }
+
+        open my $log, '+>', undef or die "open: $!";
+        my $solver = PublicInbox::SolverGit->new($ctx->{-inbox}, sub {
+                solve_result($ctx, $_[0], $log, $hints, $fn);
+        });
+
+        # PSGI server will call this and give us a callback
+        sub {
+                $ctx->{-wcb} = $_[0]; # HTTP write callback
+                $solver->solve($ctx->{env}, $log, $oid_b, $hints);
+        };
+}
+
+1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 3562e46c..406802a9 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -6,6 +6,7 @@
 # We focus on the lowest common denominators here:
 # - targeted at text-only console browsers (w3m, links, etc..)
 # - Only basic HTML, CSS only for line-wrapping <pre> text content for GUIs
+#   and diff/syntax-highlighting (optional)
 # - No JavaScript, graphics or icons allowed.
 # - Must not rely on static content
 # - UTF-8 is only for user-content, 7-bit US-ASCII for us
@@ -19,12 +20,14 @@ use URI::Escape qw(uri_unescape);
 use PublicInbox::MID qw(mid_escape);
 require PublicInbox::Git;
 use PublicInbox::GitHTTPBackend;
+use PublicInbox::UserContent;
 
 # TODO: consider a routing tree now that we have more endpoints:
 our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
 our $MID_RE = qr!([^/]+)!;
 our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
 our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i;
+our $OID_RE = qr![a-f0-9]{7,40}!;
 
 sub new {
         my ($class, $pi_config) = @_;
@@ -117,7 +120,14 @@ sub call {
                 r301($ctx, $1, $2);
         } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) {
                 get_text($ctx, $1, $2);
-
+        } elsif ($path_info =~ m!$INBOX_RE/([\w\-\.]+)\.css\z!o) {
+                get_css($ctx, $1, $2);
+        } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) {
+                get_vcs_object($ctx, $1, $2);
+        } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) {
+                get_vcs_object($ctx, $1, $2, $3);
+        } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
+                r301($ctx, $1, $2, 's/');
         # convenience redirects order matters
         } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
                 r301($ctx, $1, $2);
@@ -129,6 +139,7 @@ sub call {
 
 # for CoW-friendliness, MOOOOO!
 sub preload {
+        my ($self) = @_;
         require PublicInbox::Feed;
         require PublicInbox::View;
         require PublicInbox::SearchThread;
@@ -141,6 +152,9 @@ sub preload {
                         PublicInbox::NewsWWW)) {
                 eval "require $_;";
         }
+        if (ref($self)) {
+                $self->stylesheets_prepare($_) for ('', '../', '../../');
+        }
 }
 
 # private functions below
@@ -259,6 +273,18 @@ sub get_text {
         PublicInbox::WwwText::get_text($ctx, $key);
 }
 
+# show git objects (blobs and commits)
+# /$INBOX/_/$OBJECT_ID/show
+# /$INBOX/_/${OBJECT_ID}_${FILENAME}
+# KEY may contain slashes
+sub get_vcs_object ($$$;$) {
+        my ($ctx, $inbox, $oid, $filename) = @_;
+        my $r404 = invalid_inbox($ctx, $inbox);
+        return $r404 if $r404;
+        require PublicInbox::ViewVCS;
+        PublicInbox::ViewVCS::show($ctx, $oid, $filename);
+}
+
 sub ctx_get {
         my ($ctx, $key) = @_;
         my $val = $ctx->{$key};
@@ -446,4 +472,128 @@ sub get_attach {
         PublicInbox::WwwAttach::get_attach($ctx, $idx, $fn);
 }
 
+# User-generated content (UGC) may have excessively long lines
+# and screw up rendering on some browsers, so we use pre-wrap.
+#
+# We also force everything to the same scaled font-size because GUI
+# browsers (tested both Firefox and surf (webkit)) uses a larger font
+# for the Search <form> element than the rest of the page.  Font size
+# uniformity is important to people who rely on gigantic fonts.
+# Finally, we use monospace to ensure the Search field and button
+# has the same size and spacing as everything else which is
+# <pre>-formatted anyways.
+our $STYLE = 'pre{white-space:pre-wrap}*{font-size:100%;font-family:monospace}';
+
+sub stylesheets_prepare ($$) {
+        my ($self, $upfx) = @_;
+        my $mini = eval {
+                require CSS::Minifier;
+                sub { CSS::Minifier::minify(input => $_[0]) };
+        } || eval {
+                require CSS::Minifier::XS;
+                sub { CSS::Minifier::XS::minify($_[0]) };
+        } || sub { $_[0] };
+
+        my $css_map = {};
+        my $stylesheets = $self->{pi_config}->{css} || [];
+        my $links = [];
+        my $inline_ok = 1;
+
+        foreach my $s (@$stylesheets) {
+                my $attr = {};
+                local $_ = $s;
+                foreach my $k (qw(media title href)) {
+                        if (s/\s*$k='([^']+)'// || s/\s*$k=(\S+)//) {
+                                $attr->{$k} = $1;
+                        }
+                }
+
+                if (defined $attr->{href}) {
+                        $inline_ok = 0;
+                } else {
+                        open(my $fh, '<', $_) or do {
+                                warn "failed to open $_: $!\n";
+                                next;
+                        };
+                        my ($key) = (m!([^/]+?)(?:\.css)?\z!i);
+                        my $ctime = 0;
+                        my $local = do { local $/; <$fh> };
+                        if ($local =~ /\S/) {
+                                $ctime = sprintf('%x',(stat($fh))[10]);
+                                $local = $mini->($local);
+                        }
+                        $css_map->{$key} = $local;
+                        $attr->{href} = "$upfx$key.css?$ctime";
+                        if (defined($attr->{title})) {
+                                $inline_ok = 0;
+                        } elsif (($attr->{media}||'screen') eq 'screen') {
+                                $attr->{-inline} = $local;
+                        }
+                }
+                push @$links, $attr;
+        }
+
+        my $buf = "<style>$STYLE";
+        if ($inline_ok) {
+                my @ext; # for media=print and whatnot
+                foreach my $attr (@$links) {
+                        if (defined(my $str = delete $attr->{-inline})) {
+                                $buf .= $str;
+                        } else {
+                                push @ext, $attr;
+                        }
+                }
+                $links = \@ext;
+        }
+        $buf .= '</style>';
+
+        if (@$links) {
+                foreach my $attr (@$links) {
+                        delete $attr->{-inline};
+                        $buf .= "<link\ntype=text/css\nrel=stylesheet";
+                        while (my ($k, $v) = each %$attr) {
+                                $v = qq{"$v"} if $v =~ /[\s=]/;
+                                $buf .= qq{\n$k=$v};
+                        }
+                        $buf .= ' />';
+                }
+                $self->{"-style-$upfx"} = $buf;
+        } else {
+                $self->{-style_inline} = $buf;
+        }
+        $self->{-css_map} = $css_map;
+}
+
+# returns an HTML fragment with <style> or <link> tags in them
+# Called by WwwStream by nearly every HTML page
+sub style {
+        my ($self, $upfx) = @_;
+        $self->{-style_inline} || $self->{"-style-$upfx"} || do {
+                stylesheets_prepare($self, $upfx);
+                $self->{-style_inline} || $self->{"-style-$upfx"}
+        };
+}
+
+# /$INBOX/$KEY.css endpoint
+# CSS is configured globally for all inboxes, but we access them on
+# a per-inbox basis.  This allows administrators to setup per-inbox
+# static routes to intercept the request before it hits PSGI
+sub get_css ($$$) {
+        my ($ctx, $inbox, $key) = @_;
+        my $r404 = invalid_inbox($ctx, $inbox);
+        return $r404 if $r404;
+        my $self = $ctx->{www};
+        my $css_map = $self->{-css_map} || stylesheets_prepare($self, '');
+        my $css = $css_map->{$key};
+        if (!defined($css) && $key eq 'userContent') {
+                my $env = $ctx->{env};
+                $css = PublicInbox::UserContent::sample($ctx->{-inbox}, $env);
+        }
+        defined $css or return r404();
+        my $h = [ 'Content-Length', bytes::length($css),
+                'Content-Type', 'text/css' ];
+        PublicInbox::GitHTTPBackend::cache_one_year($h);
+        [ 200, $h, [ $css ] ];
+}
+
 1;
diff --git a/lib/PublicInbox/WwwHighlight.pm b/lib/PublicInbox/WwwHighlight.pm
new file mode 100644
index 00000000..09fc48ab
--- /dev/null
+++ b/lib/PublicInbox/WwwHighlight.pm
@@ -0,0 +1,74 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Standalone PSGI app to provide syntax highlighting as-a-service
+# via "highlight" Perl module ("libhighlight-perl" in Debian).
+#
+# This allows exposing highlight as a persistent HTTP service for
+# other scripts via HTTP PUT requests.  PATH_INFO will be used
+# as a hint for detecting the language for highlight.
+#
+# The following example using curl(1) will do the right thing
+# regarding the file extension:
+#
+#   curl -HExpect: -T /path/to/file http://example.com/
+#
+# You can also force a file extension by giving a path
+# (in this case, "c") via:
+#
+#   curl -HExpect: -T /path/to/file http://example.com/x.c
+
+package PublicInbox::WwwHighlight;
+use strict;
+use warnings;
+use HTTP::Status qw(status_message);
+use parent qw(PublicInbox::HlMod);
+
+# TODO: support highlight(1) for distros which don't package the
+# SWIG extension.  Also, there may be admins who don't want to
+# have ugly SWIG-generated code in a long-lived Perl process.
+
+sub r ($) {
+        my ($code) = @_;
+        my $msg = status_message($code);
+        my $len = length($msg);
+        [ $code, [qw(Content-Type text/plain Content-Length), $len], [$msg] ]
+}
+
+# another slurp API hogging up all my memory :<
+# This is capped by whatever the PSGI server allows,
+# $ENV{GIT_HTTP_MAX_REQUEST_BUFFER} for PublicInbox::HTTP (10 MB)
+sub read_in_full ($) {
+        my ($env) = @_;
+
+        my $in = $env->{'psgi.input'};
+        my $off = 0;
+        my $buf = '';
+        my $len = $env->{CONTENT_LENGTH} || 8192;
+        while (1) {
+                my $r = $in->read($buf, $len, $off);
+                last unless defined $r;
+                return \$buf if $r == 0;
+                $off += $r;
+        }
+        $env->{'psgi.errors'}->print("input read error: $!\n");
+        undef;
+}
+
+# entry point for PSGI
+sub call {
+        my ($self, $env) = @_;
+        my $req_method = $env->{REQUEST_METHOD};
+
+        return r(405) if $req_method ne 'PUT';
+
+        my $bref = read_in_full($env) or return r(500);
+        $bref = $self->do_hl($bref, $env->{PATH_INFO});
+
+        my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
+        push @$h, 'Content-Length', bytes::length($$bref);
+
+        [ 200, $h, [ $$bref ] ]
+}
+
+1;
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index e548f00f..8ae35c73 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -38,10 +38,12 @@ sub _html_top ($) {
         my $title = $ctx->{-title_html} || $desc;
         my $upfx = $ctx->{-upfx} || '';
         my $help = $upfx.'_/text/help';
+        my $color = $upfx.'_/text/color';
         my $atom = $ctx->{-atom} || $upfx.'new.atom';
         my $tip = $ctx->{-html_tip} || '';
         my $top = "<b>$desc</b>";
         my $links = "<a\nhref=\"$help\">help</a> / ".
+                        "<a\nhref=\"$color\">color</a> / ".
                         "<a\nhref=\"$atom\">Atom feed</a>";
         if ($obj->search) {
                 my $q_val = $ctx->{-q_value_html};
@@ -65,7 +67,7 @@ sub _html_top ($) {
         "<html><head><title>$title</title>" .
                 "<link\nrel=alternate\ntitle=\"Atom feed\"\n".
                 "href=\"$atom\"\ntype=\"application/atom+xml\"/>" .
-                PublicInbox::Hval::STYLE .
+                $ctx->{www}->style($upfx) .
                 "</head><body>". $top . $tip;
 }
 
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index b5874cf6..d3413ad7 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -88,9 +88,44 @@ sub _srch_prefix ($$) {
         1;
 }
 
+sub _colors_help ($$) {
+        my ($ctx, $txt) = @_;
+        my $ibx = $ctx->{-inbox};
+        my $base_url = $ibx->base_url($ctx->{env});
+        $$txt .= "color customization for $base_url\n";
+        $$txt .= <<EOF;
+
+public-inbox provides a stable set of CSS classes for users to
+customize colors for highlighting diffs and code.
+
+Users of browsers such as dillo, Firefox, or some browser
+extensions may start by downloading the following sample CSS file
+to control the colors they see:
+
+        ${base_url}userContent.css
+
+CSS classes
+-----------
+
+           span.q - quoted text in email messages
+
+For diff highlighting, we try to match class names with those
+used by cgit: https://git.zx2c4.com/cgit/
+
+         span.add - diff post-image lines
+
+         span.del - diff pre-image lines
+
+        span.head - diff header (metainformation)
+
+        span.hunk - diff hunk-header
+
+EOF
+}
 
 sub _default_text ($$$) {
         my ($ctx, $key, $txt) = @_;
+        return _colors_help($ctx, $txt) if $key eq 'color';
         return if $key ne 'help'; # TODO more keys?
 
         my $ibx = $ctx->{-inbox};