about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-02-05 22:24:19 +0000
committerEric Wong <e@80x24.org>2019-02-05 22:24:19 +0000
commit995933c8941ff6b3425483aad8fb0576a25cee1a (patch)
treeeec1e8c706d676d228a4a1d566e02be0e30346b1
parent242da3fbe771f514dfc5ddabc0d02f8a0f23cd52 (diff)
parent8b8577f91109e76d12fcfa55a2e7388f54a41be6 (diff)
downloadpublic-inbox-995933c8941ff6b3425483aad8fb0576a25cee1a.tar.gz
* origin/help-color:
  wwwtext: inline sample CSS and use highlight
  hlmod: support "```$LANG" blocks in text
  hlmod: do_hl* performs src_escape immediately
  hlmod: make into a singleton
  hlmod: hoist out do_hl_lang sub
  viewvcs: cleanup utf8 handling
-rw-r--r--contrib/css/216dark.css30
-rw-r--r--lib/PublicInbox/HlMod.pm70
-rw-r--r--lib/PublicInbox/UserContent.pm16
-rw-r--r--lib/PublicInbox/ViewVCS.pm14
-rw-r--r--lib/PublicInbox/WwwText.pm35
-rw-r--r--t/hl_mod.t34
6 files changed, 114 insertions, 85 deletions
diff --git a/contrib/css/216dark.css b/contrib/css/216dark.css
index 882fbc40..b18b0576 100644
--- a/contrib/css/216dark.css
+++ b/contrib/css/216dark.css
@@ -13,22 +13,22 @@
 a { color:#69f; text-decoration:none }
 a:visited { color:#96f }
 
-/* quoted text gets a different color */
+/* quoted text in emails gets a different color */
 *.q { color:#09f }
 
 /*
- * these may be used with cgit, too
+ * these may be used with cgit <https://git.zx2c4.com/cgit/>, too.
  * (cgit uses <div>, public-inbox uses <span>)
  */
-*.add { color:#0ff }
-*.del { color:#f0f }
-*.head { color:#fff }
-*.hunk { color:#c93 }
+*.add { color:#0ff } /* diff post-image lines */
+*.del { color:#f0f } /* diff pre-image lines */
+*.head { color:#fff } /* diff header (metainformation) */
+*.hunk { color:#c93 } /* diff hunk-header */
 
 /*
- * highlight 3.x colors (tested 3.18)
- * this doesn't use most of the colors available (I find too many
- * colors overwhelming).  So the #ccc default is commented out.
+ * highlight 3.x colors (tested 3.18) for displaying blobs.
+ * This doesn't use most of the colors available, as I find too
+ * many colors overwhelming, so the default is commented out.
  */
 .hl.num { color:#f30 } /* number */
 .hl.esc { color:#f0f } /* escape character */
@@ -36,11 +36,15 @@ a:visited { color:#96f }
 .hl.ppc { color:#f0f } /* preprocessor */
 .hl.pps { color:#f30 } /* preprocessor string */
 .hl.slc { color:#09f } /* single-line comment */
-.hl.com { color:#09f }
-/* .hl.opt { color:#ccc } */
-/* .hl.ipl { color:#ccc } */
-/* .hl.lin { color:#ccc } */
+.hl.com { color:#09f } /* multi-line comment */
+/* .hl.opt { color:#ccc } */ /* operator */
+/* .hl.ipl { color:#ccc } */ /* interpolation */
+
+/* keyword groups kw[a-z] */
 .hl.kwa { color:#ff0 }
 .hl.kwb { color:#0f0 }
 .hl.kwc { color:#ff0 }
 /* .hl.kwd { color:#ccc } */
+
+/* line-number (unused by public-inbox) */
+/* .hl.lin { color:#ccc } */
diff --git a/lib/PublicInbox/HlMod.pm b/lib/PublicInbox/HlMod.pm
index 237ffaca..36e31106 100644
--- a/lib/PublicInbox/HlMod.pm
+++ b/lib/PublicInbox/HlMod.pm
@@ -16,6 +16,8 @@ package PublicInbox::HlMod;
 use strict;
 use warnings;
 use highlight; # SWIG-generated stuff
+use PublicInbox::Hval qw(src_escape ascii_html);
+my $hl;
 
 sub _parse_filetypes ($) {
         my $ft_conf = $_[0]->searchFile('filetypes.conf') or
@@ -52,16 +54,20 @@ sub _parse_filetypes ($) {
         (\%ext2lang, \@shebang);
 }
 
+# We only need one instance, so we don't need to do
+# highlight::CodeGenerator::deleteInstance
 sub new {
         my ($class) = @_;
-        my $dir = highlight::DataDir->new;
-        $dir->initSearchDirectories('');
-        my ($ext2lang, $shebang) = _parse_filetypes($dir);
-        bless {
-                -dir => $dir,
-                -ext2lang => $ext2lang,
-                -shebang => $shebang,
-        }, $class;
+        $hl ||= do {
+                my $dir = highlight::DataDir->new;
+                $dir->initSearchDirectories('');
+                my ($ext2lang, $shebang) = _parse_filetypes($dir);
+                bless {
+                        -dir => $dir,
+                        -ext2lang => $ext2lang,
+                        -shebang => $shebang,
+                }, $class;
+        };
 }
 
 sub _shebang2lang ($$) {
@@ -83,19 +89,24 @@ sub _path2lang ($$) {
 sub do_hl {
         my ($self, $str, $path) = @_;
         my $lang = _path2lang($self, $path) if defined $path;
+        do_hl_lang($self, $str, $lang);
+}
+
+sub do_hl_lang {
+        my ($self, $str, $lang) = @_;
+
         my $dir = $self->{-dir};
         my $langpath;
+
         if (defined $lang) {
                 $langpath = $dir->getLangPath("$lang.lang") or return;
-                $langpath = undef unless -f $langpath;
+                $lang = undef unless -f $langpath
         }
-        unless (defined $langpath) {
+        unless (defined $lang) {
                 $lang = _shebang2lang($self, $str) or return;
                 $langpath = $dir->getLangPath("$lang.lang") or return;
-                $langpath = undef unless -f $langpath;
+                return unless -f $langpath
         }
-        return unless defined $langpath;
-
         my $gen = $self->{$langpath} ||= do {
                 my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
                 $g->setFragmentCode(1); # generate html fragment
@@ -107,19 +118,32 @@ sub do_hl {
                 $g->setEncoding('utf-8');
                 $g;
         };
-        \($gen->generateString($$str))
+
+        # we assume $$str is valid UTF-8, but the SWIG binding doesn't
+        # know that, so ensure it's marked as UTF-8 even if it isnt...
+        my $out = $gen->generateString($$str);
+        utf8::decode($out);
+        src_escape($out);
+        \$out;
 }
 
-# SWIG instances aren't reference-counted, but $self is;
-# so we need to delete all the CodeGenerator instances manually
-# at our own destruction
-sub DESTROY {
-        my ($self) = @_;
-        foreach my $gen (values %$self) {
-                if (ref($gen) eq 'highlight::CodeGenerator') {
-                        highlight::CodeGenerator::deleteInstance($gen);
+# Highlight text, but support Markdown "```$LANG" notation
+# while preserving WYSIWYG of plain-text documentation.
+# This is NOT to be enabled by default or encouraged for parsing
+# emails, since it is NOT stable and can lead to standards
+# proliferation of email.
+sub do_hl_text {
+        my ($self, $str) = @_;
+
+        $$str = join('', map {
+                if (/\A(``` ?)(\w+)\s*?\n(.+)(^```\s*)\z/sm) {
+                        my ($pfx, $lang, $code, $post) = ($1, $2, $3, $4);
+                        my $hl = do_hl_lang($self, \$code, $lang) || \$code;
+                        $pfx . $lang . "\n" . $$hl . $post;
+                } else {
+                        ascii_html($_);
                 }
-        }
+        } split(/(^``` ?\w+\s*?\n.+?^```\s*$)/sm, $$str));
 }
 
 1;
diff --git a/lib/PublicInbox/UserContent.pm b/lib/PublicInbox/UserContent.pm
index df0429c3..468e6cf1 100644
--- a/lib/PublicInbox/UserContent.pm
+++ b/lib/PublicInbox/UserContent.pm
@@ -38,9 +38,9 @@ sub CSS () {
         *.hunk { color:#c93 }
 
         /*
-         * highlight 3.x colors (tested 3.18)
-         * this doesn't use most of the colors available (I find too many
-         * colors overwhelming).  So the #ccc default is commented out.
+         * highlight 3.x colors (tested 3.18) for displaying blobs.
+         * This doesn't use most of the colors available (I find too many
+         * colors overwhelming), so the #ccc default is commented out.
          */
         .hl.num { color:#f30 } /* number */
         .hl.esc { color:#f0f } /* escape character */
@@ -48,10 +48,12 @@ sub CSS () {
         .hl.ppc { color:#f0f } /* preprocessor */
         .hl.pps { color:#f30 } /* preprocessor string */
         .hl.slc { color:#09f } /* single-line comment */
-        .hl.com { color:#09f }
-        /* .hl.opt { color:#ccc } */
-        /* .hl.ipl { color:#ccc } */
-        /* .hl.lin { color:#ccc } */
+        .hl.com { color:#09f } /* multi-line comment */
+        /* .hl.opt { color:#ccc } */ /* operator */
+        /* .hl.ipl { color:#ccc } */ /* interpolation */
+        /* .hl.lin { color:#ccc } */ /* line-number (unused by public-inbox) */
+
+        /* keyword groups kw[a-z] */
         .hl.kwa { color:#ff0 }
         .hl.kwb { color:#0f0 }
         .hl.kwc { color:#ff0 }
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index d67b5eb4..f6a76942 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -16,24 +16,17 @@
 package PublicInbox::ViewVCS;
 use strict;
 use warnings;
-use Encode qw(find_encoding);
 use PublicInbox::SolverGit;
 use PublicInbox::WwwStream;
 use PublicInbox::Linkify;
-use PublicInbox::Hval qw(ascii_html to_filename src_escape);
+use PublicInbox::Hval qw(ascii_html to_filename);
 my $hl = eval {
         require PublicInbox::HlMod;
         PublicInbox::HlMod->new;
 };
 
-# we need to trigger highlight::CodeGenerator::deleteInstance
-# in HlMod::DESTROY before the rest of Perl shuts down to avoid
-# a segfault at shutdown
-END { $hl = undef };
-
 my %QP_MAP = ( A => 'oid_a', B => 'oid_b', a => 'path_a', b => 'path_b' );
 my $max_size = 1024 * 1024; # TODO: configurable
-my $enc_utf8 = find_encoding('UTF-8');
 my $BIN_DETECT = 8000; # same as git
 
 sub html_page ($$$) {
@@ -122,15 +115,14 @@ sub solve_result {
                 return html_page($ctx, 200, \$log);
         }
 
-        $$blob = $enc_utf8->decode($$blob);
+        # TODO: detect + convert to ensure validity
+        utf8::decode($$blob);
         my $nl = ($$blob =~ tr/\n/\n/);
         my $pad = length($nl);
 
         $l->linkify_1($$blob);
         my $ok = $hl->do_hl($blob, $path) if $hl;
         if ($ok) {
-                $$ok = $enc_utf8->decode($$ok);
-                src_escape($$ok);
                 $blob = $ok;
         } else {
                 $$blob = ascii_html($$blob);
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index d3413ad7..adadc37a 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -10,6 +10,10 @@ use PublicInbox::WwwStream;
 use PublicInbox::Hval qw(ascii_html);
 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
 our $WIKI_URL = 'https://en.wikipedia.org/wiki';
+my $hl = eval {
+        require PublicInbox::HlMod;
+        PublicInbox::HlMod->new
+};
 
 # /$INBOX/_/text/$KEY/ # KEY may contain slashes
 # For now, "help" is the only supported $KEY
@@ -61,7 +65,13 @@ sub get_text {
 
 sub _do_linkify {
         my $l = PublicInbox::Linkify->new;
-        $_[0] = $l->linkify_2(ascii_html($l->linkify_1($_[0])));
+        $l->linkify_1($_[0]);
+        if ($hl) {
+                $hl->do_hl_text(\($_[0]));
+        } else {
+                $_[0] = ascii_html($_[0]);
+        }
+        $_[0] = $l->linkify_2($_[0]);
 }
 
 sub _srch_prefix ($$) {
@@ -91,7 +101,8 @@ sub _srch_prefix ($$) {
 sub _colors_help ($$) {
         my ($ctx, $txt) = @_;
         my $ibx = $ctx->{-inbox};
-        my $base_url = $ibx->base_url($ctx->{env});
+        my $env = $ctx->{env};
+        my $base_url = $ibx->base_url($env);
         $$txt .= "color customization for $base_url\n";
         $$txt .= <<EOF;
 
@@ -104,23 +115,11 @@ to control the colors they see:
 
         ${base_url}userContent.css
 
-CSS classes
------------
-
-           span.q - quoted text in email messages
-
-For diff highlighting, we try to match class names with those
-used by cgit: https://git.zx2c4.com/cgit/
-
-         span.add - diff post-image lines
-
-         span.del - diff pre-image lines
-
-        span.head - diff header (metainformation)
-
-        span.hunk - diff hunk-header
-
+CSS sample
+----------
+```css
 EOF
+        $$txt .= PublicInbox::UserContent::sample($ibx, $env) . "```\n";
 }
 
 sub _default_text ($$$) {
diff --git a/t/hl_mod.t b/t/hl_mod.t
index 80f88907..84a4b576 100644
--- a/t/hl_mod.t
+++ b/t/hl_mod.t
@@ -19,8 +19,11 @@ my $orig = $str;
 {
         my $ref = $hls->do_hl(\$str, 'foo.perl');
         is(ref($ref), 'SCALAR', 'got a scalar reference back');
+        ok(utf8::valid($$ref), 'resulting string is utf8::valid');
         like($$ref, qr/I can see you!/, 'we can see ourselves in output');
         like($$ref, qr/&amp;&amp;/, 'escaped');
+        my $lref = $hls->do_hl_lang(\$str, 'perl');
+        is($$ref, $$lref, 'do_hl_lang matches do_hl');
 
         use PublicInbox::Spawn qw(which);
         if (eval { require IPC::Run } && which('w3m')) {
@@ -37,19 +40,24 @@ my $orig = $str;
         }
 }
 
-my $nr = $ENV{TEST_MEMLEAK};
-if ($nr && -r "/proc/$$/status") {
-        my $fh;
-        open $fh, '<', "/proc/$$/status";
-        diag "starting at memtest at ".join('', grep(/VmRSS:/, <$fh>));
-        PublicInbox::HlMod->new->do_hl(\$orig) for (1..$nr);
-        open $fh, '<', "/proc/$$/status";
-        diag "creating $nr instances: ".join('', grep(/VmRSS:/, <$fh>));
-        my $hls = PublicInbox::HlMod->new;
-        $hls->do_hl(\$orig) for (1..$nr);
-        $hls = undef;
-        open $fh, '<', "/proc/$$/status";
-        diag "reused instance $nr times: ".join('', grep(/VmRSS:/, <$fh>));
+if ('experimental, only for help text') {
+        my $tmp = <<'EOF';
+:>
+```perl
+my $foo = 1 & 2;
+```
+:<
+EOF
+        $hls->do_hl_text(\$tmp);
+        my @hl = split(/^/m, $tmp);
+        is($hl[0], ":&gt;\n", 'first line escaped');
+        is($hl[1], "```perl\n", '2nd line preserved');
+        like($hl[2], qr/<span\b/, 'code highlighted');
+        like($hl[2], qr/&amp;/, 'ampersand escaped');
+        is($hl[3], "```\n", '4th line preserved');
+        is($hl[4], ":&lt;\n", '5th line escaped');
+        is(scalar(@hl), 5, 'no extra line');
+
 }
 
 done_testing;