about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-02-05 22:24:19 +0000
committerEric Wong <e@80x24.org>2019-02-05 22:24:19 +0000
commit995933c8941ff6b3425483aad8fb0576a25cee1a (patch)
treeeec1e8c706d676d228a4a1d566e02be0e30346b1 /lib
parent242da3fbe771f514dfc5ddabc0d02f8a0f23cd52 (diff)
parent8b8577f91109e76d12fcfa55a2e7388f54a41be6 (diff)
downloadpublic-inbox-995933c8941ff6b3425483aad8fb0576a25cee1a.tar.gz
* origin/help-color:
  wwwtext: inline sample CSS and use highlight
  hlmod: support "```$LANG" blocks in text
  hlmod: do_hl* performs src_escape immediately
  hlmod: make into a singleton
  hlmod: hoist out do_hl_lang sub
  viewvcs: cleanup utf8 handling
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/HlMod.pm70
-rw-r--r--lib/PublicInbox/UserContent.pm16
-rw-r--r--lib/PublicInbox/ViewVCS.pm14
-rw-r--r--lib/PublicInbox/WwwText.pm35
4 files changed, 76 insertions, 59 deletions
diff --git a/lib/PublicInbox/HlMod.pm b/lib/PublicInbox/HlMod.pm
index 237ffaca..36e31106 100644
--- a/lib/PublicInbox/HlMod.pm
+++ b/lib/PublicInbox/HlMod.pm
@@ -16,6 +16,8 @@ package PublicInbox::HlMod;
 use strict;
 use warnings;
 use highlight; # SWIG-generated stuff
+use PublicInbox::Hval qw(src_escape ascii_html);
+my $hl;
 
 sub _parse_filetypes ($) {
         my $ft_conf = $_[0]->searchFile('filetypes.conf') or
@@ -52,16 +54,20 @@ sub _parse_filetypes ($) {
         (\%ext2lang, \@shebang);
 }
 
+# We only need one instance, so we don't need to do
+# highlight::CodeGenerator::deleteInstance
 sub new {
         my ($class) = @_;
-        my $dir = highlight::DataDir->new;
-        $dir->initSearchDirectories('');
-        my ($ext2lang, $shebang) = _parse_filetypes($dir);
-        bless {
-                -dir => $dir,
-                -ext2lang => $ext2lang,
-                -shebang => $shebang,
-        }, $class;
+        $hl ||= do {
+                my $dir = highlight::DataDir->new;
+                $dir->initSearchDirectories('');
+                my ($ext2lang, $shebang) = _parse_filetypes($dir);
+                bless {
+                        -dir => $dir,
+                        -ext2lang => $ext2lang,
+                        -shebang => $shebang,
+                }, $class;
+        };
 }
 
 sub _shebang2lang ($$) {
@@ -83,19 +89,24 @@ sub _path2lang ($$) {
 sub do_hl {
         my ($self, $str, $path) = @_;
         my $lang = _path2lang($self, $path) if defined $path;
+        do_hl_lang($self, $str, $lang);
+}
+
+sub do_hl_lang {
+        my ($self, $str, $lang) = @_;
+
         my $dir = $self->{-dir};
         my $langpath;
+
         if (defined $lang) {
                 $langpath = $dir->getLangPath("$lang.lang") or return;
-                $langpath = undef unless -f $langpath;
+                $lang = undef unless -f $langpath
         }
-        unless (defined $langpath) {
+        unless (defined $lang) {
                 $lang = _shebang2lang($self, $str) or return;
                 $langpath = $dir->getLangPath("$lang.lang") or return;
-                $langpath = undef unless -f $langpath;
+                return unless -f $langpath
         }
-        return unless defined $langpath;
-
         my $gen = $self->{$langpath} ||= do {
                 my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
                 $g->setFragmentCode(1); # generate html fragment
@@ -107,19 +118,32 @@ sub do_hl {
                 $g->setEncoding('utf-8');
                 $g;
         };
-        \($gen->generateString($$str))
+
+        # we assume $$str is valid UTF-8, but the SWIG binding doesn't
+        # know that, so ensure it's marked as UTF-8 even if it isnt...
+        my $out = $gen->generateString($$str);
+        utf8::decode($out);
+        src_escape($out);
+        \$out;
 }
 
-# SWIG instances aren't reference-counted, but $self is;
-# so we need to delete all the CodeGenerator instances manually
-# at our own destruction
-sub DESTROY {
-        my ($self) = @_;
-        foreach my $gen (values %$self) {
-                if (ref($gen) eq 'highlight::CodeGenerator') {
-                        highlight::CodeGenerator::deleteInstance($gen);
+# Highlight text, but support Markdown "```$LANG" notation
+# while preserving WYSIWYG of plain-text documentation.
+# This is NOT to be enabled by default or encouraged for parsing
+# emails, since it is NOT stable and can lead to standards
+# proliferation of email.
+sub do_hl_text {
+        my ($self, $str) = @_;
+
+        $$str = join('', map {
+                if (/\A(``` ?)(\w+)\s*?\n(.+)(^```\s*)\z/sm) {
+                        my ($pfx, $lang, $code, $post) = ($1, $2, $3, $4);
+                        my $hl = do_hl_lang($self, \$code, $lang) || \$code;
+                        $pfx . $lang . "\n" . $$hl . $post;
+                } else {
+                        ascii_html($_);
                 }
-        }
+        } split(/(^``` ?\w+\s*?\n.+?^```\s*$)/sm, $$str));
 }
 
 1;
diff --git a/lib/PublicInbox/UserContent.pm b/lib/PublicInbox/UserContent.pm
index df0429c3..468e6cf1 100644
--- a/lib/PublicInbox/UserContent.pm
+++ b/lib/PublicInbox/UserContent.pm
@@ -38,9 +38,9 @@ sub CSS () {
         *.hunk { color:#c93 }
 
         /*
-         * highlight 3.x colors (tested 3.18)
-         * this doesn't use most of the colors available (I find too many
-         * colors overwhelming).  So the #ccc default is commented out.
+         * highlight 3.x colors (tested 3.18) for displaying blobs.
+         * This doesn't use most of the colors available (I find too many
+         * colors overwhelming), so the #ccc default is commented out.
          */
         .hl.num { color:#f30 } /* number */
         .hl.esc { color:#f0f } /* escape character */
@@ -48,10 +48,12 @@ sub CSS () {
         .hl.ppc { color:#f0f } /* preprocessor */
         .hl.pps { color:#f30 } /* preprocessor string */
         .hl.slc { color:#09f } /* single-line comment */
-        .hl.com { color:#09f }
-        /* .hl.opt { color:#ccc } */
-        /* .hl.ipl { color:#ccc } */
-        /* .hl.lin { color:#ccc } */
+        .hl.com { color:#09f } /* multi-line comment */
+        /* .hl.opt { color:#ccc } */ /* operator */
+        /* .hl.ipl { color:#ccc } */ /* interpolation */
+        /* .hl.lin { color:#ccc } */ /* line-number (unused by public-inbox) */
+
+        /* keyword groups kw[a-z] */
         .hl.kwa { color:#ff0 }
         .hl.kwb { color:#0f0 }
         .hl.kwc { color:#ff0 }
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index d67b5eb4..f6a76942 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -16,24 +16,17 @@
 package PublicInbox::ViewVCS;
 use strict;
 use warnings;
-use Encode qw(find_encoding);
 use PublicInbox::SolverGit;
 use PublicInbox::WwwStream;
 use PublicInbox::Linkify;
-use PublicInbox::Hval qw(ascii_html to_filename src_escape);
+use PublicInbox::Hval qw(ascii_html to_filename);
 my $hl = eval {
         require PublicInbox::HlMod;
         PublicInbox::HlMod->new;
 };
 
-# we need to trigger highlight::CodeGenerator::deleteInstance
-# in HlMod::DESTROY before the rest of Perl shuts down to avoid
-# a segfault at shutdown
-END { $hl = undef };
-
 my %QP_MAP = ( A => 'oid_a', B => 'oid_b', a => 'path_a', b => 'path_b' );
 my $max_size = 1024 * 1024; # TODO: configurable
-my $enc_utf8 = find_encoding('UTF-8');
 my $BIN_DETECT = 8000; # same as git
 
 sub html_page ($$$) {
@@ -122,15 +115,14 @@ sub solve_result {
                 return html_page($ctx, 200, \$log);
         }
 
-        $$blob = $enc_utf8->decode($$blob);
+        # TODO: detect + convert to ensure validity
+        utf8::decode($$blob);
         my $nl = ($$blob =~ tr/\n/\n/);
         my $pad = length($nl);
 
         $l->linkify_1($$blob);
         my $ok = $hl->do_hl($blob, $path) if $hl;
         if ($ok) {
-                $$ok = $enc_utf8->decode($$ok);
-                src_escape($$ok);
                 $blob = $ok;
         } else {
                 $$blob = ascii_html($$blob);
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index d3413ad7..adadc37a 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -10,6 +10,10 @@ use PublicInbox::WwwStream;
 use PublicInbox::Hval qw(ascii_html);
 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
 our $WIKI_URL = 'https://en.wikipedia.org/wiki';
+my $hl = eval {
+        require PublicInbox::HlMod;
+        PublicInbox::HlMod->new
+};
 
 # /$INBOX/_/text/$KEY/ # KEY may contain slashes
 # For now, "help" is the only supported $KEY
@@ -61,7 +65,13 @@ sub get_text {
 
 sub _do_linkify {
         my $l = PublicInbox::Linkify->new;
-        $_[0] = $l->linkify_2(ascii_html($l->linkify_1($_[0])));
+        $l->linkify_1($_[0]);
+        if ($hl) {
+                $hl->do_hl_text(\($_[0]));
+        } else {
+                $_[0] = ascii_html($_[0]);
+        }
+        $_[0] = $l->linkify_2($_[0]);
 }
 
 sub _srch_prefix ($$) {
@@ -91,7 +101,8 @@ sub _srch_prefix ($$) {
 sub _colors_help ($$) {
         my ($ctx, $txt) = @_;
         my $ibx = $ctx->{-inbox};
-        my $base_url = $ibx->base_url($ctx->{env});
+        my $env = $ctx->{env};
+        my $base_url = $ibx->base_url($env);
         $$txt .= "color customization for $base_url\n";
         $$txt .= <<EOF;
 
@@ -104,23 +115,11 @@ to control the colors they see:
 
         ${base_url}userContent.css
 
-CSS classes
------------
-
-           span.q - quoted text in email messages
-
-For diff highlighting, we try to match class names with those
-used by cgit: https://git.zx2c4.com/cgit/
-
-         span.add - diff post-image lines
-
-         span.del - diff pre-image lines
-
-        span.head - diff header (metainformation)
-
-        span.hunk - diff hunk-header
-
+CSS sample
+----------
+```css
 EOF
+        $$txt .= PublicInbox::UserContent::sample($ibx, $env) . "```\n";
 }
 
 sub _default_text ($$$) {