about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-02-01 22:12:52 +0000
committerEric Wong <e@80x24.org>2019-02-05 04:35:29 +0000
commit738f4daed7f0555f7ac11dc2f527bc53dddd4e5b (patch)
tree13eae96b43d8f07c7bf1b94ae795384fb6fd7a6c
parent390441cbff937a6048c257df9c91474d63a629bc (diff)
downloadpublic-inbox-738f4daed7f0555f7ac11dc2f527bc53dddd4e5b.tar.gz
Favor in-place utf8::decode since it's a bit faster without
method dispatch overhead; and don't care about validity just
yet.

HlMod->do_hl itself should return "utf8" strings, since other
parts of our code can use it, so it's not the job of ViewVCS to
post-process HlMod output.
-rw-r--r--lib/PublicInbox/HlMod.pm7
-rw-r--r--lib/PublicInbox/ViewVCS.pm6
-rw-r--r--t/hl_mod.t1
3 files changed, 9 insertions, 5 deletions
diff --git a/lib/PublicInbox/HlMod.pm b/lib/PublicInbox/HlMod.pm
index 237ffaca..decfd714 100644
--- a/lib/PublicInbox/HlMod.pm
+++ b/lib/PublicInbox/HlMod.pm
@@ -107,7 +107,12 @@ sub do_hl {
                 $g->setEncoding('utf-8');
                 $g;
         };
-        \($gen->generateString($$str))
+
+        # we assume $$str is valid UTF-8, but the SWIG binding doesn't
+        # know that, so ensure it's marked as UTF-8 even if it isnt...
+        my $out = $gen->generateString($$str);
+        utf8::decode($out);
+        \$out;
 }
 
 # SWIG instances aren't reference-counted, but $self is;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index d67b5eb4..acdd822d 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -16,7 +16,6 @@
 package PublicInbox::ViewVCS;
 use strict;
 use warnings;
-use Encode qw(find_encoding);
 use PublicInbox::SolverGit;
 use PublicInbox::WwwStream;
 use PublicInbox::Linkify;
@@ -33,7 +32,6 @@ END { $hl = undef };
 
 my %QP_MAP = ( A => 'oid_a', B => 'oid_b', a => 'path_a', b => 'path_b' );
 my $max_size = 1024 * 1024; # TODO: configurable
-my $enc_utf8 = find_encoding('UTF-8');
 my $BIN_DETECT = 8000; # same as git
 
 sub html_page ($$$) {
@@ -122,14 +120,14 @@ sub solve_result {
                 return html_page($ctx, 200, \$log);
         }
 
-        $$blob = $enc_utf8->decode($$blob);
+        # TODO: detect + convert to ensure validity
+        utf8::decode($$blob);
         my $nl = ($$blob =~ tr/\n/\n/);
         my $pad = length($nl);
 
         $l->linkify_1($$blob);
         my $ok = $hl->do_hl($blob, $path) if $hl;
         if ($ok) {
-                $$ok = $enc_utf8->decode($$ok);
                 src_escape($$ok);
                 $blob = $ok;
         } else {
diff --git a/t/hl_mod.t b/t/hl_mod.t
index 80f88907..c402f1f7 100644
--- a/t/hl_mod.t
+++ b/t/hl_mod.t
@@ -19,6 +19,7 @@ my $orig = $str;
 {
         my $ref = $hls->do_hl(\$str, 'foo.perl');
         is(ref($ref), 'SCALAR', 'got a scalar reference back');
+        ok(utf8::valid($$ref), 'resulting string is utf8::valid');
         like($$ref, qr/I can see you!/, 'we can see ourselves in output');
         like($$ref, qr/&amp;&amp;/, 'escaped');