From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id E9401211B5 for ; Wed, 30 Jan 2019 04:44:30 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 01/11] hval: add src_escape for highlight post-processing Date: Wed, 30 Jan 2019 04:44:20 +0000 Message-Id: <20190130044430.28189-2-e@80x24.org> In-Reply-To: <20190130044430.28189-1-e@80x24.org> References: <20190130044430.28189-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We need to post-process "highlight" output to ensure it doesn't contain odd bytes which cause "wide character" warnings or require odd glyphs in source form. --- lib/PublicInbox/Hval.pm | 8 +++++++- lib/PublicInbox/ViewVCS.pm | 4 +++- t/hval.t | 3 +++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 4d70d5e..53810b3 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -9,7 +9,7 @@ use warnings; use Encode qw(find_encoding); use PublicInbox::MID qw/mid_clean mid_escape/; use base qw/Exporter/; -our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename/; +our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape/; my $enc_ascii = find_encoding('us-ascii'); @@ -63,6 +63,12 @@ my %xhtml_map = ( $xhtml_map{chr($_)} = sprintf('\\x%02x', $_) for (0..31); %xhtml_map = (%xhtml_map, %escape_sequence); +sub src_escape ($) { + $_[0] =~ s/\r\n/\n/sg; + $_[0] =~ s/([\x7f\x00-\x1f])/$xhtml_map{$1}/sge; + $_[0] = $enc_ascii->encode($_[0], Encode::HTMLCREF); +} + sub ascii_html { my ($s) = @_; $s =~ s/\r\n/\n/sg; # fixup bad line endings diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index a8aa0b6..63e503d 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -20,7 +20,7 @@ use Encode qw(find_encoding); use PublicInbox::SolverGit; use PublicInbox::WwwStream; use PublicInbox::Linkify; -use PublicInbox::Hval qw(ascii_html to_filename); +use PublicInbox::Hval qw(ascii_html to_filename src_escape); my $hl = eval { require PublicInbox::HlMod; PublicInbox::HlMod->new; @@ -96,6 +96,8 @@ sub solve_result { $l->linkify_1($$blob); my $ok = $hl->do_hl($blob, $path) if $hl; if ($ok) { + $$ok = $enc_utf8->decode($$ok); + src_escape($$ok); $blob = $ok; } else { $$blob = ascii_html($$blob); diff --git a/t/hval.t b/t/hval.t index a193c29..bfc9a85 100644 --- a/t/hval.t +++ b/t/hval.t @@ -43,5 +43,8 @@ is('foo-bar', PublicInbox::Hval::to_filename("foo bar\nanother line\n"), is('foo.bar', PublicInbox::Hval::to_filename("foo....bar"), 'to_filename squeezes -'); +my $s = "\0\x07\n"; +PublicInbox::Hval::src_escape($s); +is($s, "\\0\\a\n", 'src_escape works as intended'); done_testing(); -- EW