* [PATCH 1/2] hval: to_attr: support wide characters
2020-01-19 9:40 [PATCH 0/2] hval: handle wide characters properly Eric Wong
@ 2020-01-19 9:40 ` Eric Wong
2020-01-19 9:40 ` [PATCH 2/2] hval: from_attr: move to unit test Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2020-01-19 9:40 UTC (permalink / raw)
To: meta
We need to escape wide characters when making attribute names from
filename-looking things in diffstats.
---
lib/PublicInbox/Hval.pm | 3 +++
t/solve/0001-simple-mod.patch | 2 ++
t/solver_git.t | 11 ++++++++++-
3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 7e007027..39256ee0 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -139,10 +139,12 @@ sub to_attr ($) {
return if index($str, '//') >= 0;
my $first = '';
+ utf8::encode($str); # to octets
if ($str =~ s/\A([^A-Ya-z])//ms) { # start with a letter
$first = sprintf('Z%02x', ord($1));
}
$str =~ s/([^A-Za-z0-9_\.\-])/$ESCAPES{$1}/egms;
+ utf8::decode($str); # allow wide chars
$first . $str;
}
@@ -155,6 +157,7 @@ sub from_attr ($) {
}
$str =~ s!::([a-f0-9]{2})!chr(hex($1))!egms;
$str =~ tr!:!/!;
+ utf8::decode($str);
$first . $str;
}
diff --git a/t/solve/0001-simple-mod.patch b/t/solve/0001-simple-mod.patch
index c6bb1575..c55fe310 100644
--- a/t/solve/0001-simple-mod.patch
+++ b/t/solve/0001-simple-mod.patch
@@ -3,9 +3,11 @@ To: meta@public-inbox.org
Subject: [PATCH] TODO: take expert web design advice
Date: Mon, 1 Apr 2019 08:15:20 +0000
Message-Id: <20190401081523.16213-1-BOFH@YHBT.net>
+Content-Type: text/plain; charset=utf-8
---
TODO | 2 ++
+ Ω | 5 --
1 file changed, 2 insertions(+)
diff --git a/TODO b/TODO
diff --git a/t/solver_git.t b/t/solver_git.t
index 92402c3a..92c07334 100644
--- a/t/solver_git.t
+++ b/t/solver_git.t
@@ -154,7 +154,16 @@ EOF
my $non_existent = 'ee5e32211bf62ab6531bdf39b84b6920d0b6775a';
my $client = sub {
my ($cb) = @_;
- my $res = $cb->(GET("/$name/3435775/s/"));
+ my $mid = '20190401081523.16213-1-BOFH@YHBT.net';
+ my @warn;
+ my $res = do {
+ local $SIG{__WARN__} = sub { push @warn, @_ };
+ $cb->(GET("/$name/$mid/"));
+ };
+ is_deeply(\@warn, [], 'no warnings from rendering diff');
+ like($res->content, qr!>Ω</a>!, 'omega escaped');
+
+ $res = $cb->(GET("/$name/3435775/s/"));
is($res->code, 200, 'success with existing blob');
$res = $cb->(GET("/$name/".('0'x40).'/s/'));
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] hval: from_attr: move to unit test
2020-01-19 9:40 [PATCH 0/2] hval: handle wide characters properly Eric Wong
2020-01-19 9:40 ` [PATCH 1/2] hval: to_attr: support wide characters Eric Wong
@ 2020-01-19 9:40 ` Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2020-01-19 9:40 UTC (permalink / raw)
To: meta
We don't call from_attr anywhere outside of tests, so don't
bloat normal processes with it.
---
lib/PublicInbox/Hval.pm | 15 +--------------
t/hval.t | 15 ++++++++++++++-
2 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 39256ee0..2589c519 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -10,7 +10,7 @@ use Encode qw(find_encoding);
use PublicInbox::MID qw/mid_clean mid_escape/;
use base qw/Exporter/;
our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape
- to_attr from_attr prurl/;
+ to_attr prurl/;
my $enc_ascii = find_encoding('us-ascii');
# safe-ish acceptable filename pattern for portability
@@ -148,17 +148,4 @@ sub to_attr ($) {
$first . $str;
}
-# reverse the result of to_attr
-sub from_attr ($) {
- my ($str) = @_;
- my $first = '';
- if ($str =~ s/\AZ([a-f0-9]{2})//ms) {
- $first = chr(hex($1));
- }
- $str =~ s!::([a-f0-9]{2})!chr(hex($1))!egms;
- $str =~ tr!:!/!;
- utf8::decode($str);
- $first . $str;
-}
-
1;
diff --git a/t/hval.t b/t/hval.t
index f93d6be6..53f52f07 100644
--- a/t/hval.t
+++ b/t/hval.t
@@ -3,7 +3,20 @@
use strict;
use warnings;
use Test::More;
-use_ok 'PublicInbox::Hval', qw(from_attr to_attr);
+use_ok 'PublicInbox::Hval', qw(to_attr);
+
+# reverse the result of to_attr
+sub from_attr ($) {
+ my ($str) = @_;
+ my $first = '';
+ if ($str =~ s/\AZ([a-f0-9]{2})//ms) {
+ $first = chr(hex($1));
+ }
+ $str =~ s!::([a-f0-9]{2})!chr(hex($1))!egms;
+ $str =~ tr!:!/!;
+ utf8::decode($str);
+ $first . $str;
+}
my $ibx = {
-no_obfuscate_re => qr/(?:example\.com)\z/i,
^ permalink raw reply related [flat|nested] 3+ messages in thread