about summary refs log tree commit homepage
path: root/lib/PublicInbox/Hval.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-10-09 17:56:23 +0000
committerEric Wong <e@80x24.org>2023-10-09 18:41:36 +0000
commit5754faeb3fa1c9aaeff8922b449127cfbc86236d (patch)
tree09381346561d119ef30d6465baacc44f8a9d32dd /lib/PublicInbox/Hval.pm
parent02cd38ea042e01f343d52f8401cd56cf8e37dd9d (diff)
downloadpublic-inbox-5754faeb3fa1c9aaeff8922b449127cfbc86236d.tar.gz
We can't assume git output is UTF-8, and we'll always have
legacy data in git coderepos.  So attempt to display some
some garbled text rather than nothing at all if Perl croaks
on it.

sox commit c38987e8d20505621b8d872863afa7d233ed1096
(Added raw inverse-bit u-law and A-law support.  Updated *.txt files., 2001-12-13)
is an example of a commit which caused problems for me.
Diffstat (limited to 'lib/PublicInbox/Hval.pm')
-rw-r--r--lib/PublicInbox/Hval.pm9
1 files changed, 7 insertions, 2 deletions
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 0677865e..e9b9ae64 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -4,13 +4,13 @@
 # represents a header value in various forms.  Used for HTML generation
 # in our web interface(s)
 package PublicInbox::Hval;
+use v5.10.1; # be careful about unicode_strings in v5.12;
 use strict;
-use warnings;
 use Encode qw(find_encoding);
 use PublicInbox::MID qw/mid_clean mid_escape/;
 use base qw/Exporter/;
 our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape
-                to_attr prurl mid_href fmt_ts ts2str/;
+                to_attr prurl mid_href fmt_ts ts2str utf8_maybe/;
 use POSIX qw(strftime);
 my $enc_ascii = find_encoding('us-ascii');
 
@@ -137,4 +137,9 @@ sub ts2str ($) { strftime('%Y%m%d%H%M%S', gmtime($_[0])) };
 # human-friendly format
 sub fmt_ts ($) { strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
 
+sub utf8_maybe ($) {
+        utf8::decode($_[0]);
+        utf8::valid($_[0]) or utf8::encode($_[0]); # non-UTF-8 data exists
+}
+
 1;