diff options
author | Eric Wong <e@80x24.org> | 2023-10-09 17:56:23 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2023-10-09 18:41:36 +0000 |
commit | 5754faeb3fa1c9aaeff8922b449127cfbc86236d (patch) | |
tree | 09381346561d119ef30d6465baacc44f8a9d32dd /lib/PublicInbox/Hval.pm | |
parent | 02cd38ea042e01f343d52f8401cd56cf8e37dd9d (diff) | |
download | public-inbox-5754faeb3fa1c9aaeff8922b449127cfbc86236d.tar.gz |
We can't assume git output is UTF-8, and we'll always have legacy data in git coderepos. So attempt to display some some garbled text rather than nothing at all if Perl croaks on it. sox commit c38987e8d20505621b8d872863afa7d233ed1096 (Added raw inverse-bit u-law and A-law support. Updated *.txt files., 2001-12-13) is an example of a commit which caused problems for me.
Diffstat (limited to 'lib/PublicInbox/Hval.pm')
-rw-r--r-- | lib/PublicInbox/Hval.pm | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 0677865e..e9b9ae64 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -4,13 +4,13 @@ # represents a header value in various forms. Used for HTML generation # in our web interface(s) package PublicInbox::Hval; +use v5.10.1; # be careful about unicode_strings in v5.12; use strict; -use warnings; use Encode qw(find_encoding); use PublicInbox::MID qw/mid_clean mid_escape/; use base qw/Exporter/; our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape - to_attr prurl mid_href fmt_ts ts2str/; + to_attr prurl mid_href fmt_ts ts2str utf8_maybe/; use POSIX qw(strftime); my $enc_ascii = find_encoding('us-ascii'); @@ -137,4 +137,9 @@ sub ts2str ($) { strftime('%Y%m%d%H%M%S', gmtime($_[0])) }; # human-friendly format sub fmt_ts ($) { strftime('%Y-%m-%d %k:%M', gmtime($_[0])) } +sub utf8_maybe ($) { + utf8::decode($_[0]); + utf8::valid($_[0]) or utf8::encode($_[0]); # non-UTF-8 data exists +} + 1; |