diff options
author | Eric Wong <e@80x24.org> | 2023-10-09 17:56:23 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2023-10-09 18:41:36 +0000 |
commit | 5754faeb3fa1c9aaeff8922b449127cfbc86236d (patch) | |
tree | 09381346561d119ef30d6465baacc44f8a9d32dd /lib/PublicInbox | |
parent | 02cd38ea042e01f343d52f8401cd56cf8e37dd9d (diff) | |
download | public-inbox-5754faeb3fa1c9aaeff8922b449127cfbc86236d.tar.gz |
We can't assume git output is UTF-8, and we'll always have legacy data in git coderepos. So attempt to display some some garbled text rather than nothing at all if Perl croaks on it. sox commit c38987e8d20505621b8d872863afa7d233ed1096 (Added raw inverse-bit u-law and A-law support. Updated *.txt files., 2001-12-13) is an example of a commit which caused problems for me.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r-- | lib/PublicInbox/Hval.pm | 9 | ||||
-rw-r--r-- | lib/PublicInbox/RepoAtom.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/RepoTree.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/ViewDiff.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/ViewVCS.pm | 19 | ||||
-rw-r--r-- | lib/PublicInbox/WwwCoderepo.pm | 8 |
6 files changed, 26 insertions, 22 deletions
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 0677865e..e9b9ae64 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -4,13 +4,13 @@ # represents a header value in various forms. Used for HTML generation # in our web interface(s) package PublicInbox::Hval; +use v5.10.1; # be careful about unicode_strings in v5.12; use strict; -use warnings; use Encode qw(find_encoding); use PublicInbox::MID qw/mid_clean mid_escape/; use base qw/Exporter/; our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape - to_attr prurl mid_href fmt_ts ts2str/; + to_attr prurl mid_href fmt_ts ts2str utf8_maybe/; use POSIX qw(strftime); my $enc_ascii = find_encoding('us-ascii'); @@ -137,4 +137,9 @@ sub ts2str ($) { strftime('%Y%m%d%H%M%S', gmtime($_[0])) }; # human-friendly format sub fmt_ts ($) { strftime('%Y-%m-%d %k:%M', gmtime($_[0])) } +sub utf8_maybe ($) { + utf8::decode($_[0]); + utf8::valid($_[0]) or utf8::encode($_[0]); # non-UTF-8 data exists +} + 1; diff --git a/lib/PublicInbox/RepoAtom.pm b/lib/PublicInbox/RepoAtom.pm index c89d4551..79b76c12 100644 --- a/lib/PublicInbox/RepoAtom.pm +++ b/lib/PublicInbox/RepoAtom.pm @@ -8,7 +8,7 @@ use parent qw(PublicInbox::GzipFilter); use POSIX qw(strftime); use URI::Escape qw(uri_escape); use Scalar::Util (); -use PublicInbox::Hval qw(ascii_html); +use PublicInbox::Hval qw(ascii_html utf8_maybe); # git for-each-ref and log use different format fields :< my $ATOM_FMT = '--pretty=tformat:'.join('%n', @@ -50,7 +50,7 @@ sub translate { my $is_tag = $self->{-is_tag}; my ($H, $ct, $an, $ae, $at, $s, $bdy); while ($lbuf =~ s/\A([^\0]+)\0\n//s) { - utf8::decode($bdy = $1); + utf8_maybe($bdy = $1); if ($is_tag) { my %r; eval "$bdy"; diff --git a/lib/PublicInbox/RepoTree.pm b/lib/PublicInbox/RepoTree.pm index 9c7b86b3..5c73531a 100644 --- a/lib/PublicInbox/RepoTree.pm +++ b/lib/PublicInbox/RepoTree.pm @@ -8,7 +8,7 @@ use PublicInbox::ViewDiff qw(uri_escape_path); use PublicInbox::WwwStatic qw(r); use PublicInbox::Qspawn; use PublicInbox::WwwStream qw(html_oneshot); -use PublicInbox::Hval qw(ascii_html); +use PublicInbox::Hval qw(ascii_html utf8_maybe); sub rd_404_log { my ($bref, $ctx) = @_; @@ -26,7 +26,7 @@ sub rd_404_log { $code = 404; } else { my ($H, $h, $s_as) = split(/ /, $$bref, 3); - utf8::decode($s_as); + utf8_maybe($s_as); my $x = uri_escape_path($ctx->{-path}); $s_as = ascii_html($s_as); print $zfh <<EOM; diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm index 124a723a..d078c5f9 100644 --- a/lib/PublicInbox/ViewDiff.pm +++ b/lib/PublicInbox/ViewDiff.pm @@ -11,7 +11,7 @@ use v5.12; use parent qw(Exporter); our @EXPORT_OK = qw(flush_diff uri_escape_path); use URI::Escape qw(uri_escape_utf8); -use PublicInbox::Hval qw(ascii_html to_attr); +use PublicInbox::Hval qw(ascii_html to_attr utf8_maybe); use PublicInbox::Git qw(git_unquote); my $OID_NULL = '0{7,}'; @@ -236,7 +236,7 @@ sub flush_diff ($$) { } } if (!$dctx) { - utf8::decode($after); + utf8_maybe($after); diff_before_or_after($ctx, \$after); } } else { diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index f80fb4cb..8a90c2d6 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -25,7 +25,7 @@ use PublicInbox::ViewDiff qw(flush_diff uri_escape_path); use PublicInbox::View; use PublicInbox::Eml; use Text::Wrap qw(wrap); -use PublicInbox::Hval qw(ascii_html to_filename prurl); +use PublicInbox::Hval qw(ascii_html to_filename prurl utf8_maybe); use POSIX qw(strftime); my $hl = eval { require PublicInbox::HlMod; @@ -115,14 +115,14 @@ sub show_other_result ($$) { # future-proofing "git show error:$qsp_err"); } my $l = PublicInbox::Linkify->new; - utf8::decode($$bref); + utf8_maybe($$bref); html_page($ctx, 200, '<pre>', $l->to_html($$bref), '</pre><hr>', dbg_log($ctx)); } sub cmt_title { # git->cat_async callback my ($bref, $oid, $type, $size, $ctx) = @_; - utf8::decode($$bref); + utf8_maybe($$bref); my $title = $$bref =~ /\r?\n\r?\n([^\r\n]+)\r?\n?/ ? $1 : ''; push(@{$ctx->{-cmt_pt}} , ascii_html($title)) == @{$ctx->{-cmt_P}} and cmt_finalize($ctx); @@ -160,8 +160,7 @@ sub show_commit_start { # ->psgi_qx callback open my $fh, '<', "$ctx->{-tmp}/h" or die "open $ctx->{-tmp}/h: $!"; chop(my $buf = do { local $/ = "\0"; <$fh> }); - utf8::decode($buf); - utf8::valid($buf) or utf8::encode($buf); # non-UTF-8 commits exist + utf8_maybe($buf); # non-UTF-8 commits exist chomp $buf; my ($P, $p); ($P, $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9); @@ -248,12 +247,12 @@ committer $co EOM print $zfh "\n", $ctx->{-linkify}->to_html($bdy) if length($bdy); $bdy = ''; - open my $fh, '<:utf8', "$ctx->{-tmp}/p" or - die "open $ctx->{-tmp}/p: $!"; + open my $fh, '<', "$ctx->{-tmp}/p" or die "open $ctx->{-tmp}/p: $!"; if (-s $fh > $MAX_SIZE) { print $zfh "---\n patch is too large to show\n"; } else { # prepare flush_diff: read($fh, $x, -s _); + utf8_maybe($x); $ctx->{-apfx} = $ctx->{-spfx} = $upfx; $x =~ s/\r?\n/\n/gs; $ctx->{-anchors} = {} if $x =~ /^diff --git /sm; @@ -418,7 +417,7 @@ EOM undef $_; ($m, $t, $oid, $sz) = split(/ +/, $x, 4); $m = $GIT_MODE{$m} // '?'; - utf8::decode($f); + utf8_maybe($f); $n = ascii_html($f); if ($m eq 'g') { # gitlink submodule commit $$bref .= "\ng\t\t$n @ <a\nhref=#g>commit</a>$oid"; @@ -480,7 +479,7 @@ sub tz_adj ($) { sub show_tag_result { # git->cat_async callback my ($bref, $oid, $type, $size, $ctx) = @_; - utf8::decode($$bref); + utf8_maybe($$bref); my $l = PublicInbox::Linkify->new; $$bref = $l->to_html($$bref); $$bref =~ s!^object ([a-f0-9]+)!object <a @@ -568,7 +567,7 @@ sub show_blob { # git->cat_async callback " $raw_more</pre>".dbg_log($ctx)); # TODO: detect + convert to ensure validity - utf8::decode($$blob); + utf8_maybe($$blob); my $nl = ($$blob =~ s/\r?\n/\n/sg); my $pad = length($nl); diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm index 834145e9..e8c340b5 100644 --- a/lib/PublicInbox/WwwCoderepo.pm +++ b/lib/PublicInbox/WwwCoderepo.pm @@ -14,7 +14,7 @@ use PublicInbox::ViewVCS; use PublicInbox::WwwStatic qw(r); use PublicInbox::GitHTTPBackend; use PublicInbox::WwwStream; -use PublicInbox::Hval qw(ascii_html); +use PublicInbox::Hval qw(ascii_html utf8_maybe); use PublicInbox::ViewDiff qw(uri_escape_path); use PublicInbox::RepoSnapshot; use PublicInbox::RepoAtom; @@ -179,7 +179,7 @@ EOM sub capture { # psgi_qx callback to capture git-for-each-ref my ($bref, $arg) = @_; # arg = [ctx, key, OnDestroy(summary_END)] - utf8::decode($$bref); + utf8_maybe($$bref); $arg->[0]->{qx_res}->{$arg->[1]} = $$bref; # summary_END may be called via OnDestroy $arg->[2] } @@ -241,13 +241,13 @@ sub translate { $fbuf .= shift while @_; if ($ctx->{-heads}) { while ($fbuf =~ s/\A([^\n]+)\n//s) { - utf8::decode(my $x = $1); + utf8_maybe(my $x = $1); push @out, _refs_heads_link($x, '../../'); } } else { my ($snap_pfx, @snap_fmt) = _snapshot_link_prep($ctx); while ($fbuf =~ s/\A([^\n]+)\n//s) { - utf8::decode(my $x = $1); + utf8_maybe(my $x = $1); push @out, _refs_tags_link($x, '../../', $snap_pfx, @snap_fmt); } |