about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--lib/PublicInbox/Hval.pm9
-rw-r--r--lib/PublicInbox/RepoAtom.pm4
-rw-r--r--lib/PublicInbox/RepoTree.pm4
-rw-r--r--lib/PublicInbox/ViewDiff.pm4
-rw-r--r--lib/PublicInbox/ViewVCS.pm19
-rw-r--r--lib/PublicInbox/WwwCoderepo.pm8
-rw-r--r--xt/solver.t31
7 files changed, 44 insertions, 35 deletions
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 0677865e..e9b9ae64 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -4,13 +4,13 @@
 # represents a header value in various forms.  Used for HTML generation
 # in our web interface(s)
 package PublicInbox::Hval;
+use v5.10.1; # be careful about unicode_strings in v5.12;
 use strict;
-use warnings;
 use Encode qw(find_encoding);
 use PublicInbox::MID qw/mid_clean mid_escape/;
 use base qw/Exporter/;
 our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape
-                to_attr prurl mid_href fmt_ts ts2str/;
+                to_attr prurl mid_href fmt_ts ts2str utf8_maybe/;
 use POSIX qw(strftime);
 my $enc_ascii = find_encoding('us-ascii');
 
@@ -137,4 +137,9 @@ sub ts2str ($) { strftime('%Y%m%d%H%M%S', gmtime($_[0])) };
 # human-friendly format
 sub fmt_ts ($) { strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
 
+sub utf8_maybe ($) {
+        utf8::decode($_[0]);
+        utf8::valid($_[0]) or utf8::encode($_[0]); # non-UTF-8 data exists
+}
+
 1;
diff --git a/lib/PublicInbox/RepoAtom.pm b/lib/PublicInbox/RepoAtom.pm
index c89d4551..79b76c12 100644
--- a/lib/PublicInbox/RepoAtom.pm
+++ b/lib/PublicInbox/RepoAtom.pm
@@ -8,7 +8,7 @@ use parent qw(PublicInbox::GzipFilter);
 use POSIX qw(strftime);
 use URI::Escape qw(uri_escape);
 use Scalar::Util ();
-use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::Hval qw(ascii_html utf8_maybe);
 
 # git for-each-ref and log use different format fields :<
 my $ATOM_FMT = '--pretty=tformat:'.join('%n',
@@ -50,7 +50,7 @@ sub translate {
         my $is_tag = $self->{-is_tag};
         my ($H, $ct, $an, $ae, $at, $s, $bdy);
         while ($lbuf =~ s/\A([^\0]+)\0\n//s) {
-                utf8::decode($bdy = $1);
+                utf8_maybe($bdy = $1);
                 if ($is_tag) {
                         my %r;
                         eval "$bdy";
diff --git a/lib/PublicInbox/RepoTree.pm b/lib/PublicInbox/RepoTree.pm
index 9c7b86b3..5c73531a 100644
--- a/lib/PublicInbox/RepoTree.pm
+++ b/lib/PublicInbox/RepoTree.pm
@@ -8,7 +8,7 @@ use PublicInbox::ViewDiff qw(uri_escape_path);
 use PublicInbox::WwwStatic qw(r);
 use PublicInbox::Qspawn;
 use PublicInbox::WwwStream qw(html_oneshot);
-use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::Hval qw(ascii_html utf8_maybe);
 
 sub rd_404_log {
         my ($bref, $ctx) = @_;
@@ -26,7 +26,7 @@ sub rd_404_log {
                 $code = 404;
         } else {
                 my ($H, $h, $s_as) = split(/ /, $$bref, 3);
-                utf8::decode($s_as);
+                utf8_maybe($s_as);
                 my $x = uri_escape_path($ctx->{-path});
                 $s_as = ascii_html($s_as);
                 print $zfh <<EOM;
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 124a723a..d078c5f9 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -11,7 +11,7 @@ use v5.12;
 use parent qw(Exporter);
 our @EXPORT_OK = qw(flush_diff uri_escape_path);
 use URI::Escape qw(uri_escape_utf8);
-use PublicInbox::Hval qw(ascii_html to_attr);
+use PublicInbox::Hval qw(ascii_html to_attr utf8_maybe);
 use PublicInbox::Git qw(git_unquote);
 
 my $OID_NULL = '0{7,}';
@@ -236,7 +236,7 @@ sub flush_diff ($$) {
                                 }
                         }
                         if (!$dctx) {
-                                utf8::decode($after);
+                                utf8_maybe($after);
                                 diff_before_or_after($ctx, \$after);
                         }
                 } else {
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index f80fb4cb..8a90c2d6 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -25,7 +25,7 @@ use PublicInbox::ViewDiff qw(flush_diff uri_escape_path);
 use PublicInbox::View;
 use PublicInbox::Eml;
 use Text::Wrap qw(wrap);
-use PublicInbox::Hval qw(ascii_html to_filename prurl);
+use PublicInbox::Hval qw(ascii_html to_filename prurl utf8_maybe);
 use POSIX qw(strftime);
 my $hl = eval {
         require PublicInbox::HlMod;
@@ -115,14 +115,14 @@ sub show_other_result ($$) { # future-proofing
                                 "git show error:$qsp_err");
         }
         my $l = PublicInbox::Linkify->new;
-        utf8::decode($$bref);
+        utf8_maybe($$bref);
         html_page($ctx, 200, '<pre>', $l->to_html($$bref), '</pre><hr>',
                 dbg_log($ctx));
 }
 
 sub cmt_title { # git->cat_async callback
         my ($bref, $oid, $type, $size, $ctx) = @_;
-        utf8::decode($$bref);
+        utf8_maybe($$bref);
         my $title = $$bref =~ /\r?\n\r?\n([^\r\n]+)\r?\n?/ ? $1 : '';
         push(@{$ctx->{-cmt_pt}} , ascii_html($title)) == @{$ctx->{-cmt_P}} and
                 cmt_finalize($ctx);
@@ -160,8 +160,7 @@ sub show_commit_start { # ->psgi_qx callback
         open my $fh, '<', "$ctx->{-tmp}/h" or
                 die "open $ctx->{-tmp}/h: $!";
         chop(my $buf = do { local $/ = "\0"; <$fh> });
-        utf8::decode($buf);
-        utf8::valid($buf) or utf8::encode($buf); # non-UTF-8 commits exist
+        utf8_maybe($buf); # non-UTF-8 commits exist
         chomp $buf;
         my ($P, $p);
         ($P, $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9);
@@ -248,12 +247,12 @@ committer $co
 EOM
         print $zfh "\n", $ctx->{-linkify}->to_html($bdy) if length($bdy);
         $bdy = '';
-        open my $fh, '<:utf8', "$ctx->{-tmp}/p" or
-                die "open $ctx->{-tmp}/p: $!";
+        open my $fh, '<', "$ctx->{-tmp}/p" or die "open $ctx->{-tmp}/p: $!";
         if (-s $fh > $MAX_SIZE) {
                 print $zfh "---\n patch is too large to show\n";
         } else { # prepare flush_diff:
                 read($fh, $x, -s _);
+                utf8_maybe($x);
                 $ctx->{-apfx} = $ctx->{-spfx} = $upfx;
                 $x =~ s/\r?\n/\n/gs;
                 $ctx->{-anchors} = {} if $x =~ /^diff --git /sm;
@@ -418,7 +417,7 @@ EOM
                 undef $_;
                 ($m, $t, $oid, $sz) = split(/ +/, $x, 4);
                 $m = $GIT_MODE{$m} // '?';
-                utf8::decode($f);
+                utf8_maybe($f);
                 $n = ascii_html($f);
                 if ($m eq 'g') { # gitlink submodule commit
                         $$bref .= "\ng\t\t$n @ <a\nhref=#g>commit</a>$oid";
@@ -480,7 +479,7 @@ sub tz_adj ($) {
 
 sub show_tag_result { # git->cat_async callback
         my ($bref, $oid, $type, $size, $ctx) = @_;
-        utf8::decode($$bref);
+        utf8_maybe($$bref);
         my $l = PublicInbox::Linkify->new;
         $$bref = $l->to_html($$bref);
         $$bref =~ s!^object ([a-f0-9]+)!object <a
@@ -568,7 +567,7 @@ sub show_blob { # git->cat_async callback
                                 " $raw_more</pre>".dbg_log($ctx));
 
         # TODO: detect + convert to ensure validity
-        utf8::decode($$blob);
+        utf8_maybe($$blob);
         my $nl = ($$blob =~ s/\r?\n/\n/sg);
         my $pad = length($nl);
 
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
index 834145e9..e8c340b5 100644
--- a/lib/PublicInbox/WwwCoderepo.pm
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -14,7 +14,7 @@ use PublicInbox::ViewVCS;
 use PublicInbox::WwwStatic qw(r);
 use PublicInbox::GitHTTPBackend;
 use PublicInbox::WwwStream;
-use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::Hval qw(ascii_html utf8_maybe);
 use PublicInbox::ViewDiff qw(uri_escape_path);
 use PublicInbox::RepoSnapshot;
 use PublicInbox::RepoAtom;
@@ -179,7 +179,7 @@ EOM
 
 sub capture { # psgi_qx callback to capture git-for-each-ref
         my ($bref, $arg) = @_; # arg = [ctx, key, OnDestroy(summary_END)]
-        utf8::decode($$bref);
+        utf8_maybe($$bref);
         $arg->[0]->{qx_res}->{$arg->[1]} = $$bref;
         # summary_END may be called via OnDestroy $arg->[2]
 }
@@ -241,13 +241,13 @@ sub translate {
         $fbuf .= shift while @_;
         if ($ctx->{-heads}) {
                 while ($fbuf =~ s/\A([^\n]+)\n//s) {
-                        utf8::decode(my $x = $1);
+                        utf8_maybe(my $x = $1);
                         push @out, _refs_heads_link($x, '../../');
                 }
         } else {
                 my ($snap_pfx, @snap_fmt) = _snapshot_link_prep($ctx);
                 while ($fbuf =~ s/\A([^\n]+)\n//s) {
-                        utf8::decode(my $x = $1);
+                        utf8_maybe(my $x = $1);
                         push @out, _refs_tags_link($x, '../../',
                                                 $snap_pfx, @snap_fmt);
                 }
diff --git a/xt/solver.t b/xt/solver.t
index 06f5a493..51b4144c 100644
--- a/xt/solver.t
+++ b/xt/solver.t
@@ -32,23 +32,30 @@ my $todo = {
                 'c2f3bf071ee90b01f2d629921bb04c4f798f02fa/s/', # tag
                 '7eb93c89651c47c8095d476251f2e4314656b292/s/', # non-UTF-8
         ],
+        'sox-devel' => [
+                'c38987e8d20505621b8d872863afa7d233ed1096/s/', # non-UTF-8
+        ]
 };
 
-my ($ibx_name, $urls, @gone);
+my @gone;
 my $client = sub {
         my ($cb) = @_;
-        for my $u (@$urls) {
-                my $url = "/$ibx_name/$u";
-                my $res = $cb->(GET($url));
-                is($res->code, 200, $url);
-                next if $res->code == 200;
-                diag "$url failed";
-                diag $res->content;
+        for my $ibx_name (sort keys %$todo) {
+                diag "testing $ibx_name";
+                my $urls = $todo->{$ibx_name};
+                for my $u (@$urls) {
+                        my $url = "/$ibx_name/$u";
+                        my $res = $cb->(GET($url));
+                        is($res->code, 200, $url);
+                        next if $res->code == 200;
+                        diag "$url failed";
+                        diag $res->content;
+                }
         }
 };
 
 my $nr = 0;
-while (($ibx_name, $urls) = each %$todo) {
+while (my ($ibx_name, $urls) = each %$todo) {
         SKIP: {
                 my $ibx = $cfg->lookup_name($ibx_name);
                 if (!$ibx) {
@@ -61,15 +68,13 @@ while (($ibx_name, $urls) = each %$todo) {
                         skip(qq{publicinbox.$ibx_name.coderepo not configured},
                                 scalar(@$urls));
                 }
-                test_psgi($app, $client);
                 $nr++;
         }
 }
 
 delete @$todo{@gone};
+test_psgi($app, $client);
 my $env = { PI_CONFIG => PublicInbox::Config->default_file };
-while (($ibx_name, $urls) = each %$todo) {
-        test_httpd($env, $client, $nr);
-}
+test_httpd($env, $client, $nr);
 
 done_testing();