about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-08-26 12:33:31 +0000
committerEric Wong <e@80x24.org>2021-08-28 10:36:58 +0000
commit5c8909925072804901e9c3b45bbf25446d379e7b (patch)
tree32b55b97dc531395284dc9458634c9f5b8c4d5b8
parent26c635060dcae35feae836b02a18a6a11e408312 (diff)
downloadpublic-inbox-5c8909925072804901e9c3b45bbf25446d379e7b.tar.gz
The only place where we could return wide characters with -httpd
was the raw $INBOX_DIR/description text, which is now converted
to octets.

All daemon (HTTP/NNTP/IMAP) sockets are opened in binary mode,
so length() and bytes::length() are equivalent on reads.  For
socket writes, any non-octet data would warn about wide characters
and we are strict in warnings with test_httpd.

All gzipped buffers are also octets, as is PublicInbox::Eml->body,
and anything from PerlIO objects ("git cat-file --batch" output,
filesystems), so bytes::length was unnecessary in all those places.
-rw-r--r--lib/PublicInbox/HTTP.pm17
-rw-r--r--lib/PublicInbox/ManifestJsGz.pm3
-rw-r--r--lib/PublicInbox/NNTP.pm2
-rw-r--r--lib/PublicInbox/View.pm5
-rw-r--r--lib/PublicInbox/ViewVCS.pm5
-rw-r--r--lib/PublicInbox/WWW.pm10
-rw-r--r--lib/PublicInbox/WwwAttach.pm4
-rw-r--r--lib/PublicInbox/WwwHighlight.pm5
-rw-r--r--lib/PublicInbox/WwwListing.pm4
-rw-r--r--lib/PublicInbox/WwwStatic.pm4
-rw-r--r--lib/PublicInbox/WwwStream.pm4
-rw-r--r--lib/PublicInbox/WwwText.pm5
-rw-r--r--t/psgi_search.t1
-rw-r--r--t/search-thr-index.t8
-rw-r--r--t/www_listing.t19
-rw-r--r--xt/cmp-msgstr.t2
16 files changed, 50 insertions, 48 deletions
diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index d0708c5b..b2c74cf3 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -21,7 +21,6 @@
 package PublicInbox::HTTP;
 use strict;
 use parent qw(PublicInbox::DS);
-use bytes (); # only for bytes::length
 use Fcntl qw(:seek);
 use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl
 use Plack::Util;
@@ -89,7 +88,7 @@ sub event_step { # called by PublicInbox::DS
 
         return read_input($self) if ref($self->{env});
         my $rbuf = $self->{rbuf} // (\(my $x = ''));
-        $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or return;
+        $self->do_read($rbuf, 8192, length($$rbuf)) or return;
         rbuf_process($self, $rbuf);
 }
 
@@ -104,7 +103,7 @@ sub rbuf_process {
         # (they are rarely-used and git (as of 2.7.2) does not use them)
         if ($r == -1 || $env{HTTP_TRAILER} ||
                         # this length-check is necessary for PURE_PERL=1:
-                        ($r == -2 && bytes::length($$rbuf) > 0x4000)) {
+                        ($r == -2 && length($$rbuf) > 0x4000)) {
                 return quit($self, 400);
         }
         if ($r < 0) { # incomplete
@@ -121,7 +120,7 @@ sub rbuf_process {
 # IO::Handle::write returns boolean, this returns bytes written:
 sub xwrite ($$$) {
         my ($fh, $rbuf, $max) = @_;
-        my $w = bytes::length($$rbuf);
+        my $w = length($$rbuf);
         $w = $max if $w > $max;
         $fh->write($$rbuf, $w) or return;
         $w;
@@ -236,7 +235,7 @@ sub response_header_write {
 sub chunked_write ($$) {
         my $self = $_[0];
         return if $_[1] eq '';
-        msg_more($self, sprintf("%x\r\n", bytes::length($_[1])));
+        msg_more($self, sprintf("%x\r\n", length($_[1])));
         msg_more($self, $_[1]);
 
         # use $self->write(\"\n\n") if you care about real-time
@@ -411,12 +410,12 @@ sub read_input_chunked { # unlikely...
                         $$rbuf =~ s/\A\r\n//s and
                                 return app_dispatch($self, $input, $rbuf);
 
-                        return quit($self, 400) if bytes::length($$rbuf) > 2;
+                        return quit($self, 400) if length($$rbuf) > 2;
                 }
                 if ($len == CHUNK_END) {
                         if ($$rbuf =~ s/\A\r\n//s) {
                                 $len = CHUNK_START;
-                        } elsif (bytes::length($$rbuf) > 2) {
+                        } elsif (length($$rbuf) > 2) {
                                 return quit($self, 400);
                         }
                 }
@@ -426,14 +425,14 @@ sub read_input_chunked { # unlikely...
                                 if (($len + -s $input) > $MAX_REQUEST_BUFFER) {
                                         return quit($self, 413);
                                 }
-                        } elsif (bytes::length($$rbuf) > CHUNK_MAX_HDR) {
+                        } elsif (length($$rbuf) > CHUNK_MAX_HDR) {
                                 return quit($self, 400);
                         }
                         # will break from loop since $len >= 0
                 }
 
                 if ($len < 0) { # chunk header is trickled, read more
-                        $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or
+                        $self->do_read($rbuf, 8192, length($$rbuf)) or
                                 return recv_err($self, $len);
                         # (implicit) goto chunk_start if $r > 0;
                 }
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index 7fee78dd..69d81fa1 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -6,7 +6,6 @@ package PublicInbox::ManifestJsGz;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::WwwListing);
-use bytes (); # length
 use PublicInbox::Config;
 use IO::Compress::Gzip qw(gzip);
 use HTTP::Date qw(time2str);
@@ -108,7 +107,7 @@ sub psgi_triple {
         gzip(\$manifest => \(my $out));
         [ 200, [ qw(Content-Type application/gzip),
                  'Last-Modified', time2str($ctx->{-mtime}),
-                 'Content-Length', bytes::length($out) ], [ $out ] ]
+                 'Content-Length', length($out) ], [ $out ] ]
 }
 
 sub per_inbox {
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 13a68bb8..aea04c05 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -241,7 +241,7 @@ sub parse_time ($$;$) {
                 $gmt = 1;
         }
         my ($YYYY, $MM, $DD);
-        if (bytes::length($date) == 8) { # RFC 3977 allows YYYYMMDD
+        if (length($date) == 8) { # RFC 3977 allows YYYYMMDD
                 ($YYYY, $MM, $DD) = unpack('A4A2A2', $date);
         } else { # legacy clients send YYMMDD
                 my $YY;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 17d38302..94ea6148 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -5,8 +5,7 @@
 # See Documentation/design_www.txt for this.
 package PublicInbox::View;
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
 use List::Util qw(max);
 use PublicInbox::MsgTime qw(msg_datestamp);
 use PublicInbox::Hval qw(ascii_html obfuscate_addrs prurl mid_href
@@ -531,7 +530,7 @@ sub attach_link ($$$$;$) {
         return unless $part->{bdy};
 
         my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...)
-        my $size = bytes::length($part->body);
+        my $size = length($part->body);
 
         # hide attributes normally, unless we want to aid users in
         # spotting MUA problems:
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 702a075d..6365f045 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -15,8 +15,7 @@
 
 package PublicInbox::ViewVCS;
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
 use PublicInbox::SolverGit;
 use PublicInbox::WwwStream qw(html_oneshot);
 use PublicInbox::Linkify;
@@ -49,7 +48,7 @@ sub stream_blob_parse_hdr { # {parse_hdr} for Qspawn
         } elsif (index($$bref, "\0") >= 0) {
                 [200, [qw(Content-Type application/octet-stream), @cl] ];
         } else {
-                my $n = bytes::length($$bref);
+                my $n = length($$bref);
                 if ($n >= $BIN_DETECT || $n == $size) {
                         return [200, [ 'Content-Type',
                                 'text/plain; charset=UTF-8', @cl ] ];
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 1afdece0..570e690e 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -11,10 +11,8 @@
 # - Must not rely on static content
 # - UTF-8 is only for user-content, 7-bit US-ASCII for us
 package PublicInbox::WWW;
-use 5.010_001;
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
 use PublicInbox::Config;
 use PublicInbox::Hval;
 use URI::Escape qw(uri_unescape);
@@ -646,8 +644,7 @@ sub get_css ($$$) {
                 $css = PublicInbox::UserContent::sample($ctx->{ibx}, $env);
         }
         defined $css or return r404();
-        my $h = [ 'Content-Length', bytes::length($css),
-                'Content-Type', 'text/css' ];
+        my $h = [ 'Content-Length', length($css), 'Content-Type', 'text/css' ];
         PublicInbox::GitHTTPBackend::cache_one_year($h);
         [ 200, $h, [ $css ] ];
 }
@@ -656,7 +653,8 @@ sub get_description {
         my ($ctx, $inbox) = @_;
         invalid_inbox($ctx, $inbox) || do {
                 my $d = $ctx->{ibx}->description . "\n";
-                [ 200, [ 'Content-Length', bytes::length($d),
+                utf8::encode($d);
+                [ 200, [ 'Content-Length', length($d),
                         'Content-Type', 'text/plain' ], [ $d ] ];
         };
 }
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index a6c68a3f..c17394af 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -4,8 +4,8 @@
 # For retrieving attachments from messages in the WWW interface
 package PublicInbox::WwwAttach; # internal package
 use strict;
+use v5.10.1;
 use parent qw(PublicInbox::GzipFilter);
-use bytes (); # only for bytes::length
 use PublicInbox::Eml;
 
 sub referer_match ($) {
@@ -50,7 +50,7 @@ sub get_attach_i { # ->each_part callback
                         $part = "Deep-linking prevented\n";
                 }
         }
-        push @{$res->[1]}, 'Content-Length', bytes::length($part);
+        push @{$res->[1]}, 'Content-Length', length($part);
         $res->[2]->[0] = $part;
 }
 
diff --git a/lib/PublicInbox/WwwHighlight.pm b/lib/PublicInbox/WwwHighlight.pm
index 6fed2fed..3593c2d4 100644
--- a/lib/PublicInbox/WwwHighlight.pm
+++ b/lib/PublicInbox/WwwHighlight.pm
@@ -20,8 +20,7 @@
 
 package PublicInbox::WwwHighlight;
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
 use parent qw(PublicInbox::HlMod);
 use PublicInbox::Linkify qw();
 use PublicInbox::Hval qw(ascii_html);
@@ -69,7 +68,7 @@ sub call {
         $l->linkify_2($$bref);
 
         my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
-        push @$h, 'Content-Length', bytes::length($$bref);
+        push @$h, 'Content-Length', length($$bref);
 
         [ 200, $h, [ $$bref ] ]
 }
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index a31aa4ca..8b54d724 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -5,12 +5,12 @@
 # Used by PublicInbox::WWW
 package PublicInbox::WwwListing;
 use strict;
+use v5.10.1;
 use PublicInbox::Hval qw(prurl fmt_ts ascii_html);
 use PublicInbox::Linkify;
 use PublicInbox::GzipFilter qw(gzf_maybe);
 use PublicInbox::ConfigIter;
 use PublicInbox::WwwStream;
-use bytes (); # bytes::length
 
 sub ibx_entry {
         my ($ctx, $ibx, $ce) = @_;
@@ -213,7 +213,7 @@ sub psgi_triple {
         my $out = $gzf->zflush('</pre><hr><pre>'.
                         PublicInbox::WwwStream::code_footer($ctx->{env}) .
                         '</pre></body></html>');
-        $h->[3] = bytes::length($out);
+        $h->[3] = length($out);
         [ $code, $h, [ $out ] ];
 }
 
diff --git a/lib/PublicInbox/WwwStatic.pm b/lib/PublicInbox/WwwStatic.pm
index 29e4819d..b3476ab8 100644
--- a/lib/PublicInbox/WwwStatic.pm
+++ b/lib/PublicInbox/WwwStatic.pm
@@ -9,8 +9,8 @@
 # functionality of nginx.
 package PublicInbox::WwwStatic;
 use strict;
+use v5.10.1;
 use parent qw(Exporter);
-use bytes ();
 use Fcntl qw(SEEK_SET O_RDONLY O_NONBLOCK);
 use POSIX qw(strftime);
 use HTTP::Date qw(time2str);
@@ -318,7 +318,7 @@ sub dir_response ($$$) {
                 "</head><body><pre>Index of $path_info_html</pre><hr><pre>\n");
         $gzf->zmore(join("\n", @entries));
         my $out = $gzf->zflush("</pre><hr></body></html>\n");
-        $h->[3] = bytes::length($out);
+        $h->[3] = length($out);
         [ 200, $h, [ $out ] ]
 }
 
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 2f8212d4..adcb5fe2 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -7,9 +7,9 @@
 # See PublicInbox::GzipFilter parent class for more info.
 package PublicInbox::WwwStream;
 use strict;
+use v5.10.1;
 use parent qw(Exporter PublicInbox::GzipFilter);
 our @EXPORT_OK = qw(html_oneshot);
-use bytes (); # length
 use PublicInbox::Hval qw(ascii_html prurl ts2str);
 our $TOR_URL = 'https://www.torproject.org/';
 our $CODE_URL = [ qw(http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git
@@ -216,7 +216,7 @@ sub html_oneshot ($$;$) {
         };
         $ctx->zmore($$sref) if $sref;
         my $bdy = $ctx->zflush(_html_end($ctx));
-        $res_hdr->[3] = bytes::length($bdy);
+        $res_hdr->[3] = length($bdy);
         [ $code, $res_hdr, [ $bdy ] ]
 }
 
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index 76a95a6b..db5060ea 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -4,8 +4,7 @@
 # used for displaying help texts and other non-mail content
 package PublicInbox::WwwText;
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
 use PublicInbox::Linkify;
 use PublicInbox::WwwStream;
 use PublicInbox::Hval qw(ascii_html);
@@ -43,7 +42,7 @@ sub get_text {
                         $txt = $gzf->translate($txt);
                         $txt .= $gzf->zflush;
                 }
-                $hdr->[3] = bytes::length($txt);
+                $hdr->[3] = length($txt);
                 return [ $code, $hdr, [ $txt ] ]
         }
 
diff --git a/t/psgi_search.t b/t/psgi_search.t
index 5bdd66ed..3da93eda 100644
--- a/t/psgi_search.t
+++ b/t/psgi_search.t
@@ -8,7 +8,6 @@ use IO::Uncompress::Gunzip qw(gunzip);
 use PublicInbox::Eml;
 use PublicInbox::Config;
 use PublicInbox::Inbox;
-use bytes (); # only for bytes::length
 my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
                 URI::Escape Plack::Builder);
 require_mods(@mods);
diff --git a/t/search-thr-index.t b/t/search-thr-index.t
index fc1b666a..62745dbc 100644
--- a/t/search-thr-index.t
+++ b/t/search-thr-index.t
@@ -1,8 +1,8 @@
+#!perl -w
 # Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
 use Test::More;
 use PublicInbox::TestCommon;
 use PublicInbox::MID qw(mids);
@@ -45,7 +45,7 @@ foreach (reverse split(/\n\n/, $data)) {
         my $mime = PublicInbox::Eml->new(\$_);
         $mime->header_set('From' => 'bw@g');
         $mime->header_set('To' => 'git@vger.kernel.org');
-        my $bytes = bytes::length($mime->as_string);
+        my $bytes = length($mime->as_string);
         my $mid = mids($mime->header_obj)->[0];
         my $smsg = bless {
                 bytes => $bytes,
@@ -92,7 +92,7 @@ To: git@vger.kernel.org
         my $tid0 = $dbh->selectrow_array(<<'', undef, $num);
 SELECT tid FROM over WHERE num = ? LIMIT 1
 
-        my $bytes = bytes::length($mime->as_string);
+        my $bytes = length($mime->as_string);
         my $mid = mids($mime->header_obj)->[0];
         my $smsg = bless {
                 bytes => $bytes,
diff --git a/t/www_listing.t b/t/www_listing.t
index 6b3b408f..7ea12eea 100644
--- a/t/www_listing.t
+++ b/t/www_listing.t
@@ -55,7 +55,7 @@ sub tiny_test {
         ok(my $clone = $manifest->{'/alt'}, '/alt in manifest');
         is($clone->{owner}, "lorelei \x{100}", 'owner set');
         is($clone->{reference}, '/bare', 'reference detected');
-        is($clone->{description}, "we're all clones", 'description read');
+        is($clone->{description}, "we're \x{100}ll clones", 'description read');
         ok(my $bare = $manifest->{'/bare'}, '/bare in manifest');
         is($bare->{description}, 'Unnamed repository',
                 'missing $GIT_DIR/description fallback');
@@ -72,6 +72,10 @@ sub tiny_test {
         ok(my $v2epoch1 = $manifest->{'/v2/git/1.git'}, 'v2 epoch 1 appeared');
         like($v2epoch1->{description}, qr/ \[epoch 1\]\z/,
                 'epoch 1 in description');
+
+        $res = $http->get("http://$host:$port/alt/description");
+        is($res->{content}, "we're \xc4\x80ll clones\n", 'UTF-8 description')
+                or diag explain($res);
 }
 
 my $td;
@@ -91,9 +95,9 @@ SKIP: {
                 is(xsys(@clone, $alt, "$v2/git/$i.git"), 0, "clone epoch $i")
         }
         ok(open(my $fh, '>', "$v2/inbox.lock"), 'mock a v2 inbox');
-        open $fh, '>', "$alt/description" or die;
-        print $fh "we're all clones\n" or die;
-        close $fh or die;
+        open $fh, '>', "$alt/description" or xbail "open $alt/description $!";
+        print $fh "we're \xc4\x80ll clones\n" or xbail "print $!";
+        close $fh or xbail "write: $alt/description $!";
         is(xsys('git', "--git-dir=$alt", qw(config gitweb.owner),
                 "lorelei \xc4\x80"), 0,
                 'set gitweb user');
@@ -178,6 +182,13 @@ manifest = \${site}/v2/manifest.js.gz
         for (qw(v2/git/0.git v2/git/1.git v2/git/2.git)) {
                 ok(-d "$tmpdir/per-inbox/$_", "grok-pull created $_");
         }
+        $td->kill;
+        $td->join;
+        is($?, 0, 'no error in exited process');
+        open $fh, '<', $err or BAIL_OUT("open $err failed: $!");
+        my $eout = do { local $/; <$fh> };
+        unlike($eout, qr/wide/i, 'no Wide character warnings');
+        unlike($eout, qr/uninitialized/i, 'no uninitialized warnings');
 }
 
 done_testing();
diff --git a/xt/cmp-msgstr.t b/xt/cmp-msgstr.t
index e0e8ed5a..900127c7 100644
--- a/xt/cmp-msgstr.t
+++ b/xt/cmp-msgstr.t
@@ -60,7 +60,7 @@ my $cmp = sub {
                                 my $dig = $dig_cls->new;
                                 $dig->add($part);
                                 push @$cmp_arg, "M: ".$dig->hexdigest;
-                                push @$cmp_arg, "B: ".bytes::length($part);
+                                push @$cmp_arg, "B: ".length($part);
                         } else {
                                 $part =~ s/\s+\z//s;
                                 push @$cmp_arg, "X: ".$part;