From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id BCD8E1F8C7 for ; Thu, 26 Aug 2021 12:33:38 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/8] get rid of unnecessary bytes::length usage Date: Thu, 26 Aug 2021 12:33:31 +0000 Message-Id: <20210826123338.694-2-e@80x24.org> In-Reply-To: <20210826123338.694-1-e@80x24.org> References: <20210826123338.694-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: The only place where we could return wide characters with -httpd was the raw $INBOX_DIR/description text, which is now converted to octets. All daemon (HTTP/NNTP/IMAP) sockets are opened in binary mode, so length() and bytes::length() are equivalent on reads. For socket writes, any non-octet data would warn about wide characters and we are strict in warnings with test_httpd. All gzipped buffers are also octets, as is PublicInbox::Eml->body, and anything from PerlIO objects ("git cat-file --batch" output, filesystems), so bytes::length was unnecessary in all those places. --- lib/PublicInbox/HTTP.pm | 17 ++++++++--------- lib/PublicInbox/ManifestJsGz.pm | 3 +-- lib/PublicInbox/NNTP.pm | 2 +- lib/PublicInbox/View.pm | 5 ++--- lib/PublicInbox/ViewVCS.pm | 5 ++--- lib/PublicInbox/WWW.pm | 10 ++++------ lib/PublicInbox/WwwAttach.pm | 4 ++-- lib/PublicInbox/WwwHighlight.pm | 5 ++--- lib/PublicInbox/WwwListing.pm | 4 ++-- lib/PublicInbox/WwwStatic.pm | 4 ++-- lib/PublicInbox/WwwStream.pm | 4 ++-- lib/PublicInbox/WwwText.pm | 5 ++--- t/psgi_search.t | 1 - t/search-thr-index.t | 8 ++++---- t/www_listing.t | 19 +++++++++++++++---- xt/cmp-msgstr.t | 2 +- 16 files changed, 50 insertions(+), 48 deletions(-) diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index d0708c5b..b2c74cf3 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -21,7 +21,6 @@ package PublicInbox::HTTP; use strict; use parent qw(PublicInbox::DS); -use bytes (); # only for bytes::length use Fcntl qw(:seek); use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl use Plack::Util; @@ -89,7 +88,7 @@ sub event_step { # called by PublicInbox::DS return read_input($self) if ref($self->{env}); my $rbuf = $self->{rbuf} // (\(my $x = '')); - $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or return; + $self->do_read($rbuf, 8192, length($$rbuf)) or return; rbuf_process($self, $rbuf); } @@ -104,7 +103,7 @@ sub rbuf_process { # (they are rarely-used and git (as of 2.7.2) does not use them) if ($r == -1 || $env{HTTP_TRAILER} || # this length-check is necessary for PURE_PERL=1: - ($r == -2 && bytes::length($$rbuf) > 0x4000)) { + ($r == -2 && length($$rbuf) > 0x4000)) { return quit($self, 400); } if ($r < 0) { # incomplete @@ -121,7 +120,7 @@ sub rbuf_process { # IO::Handle::write returns boolean, this returns bytes written: sub xwrite ($$$) { my ($fh, $rbuf, $max) = @_; - my $w = bytes::length($$rbuf); + my $w = length($$rbuf); $w = $max if $w > $max; $fh->write($$rbuf, $w) or return; $w; @@ -236,7 +235,7 @@ sub response_header_write { sub chunked_write ($$) { my $self = $_[0]; return if $_[1] eq ''; - msg_more($self, sprintf("%x\r\n", bytes::length($_[1]))); + msg_more($self, sprintf("%x\r\n", length($_[1]))); msg_more($self, $_[1]); # use $self->write(\"\n\n") if you care about real-time @@ -411,12 +410,12 @@ sub read_input_chunked { # unlikely... $$rbuf =~ s/\A\r\n//s and return app_dispatch($self, $input, $rbuf); - return quit($self, 400) if bytes::length($$rbuf) > 2; + return quit($self, 400) if length($$rbuf) > 2; } if ($len == CHUNK_END) { if ($$rbuf =~ s/\A\r\n//s) { $len = CHUNK_START; - } elsif (bytes::length($$rbuf) > 2) { + } elsif (length($$rbuf) > 2) { return quit($self, 400); } } @@ -426,14 +425,14 @@ sub read_input_chunked { # unlikely... if (($len + -s $input) > $MAX_REQUEST_BUFFER) { return quit($self, 413); } - } elsif (bytes::length($$rbuf) > CHUNK_MAX_HDR) { + } elsif (length($$rbuf) > CHUNK_MAX_HDR) { return quit($self, 400); } # will break from loop since $len >= 0 } if ($len < 0) { # chunk header is trickled, read more - $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or + $self->do_read($rbuf, 8192, length($$rbuf)) or return recv_err($self, $len); # (implicit) goto chunk_start if $r > 0; } diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm index 7fee78dd..69d81fa1 100644 --- a/lib/PublicInbox/ManifestJsGz.pm +++ b/lib/PublicInbox/ManifestJsGz.pm @@ -6,7 +6,6 @@ package PublicInbox::ManifestJsGz; use strict; use v5.10.1; use parent qw(PublicInbox::WwwListing); -use bytes (); # length use PublicInbox::Config; use IO::Compress::Gzip qw(gzip); use HTTP::Date qw(time2str); @@ -108,7 +107,7 @@ sub psgi_triple { gzip(\$manifest => \(my $out)); [ 200, [ qw(Content-Type application/gzip), 'Last-Modified', time2str($ctx->{-mtime}), - 'Content-Length', bytes::length($out) ], [ $out ] ] + 'Content-Length', length($out) ], [ $out ] ] } sub per_inbox { diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 13a68bb8..aea04c05 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -241,7 +241,7 @@ sub parse_time ($$;$) { $gmt = 1; } my ($YYYY, $MM, $DD); - if (bytes::length($date) == 8) { # RFC 3977 allows YYYYMMDD + if (length($date) == 8) { # RFC 3977 allows YYYYMMDD ($YYYY, $MM, $DD) = unpack('A4A2A2', $date); } else { # legacy clients send YYMMDD my $YY; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 17d38302..94ea6148 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -5,8 +5,7 @@ # See Documentation/design_www.txt for this. package PublicInbox::View; use strict; -use warnings; -use bytes (); # only for bytes::length +use v5.10.1; use List::Util qw(max); use PublicInbox::MsgTime qw(msg_datestamp); use PublicInbox::Hval qw(ascii_html obfuscate_addrs prurl mid_href @@ -531,7 +530,7 @@ sub attach_link ($$$$;$) { return unless $part->{bdy}; my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...) - my $size = bytes::length($part->body); + my $size = length($part->body); # hide attributes normally, unless we want to aid users in # spotting MUA problems: diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index 702a075d..6365f045 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -15,8 +15,7 @@ package PublicInbox::ViewVCS; use strict; -use warnings; -use bytes (); # only for bytes::length +use v5.10.1; use PublicInbox::SolverGit; use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::Linkify; @@ -49,7 +48,7 @@ sub stream_blob_parse_hdr { # {parse_hdr} for Qspawn } elsif (index($$bref, "\0") >= 0) { [200, [qw(Content-Type application/octet-stream), @cl] ]; } else { - my $n = bytes::length($$bref); + my $n = length($$bref); if ($n >= $BIN_DETECT || $n == $size) { return [200, [ 'Content-Type', 'text/plain; charset=UTF-8', @cl ] ]; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 1afdece0..570e690e 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -11,10 +11,8 @@ # - Must not rely on static content # - UTF-8 is only for user-content, 7-bit US-ASCII for us package PublicInbox::WWW; -use 5.010_001; use strict; -use warnings; -use bytes (); # only for bytes::length +use v5.10.1; use PublicInbox::Config; use PublicInbox::Hval; use URI::Escape qw(uri_unescape); @@ -646,8 +644,7 @@ sub get_css ($$$) { $css = PublicInbox::UserContent::sample($ctx->{ibx}, $env); } defined $css or return r404(); - my $h = [ 'Content-Length', bytes::length($css), - 'Content-Type', 'text/css' ]; + my $h = [ 'Content-Length', length($css), 'Content-Type', 'text/css' ]; PublicInbox::GitHTTPBackend::cache_one_year($h); [ 200, $h, [ $css ] ]; } @@ -656,7 +653,8 @@ sub get_description { my ($ctx, $inbox) = @_; invalid_inbox($ctx, $inbox) || do { my $d = $ctx->{ibx}->description . "\n"; - [ 200, [ 'Content-Length', bytes::length($d), + utf8::encode($d); + [ 200, [ 'Content-Length', length($d), 'Content-Type', 'text/plain' ], [ $d ] ]; }; } diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm index a6c68a3f..c17394af 100644 --- a/lib/PublicInbox/WwwAttach.pm +++ b/lib/PublicInbox/WwwAttach.pm @@ -4,8 +4,8 @@ # For retrieving attachments from messages in the WWW interface package PublicInbox::WwwAttach; # internal package use strict; +use v5.10.1; use parent qw(PublicInbox::GzipFilter); -use bytes (); # only for bytes::length use PublicInbox::Eml; sub referer_match ($) { @@ -50,7 +50,7 @@ sub get_attach_i { # ->each_part callback $part = "Deep-linking prevented\n"; } } - push @{$res->[1]}, 'Content-Length', bytes::length($part); + push @{$res->[1]}, 'Content-Length', length($part); $res->[2]->[0] = $part; } diff --git a/lib/PublicInbox/WwwHighlight.pm b/lib/PublicInbox/WwwHighlight.pm index 6fed2fed..3593c2d4 100644 --- a/lib/PublicInbox/WwwHighlight.pm +++ b/lib/PublicInbox/WwwHighlight.pm @@ -20,8 +20,7 @@ package PublicInbox::WwwHighlight; use strict; -use warnings; -use bytes (); # only for bytes::length +use v5.10.1; use parent qw(PublicInbox::HlMod); use PublicInbox::Linkify qw(); use PublicInbox::Hval qw(ascii_html); @@ -69,7 +68,7 @@ sub call { $l->linkify_2($$bref); my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ]; - push @$h, 'Content-Length', bytes::length($$bref); + push @$h, 'Content-Length', length($$bref); [ 200, $h, [ $$bref ] ] } diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index a31aa4ca..8b54d724 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -5,12 +5,12 @@ # Used by PublicInbox::WWW package PublicInbox::WwwListing; use strict; +use v5.10.1; use PublicInbox::Hval qw(prurl fmt_ts ascii_html); use PublicInbox::Linkify; use PublicInbox::GzipFilter qw(gzf_maybe); use PublicInbox::ConfigIter; use PublicInbox::WwwStream; -use bytes (); # bytes::length sub ibx_entry { my ($ctx, $ibx, $ce) = @_; @@ -213,7 +213,7 @@ sub psgi_triple { my $out = $gzf->zflush('
'.
 			PublicInbox::WwwStream::code_footer($ctx->{env}) .
 			'
'); - $h->[3] = bytes::length($out); + $h->[3] = length($out); [ $code, $h, [ $out ] ]; } diff --git a/lib/PublicInbox/WwwStatic.pm b/lib/PublicInbox/WwwStatic.pm index 29e4819d..b3476ab8 100644 --- a/lib/PublicInbox/WwwStatic.pm +++ b/lib/PublicInbox/WwwStatic.pm @@ -9,8 +9,8 @@ # functionality of nginx. package PublicInbox::WwwStatic; use strict; +use v5.10.1; use parent qw(Exporter); -use bytes (); use Fcntl qw(SEEK_SET O_RDONLY O_NONBLOCK); use POSIX qw(strftime); use HTTP::Date qw(time2str); @@ -318,7 +318,7 @@ sub dir_response ($$$) { "
Index of $path_info_html

\n");
 	$gzf->zmore(join("\n", @entries));
 	my $out = $gzf->zflush("

\n"); - $h->[3] = bytes::length($out); + $h->[3] = length($out); [ 200, $h, [ $out ] ] } diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 2f8212d4..adcb5fe2 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -7,9 +7,9 @@ # See PublicInbox::GzipFilter parent class for more info. package PublicInbox::WwwStream; use strict; +use v5.10.1; use parent qw(Exporter PublicInbox::GzipFilter); our @EXPORT_OK = qw(html_oneshot); -use bytes (); # length use PublicInbox::Hval qw(ascii_html prurl ts2str); our $TOR_URL = 'https://www.torproject.org/'; our $CODE_URL = [ qw(http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git @@ -216,7 +216,7 @@ sub html_oneshot ($$;$) { }; $ctx->zmore($$sref) if $sref; my $bdy = $ctx->zflush(_html_end($ctx)); - $res_hdr->[3] = bytes::length($bdy); + $res_hdr->[3] = length($bdy); [ $code, $res_hdr, [ $bdy ] ] } diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm index 76a95a6b..db5060ea 100644 --- a/lib/PublicInbox/WwwText.pm +++ b/lib/PublicInbox/WwwText.pm @@ -4,8 +4,7 @@ # used for displaying help texts and other non-mail content package PublicInbox::WwwText; use strict; -use warnings; -use bytes (); # only for bytes::length +use v5.10.1; use PublicInbox::Linkify; use PublicInbox::WwwStream; use PublicInbox::Hval qw(ascii_html); @@ -43,7 +42,7 @@ sub get_text { $txt = $gzf->translate($txt); $txt .= $gzf->zflush; } - $hdr->[3] = bytes::length($txt); + $hdr->[3] = length($txt); return [ $code, $hdr, [ $txt ] ] } diff --git a/t/psgi_search.t b/t/psgi_search.t index 5bdd66ed..3da93eda 100644 --- a/t/psgi_search.t +++ b/t/psgi_search.t @@ -8,7 +8,6 @@ use IO::Uncompress::Gunzip qw(gunzip); use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::Inbox; -use bytes (); # only for bytes::length my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); require_mods(@mods); diff --git a/t/search-thr-index.t b/t/search-thr-index.t index fc1b666a..62745dbc 100644 --- a/t/search-thr-index.t +++ b/t/search-thr-index.t @@ -1,8 +1,8 @@ +#!perl -w # Copyright (C) 2017-2021 all contributors # License: AGPL-3.0+ use strict; -use warnings; -use bytes (); # only for bytes::length +use v5.10.1; use Test::More; use PublicInbox::TestCommon; use PublicInbox::MID qw(mids); @@ -45,7 +45,7 @@ foreach (reverse split(/\n\n/, $data)) { my $mime = PublicInbox::Eml->new(\$_); $mime->header_set('From' => 'bw@g'); $mime->header_set('To' => 'git@vger.kernel.org'); - my $bytes = bytes::length($mime->as_string); + my $bytes = length($mime->as_string); my $mid = mids($mime->header_obj)->[0]; my $smsg = bless { bytes => $bytes, @@ -92,7 +92,7 @@ To: git@vger.kernel.org my $tid0 = $dbh->selectrow_array(<<'', undef, $num); SELECT tid FROM over WHERE num = ? LIMIT 1 - my $bytes = bytes::length($mime->as_string); + my $bytes = length($mime->as_string); my $mid = mids($mime->header_obj)->[0]; my $smsg = bless { bytes => $bytes, diff --git a/t/www_listing.t b/t/www_listing.t index 6b3b408f..7ea12eea 100644 --- a/t/www_listing.t +++ b/t/www_listing.t @@ -55,7 +55,7 @@ sub tiny_test { ok(my $clone = $manifest->{'/alt'}, '/alt in manifest'); is($clone->{owner}, "lorelei \x{100}", 'owner set'); is($clone->{reference}, '/bare', 'reference detected'); - is($clone->{description}, "we're all clones", 'description read'); + is($clone->{description}, "we're \x{100}ll clones", 'description read'); ok(my $bare = $manifest->{'/bare'}, '/bare in manifest'); is($bare->{description}, 'Unnamed repository', 'missing $GIT_DIR/description fallback'); @@ -72,6 +72,10 @@ sub tiny_test { ok(my $v2epoch1 = $manifest->{'/v2/git/1.git'}, 'v2 epoch 1 appeared'); like($v2epoch1->{description}, qr/ \[epoch 1\]\z/, 'epoch 1 in description'); + + $res = $http->get("http://$host:$port/alt/description"); + is($res->{content}, "we're \xc4\x80ll clones\n", 'UTF-8 description') + or diag explain($res); } my $td; @@ -91,9 +95,9 @@ SKIP: { is(xsys(@clone, $alt, "$v2/git/$i.git"), 0, "clone epoch $i") } ok(open(my $fh, '>', "$v2/inbox.lock"), 'mock a v2 inbox'); - open $fh, '>', "$alt/description" or die; - print $fh "we're all clones\n" or die; - close $fh or die; + open $fh, '>', "$alt/description" or xbail "open $alt/description $!"; + print $fh "we're \xc4\x80ll clones\n" or xbail "print $!"; + close $fh or xbail "write: $alt/description $!"; is(xsys('git', "--git-dir=$alt", qw(config gitweb.owner), "lorelei \xc4\x80"), 0, 'set gitweb user'); @@ -178,6 +182,13 @@ manifest = \${site}/v2/manifest.js.gz for (qw(v2/git/0.git v2/git/1.git v2/git/2.git)) { ok(-d "$tmpdir/per-inbox/$_", "grok-pull created $_"); } + $td->kill; + $td->join; + is($?, 0, 'no error in exited process'); + open $fh, '<', $err or BAIL_OUT("open $err failed: $!"); + my $eout = do { local $/; <$fh> }; + unlike($eout, qr/wide/i, 'no Wide character warnings'); + unlike($eout, qr/uninitialized/i, 'no uninitialized warnings'); } done_testing(); diff --git a/xt/cmp-msgstr.t b/xt/cmp-msgstr.t index e0e8ed5a..900127c7 100644 --- a/xt/cmp-msgstr.t +++ b/xt/cmp-msgstr.t @@ -60,7 +60,7 @@ my $cmp = sub { my $dig = $dig_cls->new; $dig->add($part); push @$cmp_arg, "M: ".$dig->hexdigest; - push @$cmp_arg, "B: ".bytes::length($part); + push @$cmp_arg, "B: ".length($part); } else { $part =~ s/\s+\z//s; push @$cmp_arg, "X: ".$part;