From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 2CBFC1F5B1 for ; Tue, 7 Jul 2020 20:37:39 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/5] hval: to_filename: return `undef' instead of empty string Date: Tue, 7 Jul 2020 20:37:35 +0000 Message-Id: <20200707203738.32677-3-e@yhbt.net> In-Reply-To: <20200707203738.32677-1-e@yhbt.net> References: <20200707203738.32677-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Returning an empty string for a filename makes no sense, so instead return `undef' so the caller can setup a fallback using the "//" operator. This fixes uninitialized variable warnings because split() on an empty string returns `undef', which caused to_filename to warn on s// and tr// ops. --- lib/PublicInbox/Hval.pm | 4 ++-- lib/PublicInbox/Mbox.pm | 4 ++-- lib/PublicInbox/MboxGz.pm | 3 +-- t/hval.t | 8 +++++--- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 46a839160..e21a64a60 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -94,12 +94,12 @@ sub obfuscate_addrs ($$;$) { # like format_sanitized_subject in git.git pretty.c with '%f' format string sub to_filename ($) { - my ($s, undef) = split(/\n/, $_[0]); + my $s = (split(/\n/, $_[0]))[0] // return; # empty string => undef $s =~ s/[^A-Za-z0-9_\.]+/-/g; $s =~ tr/././s; $s =~ s/[\.\-]+\z//; $s =~ s/\A[\.\-]+//; - $s + $s eq '' ? undef : $s; } # convert a filename (or any string) to HTML attribute diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 8726b9f64..115321c61 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -52,9 +52,9 @@ sub async_eml { # ->{async_eml} for async_blob_cb sub res_hdr ($$) { my ($ctx, $subject) = @_; - my $fn = $subject // 'no-subject'; + my $fn = $subject // ''; $fn =~ s/^re:\s+//i; - $fn = $fn eq '' ? 'no-subject' : to_filename($fn); + $fn = to_filename($fn) // 'no-subject'; my @hdr = ('Content-Type'); if ($ctx->{-inbox}->{obfuscate}) { # obfuscation is stupid, but maybe scrapers are, too... diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm index fdd16f68e..967af9c68 100644 --- a/lib/PublicInbox/MboxGz.pm +++ b/lib/PublicInbox/MboxGz.pm @@ -24,8 +24,7 @@ sub mbox_gz { $self->{cb} = $cb; $self->{base_url} = $self->{-inbox}->base_url($self->{env}); $self->{gz} = PublicInbox::GzipFilter::gzip_or_die(); - $fn = to_filename($fn // 'no-subject'); - $fn = 'no-subject' if $fn eq ''; + $fn = to_filename($fn // '') // 'no-subject'; # http://www.iana.org/assignments/media-types/application/gzip bless $self, __PACKAGE__; my $res_hdr = [ 'Content-Type' => 'application/gzip', diff --git a/t/hval.t b/t/hval.t index 38605c6f1..e80a02ff4 100644 --- a/t/hval.t +++ b/t/hval.t @@ -47,15 +47,17 @@ EOF is($html, $exp, 'only obfuscated relevant addresses'); -is('foo-bar', PublicInbox::Hval::to_filename('foo bar '), +is(PublicInbox::Hval::to_filename('foo bar '), 'foo-bar', 'to_filename has no trailing -'); -is('foo-bar', PublicInbox::Hval::to_filename("foo bar\nanother line\n"), +is(PublicInbox::Hval::to_filename("foo bar\nanother line\n"), 'foo-bar', 'to_filename has no repeated -, and nothing past LF'); -is('foo.bar', PublicInbox::Hval::to_filename("foo....bar"), +is(PublicInbox::Hval::to_filename("foo....bar"), 'foo.bar', 'to_filename squeezes -'); +is(PublicInbox::Hval::to_filename(''), undef, 'empty string returns undef'); + my $s = "\0\x07\n"; PublicInbox::Hval::src_escape($s); is($s, "\\0\\a\n", 'src_escape works as intended');