about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-10-13 10:16:08 +0000
committerEric Wong <e@80x24.org>2021-10-13 19:52:26 +0000
commit00d5dff2cce9d2c9b5720c0971ae3fd995c22c94 (patch)
treeedf609a8e8588f8012121503868f3f42496bdaf0
parent8d0091bb5bf984b924406a0a55da1c2136227438 (diff)
downloadpublic-inbox-00d5dff2cce9d2c9b5720c0971ae3fd995c22c94.tar.gz
Encode::FB_CROAK leaks memory in old versions of Encode:
<https://rt.cpan.org/Public/Bug/Display.html?id=139622>

Since I expect there's still many users on old systems and old
Perls, we can use "$SIG{__WARN__} = \&croak" here with
Encode::FB_WARN to emulate Encode::FB_CROAK behavior.
-rw-r--r--lib/PublicInbox/Eml.pm25
1 files changed, 16 insertions, 9 deletions
diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm
index 0867a016..69c26932 100644
--- a/lib/PublicInbox/Eml.pm
+++ b/lib/PublicInbox/Eml.pm
@@ -28,7 +28,7 @@ package PublicInbox::Eml;
 use strict;
 use v5.10.1;
 use Carp qw(croak);
-use Encode qw(find_encoding decode encode); # stdlib
+use Encode qw(find_encoding); # stdlib
 use Text::Wrap qw(wrap); # stdlib, we need Perl 5.6+ for $huge
 use MIME::Base64 3.05; # Perl 5.10.0 / 5.9.2
 use MIME::QuotedPrint 3.05; # ditto
@@ -334,9 +334,14 @@ sub body_set {
 
 sub body_str_set {
         my ($self, $body_str) = @_;
-        my $charset = ct($self)->{attributes}->{charset} or
+        my $cs = ct($self)->{attributes}->{charset} //
                 croak('body_str was given, but no charset is defined');
-        body_set($self, \(encode($charset, $body_str, Encode::FB_CROAK)));
+        my $enc = find_encoding($cs) // croak "unknown encoding `$cs'";
+        $body_str = do {
+                local $SIG{__WARN__} = \&croak;
+                $enc->encode($body_str, Encode::FB_WARN);
+        };
+        body_set($self, \$body_str);
 }
 
 sub content_type { scalar header($_[0], 'Content-Type') }
@@ -452,15 +457,17 @@ sub body {
 sub body_str {
         my ($self) = @_;
         my $ct = ct($self);
-        my $charset = $ct->{attributes}->{charset};
-        if (!$charset) {
-                if ($STR_TYPE{$ct->{type}} && $STR_SUBTYPE{$ct->{subtype}}) {
+        my $cs = $ct->{attributes}->{charset} // do {
+                ($STR_TYPE{$ct->{type}} && $STR_SUBTYPE{$ct->{subtype}}) and
                         return body($self);
-                }
                 croak("can't get body as a string for ",
                         join("\n\t", header_raw($self, 'Content-Type')));
-        }
-        decode($charset, body($self), Encode::FB_CROAK);
+        };
+        my $enc = find_encoding($cs) or croak "unknown encoding `$cs'";
+        my $tmp = body($self);
+        # workaround https://rt.cpan.org/Public/Bug/Display.html?id=139622
+        local $SIG{__WARN__} = \&croak;
+        $enc->decode($tmp, Encode::FB_WARN);
 }
 
 sub as_string {