diff options
-rw-r--r-- | lib/PublicInbox/ContentHash.pm | 7 | ||||
-rw-r--r-- | t/v2writable.t | 16 |
2 files changed, 20 insertions, 3 deletions
diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm index bacc9cdd..1afbb413 100644 --- a/lib/PublicInbox/ContentHash.pm +++ b/lib/PublicInbox/ContentHash.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # Unstable internal API. @@ -63,8 +63,9 @@ sub content_digest ($;$) { # do NOT consider the Message-ID as part of the content_hash # if we got here, we've already got Message-ID reuse my %seen = map { $_ => 1 } @{mids($eml)}; - foreach my $mid (@{references($eml)}) { - $dig->add("ref\0$mid\0") unless $seen{$mid}++; + for (grep { !$seen{$_}++ } @{references($eml)}) { + utf8::encode($_); + $dig->add("ref\0$_\0"); } # Only use Sender: if From is not present diff --git a/t/v2writable.t b/t/v2writable.t index ad946338..0d102204 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -283,6 +283,22 @@ EOF is($msgs->[1]->{mid}, 'y'x244, 'stored truncated mid(2)'); } +if ('UTF-8 References') { + my @w; + local $SIG{__WARN__} = sub { push @w, @_ }; + my $msg = <<EOM; +From: a\@example.com +Subject: b +Message-ID: <horrible\@example> +References: <\xc4\x80\@example> + +EOM + ok($im->add(PublicInbox::Eml->new($msg."a\n")), 'UTF-8 References 1'); + ok($im->add(PublicInbox::Eml->new($msg."b\n")), 'UTF-8 References 2'); + $im->done; + ok(!grep(/Wide character/, @w), 'no wide characters') or xbail(\@w); +} + my $tmp = { inboxdir => "$inboxdir/non-existent/subdir", name => 'nope', |