From c5621af43e9c7cb1ff0565aa61a1d8fced55a23b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 4 Jun 2019 02:04:28 +0000 Subject: www: only emit ASCII chars in attachment filenames We don't want to emit funky URLs which can be lost in translation or cause problems with non-Unicode-aware clients. Then, don't accept non-ASCII filenames in URLs, since a manually-generated URL/filename in attachment downloads could be used for Unicode homographs to confuse folks who down the attachment. --- lib/PublicInbox/Hval.pm | 3 +++ lib/PublicInbox/View.pm | 2 +- lib/PublicInbox/WWW.pm | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 95a0f709..2b443970 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -13,6 +13,9 @@ our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape to_attr from_attr/; my $enc_ascii = find_encoding('us-ascii'); +# safe-ish acceptable filename pattern for portability +our $FN = '[a-zA-Z0-9][a-zA-Z0-9_\-\.]+[a-zA-Z0-9]'; # needs \z anchor + sub new { my ($class, $raw, $href) = @_; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 09afdaf1..83ae99bc 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -528,7 +528,7 @@ sub attach_link ($$$$;$) { $desc = $fn unless defined $desc; $desc = '' unless defined $desc; my $sfn; - if (defined $fn && $fn =~ /\A[[:alnum:]][\w\.-]+[[:alnum:]]\z/) { + if (defined $fn && $fn =~ /\A$PublicInbox::Hval::FN\z/o) { $sfn = $fn; } elsif ($ct eq 'text/plain') { $sfn = 'a.txt'; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index b6f18f8d..50b6950c 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -28,7 +28,7 @@ use PublicInbox::UserContent; our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; -our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i; +our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!; our $OID_RE = qr![a-f0-9]{7,40}!; sub new { -- cgit v1.2.3-24-ge0c7