From 9d1e5fadd7d18f4c96ab0509d673040e34225a04 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 14 Aug 2016 10:21:09 +0000 Subject: www: do not unecessarily escape some chars in paths Based on reading RFC 3986, it seems '@', ':', '!', '$', '&', "'", '; '(', ')', '*', '+', ',', ';', '=' are all allowed in path-absolute where we have the Message-ID. In any case, it seems '@' is fairly common in path components nowadays and too common in Message-IDs. --- lib/PublicInbox/MID.pm | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/MID.pm') diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index bb40cc76..1c2d75cc 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -6,7 +6,8 @@ package PublicInbox::MID; use strict; use warnings; use base qw/Exporter/; -our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime/; +our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape/; +use URI::Escape qw(uri_escape_utf8); use Digest::SHA qw/sha1_hex/; use constant MID_MAX => 40; # SHA-1 hex length @@ -47,4 +48,8 @@ sub mid2path { sub mid_mime ($) { $_[0]->header_obj->header_raw('Message-ID') } +# RFC3986, section 3.3: +sub MID_ESC () { '^A-Za-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@' } +sub mid_escape ($) { uri_escape_utf8($_[0], MID_ESC) } + 1; -- cgit v1.2.3-24-ge0c7