about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-02-15 00:25:53 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-02-15 01:08:15 +0000
commitf46019039da6ac1596a4aef64b7bf394c743c1b1 (patch)
tree772f755511937a7b8fb50d80689e58484282abff
parent9b7617b25f58a731bc1ffc6087faed46301e2c26 (diff)
downloadpublic-inbox-f46019039da6ac1596a4aef64b7bf394c743c1b1.tar.gz
There's a lot of weird characters which show up in LKML archives
which we did not support before.  Furthermore, allow spaces
before the '>' in the From: line as at least some non-spam
poster used it.
-rw-r--r--lib/PublicInbox/Address.pm3
-rw-r--r--t/address.t5
2 files changed, 5 insertions, 3 deletions
diff --git a/lib/PublicInbox/Address.pm b/lib/PublicInbox/Address.pm
index f334adea..548f417c 100644
--- a/lib/PublicInbox/Address.pm
+++ b/lib/PublicInbox/Address.pm
@@ -8,7 +8,8 @@ use warnings;
 # just enough to make thing sanely displayable and pass to git
 
 sub emails {
-        ($_[0] =~ /([\w\.\+=\-]+\@[\w\.\-]+)>?\s*(?:\(.*?\))?(?:,\s*|\z)/g)
+        ($_[0] =~ /([\w\.\+=\?"\(\)\-!#\$%&'\*\/\^\`\|\{\}~]+\@[\w\.\-\(\)]+)
+                (?:\s[^>]*)?>?\s*(?:\(.*?\))?(?:,\s*|\z)/gx)
 }
 
 sub names {
diff --git a/t/address.t b/t/address.t
index e35e4f8b..eced5c46 100644
--- a/t/address.t
+++ b/t/address.t
@@ -9,8 +9,9 @@ is_deeply([qw(e@example.com e@example.org)],
         [PublicInbox::Address::emails('User <e@example.com>, e@example.org')],
         'address extraction works as expected');
 
-is_deeply([PublicInbox::Address::emails('"ex@example.com" <ex@example.com>')],
-        [qw(ex@example.com)]);
+is_deeply(['user@example.com'],
+        [PublicInbox::Address::emails('<user@example.com (Comment)>')],
+        'comment after domain accepted before >');
 
 my @names = PublicInbox::Address::names(
         'User <e@e>, e@e, "John A. Doe" <j@d>, <x@x>');