From f46019039da6ac1596a4aef64b7bf394c743c1b1 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Thu, 15 Feb 2018 00:25:53 +0000 Subject: address: extract more characters from email addresses There's a lot of weird characters which show up in LKML archives which we did not support before. Furthermore, allow spaces before the '>' in the From: line as at least some non-spam poster used it. --- lib/PublicInbox/Address.pm | 3 ++- t/address.t | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/Address.pm b/lib/PublicInbox/Address.pm index f334adea..548f417c 100644 --- a/lib/PublicInbox/Address.pm +++ b/lib/PublicInbox/Address.pm @@ -8,7 +8,8 @@ use warnings; # just enough to make thing sanely displayable and pass to git sub emails { - ($_[0] =~ /([\w\.\+=\-]+\@[\w\.\-]+)>?\s*(?:\(.*?\))?(?:,\s*|\z)/g) + ($_[0] =~ /([\w\.\+=\?"\(\)\-!#\$%&'\*\/\^\`\|\{\}~]+\@[\w\.\-\(\)]+) + (?:\s[^>]*)?>?\s*(?:\(.*?\))?(?:,\s*|\z)/gx) } sub names { diff --git a/t/address.t b/t/address.t index e35e4f8b..eced5c46 100644 --- a/t/address.t +++ b/t/address.t @@ -9,8 +9,9 @@ is_deeply([qw(e@example.com e@example.org)], [PublicInbox::Address::emails('User , e@example.org')], 'address extraction works as expected'); -is_deeply([PublicInbox::Address::emails('"ex@example.com" ')], - [qw(ex@example.com)]); +is_deeply(['user@example.com'], + [PublicInbox::Address::emails('')], + 'comment after domain accepted before >'); my @names = PublicInbox::Address::names( 'User , e@e, "John A. Doe" , '); -- cgit v1.2.3-24-ge0c7