user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH inbox] remove Email::Address dependency
Date: Wed, 25 May 2016 02:29:51 +0000	[thread overview]
Message-ID: <20160525022951.GA17598@dcvr.yhbt.net> (raw)
In-Reply-To: <20160525022859.GA7059@dcvr.yhbt.net>

git has stricter requirements for ident names (no '<>')
which Email::Address allows.

Even in 1.908, Email::Address also has an incomplete fix for
CVE-2015-7686 with a DoS-able regexp for comments.  Since we
don't care for or need all the RFC compliance of Email::Address,
avoiding it entirely may be preferable.

Email::Address will still be installed as a requirement for
Email::MIME, but it is only used by the
Email::MIME::header_str_set which we do not use
---
 INSTALL                       |  1 -
 Makefile.PL                   |  1 -
 lib/PublicInbox/Address.pm    | 25 +++++++++++++++++++++++++
 lib/PublicInbox/Feed.pm       | 10 ++++------
 lib/PublicInbox/Import.pm     | 19 +++++++++----------
 lib/PublicInbox/MDA.pm        |  9 ++++-----
 lib/PublicInbox/SearchMsg.pm  |  6 ++----
 lib/PublicInbox/SearchView.pm |  1 -
 lib/PublicInbox/View.pm       | 15 +++++----------
 script/public-inbox-learn     |  7 ++++---
 script/public-inbox-mda       |  1 -
 t/mda.t                       |  8 ++++----
 12 files changed, 57 insertions(+), 46 deletions(-)
 create mode 100644 lib/PublicInbox/Address.pm

diff --git a/INSTALL b/INSTALL
index e75c4e2..40cd6ca 100644
--- a/INSTALL
+++ b/INSTALL
@@ -28,7 +28,6 @@ Requirements (server MDA)
 * lynx (for converting HTML messages to text)
 * Perl and several modules:    (Debian package name)
   - Date::Parse                libtimedate-perl
-  - Email::Address             libemail-address-perl
   - Email::Filter              libemail-filter-perl
   - Email::MIME                libemail-mime-perl
   - Email::MIME::ContentType   libemail-mime-contenttype-perl
diff --git a/Makefile.PL b/Makefile.PL
index 9e0c6e8..3cffe13 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -18,7 +18,6 @@ WriteMakefile(
 		# We also depend on git.
 		# Keep this sorted and synced to the INSTALL document
 		'Date::Parse' => 0,
-		'Email::Address' => 0,
 		'Email::Filter' => 0,
 		'Email::MIME' => 0,
 		'Email::MIME::ContentType' => 0,
diff --git a/lib/PublicInbox/Address.pm b/lib/PublicInbox/Address.pm
new file mode 100644
index 0000000..ef4cbdc
--- /dev/null
+++ b/lib/PublicInbox/Address.pm
@@ -0,0 +1,25 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::Address;
+use strict;
+use warnings;
+
+# very loose regexes, here.  We don't need RFC-compliance,
+# just enough to make thing sanely displayable and pass to git
+
+sub emails { ($_[0] =~ /([^<\s]+\@[^>\s]+)/g) }
+
+sub from_name {
+	my ($val) = @_;
+	my $name = $val;
+	$name =~ s/\s*\S+\@\S+\s*\z//;
+	if ($name !~ /\S/ || $name =~ /[<>]/) { # git does not like [<>]
+		($name) = emails($val);
+		$name =~ s/\@.*//;
+	}
+	$name =~ tr/\r\n\t/ /;
+	$name =~ s/\A\s*//;
+	$name;
+}
+
+1;
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 6ed0085..81895db 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -5,13 +5,13 @@
 package PublicInbox::Feed;
 use strict;
 use warnings;
-use Email::Address;
 use Email::MIME;
 use Date::Parse qw(strptime);
 use PublicInbox::Hval qw/ascii_html/;
 use PublicInbox::Git;
 use PublicInbox::View;
 use PublicInbox::MID qw/mid_clean mid2path/;
+use PublicInbox::Address;
 use POSIX qw/strftime/;
 use constant {
 	DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # Atom standard
@@ -86,7 +86,6 @@ sub _no_thread {
 
 sub end_feed {
 	my ($fh) = @_;
-	Email::Address->purge_cache;
 	$fh->write('</feed>');
 	$fh->close;
 }
@@ -171,7 +170,6 @@ sub emit_index_nosrch {
 		PublicInbox::View::index_entry($mime, 0, $state);
 		1;
 	});
-	Email::Address->purge_cache;
 	$last;
 }
 
@@ -330,9 +328,9 @@ sub add_to_feed {
 	$title = title_tag($title);
 
 	my $from = $header_obj->header('From') or return 0;
-	my @from = Email::Address->parse($from) or return 0;
-	my $name = ascii_html($from[0]->name);
-	my $email = $from[0]->address;
+	my ($email) = PublicInbox::Address::emails($from);
+	my $name = PublicInbox::Address::from_name($from);
+	$name = ascii_html($name);
 	$email = ascii_html($email);
 
 	if (delete $feed_opts->{emit_header}) {
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 1afcf5d..e3d65f4 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -8,7 +8,6 @@ package PublicInbox::Import;
 use strict;
 use warnings;
 use Fcntl qw(:flock :DEFAULT);
-use Email::Address;
 use PublicInbox::Spawn qw(spawn);
 use PublicInbox::MID qw(mid_mime mid2path);
 
@@ -141,21 +140,21 @@ sub add {
 	my ($self, $mime) = @_; # mime = Email::MIME
 
 	my $from = $mime->header('From');
-	my @from = Email::Address->parse($from);
-	my $name = $from[0]->name;
-	my $email = $from[0]->address;
-	my $date = $mime->header('Date');
-	my $subject = $mime->header('Subject');
-	$subject = '(no subject)' unless defined $subject;
-	my $mid = mid_mime($mime);
-	my $path = mid2path($mid);
-
+	my ($email) = ($from =~ /([^<\s]+\@[^>\s]+)/g);
+	my $name = $from;
+	$name =~ s/\s*\S+\@\S+\s*\z//;
 	# git gets confused with:
 	#  "'A U Thor <u@example.com>' via foo" <foo@example.com>
 	# ref:
 	# <CAD0k6qSUYANxbjjbE4jTW4EeVwOYgBD=bXkSu=akiYC_CB7Ffw@mail.gmail.com>
 	$name =~ tr/<>// and $name = $email;
 
+	my $date = $mime->header('Date');
+	my $subject = $mime->header('Subject');
+	$subject = '(no subject)' unless defined $subject;
+	my $mid = mid_mime($mime);
+	my $path = mid2path($mid);
+
 	my ($r, $w) = $self->gfi_start;
 	my $tip = $self->{tip};
 	if ($tip ne '') {
diff --git a/lib/PublicInbox/MDA.pm b/lib/PublicInbox/MDA.pm
index e1207b5..2e6e9ec 100644
--- a/lib/PublicInbox/MDA.pm
+++ b/lib/PublicInbox/MDA.pm
@@ -6,7 +6,6 @@ package PublicInbox::MDA;
 use strict;
 use warnings;
 use Email::Simple;
-use Email::Address;
 use Date::Parse qw(strptime);
 use constant MAX_SIZE => 1024 * 500; # same as spamc default, should be tunable
 use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
@@ -62,13 +61,13 @@ sub alias_specified {
 
 	my @address = ref($address) eq 'ARRAY' ? @$address : ($address);
 	my %ok = map {
-		my @recip = Email::Address->parse($_);
-		lc(__drop_plus($recip[0]->address)) => 1;
+		lc(__drop_plus($_)) => 1;
 	} @address;
 
 	foreach my $line ($filter->cc, $filter->to) {
-		foreach my $addr (Email::Address->parse($line)) {
-			if ($ok{lc(__drop_plus($addr->address))}) {
+		my @addrs = ($line =~ /([^<\s]+\@[^>\s]+)/g);
+		foreach my $addr (@addrs) {
+			if ($ok{lc(__drop_plus($addr))}) {
 				return 1;
 			}
 		}
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 1244aee..0fb2a07 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -7,10 +7,10 @@ package PublicInbox::SearchMsg;
 use strict;
 use warnings;
 use Search::Xapian;
-use Email::Address qw//;
 use POSIX qw//;
 use Date::Parse qw/str2time/;
 use PublicInbox::MID qw/mid_clean/;
+use PublicInbox::Address;
 use Encode qw/find_encoding/;
 my $enc_utf8 = find_encoding('UTF-8');
 our $PFX2TERM_RE = undef;
@@ -87,9 +87,7 @@ sub from ($) {
 	my ($self) = @_;
 	my $from = __hdr($self, 'from');
 	if (defined $from && !defined $self->{from_name}) {
-		$from =~ tr/\t\r\n/ /;
-		my @from = Email::Address->parse($from);
-		$self->{from_name} = $from[0]->name;
+		$self->{from_name} = PublicInbox::Address::from_name($from);
 	}
 	$from;
 }
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index e3dc22f..0ae0505 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -176,7 +176,6 @@ sub tdump {
 	$ctx->{searchview} = 1;
 	tdump_ent($git, $state, $_, 0) for @rootset;
 	PublicInbox::View::thread_adj_level($state, 0);
-	Email::Address->purge_cache;
 
 	$fh->write(search_nav_bot($mset, $q). "\n\n" .
 			foot($ctx). '</pre></body></html>');
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 4360991..a78ce31 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -14,6 +14,7 @@ use PublicInbox::Hval qw/ascii_html/;
 use PublicInbox::Linkify;
 use PublicInbox::MID qw/mid_clean id_compress mid2path mid_mime/;
 use PublicInbox::MsgIter;
+use PublicInbox::Address;
 require POSIX;
 
 use constant INDENT => '  ';
@@ -99,9 +100,7 @@ sub index_entry {
 	$seen->{$id} = "#$id"; # save the anchor for children, later
 
 	my $mid = PublicInbox::Hval->new_msgid($mid_raw);
-	my $from = $hdr->header('From');
-	my @from = Email::Address->parse($from);
-	$from = $from[0]->name;
+	my $from = PublicInbox::Address::from_name($hdr->header('From'));
 
 	my $root_anchor = $state->{root_anchor} || '';
 	my $path = $root_anchor ? '../../' : '';
@@ -191,7 +190,6 @@ sub emit_thread_html {
 				('</ul></li>' x ($max - 1)) . '</ul>');
 		}
 	}
-	Email::Address->purge_cache;
 
 	# there could be a race due to a message being deleted in git
 	# but still being in the Xapian index:
@@ -339,8 +337,7 @@ sub headers_to_html_header {
 		$v = PublicInbox::Hval->new($v);
 
 		if ($h eq 'From') {
-			my @from = Email::Address->parse($v->raw);
-			$title[1] = ascii_html($from[0]->name);
+			$title[1] = PublicInbox::Address::from_name($v->raw);
 		} elsif ($h eq 'Subject') {
 			$title[0] = $v->as_html;
 			if ($srch) {
@@ -449,15 +446,13 @@ sub mailto_arg_link {
 	foreach my $h (qw(From To Cc)) {
 		my $v = $hdr->header($h);
 		defined($v) && ($v ne '') or next;
-		my @addrs = Email::Address->parse($v);
-		foreach my $recip (@addrs) {
-			my $address = $recip->address;
+		my @addrs = PublicInbox::Address::emails($v);
+		foreach my $address (@addrs) {
 			my $dst = lc($address);
 			$cc{$dst} ||= $address;
 			$to ||= $dst;
 		}
 	}
-	Email::Address->purge_cache;
 	my @arg;
 
 	my $subj = $hdr->header('Subject') || '';
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 1c051ec..c79f247 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -11,7 +11,7 @@ use PublicInbox::Config;
 use PublicInbox::Git;
 use PublicInbox::Import;
 use Email::MIME;
-use Email::Address;
+use PublicInbox::Address;
 use IPC::Run qw/run/;
 my $train = shift or die "usage: $usage\n";
 if ($train !~ /\A(?:ham|spam)\z/) {
@@ -29,8 +29,9 @@ my $mime = Email::MIME->new(eval {
 # get all recipients
 my %dests;
 foreach my $h (qw(Cc To)) {
-	foreach my $recipient (Email::Address->parse($mime->header($h))) {
-		$dests{lc($recipient->address)} = 1;
+	my $val = $mime->header($h) or next;
+	foreach my $email (PublicInbox::Address::emails($val)) {
+		$dests{lc($email)} = 1;
 	}
 }
 
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index b606c59..6ac0e8c 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -9,7 +9,6 @@ my $usage = 'public-inbox-mda < rfc2822_message';
 
 use Email::Filter;
 use Email::MIME;
-use Email::Address;
 use File::Path::Expand qw/expand_filename/;
 use IPC::Run qw(run);
 use PublicInbox::MDA;
diff --git a/t/mda.t b/t/mda.t
index ad6a091..fdba967 100644
--- a/t/mda.t
+++ b/t/mda.t
@@ -48,6 +48,7 @@ my $mime;
 
 local $ENV{GIT_COMMITTER_NAME} = eval {
 	use PublicInbox::MDA;
+	use PublicInbox::Address;
 	use Encode qw/encode/;
 	my $mbox = 't/utf8.mbox';
 	open(my $fh, '<', $mbox) or die "failed to open mbox: $mbox\n";
@@ -57,10 +58,9 @@ local $ENV{GIT_COMMITTER_NAME} = eval {
 	$msg = Email::MIME->new($msg->simple->as_string);
 
 	my $from = $msg->header('From');
-	my @from = Email::Address->parse($from);
-	my $author = $from[0]->name;
-	my $email = $from[0]->address;
-	my $date = $msg ->header('Date');
+	my $author = PublicInbox::Address::from_name($from);
+	my ($email) = PublicInbox::Address::emails($from);
+	my $date = $msg->header('Date');
 
 	is('El&#233;anor',
 		encode('us-ascii', my $tmp = $author, Encode::HTMLCREF),

      reply	other threads:[~2016-05-25  2:29 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-25  2:28 [PATCH ssoma] remove Email::Address dependency Eric Wong
2016-05-25  2:29 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160525022951.GA17598@dcvr.yhbt.net \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).