user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 06/13] switch read-only Email::Simple users to Eml
Date: Thu,  7 May 2020 21:05:49 +0000	[thread overview]
Message-ID: <20200507210556.22995-7-e@yhbt.net> (raw)
In-Reply-To: <20200507210556.22995-1-e@yhbt.net>

Since PublicInbox::Eml doesn't parse MIME subparts
up front, it can replace most uses of Email::Simple
without performance penalty.

This will eventually allow us to lower overall internal
API footprint by not having to keep the MIME vs Simple
distinction.
---
 lib/PublicInbox/Mbox.pm   | 16 +++++-----------
 lib/PublicInbox/MboxGz.pm |  4 ++--
 lib/PublicInbox/NNTP.pm   | 19 ++++++++-----------
 lib/PublicInbox/WWW.pm    |  6 +++---
 4 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 97bec5e7..94e61d4d 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -14,19 +14,13 @@ use PublicInbox::MID qw/mid_escape/;
 use PublicInbox::Hval qw/to_filename/;
 use PublicInbox::Smsg;
 use PublicInbox::WwwStream qw(html_oneshot);
-use Email::Simple;
-use Email::MIME::Encode;
+use PublicInbox::Eml;
 
 sub subject_fn ($) {
 	my ($hdr) = @_;
-	my $fn = $hdr->header('Subject');
+	my $fn = $hdr->header_str('Subject');
 	return 'no-subject' if (!defined($fn) || $fn eq '');
 
-	# no need for full Email::MIME, here
-	if ($fn =~ /=\?/) {
-		eval { $fn = Encode::decode('MIME-Header', $fn) };
-		return 'no-subject' if $@;
-	}
 	$fn =~ s/^re:\s+//i;
 	$fn eq '' ? 'no-subject' : to_filename($fn);
 }
@@ -51,7 +45,7 @@ sub getline {
 	my $ibx = $ctx->{-inbox};
 	$next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev);
 	$mref = $ibx->msg_by_smsg($cur) or return;
-	$hdr = Email::Simple->new($mref)->header_obj;
+	$hdr = PublicInbox::Eml->new($mref)->header_obj;
 	@$more = ($ctx, $id, $prev, $next); # $next may be undef, here
 	msg_hdr($ctx, $hdr) . msg_body($$mref);
 }
@@ -72,7 +66,7 @@ sub emit_raw {
 	} else {
 		$mref = $ibx->msg_by_mid($mid) or return;
 	}
-	my $hdr = Email::Simple->new($mref)->header_obj;
+	my $hdr = PublicInbox::Eml->new($mref)->header_obj;
 	$more = [ $ctx, $id, $prev, $next, $mref, $hdr ]; # for ->getline
 	my $fn = subject_fn($hdr);
 	my @hdr = ('Content-Type');
@@ -114,7 +108,7 @@ sub msg_hdr ($$;$) {
 	for (my $i = 0; $i < @append; $i += 2) {
 		my $k = $append[$i];
 		my $v = $append[$i + 1];
-		my @v = $header_obj->header($k);
+		my @v = $header_obj->header_raw($k);
 		foreach (@v) {
 			if ($v eq $_) {
 				$v = undef;
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index e506de3d..f7fc4afc 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -3,7 +3,7 @@
 package PublicInbox::MboxGz;
 use strict;
 use warnings;
-use Email::Simple;
+use PublicInbox::Eml;
 use PublicInbox::Hval qw/to_filename/;
 use PublicInbox::Mbox;
 use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
@@ -41,7 +41,7 @@ sub getline {
 	my $buf = delete($self->{buf});
 	while (my $smsg = $self->{cb}->($ctx)) {
 		my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
-		my $h = Email::Simple->new($mref)->header_obj;
+		my $h = PublicInbox::Eml->new($mref)->header_obj;
 
 		my $err = $gz->deflate(
 			PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}),
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index e9c66cd1..54207500 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -8,7 +8,7 @@ use warnings;
 use base qw(PublicInbox::DS);
 use fields qw(nntpd article ng long_cb);
 use PublicInbox::MID qw(mid_escape $MID_EXTRACT);
-use Email::Simple;
+use PublicInbox::Eml;
 use POSIX qw(strftime);
 use PublicInbox::DS qw(now);
 use Digest::SHA qw(sha1_hex);
@@ -383,7 +383,7 @@ sub cmd_quit ($) {
 
 sub header_append ($$$) {
 	my ($hdr, $k, $v) = @_;
-	my @v = $hdr->header($k);
+	my @v = $hdr->header_raw($k);
 	foreach (@v) {
 		return if $v eq $_;
 	}
@@ -416,11 +416,11 @@ sub set_nntp_headers ($$$$$) {
 	# leafnode (and maybe other NNTP clients) have trouble dealing
 	# with v2 messages which have multiple Message-IDs (either due
 	# to our own content-based dedupe or buggy git-send-email versions).
-	my @mids = $hdr->header('Message-ID');
+	my @mids = $hdr->header_raw('Message-ID');
 	if (scalar(@mids) > 1) {
 		my $mid0 = "<$mid>";
 		$hdr->header_set('Message-ID', $mid0);
-		my @alt = $hdr->header('X-Alt-Message-ID');
+		my @alt = $hdr->header_raw('X-Alt-Message-ID');
 		my %seen = map { $_ => 1 } (@alt, $mid0);
 		push(@alt, grep { !$seen{$_}++ } @mids);
 		$hdr->header_set('X-Alt-Message-ID', @alt);
@@ -478,10 +478,9 @@ found:
 	my $smsg = $ng->over->get_art($n) or return $err;
 	my $msg = $ng->msg_by_smsg($smsg) or return $err;
 
-	# Email::Simple->new will modify $msg in-place as documented
-	# in its manpage, so what's left is the body and we won't need
-	# to call Email::Simple::body(), later
-	my $hdr = Email::Simple->new($msg)->header_obj;
+	# PublicInbox::Eml->new will modify $msg in-place, so what's
+	# left is the body and we won't need to call ->body(), later
+	my $hdr = PublicInbox::Eml->new($msg)->header_obj;
 	set_nntp_headers($self, $hdr, $ng, $n, $mid) if $set_headers;
 	[ $n, $mid, $msg, $hdr ];
 }
@@ -511,9 +510,7 @@ sub msg_hdr_write ($$$) {
 	$hdr =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
 
 	# for leafnode compatibility, we need to ensure Message-ID headers
-	# are only a single line.  We can't subclass Email::Simple::Header
-	# and override _default_fold_at in here, either; since that won't
-	# affect messages already in the archive.
+	# are only a single line.
 	$hdr =~ s/^(Message-ID:)[ \t]*\r\n[ \t]+([^\r]+)\r\n/$1 $2\r\n/igsm;
 	$hdr .= "\r\n" if $body_follows;
 	$self->msg_more($hdr);
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 275e509f..6c016b03 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -22,6 +22,7 @@ use PublicInbox::MID qw(mid_escape);
 use PublicInbox::GitHTTPBackend;
 use PublicInbox::UserContent;
 use PublicInbox::WwwStatic qw(r path_info_raw);
+use PublicInbox::Eml;
 
 # TODO: consider a routing tree now that we have more endpoints:
 our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
@@ -225,9 +226,8 @@ sub invalid_inbox_mid {
 		my ($x2, $x38) = ($1, $2);
 		# this is horrifically wasteful for legacy URLs:
 		my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return;
-		require Email::Simple;
-		my $s = Email::Simple->new($str);
-		$mid = PublicInbox::MID::mid_clean($s->header('Message-ID'));
+		my $s = PublicInbox::Eml->new($str);
+		$mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID'));
 		return r301($ctx, $inbox, mid_escape($mid));
 	}
 	undef;

  parent reply	other threads:[~2020-05-07 21:05 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-07 21:05 [PATCH 00/13] eml: pure-Perl replacement for Email::MIME Eric Wong
2020-05-07 21:05 ` [PATCH 01/13] msg_iter: make ->each_part method for PublicInbox::MIME Eric Wong
2020-05-07 21:05 ` [PATCH 02/13] msg_iter: pass $idx as a scalar, not array Eric Wong
2020-05-07 21:05 ` [PATCH 03/13] filter/rubylang: avoid recursing subparts to strip trailers Eric Wong
2020-05-07 21:05 ` [PATCH 04/13] smsg: use capitalization for header retrieval Eric Wong
2020-05-07 21:05 ` [PATCH 05/13] eml: pure-Perl replacement for Email::MIME Eric Wong
2020-05-07 21:05 ` Eric Wong [this message]
2020-05-07 21:05 ` [PATCH 07/13] replace most uses of PublicInbox::MIME with Eml Eric Wong
2020-05-07 21:05 ` [PATCH 08/13] EmlContentFoo: Email::MIME::ContentType replacement Eric Wong
2020-05-07 21:05 ` [PATCH 09/13] EmlContentFoo: relax Encode version requirement Eric Wong
2020-05-07 21:05 ` [PATCH 10/13] eml: remove dependency on Email::MIME::Encodings Eric Wong
2020-05-07 21:05 ` [PATCH 11/13] xt: eml comparison tests Eric Wong
2020-05-08  4:47   ` Eric Wong
2020-05-07 21:05 ` [PATCH 12/13] remove most internal Email::MIME usage Eric Wong
2020-05-07 21:05 ` [PATCH 13/13] eml: drop trailing blank line on missing epilogue Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200507210556.22995-7-e@yhbt.net \
    --to=e@yhbt.net \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).