From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 06/13] switch read-only Email::Simple users to Eml
Date: Thu, 7 May 2020 21:05:49 +0000 [thread overview]
Message-ID: <20200507210556.22995-7-e@yhbt.net> (raw)
In-Reply-To: <20200507210556.22995-1-e@yhbt.net>
Since PublicInbox::Eml doesn't parse MIME subparts
up front, it can replace most uses of Email::Simple
without performance penalty.
This will eventually allow us to lower overall internal
API footprint by not having to keep the MIME vs Simple
distinction.
---
lib/PublicInbox/Mbox.pm | 16 +++++-----------
lib/PublicInbox/MboxGz.pm | 4 ++--
lib/PublicInbox/NNTP.pm | 19 ++++++++-----------
lib/PublicInbox/WWW.pm | 6 +++---
4 files changed, 18 insertions(+), 27 deletions(-)
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 97bec5e7..94e61d4d 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -14,19 +14,13 @@ use PublicInbox::MID qw/mid_escape/;
use PublicInbox::Hval qw/to_filename/;
use PublicInbox::Smsg;
use PublicInbox::WwwStream qw(html_oneshot);
-use Email::Simple;
-use Email::MIME::Encode;
+use PublicInbox::Eml;
sub subject_fn ($) {
my ($hdr) = @_;
- my $fn = $hdr->header('Subject');
+ my $fn = $hdr->header_str('Subject');
return 'no-subject' if (!defined($fn) || $fn eq '');
- # no need for full Email::MIME, here
- if ($fn =~ /=\?/) {
- eval { $fn = Encode::decode('MIME-Header', $fn) };
- return 'no-subject' if $@;
- }
$fn =~ s/^re:\s+//i;
$fn eq '' ? 'no-subject' : to_filename($fn);
}
@@ -51,7 +45,7 @@ sub getline {
my $ibx = $ctx->{-inbox};
$next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev);
$mref = $ibx->msg_by_smsg($cur) or return;
- $hdr = Email::Simple->new($mref)->header_obj;
+ $hdr = PublicInbox::Eml->new($mref)->header_obj;
@$more = ($ctx, $id, $prev, $next); # $next may be undef, here
msg_hdr($ctx, $hdr) . msg_body($$mref);
}
@@ -72,7 +66,7 @@ sub emit_raw {
} else {
$mref = $ibx->msg_by_mid($mid) or return;
}
- my $hdr = Email::Simple->new($mref)->header_obj;
+ my $hdr = PublicInbox::Eml->new($mref)->header_obj;
$more = [ $ctx, $id, $prev, $next, $mref, $hdr ]; # for ->getline
my $fn = subject_fn($hdr);
my @hdr = ('Content-Type');
@@ -114,7 +108,7 @@ sub msg_hdr ($$;$) {
for (my $i = 0; $i < @append; $i += 2) {
my $k = $append[$i];
my $v = $append[$i + 1];
- my @v = $header_obj->header($k);
+ my @v = $header_obj->header_raw($k);
foreach (@v) {
if ($v eq $_) {
$v = undef;
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index e506de3d..f7fc4afc 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -3,7 +3,7 @@
package PublicInbox::MboxGz;
use strict;
use warnings;
-use Email::Simple;
+use PublicInbox::Eml;
use PublicInbox::Hval qw/to_filename/;
use PublicInbox::Mbox;
use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
@@ -41,7 +41,7 @@ sub getline {
my $buf = delete($self->{buf});
while (my $smsg = $self->{cb}->($ctx)) {
my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
- my $h = Email::Simple->new($mref)->header_obj;
+ my $h = PublicInbox::Eml->new($mref)->header_obj;
my $err = $gz->deflate(
PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}),
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index e9c66cd1..54207500 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -8,7 +8,7 @@ use warnings;
use base qw(PublicInbox::DS);
use fields qw(nntpd article ng long_cb);
use PublicInbox::MID qw(mid_escape $MID_EXTRACT);
-use Email::Simple;
+use PublicInbox::Eml;
use POSIX qw(strftime);
use PublicInbox::DS qw(now);
use Digest::SHA qw(sha1_hex);
@@ -383,7 +383,7 @@ sub cmd_quit ($) {
sub header_append ($$$) {
my ($hdr, $k, $v) = @_;
- my @v = $hdr->header($k);
+ my @v = $hdr->header_raw($k);
foreach (@v) {
return if $v eq $_;
}
@@ -416,11 +416,11 @@ sub set_nntp_headers ($$$$$) {
# leafnode (and maybe other NNTP clients) have trouble dealing
# with v2 messages which have multiple Message-IDs (either due
# to our own content-based dedupe or buggy git-send-email versions).
- my @mids = $hdr->header('Message-ID');
+ my @mids = $hdr->header_raw('Message-ID');
if (scalar(@mids) > 1) {
my $mid0 = "<$mid>";
$hdr->header_set('Message-ID', $mid0);
- my @alt = $hdr->header('X-Alt-Message-ID');
+ my @alt = $hdr->header_raw('X-Alt-Message-ID');
my %seen = map { $_ => 1 } (@alt, $mid0);
push(@alt, grep { !$seen{$_}++ } @mids);
$hdr->header_set('X-Alt-Message-ID', @alt);
@@ -478,10 +478,9 @@ found:
my $smsg = $ng->over->get_art($n) or return $err;
my $msg = $ng->msg_by_smsg($smsg) or return $err;
- # Email::Simple->new will modify $msg in-place as documented
- # in its manpage, so what's left is the body and we won't need
- # to call Email::Simple::body(), later
- my $hdr = Email::Simple->new($msg)->header_obj;
+ # PublicInbox::Eml->new will modify $msg in-place, so what's
+ # left is the body and we won't need to call ->body(), later
+ my $hdr = PublicInbox::Eml->new($msg)->header_obj;
set_nntp_headers($self, $hdr, $ng, $n, $mid) if $set_headers;
[ $n, $mid, $msg, $hdr ];
}
@@ -511,9 +510,7 @@ sub msg_hdr_write ($$$) {
$hdr =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
# for leafnode compatibility, we need to ensure Message-ID headers
- # are only a single line. We can't subclass Email::Simple::Header
- # and override _default_fold_at in here, either; since that won't
- # affect messages already in the archive.
+ # are only a single line.
$hdr =~ s/^(Message-ID:)[ \t]*\r\n[ \t]+([^\r]+)\r\n/$1 $2\r\n/igsm;
$hdr .= "\r\n" if $body_follows;
$self->msg_more($hdr);
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 275e509f..6c016b03 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -22,6 +22,7 @@ use PublicInbox::MID qw(mid_escape);
use PublicInbox::GitHTTPBackend;
use PublicInbox::UserContent;
use PublicInbox::WwwStatic qw(r path_info_raw);
+use PublicInbox::Eml;
# TODO: consider a routing tree now that we have more endpoints:
our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
@@ -225,9 +226,8 @@ sub invalid_inbox_mid {
my ($x2, $x38) = ($1, $2);
# this is horrifically wasteful for legacy URLs:
my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return;
- require Email::Simple;
- my $s = Email::Simple->new($str);
- $mid = PublicInbox::MID::mid_clean($s->header('Message-ID'));
+ my $s = PublicInbox::Eml->new($str);
+ $mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID'));
return r301($ctx, $inbox, mid_escape($mid));
}
undef;
next prev parent reply other threads:[~2020-05-07 21:05 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-07 21:05 [PATCH 00/13] eml: pure-Perl replacement for Email::MIME Eric Wong
2020-05-07 21:05 ` [PATCH 01/13] msg_iter: make ->each_part method for PublicInbox::MIME Eric Wong
2020-05-07 21:05 ` [PATCH 02/13] msg_iter: pass $idx as a scalar, not array Eric Wong
2020-05-07 21:05 ` [PATCH 03/13] filter/rubylang: avoid recursing subparts to strip trailers Eric Wong
2020-05-07 21:05 ` [PATCH 04/13] smsg: use capitalization for header retrieval Eric Wong
2020-05-07 21:05 ` [PATCH 05/13] eml: pure-Perl replacement for Email::MIME Eric Wong
2020-05-07 21:05 ` Eric Wong [this message]
2020-05-07 21:05 ` [PATCH 07/13] replace most uses of PublicInbox::MIME with Eml Eric Wong
2020-05-07 21:05 ` [PATCH 08/13] EmlContentFoo: Email::MIME::ContentType replacement Eric Wong
2020-05-07 21:05 ` [PATCH 09/13] EmlContentFoo: relax Encode version requirement Eric Wong
2020-05-07 21:05 ` [PATCH 10/13] eml: remove dependency on Email::MIME::Encodings Eric Wong
2020-05-07 21:05 ` [PATCH 11/13] xt: eml comparison tests Eric Wong
2020-05-08 4:47 ` Eric Wong
2020-05-07 21:05 ` [PATCH 12/13] remove most internal Email::MIME usage Eric Wong
2020-05-07 21:05 ` [PATCH 13/13] eml: drop trailing blank line on missing epilogue Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200507210556.22995-7-e@yhbt.net \
--to=e@yhbt.net \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).