* [PATCH 06/13] switch read-only Email::Simple users to Eml
2020-05-07 21:05 6% [PATCH 00/13] eml: pure-Perl replacement for Email::MIME Eric Wong
@ 2020-05-07 21:05 7% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-05-07 21:05 UTC (permalink / raw)
To: meta
Since PublicInbox::Eml doesn't parse MIME subparts
up front, it can replace most uses of Email::Simple
without performance penalty.
This will eventually allow us to lower overall internal
API footprint by not having to keep the MIME vs Simple
distinction.
---
lib/PublicInbox/Mbox.pm | 16 +++++-----------
lib/PublicInbox/MboxGz.pm | 4 ++--
lib/PublicInbox/NNTP.pm | 19 ++++++++-----------
lib/PublicInbox/WWW.pm | 6 +++---
4 files changed, 18 insertions(+), 27 deletions(-)
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 97bec5e7..94e61d4d 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -14,19 +14,13 @@ use PublicInbox::MID qw/mid_escape/;
use PublicInbox::Hval qw/to_filename/;
use PublicInbox::Smsg;
use PublicInbox::WwwStream qw(html_oneshot);
-use Email::Simple;
-use Email::MIME::Encode;
+use PublicInbox::Eml;
sub subject_fn ($) {
my ($hdr) = @_;
- my $fn = $hdr->header('Subject');
+ my $fn = $hdr->header_str('Subject');
return 'no-subject' if (!defined($fn) || $fn eq '');
- # no need for full Email::MIME, here
- if ($fn =~ /=\?/) {
- eval { $fn = Encode::decode('MIME-Header', $fn) };
- return 'no-subject' if $@;
- }
$fn =~ s/^re:\s+//i;
$fn eq '' ? 'no-subject' : to_filename($fn);
}
@@ -51,7 +45,7 @@ sub getline {
my $ibx = $ctx->{-inbox};
$next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev);
$mref = $ibx->msg_by_smsg($cur) or return;
- $hdr = Email::Simple->new($mref)->header_obj;
+ $hdr = PublicInbox::Eml->new($mref)->header_obj;
@$more = ($ctx, $id, $prev, $next); # $next may be undef, here
msg_hdr($ctx, $hdr) . msg_body($$mref);
}
@@ -72,7 +66,7 @@ sub emit_raw {
} else {
$mref = $ibx->msg_by_mid($mid) or return;
}
- my $hdr = Email::Simple->new($mref)->header_obj;
+ my $hdr = PublicInbox::Eml->new($mref)->header_obj;
$more = [ $ctx, $id, $prev, $next, $mref, $hdr ]; # for ->getline
my $fn = subject_fn($hdr);
my @hdr = ('Content-Type');
@@ -114,7 +108,7 @@ sub msg_hdr ($$;$) {
for (my $i = 0; $i < @append; $i += 2) {
my $k = $append[$i];
my $v = $append[$i + 1];
- my @v = $header_obj->header($k);
+ my @v = $header_obj->header_raw($k);
foreach (@v) {
if ($v eq $_) {
$v = undef;
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index e506de3d..f7fc4afc 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -3,7 +3,7 @@
package PublicInbox::MboxGz;
use strict;
use warnings;
-use Email::Simple;
+use PublicInbox::Eml;
use PublicInbox::Hval qw/to_filename/;
use PublicInbox::Mbox;
use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
@@ -41,7 +41,7 @@ sub getline {
my $buf = delete($self->{buf});
while (my $smsg = $self->{cb}->($ctx)) {
my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
- my $h = Email::Simple->new($mref)->header_obj;
+ my $h = PublicInbox::Eml->new($mref)->header_obj;
my $err = $gz->deflate(
PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}),
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index e9c66cd1..54207500 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -8,7 +8,7 @@ use warnings;
use base qw(PublicInbox::DS);
use fields qw(nntpd article ng long_cb);
use PublicInbox::MID qw(mid_escape $MID_EXTRACT);
-use Email::Simple;
+use PublicInbox::Eml;
use POSIX qw(strftime);
use PublicInbox::DS qw(now);
use Digest::SHA qw(sha1_hex);
@@ -383,7 +383,7 @@ sub cmd_quit ($) {
sub header_append ($$$) {
my ($hdr, $k, $v) = @_;
- my @v = $hdr->header($k);
+ my @v = $hdr->header_raw($k);
foreach (@v) {
return if $v eq $_;
}
@@ -416,11 +416,11 @@ sub set_nntp_headers ($$$$$) {
# leafnode (and maybe other NNTP clients) have trouble dealing
# with v2 messages which have multiple Message-IDs (either due
# to our own content-based dedupe or buggy git-send-email versions).
- my @mids = $hdr->header('Message-ID');
+ my @mids = $hdr->header_raw('Message-ID');
if (scalar(@mids) > 1) {
my $mid0 = "<$mid>";
$hdr->header_set('Message-ID', $mid0);
- my @alt = $hdr->header('X-Alt-Message-ID');
+ my @alt = $hdr->header_raw('X-Alt-Message-ID');
my %seen = map { $_ => 1 } (@alt, $mid0);
push(@alt, grep { !$seen{$_}++ } @mids);
$hdr->header_set('X-Alt-Message-ID', @alt);
@@ -478,10 +478,9 @@ found:
my $smsg = $ng->over->get_art($n) or return $err;
my $msg = $ng->msg_by_smsg($smsg) or return $err;
- # Email::Simple->new will modify $msg in-place as documented
- # in its manpage, so what's left is the body and we won't need
- # to call Email::Simple::body(), later
- my $hdr = Email::Simple->new($msg)->header_obj;
+ # PublicInbox::Eml->new will modify $msg in-place, so what's
+ # left is the body and we won't need to call ->body(), later
+ my $hdr = PublicInbox::Eml->new($msg)->header_obj;
set_nntp_headers($self, $hdr, $ng, $n, $mid) if $set_headers;
[ $n, $mid, $msg, $hdr ];
}
@@ -511,9 +510,7 @@ sub msg_hdr_write ($$$) {
$hdr =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
# for leafnode compatibility, we need to ensure Message-ID headers
- # are only a single line. We can't subclass Email::Simple::Header
- # and override _default_fold_at in here, either; since that won't
- # affect messages already in the archive.
+ # are only a single line.
$hdr =~ s/^(Message-ID:)[ \t]*\r\n[ \t]+([^\r]+)\r\n/$1 $2\r\n/igsm;
$hdr .= "\r\n" if $body_follows;
$self->msg_more($hdr);
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 275e509f..6c016b03 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -22,6 +22,7 @@ use PublicInbox::MID qw(mid_escape);
use PublicInbox::GitHTTPBackend;
use PublicInbox::UserContent;
use PublicInbox::WwwStatic qw(r path_info_raw);
+use PublicInbox::Eml;
# TODO: consider a routing tree now that we have more endpoints:
our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
@@ -225,9 +226,8 @@ sub invalid_inbox_mid {
my ($x2, $x38) = ($1, $2);
# this is horrifically wasteful for legacy URLs:
my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return;
- require Email::Simple;
- my $s = Email::Simple->new($str);
- $mid = PublicInbox::MID::mid_clean($s->header('Message-ID'));
+ my $s = PublicInbox::Eml->new($str);
+ $mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID'));
return r301($ctx, $inbox, mid_escape($mid));
}
undef;
^ permalink raw reply related [relevance 7%]
* [PATCH 00/13] eml: pure-Perl replacement for Email::MIME
@ 2020-05-07 21:05 6% Eric Wong
2020-05-07 21:05 7% ` [PATCH 06/13] switch read-only Email::Simple users to Eml Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-05-07 21:05 UTC (permalink / raw)
To: meta
Eric Wong (13):
msg_iter: make ->each_part method for PublicInbox::MIME
msg_iter: pass $idx as a scalar, not array
filter/rubylang: avoid recursing subparts to strip trailers
smsg: use capitalization for header retrieval
eml: pure-Perl replacement for Email::MIME
switch read-only Email::Simple users to Eml
replace most uses of PublicInbox::MIME with Eml
EmlContentFoo: Email::MIME::ContentType replacement
EmlContentFoo: relax Encode version requirement
eml: remove dependency on Email::MIME::Encodings
xt: eml comparison tests
remove most internal Email::MIME usage
eml: drop trailing blank line on missing epilogue
Documentation/mknews.perl | 4 +-
INSTALL | 26 +-
MANIFEST | 7 +
Makefile.PL | 7 +-
ci/deps.perl | 3 -
lib/PublicInbox/Admin.pm | 2 +-
lib/PublicInbox/Eml.pm | 421 +++++++++++++++++++++++++++++
lib/PublicInbox/EmlContentFoo.pm | 317 ++++++++++++++++++++++
lib/PublicInbox/Filter/RubyLang.pm | 32 ++-
lib/PublicInbox/Filter/Vger.pm | 4 +-
lib/PublicInbox/Import.pm | 11 +-
lib/PublicInbox/Inbox.pm | 4 +-
lib/PublicInbox/InboxWritable.pm | 4 +-
lib/PublicInbox/MDA.pm | 1 -
lib/PublicInbox/MIME.pm | 6 +
lib/PublicInbox/Mbox.pm | 16 +-
lib/PublicInbox/MboxGz.pm | 4 +-
lib/PublicInbox/MsgIter.pm | 21 +-
lib/PublicInbox/MsgTime.pm | 8 +-
lib/PublicInbox/NNTP.pm | 19 +-
lib/PublicInbox/SearchIdx.pm | 8 +-
lib/PublicInbox/SearchIdxShard.pm | 3 +-
lib/PublicInbox/Smsg.pm | 24 +-
lib/PublicInbox/SolverGit.pm | 4 +-
lib/PublicInbox/TestCommon.pm | 11 +-
lib/PublicInbox/V2Writable.pm | 17 +-
lib/PublicInbox/View.pm | 28 +-
lib/PublicInbox/WWW.pm | 8 +-
lib/PublicInbox/WatchMaildir.pm | 4 +-
lib/PublicInbox/WwwAttach.pm | 15 +-
script/public-inbox-edit | 8 +-
script/public-inbox-learn | 4 +-
script/public-inbox-mda | 16 +-
script/public-inbox-purge | 4 +-
t/altid.t | 4 +-
t/altid_v2.t | 4 +-
t/cgi.t | 8 +-
t/content_id.t | 6 +-
t/convert-compact.t | 4 +-
t/edit.t | 20 +-
t/eml.t | 363 +++++++++++++++++++++++++
t/eml_content_disposition.t | 102 +++++++
t/eml_content_type.t | 289 ++++++++++++++++++++
t/feed.t | 6 +-
t/filter_base.t | 4 +-
t/filter_mirror.t | 2 +-
t/filter_rubylang.t | 8 +-
t/filter_subjecttag.t | 4 +-
t/filter_vger.t | 6 +-
t/html_index.t | 4 +-
t/httpd.t | 4 +-
t/import.t | 6 +-
t/indexlevels-mirror.t | 4 +-
t/mda.t | 4 +-
t/mda_filter_rubylang.t | 2 +-
t/mid.t | 4 +-
t/mime.t | 82 +++---
t/msg_iter.t | 10 +-
t/msgtime.t | 6 +-
t/multi-mid.t | 6 +-
t/nntp.t | 4 +-
t/nntpd-tls.t | 4 +-
t/nntpd.t | 6 +-
t/nulsubject.t | 2 +-
t/plack.t | 10 +-
t/precheck.t | 10 +-
t/psgi_attach.t | 2 +-
t/psgi_bad_mids.t | 4 +-
t/psgi_mount.t | 4 +-
t/psgi_multipart_not.t | 4 +-
t/psgi_scan_all.t | 4 +-
t/psgi_search.t | 8 +-
t/psgi_text.t | 2 +-
t/psgi_v2.t | 6 +-
t/purge.t | 2 +-
t/replace.t | 12 +-
t/reply.t | 4 +-
t/search-thr-index.t | 6 +-
t/search.t | 26 +-
t/solver_git.t | 4 +-
t/spamcheck_spamc.t | 8 +-
t/thread-cycle.t | 3 +-
t/time.t | 4 +-
t/v1-add-remove-add.t | 4 +-
t/v1reindex.t | 4 +-
t/v2-add-remove-add.t | 4 +-
t/v2mda.t | 4 +-
t/v2mirror.t | 4 +-
t/v2reindex.t | 8 +-
t/v2writable.t | 8 +-
t/watch_filter_rubylang.t | 2 +-
t/watch_maildir.t | 2 +-
t/watch_maildir_v2.t | 2 +-
t/www_altid.t | 2 +-
t/xcpdb-reshard.t | 4 +-
xt/cmp-msgstr.t | 108 ++++++++
xt/cmp-msgview.t | 95 +++++++
xt/msgtime_cmp.t | 12 +-
xt/perf-msgview.t | 2 +-
99 files changed, 2084 insertions(+), 353 deletions(-)
create mode 100644 lib/PublicInbox/Eml.pm
create mode 100644 lib/PublicInbox/EmlContentFoo.pm
create mode 100644 t/eml.t
create mode 100644 t/eml_content_disposition.t
create mode 100644 t/eml_content_type.t
create mode 100644 xt/cmp-msgstr.t
create mode 100644 xt/cmp-msgview.t
^ permalink raw reply [relevance 6%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-05-07 21:05 6% [PATCH 00/13] eml: pure-Perl replacement for Email::MIME Eric Wong
2020-05-07 21:05 7% ` [PATCH 06/13] switch read-only Email::Simple users to Eml Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).