From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 341331F8C7 for ; Thu, 7 May 2020 21:05:58 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 06/13] switch read-only Email::Simple users to Eml Date: Thu, 7 May 2020 21:05:49 +0000 Message-Id: <20200507210556.22995-7-e@yhbt.net> In-Reply-To: <20200507210556.22995-1-e@yhbt.net> References: <20200507210556.22995-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Since PublicInbox::Eml doesn't parse MIME subparts up front, it can replace most uses of Email::Simple without performance penalty. This will eventually allow us to lower overall internal API footprint by not having to keep the MIME vs Simple distinction. --- lib/PublicInbox/Mbox.pm | 16 +++++----------- lib/PublicInbox/MboxGz.pm | 4 ++-- lib/PublicInbox/NNTP.pm | 19 ++++++++----------- lib/PublicInbox/WWW.pm | 6 +++--- 4 files changed, 18 insertions(+), 27 deletions(-) diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 97bec5e7..94e61d4d 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -14,19 +14,13 @@ use PublicInbox::MID qw/mid_escape/; use PublicInbox::Hval qw/to_filename/; use PublicInbox::Smsg; use PublicInbox::WwwStream qw(html_oneshot); -use Email::Simple; -use Email::MIME::Encode; +use PublicInbox::Eml; sub subject_fn ($) { my ($hdr) = @_; - my $fn = $hdr->header('Subject'); + my $fn = $hdr->header_str('Subject'); return 'no-subject' if (!defined($fn) || $fn eq ''); - # no need for full Email::MIME, here - if ($fn =~ /=\?/) { - eval { $fn = Encode::decode('MIME-Header', $fn) }; - return 'no-subject' if $@; - } $fn =~ s/^re:\s+//i; $fn eq '' ? 'no-subject' : to_filename($fn); } @@ -51,7 +45,7 @@ sub getline { my $ibx = $ctx->{-inbox}; $next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev); $mref = $ibx->msg_by_smsg($cur) or return; - $hdr = Email::Simple->new($mref)->header_obj; + $hdr = PublicInbox::Eml->new($mref)->header_obj; @$more = ($ctx, $id, $prev, $next); # $next may be undef, here msg_hdr($ctx, $hdr) . msg_body($$mref); } @@ -72,7 +66,7 @@ sub emit_raw { } else { $mref = $ibx->msg_by_mid($mid) or return; } - my $hdr = Email::Simple->new($mref)->header_obj; + my $hdr = PublicInbox::Eml->new($mref)->header_obj; $more = [ $ctx, $id, $prev, $next, $mref, $hdr ]; # for ->getline my $fn = subject_fn($hdr); my @hdr = ('Content-Type'); @@ -114,7 +108,7 @@ sub msg_hdr ($$;$) { for (my $i = 0; $i < @append; $i += 2) { my $k = $append[$i]; my $v = $append[$i + 1]; - my @v = $header_obj->header($k); + my @v = $header_obj->header_raw($k); foreach (@v) { if ($v eq $_) { $v = undef; diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm index e506de3d..f7fc4afc 100644 --- a/lib/PublicInbox/MboxGz.pm +++ b/lib/PublicInbox/MboxGz.pm @@ -3,7 +3,7 @@ package PublicInbox::MboxGz; use strict; use warnings; -use Email::Simple; +use PublicInbox::Eml; use PublicInbox::Hval qw/to_filename/; use PublicInbox::Mbox; use Compress::Raw::Zlib qw(Z_FINISH Z_OK); @@ -41,7 +41,7 @@ sub getline { my $buf = delete($self->{buf}); while (my $smsg = $self->{cb}->($ctx)) { my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next; - my $h = Email::Simple->new($mref)->header_obj; + my $h = PublicInbox::Eml->new($mref)->header_obj; my $err = $gz->deflate( PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}), diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index e9c66cd1..54207500 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -8,7 +8,7 @@ use warnings; use base qw(PublicInbox::DS); use fields qw(nntpd article ng long_cb); use PublicInbox::MID qw(mid_escape $MID_EXTRACT); -use Email::Simple; +use PublicInbox::Eml; use POSIX qw(strftime); use PublicInbox::DS qw(now); use Digest::SHA qw(sha1_hex); @@ -383,7 +383,7 @@ sub cmd_quit ($) { sub header_append ($$$) { my ($hdr, $k, $v) = @_; - my @v = $hdr->header($k); + my @v = $hdr->header_raw($k); foreach (@v) { return if $v eq $_; } @@ -416,11 +416,11 @@ sub set_nntp_headers ($$$$$) { # leafnode (and maybe other NNTP clients) have trouble dealing # with v2 messages which have multiple Message-IDs (either due # to our own content-based dedupe or buggy git-send-email versions). - my @mids = $hdr->header('Message-ID'); + my @mids = $hdr->header_raw('Message-ID'); if (scalar(@mids) > 1) { my $mid0 = "<$mid>"; $hdr->header_set('Message-ID', $mid0); - my @alt = $hdr->header('X-Alt-Message-ID'); + my @alt = $hdr->header_raw('X-Alt-Message-ID'); my %seen = map { $_ => 1 } (@alt, $mid0); push(@alt, grep { !$seen{$_}++ } @mids); $hdr->header_set('X-Alt-Message-ID', @alt); @@ -478,10 +478,9 @@ found: my $smsg = $ng->over->get_art($n) or return $err; my $msg = $ng->msg_by_smsg($smsg) or return $err; - # Email::Simple->new will modify $msg in-place as documented - # in its manpage, so what's left is the body and we won't need - # to call Email::Simple::body(), later - my $hdr = Email::Simple->new($msg)->header_obj; + # PublicInbox::Eml->new will modify $msg in-place, so what's + # left is the body and we won't need to call ->body(), later + my $hdr = PublicInbox::Eml->new($msg)->header_obj; set_nntp_headers($self, $hdr, $ng, $n, $mid) if $set_headers; [ $n, $mid, $msg, $hdr ]; } @@ -511,9 +510,7 @@ sub msg_hdr_write ($$$) { $hdr =~ s/(?msg_more($hdr); diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 275e509f..6c016b03 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -22,6 +22,7 @@ use PublicInbox::MID qw(mid_escape); use PublicInbox::GitHTTPBackend; use PublicInbox::UserContent; use PublicInbox::WwwStatic qw(r path_info_raw); +use PublicInbox::Eml; # TODO: consider a routing tree now that we have more endpoints: our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!; @@ -225,9 +226,8 @@ sub invalid_inbox_mid { my ($x2, $x38) = ($1, $2); # this is horrifically wasteful for legacy URLs: my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return; - require Email::Simple; - my $s = Email::Simple->new($str); - $mid = PublicInbox::MID::mid_clean($s->header('Message-ID')); + my $s = PublicInbox::Eml->new($str); + $mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID')); return r301($ctx, $inbox, mid_escape($mid)); } undef;