user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 06/13] switch read-only Email::Simple users to Eml
  2020-05-07 21:05  6% [PATCH 00/13] eml: pure-Perl replacement for Email::MIME Eric Wong
@ 2020-05-07 21:05  7% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-05-07 21:05 UTC (permalink / raw)
  To: meta

Since PublicInbox::Eml doesn't parse MIME subparts
up front, it can replace most uses of Email::Simple
without performance penalty.

This will eventually allow us to lower overall internal
API footprint by not having to keep the MIME vs Simple
distinction.
---
 lib/PublicInbox/Mbox.pm   | 16 +++++-----------
 lib/PublicInbox/MboxGz.pm |  4 ++--
 lib/PublicInbox/NNTP.pm   | 19 ++++++++-----------
 lib/PublicInbox/WWW.pm    |  6 +++---
 4 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 97bec5e7..94e61d4d 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -14,19 +14,13 @@ use PublicInbox::MID qw/mid_escape/;
 use PublicInbox::Hval qw/to_filename/;
 use PublicInbox::Smsg;
 use PublicInbox::WwwStream qw(html_oneshot);
-use Email::Simple;
-use Email::MIME::Encode;
+use PublicInbox::Eml;
 
 sub subject_fn ($) {
 	my ($hdr) = @_;
-	my $fn = $hdr->header('Subject');
+	my $fn = $hdr->header_str('Subject');
 	return 'no-subject' if (!defined($fn) || $fn eq '');
 
-	# no need for full Email::MIME, here
-	if ($fn =~ /=\?/) {
-		eval { $fn = Encode::decode('MIME-Header', $fn) };
-		return 'no-subject' if $@;
-	}
 	$fn =~ s/^re:\s+//i;
 	$fn eq '' ? 'no-subject' : to_filename($fn);
 }
@@ -51,7 +45,7 @@ sub getline {
 	my $ibx = $ctx->{-inbox};
 	$next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev);
 	$mref = $ibx->msg_by_smsg($cur) or return;
-	$hdr = Email::Simple->new($mref)->header_obj;
+	$hdr = PublicInbox::Eml->new($mref)->header_obj;
 	@$more = ($ctx, $id, $prev, $next); # $next may be undef, here
 	msg_hdr($ctx, $hdr) . msg_body($$mref);
 }
@@ -72,7 +66,7 @@ sub emit_raw {
 	} else {
 		$mref = $ibx->msg_by_mid($mid) or return;
 	}
-	my $hdr = Email::Simple->new($mref)->header_obj;
+	my $hdr = PublicInbox::Eml->new($mref)->header_obj;
 	$more = [ $ctx, $id, $prev, $next, $mref, $hdr ]; # for ->getline
 	my $fn = subject_fn($hdr);
 	my @hdr = ('Content-Type');
@@ -114,7 +108,7 @@ sub msg_hdr ($$;$) {
 	for (my $i = 0; $i < @append; $i += 2) {
 		my $k = $append[$i];
 		my $v = $append[$i + 1];
-		my @v = $header_obj->header($k);
+		my @v = $header_obj->header_raw($k);
 		foreach (@v) {
 			if ($v eq $_) {
 				$v = undef;
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index e506de3d..f7fc4afc 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -3,7 +3,7 @@
 package PublicInbox::MboxGz;
 use strict;
 use warnings;
-use Email::Simple;
+use PublicInbox::Eml;
 use PublicInbox::Hval qw/to_filename/;
 use PublicInbox::Mbox;
 use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
@@ -41,7 +41,7 @@ sub getline {
 	my $buf = delete($self->{buf});
 	while (my $smsg = $self->{cb}->($ctx)) {
 		my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
-		my $h = Email::Simple->new($mref)->header_obj;
+		my $h = PublicInbox::Eml->new($mref)->header_obj;
 
 		my $err = $gz->deflate(
 			PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}),
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index e9c66cd1..54207500 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -8,7 +8,7 @@ use warnings;
 use base qw(PublicInbox::DS);
 use fields qw(nntpd article ng long_cb);
 use PublicInbox::MID qw(mid_escape $MID_EXTRACT);
-use Email::Simple;
+use PublicInbox::Eml;
 use POSIX qw(strftime);
 use PublicInbox::DS qw(now);
 use Digest::SHA qw(sha1_hex);
@@ -383,7 +383,7 @@ sub cmd_quit ($) {
 
 sub header_append ($$$) {
 	my ($hdr, $k, $v) = @_;
-	my @v = $hdr->header($k);
+	my @v = $hdr->header_raw($k);
 	foreach (@v) {
 		return if $v eq $_;
 	}
@@ -416,11 +416,11 @@ sub set_nntp_headers ($$$$$) {
 	# leafnode (and maybe other NNTP clients) have trouble dealing
 	# with v2 messages which have multiple Message-IDs (either due
 	# to our own content-based dedupe or buggy git-send-email versions).
-	my @mids = $hdr->header('Message-ID');
+	my @mids = $hdr->header_raw('Message-ID');
 	if (scalar(@mids) > 1) {
 		my $mid0 = "<$mid>";
 		$hdr->header_set('Message-ID', $mid0);
-		my @alt = $hdr->header('X-Alt-Message-ID');
+		my @alt = $hdr->header_raw('X-Alt-Message-ID');
 		my %seen = map { $_ => 1 } (@alt, $mid0);
 		push(@alt, grep { !$seen{$_}++ } @mids);
 		$hdr->header_set('X-Alt-Message-ID', @alt);
@@ -478,10 +478,9 @@ found:
 	my $smsg = $ng->over->get_art($n) or return $err;
 	my $msg = $ng->msg_by_smsg($smsg) or return $err;
 
-	# Email::Simple->new will modify $msg in-place as documented
-	# in its manpage, so what's left is the body and we won't need
-	# to call Email::Simple::body(), later
-	my $hdr = Email::Simple->new($msg)->header_obj;
+	# PublicInbox::Eml->new will modify $msg in-place, so what's
+	# left is the body and we won't need to call ->body(), later
+	my $hdr = PublicInbox::Eml->new($msg)->header_obj;
 	set_nntp_headers($self, $hdr, $ng, $n, $mid) if $set_headers;
 	[ $n, $mid, $msg, $hdr ];
 }
@@ -511,9 +510,7 @@ sub msg_hdr_write ($$$) {
 	$hdr =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
 
 	# for leafnode compatibility, we need to ensure Message-ID headers
-	# are only a single line.  We can't subclass Email::Simple::Header
-	# and override _default_fold_at in here, either; since that won't
-	# affect messages already in the archive.
+	# are only a single line.
 	$hdr =~ s/^(Message-ID:)[ \t]*\r\n[ \t]+([^\r]+)\r\n/$1 $2\r\n/igsm;
 	$hdr .= "\r\n" if $body_follows;
 	$self->msg_more($hdr);
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 275e509f..6c016b03 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -22,6 +22,7 @@ use PublicInbox::MID qw(mid_escape);
 use PublicInbox::GitHTTPBackend;
 use PublicInbox::UserContent;
 use PublicInbox::WwwStatic qw(r path_info_raw);
+use PublicInbox::Eml;
 
 # TODO: consider a routing tree now that we have more endpoints:
 our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
@@ -225,9 +226,8 @@ sub invalid_inbox_mid {
 		my ($x2, $x38) = ($1, $2);
 		# this is horrifically wasteful for legacy URLs:
 		my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return;
-		require Email::Simple;
-		my $s = Email::Simple->new($str);
-		$mid = PublicInbox::MID::mid_clean($s->header('Message-ID'));
+		my $s = PublicInbox::Eml->new($str);
+		$mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID'));
 		return r301($ctx, $inbox, mid_escape($mid));
 	}
 	undef;

^ permalink raw reply related	[relevance 7%]

* [PATCH 00/13] eml: pure-Perl replacement for Email::MIME
@ 2020-05-07 21:05  6% Eric Wong
  2020-05-07 21:05  7% ` [PATCH 06/13] switch read-only Email::Simple users to Eml Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-05-07 21:05 UTC (permalink / raw)
  To: meta

Eric Wong (13):
  msg_iter: make ->each_part method for PublicInbox::MIME
  msg_iter: pass $idx as a scalar, not array
  filter/rubylang: avoid recursing subparts to strip trailers
  smsg: use capitalization for header retrieval
  eml: pure-Perl replacement for Email::MIME
  switch read-only Email::Simple users to Eml
  replace most uses of PublicInbox::MIME with Eml
  EmlContentFoo: Email::MIME::ContentType replacement
  EmlContentFoo: relax Encode version requirement
  eml: remove dependency on Email::MIME::Encodings
  xt: eml comparison tests
  remove most internal Email::MIME usage
  eml: drop trailing blank line on missing epilogue

 Documentation/mknews.perl          |   4 +-
 INSTALL                            |  26 +-
 MANIFEST                           |   7 +
 Makefile.PL                        |   7 +-
 ci/deps.perl                       |   3 -
 lib/PublicInbox/Admin.pm           |   2 +-
 lib/PublicInbox/Eml.pm             | 421 +++++++++++++++++++++++++++++
 lib/PublicInbox/EmlContentFoo.pm   | 317 ++++++++++++++++++++++
 lib/PublicInbox/Filter/RubyLang.pm |  32 ++-
 lib/PublicInbox/Filter/Vger.pm     |   4 +-
 lib/PublicInbox/Import.pm          |  11 +-
 lib/PublicInbox/Inbox.pm           |   4 +-
 lib/PublicInbox/InboxWritable.pm   |   4 +-
 lib/PublicInbox/MDA.pm             |   1 -
 lib/PublicInbox/MIME.pm            |   6 +
 lib/PublicInbox/Mbox.pm            |  16 +-
 lib/PublicInbox/MboxGz.pm          |   4 +-
 lib/PublicInbox/MsgIter.pm         |  21 +-
 lib/PublicInbox/MsgTime.pm         |   8 +-
 lib/PublicInbox/NNTP.pm            |  19 +-
 lib/PublicInbox/SearchIdx.pm       |   8 +-
 lib/PublicInbox/SearchIdxShard.pm  |   3 +-
 lib/PublicInbox/Smsg.pm            |  24 +-
 lib/PublicInbox/SolverGit.pm       |   4 +-
 lib/PublicInbox/TestCommon.pm      |  11 +-
 lib/PublicInbox/V2Writable.pm      |  17 +-
 lib/PublicInbox/View.pm            |  28 +-
 lib/PublicInbox/WWW.pm             |   8 +-
 lib/PublicInbox/WatchMaildir.pm    |   4 +-
 lib/PublicInbox/WwwAttach.pm       |  15 +-
 script/public-inbox-edit           |   8 +-
 script/public-inbox-learn          |   4 +-
 script/public-inbox-mda            |  16 +-
 script/public-inbox-purge          |   4 +-
 t/altid.t                          |   4 +-
 t/altid_v2.t                       |   4 +-
 t/cgi.t                            |   8 +-
 t/content_id.t                     |   6 +-
 t/convert-compact.t                |   4 +-
 t/edit.t                           |  20 +-
 t/eml.t                            | 363 +++++++++++++++++++++++++
 t/eml_content_disposition.t        | 102 +++++++
 t/eml_content_type.t               | 289 ++++++++++++++++++++
 t/feed.t                           |   6 +-
 t/filter_base.t                    |   4 +-
 t/filter_mirror.t                  |   2 +-
 t/filter_rubylang.t                |   8 +-
 t/filter_subjecttag.t              |   4 +-
 t/filter_vger.t                    |   6 +-
 t/html_index.t                     |   4 +-
 t/httpd.t                          |   4 +-
 t/import.t                         |   6 +-
 t/indexlevels-mirror.t             |   4 +-
 t/mda.t                            |   4 +-
 t/mda_filter_rubylang.t            |   2 +-
 t/mid.t                            |   4 +-
 t/mime.t                           |  82 +++---
 t/msg_iter.t                       |  10 +-
 t/msgtime.t                        |   6 +-
 t/multi-mid.t                      |   6 +-
 t/nntp.t                           |   4 +-
 t/nntpd-tls.t                      |   4 +-
 t/nntpd.t                          |   6 +-
 t/nulsubject.t                     |   2 +-
 t/plack.t                          |  10 +-
 t/precheck.t                       |  10 +-
 t/psgi_attach.t                    |   2 +-
 t/psgi_bad_mids.t                  |   4 +-
 t/psgi_mount.t                     |   4 +-
 t/psgi_multipart_not.t             |   4 +-
 t/psgi_scan_all.t                  |   4 +-
 t/psgi_search.t                    |   8 +-
 t/psgi_text.t                      |   2 +-
 t/psgi_v2.t                        |   6 +-
 t/purge.t                          |   2 +-
 t/replace.t                        |  12 +-
 t/reply.t                          |   4 +-
 t/search-thr-index.t               |   6 +-
 t/search.t                         |  26 +-
 t/solver_git.t                     |   4 +-
 t/spamcheck_spamc.t                |   8 +-
 t/thread-cycle.t                   |   3 +-
 t/time.t                           |   4 +-
 t/v1-add-remove-add.t              |   4 +-
 t/v1reindex.t                      |   4 +-
 t/v2-add-remove-add.t              |   4 +-
 t/v2mda.t                          |   4 +-
 t/v2mirror.t                       |   4 +-
 t/v2reindex.t                      |   8 +-
 t/v2writable.t                     |   8 +-
 t/watch_filter_rubylang.t          |   2 +-
 t/watch_maildir.t                  |   2 +-
 t/watch_maildir_v2.t               |   2 +-
 t/www_altid.t                      |   2 +-
 t/xcpdb-reshard.t                  |   4 +-
 xt/cmp-msgstr.t                    | 108 ++++++++
 xt/cmp-msgview.t                   |  95 +++++++
 xt/msgtime_cmp.t                   |  12 +-
 xt/perf-msgview.t                  |   2 +-
 99 files changed, 2084 insertions(+), 353 deletions(-)
 create mode 100644 lib/PublicInbox/Eml.pm
 create mode 100644 lib/PublicInbox/EmlContentFoo.pm
 create mode 100644 t/eml.t
 create mode 100644 t/eml_content_disposition.t
 create mode 100644 t/eml_content_type.t
 create mode 100644 xt/cmp-msgstr.t
 create mode 100644 xt/cmp-msgview.t


^ permalink raw reply	[relevance 6%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-05-07 21:05  6% [PATCH 00/13] eml: pure-Perl replacement for Email::MIME Eric Wong
2020-05-07 21:05  7% ` [PATCH 06/13] switch read-only Email::Simple users to Eml Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).