user/dev discussion of public-inbox itself
 help / color / Atom feed
* [RFC/WIP] msg_iter: make ->each_part method for PublicInbox::MIME
@ 2020-05-01  6:59 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2020-05-01  6:59 UTC (permalink / raw)
  To: meta

The reliance on Email::MIME->subparts is a tad inefficient with
a work-in-progress module to replace Email::MIME.  So move
towards using ->each_part as a class-specific iterator which can
take advantage of more class-specific optimizations in the
yet-to-be-revealed PublicInbox::Eml and PublicInbox::Gmime
classes.

The msg_iter() sub remains for compatibility with existing
3rd-party scripts/modules which use our small public Perl API
and Email::MIME.
---
 I'm not completely sure about this one, but strongly leaning
 towards it since ->subparts object creation is a bit annoying
 to do up front.

 lib/PublicInbox/MIME.pm      |  3 +++
 lib/PublicInbox/MsgIter.pm   | 15 +++++++++++++--
 lib/PublicInbox/SolverGit.pm |  4 ++--
 lib/PublicInbox/View.pm      | 10 +++++-----
 lib/PublicInbox/WwwAttach.pm |  4 ++--
 5 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/lib/PublicInbox/MIME.pm b/lib/PublicInbox/MIME.pm
index 456eed64b8c..b795b93b105 100644
--- a/lib/PublicInbox/MIME.pm
+++ b/lib/PublicInbox/MIME.pm
@@ -24,6 +24,7 @@ use strict;
 use warnings;
 use base qw(Email::MIME);
 use Email::MIME::ContentType;
+use PublicInbox::MsgIter ();
 $Email::MIME::ContentType::STRICT_PARAMS = 0;
 
 if ($Email::MIME::VERSION <= 1.937) {
@@ -101,4 +102,6 @@ sub parts_multipart {
 }
 }
 
+no warnings 'once';
+*each_part = \&PublicInbox::MsgIter::em_each_part;
 1;
diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm
index fa25564a5db..cd5a5d99564 100644
--- a/lib/PublicInbox/MsgIter.pm
+++ b/lib/PublicInbox/MsgIter.pm
@@ -7,12 +7,12 @@ use strict;
 use warnings;
 use base qw(Exporter);
 our @EXPORT = qw(msg_iter msg_part_text);
-use PublicInbox::MIME;
 
+# This becomes PublicInbox::MIME->each_part:
 # Like Email::MIME::walk_parts, but this is:
 # * non-recursive
 # * passes depth and indices to the iterator callback
-sub msg_iter ($$;$$) {
+sub em_each_part ($$;$$) {
 	my ($mime, $cb, $cb_arg, $do_undef) = @_;
 	my @parts = $mime->subparts;
 	if (@parts) {
@@ -36,6 +36,17 @@ sub msg_iter ($$;$$) {
 	}
 }
 
+# Use this when we may accept Email::MIME from user scripts
+# (not just PublicInbox::MIME)
+sub msg_iter ($$;$$) { # $_[0] = PublicInbox::MIME/Email::MIME-like obj
+	my (undef, $cb, $cb_arg, $once) = @_;
+	if (my $ep = $_[0]->can('each_part')) { # PublicInbox::{MIME,*}
+		$ep->($_[0], $cb, $cb_arg, $once);
+	} else { # for compatibility with existing Email::MIME users:
+		em_each_part($_[0], $cb, $cb_arg, $once);
+	}
+}
+
 sub msg_part_text ($$) {
 	my ($part, $ct) = @_;
 
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index c32a5baecdb..f718e28cbd5 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -14,7 +14,7 @@ use 5.010_001;
 use File::Temp 0.19 (); # 0.19 for ->newdir
 use Fcntl qw(SEEK_SET);
 use PublicInbox::Git qw(git_unquote git_quote);
-use PublicInbox::MsgIter qw(msg_iter msg_part_text);
+use PublicInbox::MsgIter qw(msg_part_text);
 use PublicInbox::Qspawn;
 use PublicInbox::Tmpfile;
 use URI::Escape qw(uri_escape_utf8);
@@ -234,7 +234,7 @@ sub find_extract_diffs ($$$) {
 	my $diffs = [];
 	foreach my $smsg (@$msgs) {
 		$ibx->smsg_mime($smsg) or next;
-		msg_iter(delete $smsg->{mime}, \&extract_diff,
+		delete($smsg->{mime})->each_part(\&extract_diff,
 				[$self, $diffs, $pre, $post, $ibx, $smsg], 1);
 	}
 	@$diffs ? $diffs : undef;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 5144a130460..4ede83878a3 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -243,7 +243,7 @@ sub index_entry {
 	# scan through all parts, looking for displayable text
 	$ctx->{mhref} = $mhref;
 	$ctx->{obuf} = \$rv;
-	msg_iter($mime, \&add_text_body, $ctx, 1);
+	$mime->each_part(\&add_text_body, $ctx, 1);
 	delete $ctx->{obuf};
 
 	# add the footer
@@ -474,10 +474,10 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
 }
 
 sub multipart_text_as_html {
-	# ($mime, $ctx) = @_; # msg_iter will do "$_[0] = undef"
+	# ($mime, $ctx) = @_; # each_part may do "$_[0] = undef"
 
 	# scan through all parts, looking for displayable text
-	msg_iter($_[0], \&add_text_body, $_[1], 1);
+	$_[0]->each_part(\&add_text_body, $_[1], 1);
 }
 
 sub attach_link ($$$$;$) {
@@ -515,11 +515,11 @@ EOF
 	undef;
 }
 
-sub add_text_body { # callback for msg_iter
+sub add_text_body { # callback for each_part
 	my ($p, $ctx) = @_;
 	my $upfx = $ctx->{mhref};
 	my $ibx = $ctx->{-inbox};
-	# $p - from msg_iter: [ Email::MIME, depth, @idx ]
+	# $p - from each_part: [ Email::MIME-like, depth, @idx ]
 	my ($part, $depth, @idx) = @$p;
 	my $ct = $part->content_type || 'text/plain';
 	my $fn = $part->filename;
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index f795618ebcf..774b38ae269 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -10,7 +10,7 @@ use Email::MIME::ContentType qw(parse_content_type);
 use PublicInbox::MIME;
 use PublicInbox::MsgIter;
 
-sub get_attach_i { # msg_iter callback
+sub get_attach_i { # ->each_part callback
 	my ($part, $depth, @idx) = @{$_[0]};
 	my $res = $_[1];
 	return if join('.', @idx) ne $res->[3]; # $idx
@@ -40,7 +40,7 @@ sub get_attach ($$$) {
 	my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res;
 	$mime = PublicInbox::MIME->new($mime);
 	$res->[3] = $idx;
-	msg_iter($mime, \&get_attach_i, $res, 1);
+	$mime->each_part(\&get_attach_i, $res, 1);
 	pop @$res; # cleanup before letting PSGI server see it
 	$res
 }

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, back to index

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-01  6:59 [RFC/WIP] msg_iter: make ->each_part method for PublicInbox::MIME Eric Wong

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror http://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Example config snippet for mirrors

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git