user/dev discussion of public-inbox itself
 help / color / Atom feed
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 1/2] t/psgi_attach: assert message/* parts are downloadable
Date: Sat, 16 May 2020 10:03:21 +0000
Message-ID: <20200516100322.19793-2-e@yhbt.net> (raw)
In-Reply-To: <20200516100322.19793-1-e@yhbt.net>

We'll be adding support to descend into message/rfc822 (and
legacy message/news) attachments.  First, we must ensure
existing message/rfc822 attachments can be downloaded and remain
downloadable in future commits.
---
 MANIFEST                 |   1 +
 t/data/message_embed.eml | 163 +++++++++++++++++++++++++++++++++++++++
 t/psgi_attach.t          |  18 +++++
 3 files changed, 182 insertions(+)
 create mode 100644 t/data/message_embed.eml

diff --git a/MANIFEST b/MANIFEST
index 7997bc9906c..24f95faa942 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -226,6 +226,7 @@ t/config_limiter.t
 t/content_hash.t
 t/convert-compact.t
 t/data/0001.patch
+t/data/message_embed.eml
 t/ds-kqxs.t
 t/ds-leak.t
 t/ds-poll.t
diff --git a/t/data/message_embed.eml b/t/data/message_embed.eml
new file mode 100644
index 00000000000..a7aa88acee3
--- /dev/null
+++ b/t/data/message_embed.eml
@@ -0,0 +1,163 @@
+Received: from localhost (dcvr.yhbt.net [127.0.0.1])
+	by dcvr.yhbt.net (Postfix) with ESMTP id 977481F45A;
+	Sat, 18 Apr 2020 22:25:08 +0000 (UTC)
+Date: Sat, 18 Apr 2020 22:25:08 +0000
+From: Eric Wong <e@yhbt.net>
+To: test@public-inbox.org
+Subject: Re: embedded message test
+Message-ID: <20200418222508.GA13918@dcvr>
+References: <20200418222020.GA2745@dcvr>
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="TB36FDmn/VVEgNH/"
+Content-Disposition: inline
+In-Reply-To: <20200418222020.GA2745@dcvr>
+
+
+--TB36FDmn/VVEgNH/
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+
+testing embedded message harder
+
+--TB36FDmn/VVEgNH/
+Content-Type: message/rfc822
+Content-Disposition: attachment; filename="embed2x.eml"
+
+Date: Sat, 18 Apr 2020 22:20:20 +0000
+From: Eric Wong <e@yhbt.net>
+To: test@public-inbox.org
+Subject: embedded message test
+Message-ID: <20200418222020.GA2745@dcvr>
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="/04w6evG8XlLl3ft"
+Content-Disposition: inline
+
+--/04w6evG8XlLl3ft
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+
+testing embedded message
+
+--/04w6evG8XlLl3ft
+Content-Type: message/rfc822
+Content-Disposition: attachment; filename="test.eml"
+
+From: Eric Wong <e@yhbt.net>
+To: spew@80x24.org
+Subject: [PATCH] mail header experiments
+Date: Sat, 18 Apr 2020 21:41:14 +0000
+Message-Id: <20200418214114.7575-1-e@yhbt.net>
+MIME-Version: 1.0
+Content-Transfer-Encoding: 8bit
+
+---
+ lib/PublicInbox/MailHeader.pm | 55 +++++++++++++++++++++++++++++++++++
+ t/mail_header.t               | 31 ++++++++++++++++++++
+ 2 files changed, 86 insertions(+)
+ create mode 100644 lib/PublicInbox/MailHeader.pm
+ create mode 100644 t/mail_header.t
+
+diff --git a/lib/PublicInbox/MailHeader.pm b/lib/PublicInbox/MailHeader.pm
+new file mode 100644
+index 00000000..166baf91
+--- /dev/null
++++ b/lib/PublicInbox/MailHeader.pm
+@@ -0,0 +1,55 @@
++# Copyright (C) 2020 all contributors <meta@public-inbox.org>
++# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
++package PublicInbox::MailHeader;
++use strict;
++use HTTP::Parser::XS qw(parse_http_response HEADERS_AS_ARRAYREF);
++use bytes (); #bytes::length
++my %casemap;
++
++sub _headerx_to_list {
++	my (undef, $head, $crlf) = @_;
++
++	# picohttpparser uses `int' as the return value, so the
++	# actual limit is 2GB on most platforms.  However, headers
++	# exceeding (or even close to) 1MB seems unreasonable
++	die 'headers too big' if bytes::length($$head) > 0x100000;
++	my ($ret, undef, undef, undef, $headers) =
++		parse_http_response('HTTP/1.0 1 X'. $crlf . $$head,
++					HEADERS_AS_ARRAYREF);
++	die 'failed to parse headers' if $ret <= 0;
++	# %casemap = map {; lc($_) => $_ } ($$head =~ m/^([^:]+):/gsm);
++	# my $nr = @$headers;
++	for (my $i = 0; $i < @$headers; $i += 2) {
++		my $key = $headers->[$i]; # = $casemap{$headers->[$i]};
++		my $val = $headers->[$i + 1];
++		(my $trimmed = $val) =~ s/\r?\n\s+/ /;
++		$headers->[$i + 1] = [
++			$trimmed,
++			"$key: $val"
++		]
++	}
++	$headers;
++}
++
++sub _header_to_list {
++	my (undef, $head, $crlf) = @_;
++	my @tmp = ($$head =~ m/^(([^ \t:][^:\n]*):[ \t]*
++			([^\n]*\n(?:[ \t]+[^\n]*\n)*))/gsmx);
++	my @headers;
++	$#headers = scalar @tmp;
++	@headers = ();
++	while (@tmp) {
++		my ($orig, $key, $val) = splice(@tmp, 0, 3);
++		# my $v = $tmp[$i + 2];
++		# $v =~ s/\r?\n[ \t]+/ /sg;
++		# $v =~ s/\r?\n\z//s;
++		$val =~ s/\n[ \t]+/ /sg;
++		chomp($val, $orig);
++		# $val =~ s/\r?\n\z//s;
++		# $orig =~ s/\r?\n\z//s;
++		push @headers, $key, [ $val, $orig ];
++	}
++	\@headers;
++}
++
++1;
+diff --git a/t/mail_header.t b/t/mail_header.t
+new file mode 100644
+index 00000000..4dc62c50
+--- /dev/null
++++ b/t/mail_header.t
+@@ -0,0 +1,31 @@
++# Copyright (C) 2020 all contributors <meta@public-inbox.org>
++# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
++use strict;
++use Test::More;
++use PublicInbox::TestCommon;
++require_mods('PublicInbox::MailHeader');
++
++my $head = <<'EOF';
++From d0147582e289fdd4cdd84e91d8b0f8ae9c230124 Mon Sep 17 00:00:00 2001
++From: Eric Wong <e@yhbt.net>
++Date: Fri, 17 Apr 2020 09:28:49 +0000
++Subject: [PATCH] searchthread: reduce indirection by removing container
++
++EOF
++my $orig = $head;
++use Email::Simple;
++my $xshdr = PublicInbox::MailHeader->_header_to_list(\$head, "\n");
++my $simpl = Email::Simple::Header->_header_to_list(\$head, "\n");
++is_deeply($xshdr, $simpl);
++use Benchmark qw(:all);
++my $res = timethese(100000, {
++	pmh => sub {
++		PublicInbox::MailHeader->_header_to_list(\$head, "\n");
++	},
++	esh =>  sub {
++		PublicInbox::MailHeader->_header_to_list(\$head, "\n");
++	}
++});
++is($head, $orig);
++use Data::Dumper; diag Dumper($res);
++done_testing;
+
+
+--/04w6evG8XlLl3ft--
+
+
+--TB36FDmn/VVEgNH/--
diff --git a/t/psgi_attach.t b/t/psgi_attach.t
index 9a2b241164a..12f9e6eeecd 100644
--- a/t/psgi_attach.t
+++ b/t/psgi_attach.t
@@ -15,6 +15,7 @@ use_ok 'PublicInbox::WWW';
 use PublicInbox::Import;
 use PublicInbox::Git;
 use PublicInbox::Config;
+use PublicInbox::Eml;
 use_ok 'PublicInbox::WwwAttach';
 my $config = PublicInbox::Config->new(\<<EOF);
 $cfgpfx.address=$addr
@@ -30,6 +31,7 @@ $im->init_bare;
 	my $txt = "plain\ntext\npass\nthrough\n";
 	my $dot = "dotfile\n";
 	$im->add(eml_load('t/psgi_attach.eml'));
+	$im->add(eml_load('t/data/message_embed.eml'));
 	$im->done;
 
 	my $www = PublicInbox::WWW->new($config);
@@ -67,6 +69,22 @@ $im->init_bare;
 		ok(length($dot_res) >= length($dot), 'dot almost matches');
 		$res = $cb->(GET('/test/Z%40B/4-any-filename.txt'));
 		is($res->content, $dot_res, 'user-specified filename is OK');
+
+		my $mid = '20200418222508.GA13918@dcvr';
+		my $irt = '20200418222020.GA2745@dcvr';
+		$res = $cb->(GET("/test/$mid/"));
+		like($res->content, qr/\bhref="2-embed2x\.eml"/s,
+			'href to message/rfc822 attachment visible');
+		$res = $cb->(GET("/test/$mid/2-embed2x.eml"));
+		my $eml = PublicInbox::Eml->new(\($res->content));
+		is_deeply([ $eml->header_raw('Message-ID') ], [ "<$irt>" ],
+			'got attached eml');
+		my @subs = $eml->subparts;
+		is(scalar(@subs), 2, 'attachment had 2 subparts');
+		like($subs[0]->body_str, qr/^testing embedded message\n*\z/sm,
+			'1st attachment is as expected');
+		is($subs[1]->header('Content-Type'), 'message/rfc822',
+			'2nd attachment is as expected');
 	});
 }
 done_testing();

  reply index

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-16 10:03 [PATCH/RFC 0/2] recurse into message/rfc822 parts Eric Wong
2020-05-16 10:03 ` Eric Wong [this message]
2020-05-16 10:03 ` [PATCH 2/2] descend into message/(rfc822|news|global) parts Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200516100322.19793-2-e@yhbt.net \
    --to=e@yhbt.net \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror http://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Example config snippet for mirrors

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git