user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 1/2] eml: enforce a maximum header length
Date: Sun, 10 May 2020 06:21:04 +0000	[thread overview]
Message-ID: <20200510062105.1644-2-e@yhbt.net> (raw)
In-Reply-To: <20200510062105.1644-1-e@yhbt.net>

While our header processing is more efficient than
Email::*::Header, capping the maximum size for a `m//g' match
still limits memory growth on a header we care for.

Use the same limit as postfix (header_size_limit=102400), since
messages fetched via git/HTTP/NNTP/etc can bypass MTA limits.
---
 lib/PublicInbox/Eml.pm | 23 ++++++++++++++++++++++-
 t/eml.t                | 25 +++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm
index f022516c12c..2ccbb6597de 100644
--- a/lib/PublicInbox/Eml.pm
+++ b/lib/PublicInbox/Eml.pm
@@ -41,6 +41,7 @@ $PublicInbox::EmlContentFoo::STRICT_PARAMS = 0;
 our $MAXPARTS = 1000; # same as SpamAssassin
 our $MAXDEPTH = 20; # seems enough, Perl sucks, here
 our $MAXBOUNDLEN = 2048; # same as postfix
+our $header_size_limit = 102400; # same as postfix
 
 my %MIME_ENC = (qp => \&enc_qp, base64 => \&encode_base64);
 my %MIME_DEC = (qp => \&dec_qp, base64 => \&decode_base64);
@@ -68,6 +69,22 @@ sub re_memo ($) {
 					/ismx
 }
 
+sub hdr_truncate ($) {
+	my $len = length($_[0]);
+	substr($_[0], $header_size_limit, $len) = '';
+	my $end = rindex($_[0], "\n");
+	if ($end >= 0) {
+		++$end;
+		substr($_[0], $end, $len) = '';
+		warn "header of $len bytes truncated to $end bytes\n";
+	} else {
+		$_[0] = '';
+		warn <<EOF
+header of $len bytes without `\\n' within $header_size_limit ignored
+EOF
+	}
+}
+
 # compatible with our uses of Email::MIME
 sub new {
 	my $ref = ref($_[1]) ? $_[1] : \(my $cpy = $_[1]);
@@ -81,14 +98,18 @@ sub new {
 		# likely on *nix
 		my $hdr = substr($$ref, 0, $pos + 2, ''); # sv_chop on $$ref
 		chop($hdr); # lower SvCUR
+		hdr_truncate($hdr) if length($hdr) > $header_size_limit;
 		bless { hdr => \$hdr, crlf => "\n", bdy => $ref }, __PACKAGE__;
 	} elsif ($$ref =~ /\r?\n(\r?\n)/s) {
 		my $hdr = substr($$ref, 0, $+[0], ''); # sv_chop on $$ref
 		substr($hdr, -(length($1))) = ''; # lower SvCUR
+		hdr_truncate($hdr) if length($hdr) > $header_size_limit;
 		bless { hdr => \$hdr, crlf => $1, bdy => $ref }, __PACKAGE__;
 	} elsif ($$ref =~ /^[a-z0-9-]+[ \t]*:/ims && $$ref =~ /(\r?\n)\z/s) {
 		# body is optional :P
-		bless { hdr => \($$ref), crlf => $1 }, __PACKAGE__;
+		my $hdr = substr($$ref, 0, $header_size_limit + 1);
+		hdr_truncate($hdr) if length($hdr) > $header_size_limit;
+		bless { hdr => \$hdr, crlf => $1 }, __PACKAGE__;
 	} else { # nothing useful
 		my $hdr = $$ref = '';
 		bless { hdr => \$hdr, crlf => "\n" }, __PACKAGE__;
diff --git a/t/eml.t b/t/eml.t
index 43c735e76b9..d5e8cbcbbba 100644
--- a/t/eml.t
+++ b/t/eml.t
@@ -252,6 +252,31 @@ EOF
 		'final "\n" preserved on missing epilogue');
 }
 
+if ('header_size_limit stolen from postfix') {
+	local $PublicInbox::Eml::header_size_limit = 4;
+	my @w;
+	local $SIG{__WARN__} = sub { push @w, @_ };
+	my $eml = PublicInbox::Eml->new("a:b\na:d\n\nzz");
+	is_deeply([$eml->header('a')], ['b'], 'no overrun header');
+	is($eml->body_raw, 'zz', 'body not damaged');
+	is($eml->header_obj->as_string, "a:b\n", 'header truncated');
+	is(grep(/truncated/, @w), 1, 'truncation warned');
+
+	$eml = PublicInbox::Eml->new("a:b\na:d\n");
+	is_deeply([$eml->header('a')], ['b'], 'no overrun header w/o body');
+
+	local $PublicInbox::Eml::header_size_limit = 5;
+	$eml = PublicInbox::Eml->new("a:b\r\na:d\r\n\nzz");
+	is_deeply([$eml->header('a')], ['b'], 'no overrun header on CRLF');
+	is($eml->body_raw, 'zz', 'body not damaged');
+
+	@w = ();
+	$eml = PublicInbox::Eml->new("too:long\n");
+	$eml = PublicInbox::Eml->new("too:long\n\n");
+	$eml = PublicInbox::Eml->new("too:long\r\n\r\n");
+	is(grep(/ignored/, @w), 3, 'ignored header warned');
+}
+
 if ('maxparts is a feature unique to us') {
 	my $eml = eml_load 't/psgi_attach.eml';
 	my @orig;

  reply	other threads:[~2020-05-10  6:21 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-10  6:21 [PATCH 0/2] header limit and consistency Eric Wong
2020-05-10  6:21 ` Eric Wong [this message]
2020-05-10  6:21 ` [PATCH 2/2] eml: rename limits to match postfix names Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200510062105.1644-2-e@yhbt.net \
    --to=e@yhbt.net \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).