git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Đoàn Trần Công Danh" <congdanhqx@gmail.com>
To: git@vger.kernel.org
Cc: "Đoàn Trần Công Danh" <congdanhqx@gmail.com>,
	"Junio C Hamano" <gitster@pobox.com>,
	"brian m. carlson" <sandals@crustytoothpaste.net>
Subject: [PATCH v4 3/6] mailinfo: warn if CRLF found in decoded base64/QP email
Date: Mon, 10 May 2021 00:12:10 +0700	[thread overview]
Message-ID: <50404ffe7425a32fd26e19fc406f80693985e644.1620580178.git.congdanhqx@gmail.com> (raw)
In-Reply-To: <cover.1620580178.git.congdanhqx@gmail.com>

When SMTP servers receive 8-bit email messages, possibly with only
LF as line ending, some of them decide to change said LF to CRLF.

Some mailing list softwares, when receive 8-bit email messages,
decide to encode those messages in base64 or quoted-printable.

If an email is transfered through above mail servers, then distributed
by such mailing list softwares, the recipients will receive an email
contains a patch mungled with CRLF encoded inside another encoding.

Thus, such CR (in CRLF) couldn't be dropped by "mailsplit".
Hence, the mailed patch couldn't be applied cleanly.
Such accidents have been observed in the wild [1].

Instead of silently rejecting those messages, let's give our users
some warnings if such CR (as part of CRLF) is found.

[1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 mailinfo.c              | 14 ++++++++++++
 mailinfo.h              |  1 +
 t/t5100-mailinfo.sh     | 30 ++++++++++++++++++++++++++
 t/t5100/quoted-cr-info  |  5 +++++
 t/t5100/quoted-cr-msg   |  2 ++
 t/t5100/quoted-cr-patch | 22 +++++++++++++++++++
 t/t5100/quoted-cr.mbox  | 47 +++++++++++++++++++++++++++++++++++++++++
 7 files changed, 121 insertions(+)
 create mode 100644 t/t5100/quoted-cr-info
 create mode 100644 t/t5100/quoted-cr-msg
 create mode 100644 t/t5100/quoted-cr-patch
 create mode 100644 t/t5100/quoted-cr.mbox

diff --git a/mailinfo.c b/mailinfo.c
index 5681d9130d..c8caee4f55 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -994,6 +994,11 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 	const char *rest;
 
 	if (!mi->format_flowed) {
+		if (len >= 2 &&
+		    line->buf[len - 2] == '\r' &&
+		    line->buf[len - 1] == '\n') {
+			mi->have_quoted_cr = 1;
+		}
 		handle_filter(mi, line);
 		return;
 	}
@@ -1033,6 +1038,12 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 	handle_filter(mi, line);
 }
 
+static void summarize_quoted_cr(struct mailinfo *mi)
+{
+	if (mi->have_quoted_cr)
+		warning(_("quoted CRLF detected"));
+}
+
 static void handle_body(struct mailinfo *mi, struct strbuf *line)
 {
 	struct strbuf prev = STRBUF_INIT;
@@ -1051,6 +1062,8 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 				handle_filter(mi, &prev);
 				strbuf_reset(&prev);
 			}
+			summarize_quoted_cr(mi);
+			mi->have_quoted_cr = 0;
 			if (!handle_boundary(mi, line))
 				goto handle_body_out;
 		}
@@ -1100,6 +1113,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 
 	if (prev.len)
 		handle_filter(mi, &prev);
+	summarize_quoted_cr(mi);
 
 	flush_inbody_header_accum(mi);
 
diff --git a/mailinfo.h b/mailinfo.h
index 79b1d6774e..b394ef9bce 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -24,6 +24,7 @@ struct mailinfo {
 	struct strbuf charset;
 	unsigned int format_flowed:1;
 	unsigned int delsp:1;
+	unsigned int have_quoted_cr:1;
 	char *message_id;
 	enum  {
 		TE_DONTCARE, TE_QP, TE_BASE64
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index 147e616533..ac6fbfe596 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -228,4 +228,34 @@ test_expect_success 'mailinfo handles unusual header whitespace' '
 	test_cmp expect actual
 '
 
+check_quoted_cr_mail () {
+	mail="$1" && shift &&
+	git mailinfo -u "$@" "$mail.msg" "$mail.patch" \
+		<"$mail" >"$mail.info" 2>"$mail.err" &&
+	test_cmp "$mail-expected.msg" "$mail.msg" &&
+	test_cmp "$mail-expected.patch" "$mail.patch" &&
+	test_cmp "$DATA/quoted-cr-info" "$mail.info"
+}
+
+test_expect_success 'split base64 email with quoted-cr' '
+	mkdir quoted-cr &&
+	git mailsplit -oquoted-cr "$DATA/quoted-cr.mbox" >quoted-cr/last &&
+	test $(cat quoted-cr/last) = 2
+'
+
+test_expect_success 'mailinfo warn CR in base64 encoded email' '
+	sed -e "s/%%$//" -e "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-msg" \
+		>quoted-cr/0001-expected.msg &&
+	sed "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-msg" \
+		>quoted-cr/0002-expected.msg &&
+	sed -e "s/%%$//" -e "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-patch" \
+		>quoted-cr/0001-expected.patch &&
+	sed "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-patch" \
+		>quoted-cr/0002-expected.patch &&
+	check_quoted_cr_mail quoted-cr/0001 &&
+	test_must_be_empty quoted-cr/0001.err &&
+	check_quoted_cr_mail quoted-cr/0002 &&
+	grep "quoted CRLF detected" quoted-cr/0002.err
+'
+
 test_done
diff --git a/t/t5100/quoted-cr-info b/t/t5100/quoted-cr-info
new file mode 100644
index 0000000000..dab2228b70
--- /dev/null
+++ b/t/t5100/quoted-cr-info
@@ -0,0 +1,5 @@
+Author: A U Thor
+Email: mail@example.com
+Subject: sample
+Date: Mon, 3 Aug 2020 22:40:55 +0700
+
diff --git a/t/t5100/quoted-cr-msg b/t/t5100/quoted-cr-msg
new file mode 100644
index 0000000000..89b05a0784
--- /dev/null
+++ b/t/t5100/quoted-cr-msg
@@ -0,0 +1,2 @@
+On different distro, %%pytest is suffixed with different patterns.%%
+%%
diff --git a/t/t5100/quoted-cr-patch b/t/t5100/quoted-cr-patch
new file mode 100644
index 0000000000..65b13eeef7
--- /dev/null
+++ b/t/t5100/quoted-cr-patch
@@ -0,0 +1,22 @@
+---%%
+ configure | 2 +-%%
+ 1 file changed, 1 insertion(+), 1 deletion(-)%%
+%%
+diff --git a/configure b/configure%%
+index db3538b3..f7c1c095 100755%%
+--- a/configure%%
++++ b/configure%%
+@@ -814,7 +814,7 @@ if [ $have_python3 -eq 1 ]; then%%
+     printf "%%Checking for python3 pytest (>= 3.0)... "%%
+     conf=$(mktemp)%%
+     printf "[pytest]\nminversion=3.0\n" > $conf%%
+-    if pytest-3 -c $conf --version >/dev/null 2>&1; then%%
++    if "$python" -m pytest -c $conf --version >/dev/null 2>&1; then%%
+         printf "Yes.\n"%%
+         have_python3_pytest=1%%
+     else%%
+-- %%
+2.28.0%%
+_______________________________________________
+example mailing list -- list@example.org
+To unsubscribe send an email to list-leave@example.org
diff --git a/t/t5100/quoted-cr.mbox b/t/t5100/quoted-cr.mbox
new file mode 100644
index 0000000000..909021bb7a
--- /dev/null
+++ b/t/t5100/quoted-cr.mbox
@@ -0,0 +1,47 @@
+From nobody Mon Sep 17 00:00:00 2001
+From: A U Thor <mail@example.com>
+To: list@example.org
+Subject: [PATCH v2] sample
+Date: Mon,  3 Aug 2020 22:40:55 +0700
+Message-Id: <msg-id@example.com>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+T24gZGlmZmVyZW50IGRpc3RybywgDXB5dGVzdCBpcyBzdWZmaXhlZCB3aXRoIGRpZmZlcmVudCBw
+YXR0ZXJucy4KCi0tLQogY29uZmlndXJlIHwgMiArLQogMSBmaWxlIGNoYW5nZWQsIDEgaW5zZXJ0
+aW9uKCspLCAxIGRlbGV0aW9uKC0pCgpkaWZmIC0tZ2l0IGEvY29uZmlndXJlIGIvY29uZmlndXJl
+CmluZGV4IGRiMzUzOGIzLi5mN2MxYzA5NSAxMDA3NTUKLS0tIGEvY29uZmlndXJlCisrKyBiL2Nv
+bmZpZ3VyZQpAQCAtODE0LDcgKzgxNCw3IEBAIGlmIFsgJGhhdmVfcHl0aG9uMyAtZXEgMSBdOyB0
+aGVuCiAgICAgcHJpbnRmICINQ2hlY2tpbmcgZm9yIHB5dGhvbjMgcHl0ZXN0ICg+PSAzLjApLi4u
+ICIKICAgICBjb25mPSQobWt0ZW1wKQogICAgIHByaW50ZiAiW3B5dGVzdF1cbm1pbnZlcnNpb249
+My4wXG4iID4gJGNvbmYKLSAgICBpZiBweXRlc3QtMyAtYyAkY29uZiAtLXZlcnNpb24gPi9kZXYv
+bnVsbCAyPiYxOyB0aGVuCisgICAgaWYgIiRweXRob24iIC1tIHB5dGVzdCAtYyAkY29uZiAtLXZl
+cnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuCiAgICAgICAgIHByaW50ZiAiWWVzLlxuIgogICAg
+ICAgICBoYXZlX3B5dGhvbjNfcHl0ZXN0PTEKICAgICBlbHNlCi0tIAoyLjI4LjAKX19fX19fX19f
+X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KZXhhbXBsZSBtYWlsaW5nIGxp
+c3QgLS0gbGlzdEBleGFtcGxlLm9yZwpUbyB1bnN1YnNjcmliZSBzZW5kIGFuIGVtYWlsIHRvIGxp
+c3QtbGVhdmVAZXhhbXBsZS5vcmcK
+
+From nobody Mon Sep 17 00:00:00 2001
+From: A U Thor <mail@example.com>
+To: list@example.org
+Subject: [PATCH v2] sample
+Date: Mon,  3 Aug 2020 22:40:55 +0700
+Message-Id: <msg-id2@example.com>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+T24gZGlmZmVyZW50IGRpc3RybywgDXB5dGVzdCBpcyBzdWZmaXhlZCB3aXRoIGRpZmZlcmVudCBw
+YXR0ZXJucy4NCg0KLS0tDQogY29uZmlndXJlIHwgMiArLQ0KIDEgZmlsZSBjaGFuZ2VkLCAxIGlu
+c2VydGlvbigrKSwgMSBkZWxldGlvbigtKQ0KDQpkaWZmIC0tZ2l0IGEvY29uZmlndXJlIGIvY29u
+ZmlndXJlDQppbmRleCBkYjM1MzhiMy4uZjdjMWMwOTUgMTAwNzU1DQotLS0gYS9jb25maWd1cmUN
+CisrKyBiL2NvbmZpZ3VyZQ0KQEAgLTgxNCw3ICs4MTQsNyBAQCBpZiBbICRoYXZlX3B5dGhvbjMg
+LWVxIDEgXTsgdGhlbg0KICAgICBwcmludGYgIg1DaGVja2luZyBmb3IgcHl0aG9uMyBweXRlc3Qg
+KD49IDMuMCkuLi4gIg0KICAgICBjb25mPSQobWt0ZW1wKQ0KICAgICBwcmludGYgIltweXRlc3Rd
+XG5taW52ZXJzaW9uPTMuMFxuIiA+ICRjb25mDQotICAgIGlmIHB5dGVzdC0zIC1jICRjb25mIC0t
+dmVyc2lvbiA+L2Rldi9udWxsIDI+JjE7IHRoZW4NCisgICAgaWYgIiRweXRob24iIC1tIHB5dGVz
+dCAtYyAkY29uZiAtLXZlcnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuDQogICAgICAgICBwcmlu
+dGYgIlllcy5cbiINCiAgICAgICAgIGhhdmVfcHl0aG9uM19weXRlc3Q9MQ0KICAgICBlbHNlDQot
+LSANCjIuMjguMA0KX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f
+X18KZXhhbXBsZSBtYWlsaW5nIGxpc3QgLS0gbGlzdEBleGFtcGxlLm9yZwpUbyB1bnN1YnNjcmli
+ZSBzZW5kIGFuIGVtYWlsIHRvIGxpc3QtbGVhdmVAZXhhbXBsZS5vcmcK
-- 
2.31.1.448.g9c2f8508d1


  parent reply	other threads:[~2021-05-09 17:12 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-21  1:34 [PATCH] mailinfo: strip CR from base64/quoted-printable email Đoàn Trần Công Danh
2021-04-21  2:09 ` Junio C Hamano
2021-04-21  3:32 ` brian m. carlson
2021-04-21 12:07   ` Đoàn Trần Công Danh
2021-04-22  1:10     ` brian m. carlson
2021-05-04 17:19 ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
2021-05-04 17:19   ` [PATCH v2 1/5] mailinfo: avoid magic number in option parsing Đoàn Trần Công Danh
2021-05-04 17:19   ` [PATCH v2 2/5] mailinfo: warn if CR found in base64/quoted-printable email Đoàn Trần Công Danh
2021-05-05  3:41     ` Junio C Hamano
2021-05-04 17:20   ` [PATCH v2 3/5] mailinfo: skip quoted CR on user's wish Đoàn Trần Công Danh
2021-05-05  4:12     ` Junio C Hamano
2021-05-05 15:53       ` Đoàn Trần Công Danh
2021-05-04 17:20   ` [PATCH v2 4/5] mailinfo: strip quoted CR on users' wish Đoàn Trần Công Danh
2021-05-05  4:27     ` Junio C Hamano
2021-05-04 17:20   ` [PATCH v2 5/5] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh
2021-05-05  4:31   ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Junio C Hamano
2021-05-06 15:02 ` [PATCH v3 0/6] " Đoàn Trần Công Danh
2021-05-06 15:02   ` [PATCH v3 1/6] mailinfo: load default metainfo_charset lazily Đoàn Trần Công Danh
2021-05-06 15:02   ` [PATCH v3 2/6] mailinfo: stop parsing options manually Đoàn Trần Công Danh
2021-05-08 10:44     ` Junio C Hamano
2021-05-06 15:02   ` [PATCH v3 3/6] mailinfo: warn if CR found in decoded base64/QP email Đoàn Trần Công Danh
2021-05-08 10:52     ` Junio C Hamano
2021-05-06 15:02   ` [PATCH v3 4/6] mailinfo: allow squelching quoted CR warning Đoàn Trần Công Danh
2021-05-06 15:02   ` [PATCH v3 5/6] mailinfo: allow stripping quoted CR without warning Đoàn Trần Công Danh
2021-05-06 15:02   ` [PATCH v3 6/6] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh
2021-05-08 10:57   ` [PATCH v3 0/6] Teach am/mailinfo to process quoted CR Junio C Hamano
     [not found] ` <cover.1620309355.git.congdanhqx@gmail.com>
2021-05-06 15:02   ` [PATCH v3 2/6] mailinfo: stop parse options manually Đoàn Trần Công Danh
2021-05-06 15:19     ` Đoàn Trần Công Danh
2021-05-09 17:12 ` [PATCH v4 0/6] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
2021-05-09 17:12   ` [PATCH v4 1/6] mailinfo: load default metainfo_charset lazily Đoàn Trần Công Danh
2021-05-09 17:12   ` [PATCH v4 2/6] mailinfo: stop parsing options manually Đoàn Trần Công Danh
2021-05-09 17:12   ` Đoàn Trần Công Danh [this message]
2021-05-09 17:12   ` [PATCH v4 4/6] mailinfo: allow squelching quoted CRLF warning Đoàn Trần Công Danh
2021-05-09 17:12   ` [PATCH v4 5/6] mailinfo: allow stripping quoted CR without warning Đoàn Trần Công Danh
2021-05-09 17:12   ` [PATCH v4 6/6] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=50404ffe7425a32fd26e19fc406f80693985e644.1620580178.git.congdanhqx@gmail.com \
    --to=congdanhqx@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=sandals@crustytoothpaste.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).