git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / code / Atom feed
* [PATCH] mailinfo: strip CR from base64/quoted-printable email
@ 2021-04-21  1:34 Đoàn Trần Công Danh
  2021-04-21  2:09 ` Junio C Hamano
                   ` (5 more replies)
  0 siblings, 6 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-04-21  1:34 UTC (permalink / raw)
  To: git; +Cc: Đoàn Trần Công Danh

When an SMTP server receives an 8-bit email message, possibly with only
LF as line ending, some of those servers decide to change said LF to
CRLF.

Some other SMTP servers, when receives an 8-bit email message, decide to
encoding such message in base64 and/or quoted-printable instead.

If an email is transfered through those 2 email servers in order, the
final recipients will receive an email contains a patch mungled with
CRLF encoded inside another encoding. Thus, such CR couldn't be dropped
by mailsplit. Such accidents have been observed in the wild [1].

Let's guess if such CR was added automatically and strip them in
mailinfo.

[1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---

 I'm not sure if guessing the heuristic to strip CR is a good approach.
 I think it's better to pass --keep-cr down from git-am.
 Let's say --keep-cr=<yes|no|auto>

 mailinfo.c             | 20 +++++++++++++++++---
 t/t5100-mailinfo.sh    |  5 +++++
 t/t5100/cr-base64.mbox | 22 ++++++++++++++++++++++
 t/t5100/info1000       |  5 +++++
 t/t5100/msg1000        |  2 ++
 t/t5100/patch1000      | 22 ++++++++++++++++++++++
 6 files changed, 73 insertions(+), 3 deletions(-)
 create mode 100644 t/t5100/cr-base64.mbox
 create mode 100644 t/t5100/info1000
 create mode 100644 t/t5100/msg1000
 create mode 100644 t/t5100/patch1000

diff --git a/mailinfo.c b/mailinfo.c
index 5681d9130d..dbff867f42 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -988,16 +988,27 @@ static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
 }
 
 static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
-				 struct strbuf *prev)
+				 struct strbuf *prev, int *keep_cr)
 {
 	size_t len = line->len;
 	const char *rest;
 
 	if (!mi->format_flowed) {
+		if (*keep_cr == -1 && len >= 2)
+			*keep_cr = !(line->buf[len - 2] == '\r' &&
+				     line->buf[len - 1] == '\n');
+		if (!*keep_cr && len >= 2 &&
+		    line->buf[len - 2] == '\r' &&
+		    line->buf[len - 1] == '\n') {
+			strbuf_setlen(line, len - 2);
+			strbuf_addch(line, '\n');
+			len--;
+		}
 		handle_filter(mi, line);
 		return;
 	}
 
+	*keep_cr = 1;
 	if (line->buf[len - 1] == '\n') {
 		len--;
 		if (len && line->buf[len - 1] == '\r')
@@ -1036,6 +1047,7 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 static void handle_body(struct mailinfo *mi, struct strbuf *line)
 {
 	struct strbuf prev = STRBUF_INIT;
+	int keep_cr = -1;
 
 	/* Skip up to the first boundary */
 	if (*(mi->content_top)) {
@@ -1081,7 +1093,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 						strbuf_addbuf(&prev, sb);
 						break;
 					}
-				handle_filter_flowed(mi, sb, &prev);
+				handle_filter_flowed(mi, sb, &prev, &keep_cr);
 			}
 			/*
 			 * The partial chunk is saved in "prev" and will be
@@ -1091,7 +1103,9 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 			break;
 		}
 		default:
-			handle_filter_flowed(mi, line, &prev);
+			/* CR in plain message was processed in mailsplit */
+			keep_cr = 1;
+			handle_filter_flowed(mi, line, &prev, &keep_cr);
 		}
 
 		if (mi->input_error)
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index 147e616533..9ccc11d16a 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -228,4 +228,9 @@ test_expect_success 'mailinfo handles unusual header whitespace' '
 	test_cmp expect actual
 '
 
+test_expect_success 'mailinfo strip CR after decode base64' '
+	cp $DATA/cr-base64.mbox 1000 &&
+	check_mailinfo 1000 ""
+'
+
 test_done
diff --git a/t/t5100/cr-base64.mbox b/t/t5100/cr-base64.mbox
new file mode 100644
index 0000000000..6ea9806a6b
--- /dev/null
+++ b/t/t5100/cr-base64.mbox
@@ -0,0 +1,22 @@
+From: A U Thor <mail@example.com>
+To: list@example.org
+Subject: [PATCH v2] sample
+Date: Mon,  3 Aug 2020 22:40:55 +0700
+Message-Id: <msg-id@example.com>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+T24gZGlmZmVyZW50IGRpc3RybywgcHl0ZXN0IGlzIHN1ZmZpeGVkIHdpdGggZGlmZmVyZW50IHBh
+dHRlcm5zLg0KDQotLS0NCiBjb25maWd1cmUgfCAyICstDQogMSBmaWxlIGNoYW5nZWQsIDEgaW5z
+ZXJ0aW9uKCspLCAxIGRlbGV0aW9uKC0pDQoNCmRpZmYgLS1naXQgYS9jb25maWd1cmUgYi9jb25m
+aWd1cmUNCmluZGV4IGRiMzUzOGIzLi5mN2MxYzA5NSAxMDA3NTUNCi0tLSBhL2NvbmZpZ3VyZQ0K
+KysrIGIvY29uZmlndXJlDQpAQCAtODE0LDcgKzgxNCw3IEBAIGlmIFsgJGhhdmVfcHl0aG9uMyAt
+ZXEgMSBdOyB0aGVuDQogICAgIHByaW50ZiAiQ2hlY2tpbmcgZm9yIHB5dGhvbjMgcHl0ZXN0ICg+
+PSAzLjApLi4uICINCiAgICAgY29uZj0kKG1rdGVtcCkNCiAgICAgcHJpbnRmICJbcHl0ZXN0XVxu
+bWludmVyc2lvbj0zLjBcbiIgPiAkY29uZg0KLSAgICBpZiBweXRlc3QtMyAtYyAkY29uZiAtLXZl
+cnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuDQorICAgIGlmICIkcHl0aG9uIiAtbSBweXRlc3Qg
+LWMgJGNvbmYgLS12ZXJzaW9uID4vZGV2L251bGwgMj4mMTsgdGhlbg0KICAgICAgICAgcHJpbnRm
+ICJZZXMuXG4iDQogICAgICAgICBoYXZlX3B5dGhvbjNfcHl0ZXN0PTENCiAgICAgZWxzZQ0KLS0g
+DQoyLjI4LjANCl9fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f
+CmV4YW1wbGUgbWFpbGluZyBsaXN0IC0tIGxpc3RAZXhhbXBsZS5vcmcKVG8gdW5zdWJzY3JpYmUg
+c2VuZCBhbiBlbWFpbCB0byBsaXN0LWxlYXZlQGV4YW1wbGUub3JnCg==
diff --git a/t/t5100/info1000 b/t/t5100/info1000
new file mode 100644
index 0000000000..dab2228b70
--- /dev/null
+++ b/t/t5100/info1000
@@ -0,0 +1,5 @@
+Author: A U Thor
+Email: mail@example.com
+Subject: sample
+Date: Mon, 3 Aug 2020 22:40:55 +0700
+
diff --git a/t/t5100/msg1000 b/t/t5100/msg1000
new file mode 100644
index 0000000000..5e8e860aae
--- /dev/null
+++ b/t/t5100/msg1000
@@ -0,0 +1,2 @@
+On different distro, pytest is suffixed with different patterns.
+
diff --git a/t/t5100/patch1000 b/t/t5100/patch1000
new file mode 100644
index 0000000000..51c4fb4cb5
--- /dev/null
+++ b/t/t5100/patch1000
@@ -0,0 +1,22 @@
+---
+ configure | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/configure b/configure
+index db3538b3..f7c1c095 100755
+--- a/configure
++++ b/configure
+@@ -814,7 +814,7 @@ if [ $have_python3 -eq 1 ]; then
+     printf "Checking for python3 pytest (>= 3.0)... "
+     conf=$(mktemp)
+     printf "[pytest]\nminversion=3.0\n" > $conf
+-    if pytest-3 -c $conf --version >/dev/null 2>&1; then
++    if "$python" -m pytest -c $conf --version >/dev/null 2>&1; then
+         printf "Yes.\n"
+         have_python3_pytest=1
+     else
+-- 
+2.28.0
+_______________________________________________
+example mailing list -- list@example.org
+To unsubscribe send an email to list-leave@example.org
-- 
2.31.1.192.g0881477623


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] mailinfo: strip CR from base64/quoted-printable email
  2021-04-21  1:34 [PATCH] mailinfo: strip CR from base64/quoted-printable email Đoàn Trần Công Danh
@ 2021-04-21  2:09 ` Junio C Hamano
  2021-04-21  3:32 ` brian m. carlson
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 35+ messages in thread
From: Junio C Hamano @ 2021-04-21  2:09 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git

Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> When an SMTP server receives an 8-bit email message, possibly with only
> LF as line ending, some of those servers decide to change said LF to
> CRLF.
>
> Some other SMTP servers, when receives an 8-bit email message, decide to
> encoding such message in base64 and/or quoted-printable instead.

encoding -> encode

>
> If an email is transfered through those 2 email servers in order, the
> final recipients will receive an email contains a patch mungled with
> CRLF encoded inside another encoding. Thus, such CR couldn't be dropped
> by mailsplit. Such accidents have been observed in the wild [1].
>
> Let's guess if such CR was added automatically and strip them in
> mailinfo.
>
> [1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi
>
> Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
> ---
>
>  I'm not sure if guessing the heuristic to strip CR is a good approach.
>  I think it's better to pass --keep-cr down from git-am.
>  Let's say --keep-cr=<yes|no|auto>

It matches my instinct to tie this with the existing --keep-cr
option, even though I admit that I haven't thought things through.

.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] mailinfo: strip CR from base64/quoted-printable email
  2021-04-21  1:34 [PATCH] mailinfo: strip CR from base64/quoted-printable email Đoàn Trần Công Danh
  2021-04-21  2:09 ` Junio C Hamano
@ 2021-04-21  3:32 ` brian m. carlson
  2021-04-21 12:07   ` Đoàn Trần Công Danh
  2021-05-04 17:19 ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 35+ messages in thread
From: brian m. carlson @ 2021-04-21  3:32 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 2282 bytes --]

On 2021-04-21 at 01:34:04, Đoàn Trần Công Danh wrote:
> When an SMTP server receives an 8-bit email message, possibly with only
> LF as line ending, some of those servers decide to change said LF to
> CRLF.
> 
> Some other SMTP servers, when receives an 8-bit email message, decide to
> encoding such message in base64 and/or quoted-printable instead.

This really isn't an SMTP server.  It's mailing list software, namely
mailman, and I would argue it's a bug, even though we may want to work
around it.  For example, re-encoding the message breaks DKIM signatures,
which means that mailman is likely to cause mail to be needlessly
rejected.

8BITMIME is now so common with SMTP that I'd argue that we should just
write off servers that don't support it (especially in the context of
SMTPUTF8 existing), but this isn't the case of an SMTP server being
stuck in the last century.  Can we say more accurately that this is
mailing list software (or just call it out by name)?

> If an email is transfered through those 2 email servers in order, the
> final recipients will receive an email contains a patch mungled with
> CRLF encoded inside another encoding. Thus, such CR couldn't be dropped
> by mailsplit. Such accidents have been observed in the wild [1].
> 
> Let's guess if such CR was added automatically and strip them in
> mailinfo.
>
> [1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi
> 
> Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
> ---
> 
>  I'm not sure if guessing the heuristic to strip CR is a good approach.
>  I think it's better to pass --keep-cr down from git-am.
>  Let's say --keep-cr=<yes|no|auto>

I think we may want a separate option here.  When I send a 7bit or 8bit
body, I expect text canonicalization on the line endings.  However, when
I send a base64 or quoted-printable body, I don't expect my data to be
modified at all, and absent a compelling reason, doing so is incorrect.
In most cases, using base64 or quoted-printable is going to mean that
the sender knew that the body shouldn't be modified, not that mailman
modified it, so we should make line munging in this case opt-in.
-- 
brian m. carlson (he/him or they/them)
Houston, Texas, US

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 263 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] mailinfo: strip CR from base64/quoted-printable email
  2021-04-21  3:32 ` brian m. carlson
@ 2021-04-21 12:07   ` Đoàn Trần Công Danh
  2021-04-22  1:10     ` brian m. carlson
  0 siblings, 1 reply; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-04-21 12:07 UTC (permalink / raw)
  To: brian m. carlson, git

On 2021-04-21 03:32:07+0000, "brian m. carlson" <sandals@crustytoothpaste.net> wrote:
> On 2021-04-21 at 01:34:04, Đoàn Trần Công Danh wrote:
> > When an SMTP server receives an 8-bit email message, possibly with only
> > LF as line ending, some of those servers decide to change said LF to
> > CRLF.
> > 
> > Some other SMTP servers, when receives an 8-bit email message, decide to
> > encoding such message in base64 and/or quoted-printable instead.
> 
> This really isn't an SMTP server.  It's mailing list software, namely
> mailman, and I would argue it's a bug, even though we may want to work
> around it.  For example, re-encoding the message breaks DKIM signatures,
> which means that mailman is likely to cause mail to be needlessly
> rejected.
> 
> 8BITMIME is now so common with SMTP that I'd argue that we should just
> write off servers that don't support it (especially in the context of
> SMTPUTF8 existing), but this isn't the case of an SMTP server being
> stuck in the last century.  Can we say more accurately that this is
> mailing list software (or just call it out by name)?

I think replace "SMTP servers" with "mailing list managers" is
correct. I don't feel comfortable to call it out, since I don't know
if other managers do it that way or not.

> 
> > If an email is transfered through those 2 email servers in order, the
> > final recipients will receive an email contains a patch mungled with
> > CRLF encoded inside another encoding. Thus, such CR couldn't be dropped
> > by mailsplit. Such accidents have been observed in the wild [1].
> > 
> > Let's guess if such CR was added automatically and strip them in
> > mailinfo.
> >
> > [1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi
> > 
> > Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
> > ---
> > 
> >  I'm not sure if guessing the heuristic to strip CR is a good approach.
> >  I think it's better to pass --keep-cr down from git-am.
> >  Let's say --keep-cr=<yes|no|auto>
> 
> I think we may want a separate option here.  When I send a 7bit or 8bit
> body, I expect text canonicalization on the line endings.  However, when
> I send a base64 or quoted-printable body, I don't expect my data to be
> modified at all, and absent a compelling reason, doing so is incorrect.
> In most cases, using base64 or quoted-printable is going to mean that
> the sender knew that the body shouldn't be modified, not that mailman
> modified it, so we should make line munging in this case opt-in.

Make sense, this patch was sent mostly for some discussion first.
Would you mind suggest something for the option.

I'm thinking about --quoted-cr=<nowarn|warn|fix>, mimicking the
--whitespace option.

-- 
Danh

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] mailinfo: strip CR from base64/quoted-printable email
  2021-04-21 12:07   ` Đoàn Trần Công Danh
@ 2021-04-22  1:10     ` brian m. carlson
  0 siblings, 0 replies; 35+ messages in thread
From: brian m. carlson @ 2021-04-22  1:10 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 709 bytes --]

On 2021-04-21 at 12:07:42, Đoàn Trần Công Danh wrote:
> I think replace "SMTP servers" with "mailing list managers" is
> correct. I don't feel comfortable to call it out, since I don't know
> if other managers do it that way or not.

I think that's fair.  I would hope not for the reasons I mentioned, but
it also would not be surprising to me if others did nevertheless.

> Make sense, this patch was sent mostly for some discussion first.
> Would you mind suggest something for the option.
> 
> I'm thinking about --quoted-cr=<nowarn|warn|fix>, mimicking the
> --whitespace option.

I think that sounds like a great name.
-- 
brian m. carlson (he/him or they/them)
Houston, Texas, US

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 263 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 0/5] Teach am/mailinfo to process quoted CR
  2021-04-21  1:34 [PATCH] mailinfo: strip CR from base64/quoted-printable email Đoàn Trần Công Danh
  2021-04-21  2:09 ` Junio C Hamano
  2021-04-21  3:32 ` brian m. carlson
@ 2021-05-04 17:19 ` Đoàn Trần Công Danh
  2021-05-04 17:19   ` [PATCH v2 1/5] mailinfo: avoid magic number in option parsing Đoàn Trần Công Danh
                     ` (5 more replies)
  2021-05-06 15:02 ` [PATCH v3 0/6] " Đoàn Trần Công Danh
                   ` (2 subsequent siblings)
  5 siblings, 6 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-04 17:19 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

When an SMTP server receives an 8-bit email message, possibly with only
LF as line ending, some of those servers decide to change said LF to
CRLF.

Some mailing list software, when receives an 8-bit email message,
decide to encoding such message in base64 or quoted-printable.

This series try to help users of such softwares deal with such patches.

This series is a complete rewrite of v1.
Hence, no {inter,range}-diff.

Sorry for the long delay, life carries me away from Git.

[1]:
https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi

Đoàn Trần Công Danh (5):
  mailinfo: avoid magic number in option parsing
  mailinfo: warn if CR found in base64/quoted-printable email
  mailinfo: skip quoted CR on user's wish
  mailinfo: strip quoted CR on users' wish
  am: learn to process quoted lines that ends with CRLF

 Documentation/git-am.txt       |  4 +++
 Documentation/git-mailinfo.txt | 19 +++++++++++-
 builtin/am.c                   | 56 ++++++++++++++++++++++++++++++++++
 builtin/mailinfo.c             | 13 +++++---
 mailinfo.c                     | 47 ++++++++++++++++++++++++++--
 mailinfo.h                     | 10 ++++++
 t/t4258-am-quoted-cr.sh        | 37 ++++++++++++++++++++++
 t/t4258/mbox                   | 12 ++++++++
 t/t5100-mailinfo.sh            | 21 +++++++++++++
 t/t5100/quoted-cr-info         |  5 +++
 t/t5100/quoted-cr-msg          |  2 ++
 t/t5100/quoted-cr-patch        | 22 +++++++++++++
 t/t5100/quoted-cr.mbox         | 22 +++++++++++++
 13 files changed, 262 insertions(+), 8 deletions(-)
 create mode 100755 t/t4258-am-quoted-cr.sh
 create mode 100644 t/t4258/mbox
 create mode 100644 t/t5100/quoted-cr-info
 create mode 100644 t/t5100/quoted-cr-msg
 create mode 100644 t/t5100/quoted-cr-patch
 create mode 100644 t/t5100/quoted-cr.mbox

-- 
2.31.1.500.gbc6bbdd36b


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 1/5] mailinfo: avoid magic number in option parsing
  2021-05-04 17:19 ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
@ 2021-05-04 17:19   ` Đoàn Trần Công Danh
  2021-05-04 17:19   ` [PATCH v2 2/5] mailinfo: warn if CR found in base64/quoted-printable email Đoàn Trần Công Danh
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-04 17:19 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

While current magic number is perfectly correct, in a later change,
we would like to parse for another option, namely "--quoted-cr".
Let's refactor a bit to remove that magic number.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 builtin/mailinfo.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index cfb667a594..b309badce5 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -24,6 +24,7 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 	mi.metainfo_charset = def_charset;
 
 	while (1 < argc && argv[1][0] == '-') {
+		const char *str;
 		if (!strcmp(argv[1], "-k"))
 			mi.keep_subject = 1;
 		else if (!strcmp(argv[1], "-b"))
@@ -34,8 +35,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 			mi.metainfo_charset = def_charset;
 		else if (!strcmp(argv[1], "-n"))
 			mi.metainfo_charset = NULL;
-		else if (starts_with(argv[1], "--encoding="))
-			mi.metainfo_charset = argv[1] + 11;
+		else if (skip_prefix(argv[1], "--encoding=", &str))
+			mi.metainfo_charset = str;
 		else if (!strcmp(argv[1], "--scissors"))
 			mi.use_scissors = 1;
 		else if (!strcmp(argv[1], "--no-scissors"))
-- 
2.31.1.500.gbc6bbdd36b


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 2/5] mailinfo: warn if CR found in base64/quoted-printable email
  2021-05-04 17:19 ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
  2021-05-04 17:19   ` [PATCH v2 1/5] mailinfo: avoid magic number in option parsing Đoàn Trần Công Danh
@ 2021-05-04 17:19   ` Đoàn Trần Công Danh
  2021-05-05  3:41     ` Junio C Hamano
  2021-05-04 17:20   ` [PATCH v2 3/5] mailinfo: skip quoted CR on user's wish Đoàn Trần Công Danh
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-04 17:19 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

When an SMTP server receives an 8-bit email message, possibly with only
LF as line ending, some of those servers decide to change said LF to
CRLF.

Some mailing list softwares, when receives an 8-bit email message,
decide to encoding such message in base64 or quoted-printable.

If an email is transfered through above mail servers, then distributed
by such mailing list softwares, the recipients will receive an email
contains a patch mungled with CRLF encoded inside another encoding.
Thus, such CR couldn't be dropped by mailsplit. Hence, the mailed patch
couldn't be applied cleanly. Such accidents have been observed in the wild [1].

Let's give our users some warnings if such CR is found.

[1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 mailinfo.c              | 21 ++++++++++++++++++---
 t/t5100-mailinfo.sh     | 15 +++++++++++++++
 t/t5100/quoted-cr-info  |  5 +++++
 t/t5100/quoted-cr-msg   |  2 ++
 t/t5100/quoted-cr-patch | 22 ++++++++++++++++++++++
 t/t5100/quoted-cr.mbox  | 22 ++++++++++++++++++++++
 6 files changed, 84 insertions(+), 3 deletions(-)
 create mode 100644 t/t5100/quoted-cr-info
 create mode 100644 t/t5100/quoted-cr-msg
 create mode 100644 t/t5100/quoted-cr-patch
 create mode 100644 t/t5100/quoted-cr.mbox

diff --git a/mailinfo.c b/mailinfo.c
index 5681d9130d..713567f84b 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -988,12 +988,17 @@ static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
 }
 
 static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
-				 struct strbuf *prev)
+				 struct strbuf *prev, int *have_quoted_cr)
 {
 	size_t len = line->len;
 	const char *rest;
 
 	if (!mi->format_flowed) {
+		if (len >= 2 &&
+		    line->buf[len - 2] == '\r' &&
+		    line->buf[len - 1] == '\n') {
+			*have_quoted_cr = 1;
+		}
 		handle_filter(mi, line);
 		return;
 	}
@@ -1033,9 +1038,16 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 	handle_filter(mi, line);
 }
 
+static void summarize_quoted_cr(struct mailinfo *mi, int have_quoted_cr)
+{
+	if (have_quoted_cr)
+		warning("quoted CR detected");
+}
+
 static void handle_body(struct mailinfo *mi, struct strbuf *line)
 {
 	struct strbuf prev = STRBUF_INIT;
+	int have_quoted_cr = 0;
 
 	/* Skip up to the first boundary */
 	if (*(mi->content_top)) {
@@ -1051,6 +1063,8 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 				handle_filter(mi, &prev);
 				strbuf_reset(&prev);
 			}
+			summarize_quoted_cr(mi, have_quoted_cr);
+			have_quoted_cr = 0;
 			if (!handle_boundary(mi, line))
 				goto handle_body_out;
 		}
@@ -1081,7 +1095,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 						strbuf_addbuf(&prev, sb);
 						break;
 					}
-				handle_filter_flowed(mi, sb, &prev);
+				handle_filter_flowed(mi, sb, &prev, &have_quoted_cr);
 			}
 			/*
 			 * The partial chunk is saved in "prev" and will be
@@ -1091,7 +1105,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 			break;
 		}
 		default:
-			handle_filter_flowed(mi, line, &prev);
+			handle_filter_flowed(mi, line, &prev, &have_quoted_cr);
 		}
 
 		if (mi->input_error)
@@ -1100,6 +1114,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 
 	if (prev.len)
 		handle_filter(mi, &prev);
+	summarize_quoted_cr(mi, have_quoted_cr);
 
 	flush_inbody_header_accum(mi);
 
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index 147e616533..d8fdda6bea 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -228,4 +228,19 @@ test_expect_success 'mailinfo handles unusual header whitespace' '
 	test_cmp expect actual
 '
 
+check_quoted_cr_mail() {
+	git mailinfo -u "$@" quoted-cr-msg quoted-cr-patch \
+		<"$DATA/quoted-cr.mbox" >quoted-cr-info 2>quoted-cr-err &&
+	test_cmp "expect-cr-msg" quoted-cr-msg &&
+	test_cmp "expect-cr-patch" quoted-cr-patch &&
+	test_cmp "$DATA/quoted-cr-info" quoted-cr-info
+}
+
+test_expect_success 'mailinfo warn CR in base64 encoded email' '
+	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-msg" >expect-cr-msg &&
+	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-patch" >expect-cr-patch &&
+	check_quoted_cr_mail &&
+	grep "quoted CR detected" quoted-cr-err
+'
+
 test_done
diff --git a/t/t5100/quoted-cr-info b/t/t5100/quoted-cr-info
new file mode 100644
index 0000000000..dab2228b70
--- /dev/null
+++ b/t/t5100/quoted-cr-info
@@ -0,0 +1,5 @@
+Author: A U Thor
+Email: mail@example.com
+Subject: sample
+Date: Mon, 3 Aug 2020 22:40:55 +0700
+
diff --git a/t/t5100/quoted-cr-msg b/t/t5100/quoted-cr-msg
new file mode 100644
index 0000000000..a148bc7e26
--- /dev/null
+++ b/t/t5100/quoted-cr-msg
@@ -0,0 +1,2 @@
+On different distro, pytest is suffixed with different patterns.%%
+%%
diff --git a/t/t5100/quoted-cr-patch b/t/t5100/quoted-cr-patch
new file mode 100644
index 0000000000..580e2bddb8
--- /dev/null
+++ b/t/t5100/quoted-cr-patch
@@ -0,0 +1,22 @@
+---%%
+ configure | 2 +-%%
+ 1 file changed, 1 insertion(+), 1 deletion(-)%%
+%%
+diff --git a/configure b/configure%%
+index db3538b3..f7c1c095 100755%%
+--- a/configure%%
++++ b/configure%%
+@@ -814,7 +814,7 @@ if [ $have_python3 -eq 1 ]; then%%
+     printf "Checking for python3 pytest (>= 3.0)... "%%
+     conf=$(mktemp)%%
+     printf "[pytest]\nminversion=3.0\n" > $conf%%
+-    if pytest-3 -c $conf --version >/dev/null 2>&1; then%%
++    if "$python" -m pytest -c $conf --version >/dev/null 2>&1; then%%
+         printf "Yes.\n"%%
+         have_python3_pytest=1%%
+     else%%
+-- %%
+2.28.0%%
+_______________________________________________
+example mailing list -- list@example.org
+To unsubscribe send an email to list-leave@example.org
diff --git a/t/t5100/quoted-cr.mbox b/t/t5100/quoted-cr.mbox
new file mode 100644
index 0000000000..6ea9806a6b
--- /dev/null
+++ b/t/t5100/quoted-cr.mbox
@@ -0,0 +1,22 @@
+From: A U Thor <mail@example.com>
+To: list@example.org
+Subject: [PATCH v2] sample
+Date: Mon,  3 Aug 2020 22:40:55 +0700
+Message-Id: <msg-id@example.com>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+T24gZGlmZmVyZW50IGRpc3RybywgcHl0ZXN0IGlzIHN1ZmZpeGVkIHdpdGggZGlmZmVyZW50IHBh
+dHRlcm5zLg0KDQotLS0NCiBjb25maWd1cmUgfCAyICstDQogMSBmaWxlIGNoYW5nZWQsIDEgaW5z
+ZXJ0aW9uKCspLCAxIGRlbGV0aW9uKC0pDQoNCmRpZmYgLS1naXQgYS9jb25maWd1cmUgYi9jb25m
+aWd1cmUNCmluZGV4IGRiMzUzOGIzLi5mN2MxYzA5NSAxMDA3NTUNCi0tLSBhL2NvbmZpZ3VyZQ0K
+KysrIGIvY29uZmlndXJlDQpAQCAtODE0LDcgKzgxNCw3IEBAIGlmIFsgJGhhdmVfcHl0aG9uMyAt
+ZXEgMSBdOyB0aGVuDQogICAgIHByaW50ZiAiQ2hlY2tpbmcgZm9yIHB5dGhvbjMgcHl0ZXN0ICg+
+PSAzLjApLi4uICINCiAgICAgY29uZj0kKG1rdGVtcCkNCiAgICAgcHJpbnRmICJbcHl0ZXN0XVxu
+bWludmVyc2lvbj0zLjBcbiIgPiAkY29uZg0KLSAgICBpZiBweXRlc3QtMyAtYyAkY29uZiAtLXZl
+cnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuDQorICAgIGlmICIkcHl0aG9uIiAtbSBweXRlc3Qg
+LWMgJGNvbmYgLS12ZXJzaW9uID4vZGV2L251bGwgMj4mMTsgdGhlbg0KICAgICAgICAgcHJpbnRm
+ICJZZXMuXG4iDQogICAgICAgICBoYXZlX3B5dGhvbjNfcHl0ZXN0PTENCiAgICAgZWxzZQ0KLS0g
+DQoyLjI4LjANCl9fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f
+CmV4YW1wbGUgbWFpbGluZyBsaXN0IC0tIGxpc3RAZXhhbXBsZS5vcmcKVG8gdW5zdWJzY3JpYmUg
+c2VuZCBhbiBlbWFpbCB0byBsaXN0LWxlYXZlQGV4YW1wbGUub3JnCg==
-- 
2.31.1.500.gbc6bbdd36b


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 3/5] mailinfo: skip quoted CR on user's wish
  2021-05-04 17:19 ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
  2021-05-04 17:19   ` [PATCH v2 1/5] mailinfo: avoid magic number in option parsing Đoàn Trần Công Danh
  2021-05-04 17:19   ` [PATCH v2 2/5] mailinfo: warn if CR found in base64/quoted-printable email Đoàn Trần Công Danh
@ 2021-05-04 17:20   ` Đoàn Trần Công Danh
  2021-05-05  4:12     ` Junio C Hamano
  2021-05-04 17:20   ` [PATCH v2 4/5] mailinfo: strip quoted CR on users' wish Đoàn Trần Công Danh
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-04 17:20 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In previous change, we've turned on warning for quoted CR in base64
encoded email. Despite those warnings are usually helpful for our users,
they may expect quoted CR in their emails.

Let's give them an option to turn off the warning completely.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 Documentation/git-mailinfo.txt | 18 +++++++++++++++++-
 builtin/mailinfo.c             |  8 ++++++--
 mailinfo.c                     | 21 ++++++++++++++++++++-
 mailinfo.h                     |  8 ++++++++
 t/t5100-mailinfo.sh            |  6 ++++--
 5 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
index d343f040f5..c776b27515 100644
--- a/Documentation/git-mailinfo.txt
+++ b/Documentation/git-mailinfo.txt
@@ -9,7 +9,7 @@ git-mailinfo - Extracts patch and authorship from a single e-mail message
 SYNOPSIS
 --------
 [verse]
-'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch>
+'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] [--quoted-cr=<action>] <msg> <patch>
 
 
 DESCRIPTION
@@ -89,6 +89,22 @@ This can be enabled by default with the configuration option mailinfo.scissors.
 --no-scissors::
 	Ignore scissors lines. Useful for overriding mailinfo.scissors settings.
 
+--quoted-cr=<action>::
+	Action when processes email messages sent with base64 or
+	quoted-printable encoding, and the decoded lines end with CR-LF
+	instead of a simple LF.
++
+The valid actions are:
++
+--
+*	`nowarn`: Git will do nothing with this action.
+*	`warn`: Git will issue a warning for each message if such CR-LF is
+	found.
+--
++
+The default action could be set by configuration option `mailinfo.quotedCR`.
+If no such configuration option has been set, `warn` will be used.
+
 <msg>::
 	The commit log message extracted from e-mail, usually
 	except the title line which comes from e-mail Subject.
diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index b309badce5..1d600263cb 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -9,7 +9,7 @@
 #include "mailinfo.h"
 
 static const char mailinfo_usage[] =
-	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
+	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] [--quoted-cr=<action>] <msg> <patch> < mail >info";
 
 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 {
@@ -43,7 +43,11 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 			mi.use_scissors = 0;
 		else if (!strcmp(argv[1], "--no-inbody-headers"))
 			mi.use_inbody_headers = 0;
-		else
+		else if (skip_prefix(argv[1], "--quoted-cr=", &str)) {
+			mi.quoted_cr = mailinfo_parse_quoted_cr_action(str);
+			if (mi.quoted_cr == quoted_cr_invalid_action)
+				usage(mailinfo_usage);
+		} else
 			usage(mailinfo_usage);
 		argc--; argv++;
 	}
diff --git a/mailinfo.c b/mailinfo.c
index 713567f84b..fe7ffd01d0 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -1040,7 +1040,8 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 
 static void summarize_quoted_cr(struct mailinfo *mi, int have_quoted_cr)
 {
-	if (have_quoted_cr)
+	if (have_quoted_cr
+	    && mi->quoted_cr == quoted_cr_warn)
 		warning("quoted CR detected");
 }
 
@@ -1221,9 +1222,19 @@ int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
 	return mi->input_error;
 }
 
+enum quoted_cr_action mailinfo_parse_quoted_cr_action(const char *action)
+{
+	if (!strcmp(action, "nowarn"))
+		return quoted_cr_nowarn;
+	else if (!strcmp(action, "warn"))
+		return quoted_cr_warn;
+	return quoted_cr_invalid_action;
+}
+
 static int git_mailinfo_config(const char *var, const char *value, void *mi_)
 {
 	struct mailinfo *mi = mi_;
+	const char *str;
 
 	if (!starts_with(var, "mailinfo."))
 		return git_default_config(var, value, NULL);
@@ -1231,6 +1242,13 @@ static int git_mailinfo_config(const char *var, const char *value, void *mi_)
 		mi->use_scissors = git_config_bool(var, value);
 		return 0;
 	}
+	if (!strcmp(var, "mailinfo.quotedcr")) {
+		git_config_string(&str, var, value);
+		mi->quoted_cr = mailinfo_parse_quoted_cr_action(str);
+		if (mi->quoted_cr == quoted_cr_invalid_action)
+			die(_("bad action '%s' for '%s'"), str, var);
+		free((void *)str);
+	}
 	/* perhaps others here */
 	return 0;
 }
@@ -1243,6 +1261,7 @@ void setup_mailinfo(struct mailinfo *mi)
 	strbuf_init(&mi->charset, 0);
 	strbuf_init(&mi->log_message, 0);
 	strbuf_init(&mi->inbody_header_accum, 0);
+	mi->quoted_cr = quoted_cr_warn;
 	mi->header_stage = 1;
 	mi->use_inbody_headers = 1;
 	mi->content_top = mi->content;
diff --git a/mailinfo.h b/mailinfo.h
index 79b1d6774e..1bcef5a6f3 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -5,6 +5,12 @@
 
 #define MAX_BOUNDARIES 5
 
+enum quoted_cr_action {
+	quoted_cr_nowarn,
+	quoted_cr_warn,
+	quoted_cr_invalid_action
+};
+
 struct mailinfo {
 	FILE *input;
 	FILE *output;
@@ -14,6 +20,7 @@ struct mailinfo {
 	struct strbuf email;
 	int keep_subject;
 	int keep_non_patch_brackets_in_subject;
+	enum quoted_cr_action quoted_cr;
 	int add_message_id;
 	int use_scissors;
 	int use_inbody_headers;
@@ -39,6 +46,7 @@ struct mailinfo {
 	int input_error;
 };
 
+enum quoted_cr_action mailinfo_parse_quoted_cr_action(const char *action);
 void setup_mailinfo(struct mailinfo *);
 int mailinfo(struct mailinfo *, const char *msg, const char *patch);
 void clear_mailinfo(struct mailinfo *);
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index d8fdda6bea..57b8fc8104 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -236,11 +236,13 @@ check_quoted_cr_mail() {
 	test_cmp "$DATA/quoted-cr-info" quoted-cr-info
 }
 
-test_expect_success 'mailinfo warn CR in base64 encoded email' '
+test_expect_success 'mailinfo handle CR in base64 encoded email' '
 	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-msg" >expect-cr-msg &&
 	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-patch" >expect-cr-patch &&
 	check_quoted_cr_mail &&
-	grep "quoted CR detected" quoted-cr-err
+	grep "quoted CR detected" quoted-cr-err &&
+	check_quoted_cr_mail --quoted-cr=nowarn &&
+	test_must_be_empty quoted-cr-err
 '
 
 test_done
-- 
2.31.1.500.gbc6bbdd36b


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 4/5] mailinfo: strip quoted CR on users' wish
  2021-05-04 17:19 ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
                     ` (2 preceding siblings ...)
  2021-05-04 17:20   ` [PATCH v2 3/5] mailinfo: skip quoted CR on user's wish Đoàn Trần Công Danh
@ 2021-05-04 17:20   ` Đoàn Trần Công Danh
  2021-05-05  4:27     ` Junio C Hamano
  2021-05-04 17:20   ` [PATCH v2 5/5] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh
  2021-05-05  4:31   ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Junio C Hamano
  5 siblings, 1 reply; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-04 17:20 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In previous changes, we've turned on warning for quoted CR in base64 or
quoted-printable email messages. Some projects sees those quoted CR a lot
and they know that it happens most of the time.

Those projects in question usually fall back to use other tools to handle
patches when receiving such patches.

Let's help those projects handle those patches by stripping those
excessive CR-s.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 Documentation/git-mailinfo.txt | 1 +
 mailinfo.c                     | 7 +++++++
 mailinfo.h                     | 1 +
 t/t5100-mailinfo.sh            | 4 ++++
 4 files changed, 13 insertions(+)

diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
index c776b27515..d700929a46 100644
--- a/Documentation/git-mailinfo.txt
+++ b/Documentation/git-mailinfo.txt
@@ -100,6 +100,7 @@ The valid actions are:
 *	`nowarn`: Git will do nothing with this action.
 *	`warn`: Git will issue a warning for each message if such CR-LF is
 	found.
+*	`strip`: Git will convert those CR-LF to LF.
 --
 +
 The default action could be set by configuration option `mailinfo.quotedCR`.
diff --git a/mailinfo.c b/mailinfo.c
index fe7ffd01d0..68f4eba72a 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -998,6 +998,11 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 		    line->buf[len - 2] == '\r' &&
 		    line->buf[len - 1] == '\n') {
 			*have_quoted_cr = 1;
+			if (mi->quoted_cr == quoted_cr_strip) {
+				strbuf_setlen(line, len - 2);
+				strbuf_addch(line, '\n');
+				len--;
+			}
 		}
 		handle_filter(mi, line);
 		return;
@@ -1228,6 +1233,8 @@ enum quoted_cr_action mailinfo_parse_quoted_cr_action(const char *action)
 		return quoted_cr_nowarn;
 	else if (!strcmp(action, "warn"))
 		return quoted_cr_warn;
+	else if (!strcmp(action, "strip"))
+		return quoted_cr_strip;
 	return quoted_cr_invalid_action;
 }
 
diff --git a/mailinfo.h b/mailinfo.h
index 1bcef5a6f3..e0e094c311 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -8,6 +8,7 @@
 enum quoted_cr_action {
 	quoted_cr_nowarn,
 	quoted_cr_warn,
+	quoted_cr_strip,
 	quoted_cr_invalid_action
 };
 
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index 57b8fc8104..7559c922c6 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -242,6 +242,10 @@ test_expect_success 'mailinfo handle CR in base64 encoded email' '
 	check_quoted_cr_mail &&
 	grep "quoted CR detected" quoted-cr-err &&
 	check_quoted_cr_mail --quoted-cr=nowarn &&
+	test_must_be_empty quoted-cr-err &&
+	sed "s/%%//" "$DATA/quoted-cr-msg" >expect-cr-msg &&
+	sed "s/%%//" "$DATA/quoted-cr-patch" >expect-cr-patch &&
+	check_quoted_cr_mail --quoted-cr=strip &&
 	test_must_be_empty quoted-cr-err
 '
 
-- 
2.31.1.500.gbc6bbdd36b


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 5/5] am: learn to process quoted lines that ends with CRLF
  2021-05-04 17:19 ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
                     ` (3 preceding siblings ...)
  2021-05-04 17:20   ` [PATCH v2 4/5] mailinfo: strip quoted CR on users' wish Đoàn Trần Công Danh
@ 2021-05-04 17:20   ` Đoàn Trần Công Danh
  2021-05-05  4:31   ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Junio C Hamano
  5 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-04 17:20 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In previous changes, mailinfo has learnt to process lines that decoded
from base64 or quoted-printable and ends with CRLF.

Let's teach "am" that new option, too.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 Documentation/git-am.txt |  4 +++
 builtin/am.c             | 56 ++++++++++++++++++++++++++++++++++++++++
 mailinfo.h               |  1 +
 t/t4258-am-quoted-cr.sh  | 37 ++++++++++++++++++++++++++
 t/t4258/mbox             | 12 +++++++++
 5 files changed, 110 insertions(+)
 create mode 100755 t/t4258-am-quoted-cr.sh
 create mode 100644 t/t4258/mbox

diff --git a/Documentation/git-am.txt b/Documentation/git-am.txt
index decd8ae122..8714dfcb76 100644
--- a/Documentation/git-am.txt
+++ b/Documentation/git-am.txt
@@ -15,6 +15,7 @@ SYNOPSIS
 	 [--whitespace=<option>] [-C<n>] [-p<n>] [--directory=<dir>]
 	 [--exclude=<path>] [--include=<path>] [--reject] [-q | --quiet]
 	 [--[no-]scissors] [-S[<keyid>]] [--patch-format=<format>]
+	 [--quoted-cr=<action>]
 	 [(<mbox> | <Maildir>)...]
 'git am' (--continue | --skip | --abort | --quit | --show-current-patch[=(diff|raw)])
 
@@ -59,6 +60,9 @@ OPTIONS
 --no-scissors::
 	Ignore scissors lines (see linkgit:git-mailinfo[1]).
 
+--quoted-cr=<action>::
+	This flag will be passed down to 'git mailinfo' (see linkgit:git-mailinfo[1]).
+
 -m::
 --message-id::
 	Pass the `-m` flag to 'git mailinfo' (see linkgit:git-mailinfo[1]),
diff --git a/builtin/am.c b/builtin/am.c
index 8355e3566f..ff4c8ee68f 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -116,6 +116,7 @@ struct am_state {
 	int keep; /* enum keep_type */
 	int message_id;
 	int scissors; /* enum scissors_type */
+	int quoted_cr; /* enum quoted_cr_action */
 	struct strvec git_apply_opts;
 	const char *resolvemsg;
 	int committer_date_is_author_date;
@@ -145,6 +146,7 @@ static void am_state_init(struct am_state *state)
 	git_config_get_bool("am.messageid", &state->message_id);
 
 	state->scissors = SCISSORS_UNSET;
+	state->quoted_cr = quoted_cr_unset;
 
 	strvec_init(&state->git_apply_opts);
 
@@ -165,6 +167,19 @@ static void am_state_release(struct am_state *state)
 	strvec_clear(&state->git_apply_opts);
 }
 
+static int am_option_parse_quoted_cr(const struct option *opt,
+				     const char *arg, int unset)
+{
+	int *quoted_cr = opt->value;
+
+	BUG_ON_OPT_NEG(unset);
+
+	*quoted_cr = mailinfo_parse_quoted_cr_action(arg);
+	if (*quoted_cr == quoted_cr_invalid_action)
+		return -1;
+	return 0;
+}
+
 /**
  * Returns path relative to the am_state directory.
  */
@@ -397,6 +412,14 @@ static void am_load(struct am_state *state)
 	else
 		state->scissors = SCISSORS_UNSET;
 
+	read_state_file(&sb, state, "quoted-cr", 1);
+	if (!*sb.buf)
+		state->quoted_cr = quoted_cr_unset;
+	else
+		state->quoted_cr = mailinfo_parse_quoted_cr_action(sb.buf);
+	if (state->quoted_cr == quoted_cr_invalid_action)
+		die(_("could not parse %s"), am_path(state, "quoted-cr"));
+
 	read_state_file(&sb, state, "apply-opt", 1);
 	strvec_clear(&state->git_apply_opts);
 	if (sq_dequote_to_strvec(sb.buf, &state->git_apply_opts) < 0)
@@ -1002,6 +1025,24 @@ static void am_setup(struct am_state *state, enum patch_format patch_format,
 	}
 	write_state_text(state, "scissors", str);
 
+	switch (state->quoted_cr) {
+	case quoted_cr_unset:
+		str = "";
+		break;
+	case quoted_cr_nowarn:
+		str = "nowarn";
+		break;
+	case quoted_cr_warn:
+		str = "warn";
+		break;
+	case quoted_cr_strip:
+		str = "strip";
+		break;
+	default:
+		BUG("invalid value for state->quoted_cr");
+	}
+	write_state_text(state, "quoted-cr", str);
+
 	sq_quote_argv(&sb, state->git_apply_opts.v);
 	write_state_text(state, "apply-opt", sb.buf);
 
@@ -1162,6 +1203,18 @@ static int parse_mail(struct am_state *state, const char *mail)
 		BUG("invalid value for state->scissors");
 	}
 
+	switch (state->quoted_cr) {
+	case quoted_cr_unset:
+		break;
+	case quoted_cr_nowarn:
+	case quoted_cr_warn:
+	case quoted_cr_strip:
+		mi.quoted_cr = state->quoted_cr;
+		break;
+	default:
+		BUG("invalid value for state->quoted_cr");
+	}
+
 	mi.input = xfopen(mail, "r");
 	mi.output = xfopen(am_path(state, "info"), "w");
 	if (mailinfo(&mi, am_path(state, "msg"), am_path(state, "patch")))
@@ -2242,6 +2295,9 @@ int cmd_am(int argc, const char **argv, const char *prefix)
 			0, PARSE_OPT_NONEG),
 		OPT_BOOL('c', "scissors", &state.scissors,
 			N_("strip everything before a scissors line")),
+		OPT_CALLBACK_F(0, "quoted-cr", &state.quoted_cr, N_("action"),
+			       N_("pass it through git-mailinfo"),
+			       PARSE_OPT_NONEG, am_option_parse_quoted_cr),
 		OPT_PASSTHRU_ARGV(0, "whitespace", &state.git_apply_opts, N_("action"),
 			N_("pass it through git-apply"),
 			0),
diff --git a/mailinfo.h b/mailinfo.h
index e0e094c311..8c78c72bbe 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -6,6 +6,7 @@
 #define MAX_BOUNDARIES 5
 
 enum quoted_cr_action {
+	quoted_cr_unset = -1,
 	quoted_cr_nowarn,
 	quoted_cr_warn,
 	quoted_cr_strip,
diff --git a/t/t4258-am-quoted-cr.sh b/t/t4258-am-quoted-cr.sh
new file mode 100755
index 0000000000..2029115ecd
--- /dev/null
+++ b/t/t4258-am-quoted-cr.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+test_description='test am --quoted-cr=<action>'
+
+. ./test-lib.sh
+
+DATA="$TEST_DIRECTORY/t4258"
+
+test_expect_success 'setup' '
+	test_write_lines one two three >text &&
+	test_commit one text &&
+	test_write_lines one owt three >text &&
+	test_commit two text
+'
+
+test_expect_success 'am warn if quoted-cr is found' '
+	git reset --hard one &&
+	test_must_fail git am "$DATA/mbox" 2>err &&
+	grep "quoted CR detected" err
+'
+
+test_expect_success 'am strip if quoted-cr is found' '
+	test_might_fail git am --abort &&
+	git reset --hard one &&
+	git am --quoted-cr=strip "$DATA/mbox" &&
+	git diff --exit-code HEAD two
+'
+
+test_expect_success 'am strip if quoted-cr is found' '
+	test_might_fail git am --abort &&
+	git reset --hard one &&
+	test_config mailinfo.quotedCr strip &&
+	git am "$DATA/mbox" &&
+	git diff --exit-code HEAD two
+'
+
+test_done
diff --git a/t/t4258/mbox b/t/t4258/mbox
new file mode 100644
index 0000000000..c62819f3d2
--- /dev/null
+++ b/t/t4258/mbox
@@ -0,0 +1,12 @@
+From: A U Thor <mail@example.com>
+To: list@example.org
+Subject: [PATCH v2] sample
+Date: Mon,  3 Aug 2020 22:40:55 +0700
+Message-Id: <msg-id@example.com>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+VGhpcyBpcyBjb21taXQgbWVzc2FnZS4NCi0tLQ0KIHRleHQgfCAyICstDQogMSBmaWxlIGNoYW5n
+ZWQsIDEgaW5zZXJ0aW9uKCspLCAxIGRlbGV0aW9uKC0pDQoNCmRpZmYgLS1naXQgYS90ZXh0IGIv
+dGV4dA0KaW5kZXggNTYyNmFiZi4uZjcxOWVmZCAxMDA2NDQNCi0tLSBhL3RleHQNCisrKyBiL3Rl
+eHQNCkBAIC0xICsxIEBADQotb25lDQordHdvDQotLSANCjIuMzEuMQoK
-- 
2.31.1.500.gbc6bbdd36b


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 2/5] mailinfo: warn if CR found in base64/quoted-printable email
  2021-05-04 17:19   ` [PATCH v2 2/5] mailinfo: warn if CR found in base64/quoted-printable email Đoàn Trần Công Danh
@ 2021-05-05  3:41     ` Junio C Hamano
  0 siblings, 0 replies; 35+ messages in thread
From: Junio C Hamano @ 2021-05-05  3:41 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git, brian m. carlson

Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> When an SMTP server receives an 8-bit email message, possibly with only
> LF as line ending, some of those servers decide to change said LF to
> CRLF.

s/an SMTP server receives/SMTP servers receive/
s/those servers/them/

> Some mailing list softwares, when receives an 8-bit email message,
> decide to encoding such message in base64 or quoted-printable.

s/encoding/encode/

So the issue is not about CRLF terminating the lines of base64 or QP
(we should treat CRLF and LF terminated lines when unwrapping base64
or QP the same way).  It is about seeing CRLF in the payload after
unwrapping base64 or QP.  It was unclear which one was at issue from
the subject alone.

> If an email is transfered through above mail servers, then distributed
> by such mailing list softwares, the recipients will receive an email
> contains a patch mungled with CRLF encoded inside another encoding.
> Thus, such CR couldn't be dropped by mailsplit. Hence, the mailed patch
> couldn't be applied cleanly. Such accidents have been observed in the wild [1].
>
> Let's give our users some warnings if such CR is found.

Hmph.  It is unclear which one of the following we want our endgame
to be:

 (1) strip silently and apply
 (2) warn but strip and apply
 (3) warn but do not strip, letting the application fail

but let's keep reading.  I suspect (1) and (2) might be error prone,
as the mailpath that may have caused this kind of breakage may not
be under end-user's control.

> +static void summarize_quoted_cr(struct mailinfo *mi, int have_quoted_cr)
> +{
> +	if (have_quoted_cr)
> +		warning("quoted CR detected");
> +}

At this step, it is unclear if it is easier to read to make it the
responsibility of the caller to check for have_quoted_cr, but it
will become clear as we add more condition for the warning in later
steps to let callers unconditionally call this helper and decide
when we want to be silent inside this function.

Have you considered adding a new have_quoted_cr member to "struct
mailinfo"?  After all, the mailinfo struct is not only about end
user preference but contains all information we gleaned out of the
incoming message.

>  static void handle_body(struct mailinfo *mi, struct strbuf *line)
>  {
>  	struct strbuf prev = STRBUF_INIT;
> +	int have_quoted_cr = 0;
>  
>  	/* Skip up to the first boundary */
>  	if (*(mi->content_top)) {
> @@ -1051,6 +1063,8 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
>  				handle_filter(mi, &prev);
>  				strbuf_reset(&prev);
>  			}
> +			summarize_quoted_cr(mi, have_quoted_cr);
> +			have_quoted_cr = 0;
>  			if (!handle_boundary(mi, line))
>  				goto handle_body_out;
>  		}
> @@ -1081,7 +1095,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
>  						strbuf_addbuf(&prev, sb);
>  						break;
>  					}
> -				handle_filter_flowed(mi, sb, &prev);
> +				handle_filter_flowed(mi, sb, &prev, &have_quoted_cr);
>  			}
>  			/*
>  			 * The partial chunk is saved in "prev" and will be
> @@ -1091,7 +1105,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
>  			break;
>  		}
>  		default:
> -			handle_filter_flowed(mi, line, &prev);
> +			handle_filter_flowed(mi, line, &prev, &have_quoted_cr);
>  		}
>  
>  		if (mi->input_error)
> @@ -1100,6 +1114,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
>  
>  	if (prev.len)
>  		handle_filter(mi, &prev);
> +	summarize_quoted_cr(mi, have_quoted_cr);
>  
>  	flush_inbody_header_accum(mi);
>  
> diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
> index 147e616533..d8fdda6bea 100755
> --- a/t/t5100-mailinfo.sh
> +++ b/t/t5100-mailinfo.sh
> @@ -228,4 +228,19 @@ test_expect_success 'mailinfo handles unusual header whitespace' '
>  	test_cmp expect actual
>  '
>  
> +check_quoted_cr_mail() {

SP on both sides of (), i.e.

	check_quoted_cr_mail () {

> +	git mailinfo -u "$@" quoted-cr-msg quoted-cr-patch \
> +		<"$DATA/quoted-cr.mbox" >quoted-cr-info 2>quoted-cr-err &&
> +	test_cmp "expect-cr-msg" quoted-cr-msg &&
> +	test_cmp "expect-cr-patch" quoted-cr-patch &&
> +	test_cmp "$DATA/quoted-cr-info" quoted-cr-info
> +}
> +
> +test_expect_success 'mailinfo warn CR in base64 encoded email' '
> +	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-msg" >expect-cr-msg &&
> +	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-patch" >expect-cr-patch &&
> +	check_quoted_cr_mail &&
> +	grep "quoted CR detected" quoted-cr-err
> +'
> +
>  test_done

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 3/5] mailinfo: skip quoted CR on user's wish
  2021-05-04 17:20   ` [PATCH v2 3/5] mailinfo: skip quoted CR on user's wish Đoàn Trần Công Danh
@ 2021-05-05  4:12     ` Junio C Hamano
  2021-05-05 15:53       ` Đoàn Trần Công Danh
  0 siblings, 1 reply; 35+ messages in thread
From: Junio C Hamano @ 2021-05-05  4:12 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git, brian m. carlson

Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> Subject: Re: [PATCH v2 3/5] mailinfo: skip quoted CR on user's wish

Nothing wrong per-se, but "on user's wish" feel somewhat bizarre.
Perhaps

    mailinfo: allow skipping quoted CR

or something along that line?

> In previous change, we've turned on warning for quoted CR in base64
> encoded email. Despite those warnings are usually helpful for our users,
> they may expect quoted CR in their emails.
>
> Let's give them an option to turn off the warning completely.
>
> Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
> ---
>  Documentation/git-mailinfo.txt | 18 +++++++++++++++++-
>  builtin/mailinfo.c             |  8 ++++++--
>  mailinfo.c                     | 21 ++++++++++++++++++++-
>  mailinfo.h                     |  8 ++++++++
>  t/t5100-mailinfo.sh            |  6 ++++--
>  5 files changed, 55 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
> index d343f040f5..c776b27515 100644
> --- a/Documentation/git-mailinfo.txt
> +++ b/Documentation/git-mailinfo.txt
> @@ -9,7 +9,7 @@ git-mailinfo - Extracts patch and authorship from a single e-mail message
>  SYNOPSIS
>  --------
>  [verse]
> -'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch>
> +'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] [--quoted-cr=<action>] <msg> <patch>

This line is getting really crowded.  Perhaps it is time to do

	'git mailinfo' [<options>] <msg> <patch>

like other Git subcommands with too many options?  Certainly it can
be done after the dust settles from this entire series as a follow up
clean-up patch.

> @@ -89,6 +89,22 @@ This can be enabled by default with the configuration option mailinfo.scissors.
>  --no-scissors::
>  	Ignore scissors lines. Useful for overriding mailinfo.scissors settings.
>  
> +--quoted-cr=<action>::
> +	Action when processes email messages sent with base64 or
> +	quoted-printable encoding, and the decoded lines end with CR-LF

s/with CR-LF/with a CRLF/

> +	instead of a simple LF.
> ++
> +The valid actions are:
> ++
> +--
> +*	`nowarn`: Git will do nothing with this action.

s/with this action./when such a CRLF is seen./ perhaps?

> +*	`warn`: Git will issue a warning for each message if such CR-LF is

s/such CR-LF/such a CRLF/

> diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
> index b309badce5..1d600263cb 100644
> --- a/builtin/mailinfo.c
> +++ b/builtin/mailinfo.c
> @@ -9,7 +9,7 @@
>  #include "mailinfo.h"
>  
>  static const char mailinfo_usage[] =
> -	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
> +	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] [--quoted-cr=<action>] <msg> <patch> < mail >info";

It is surprising that we haven't switched this to parse_options().
It of course is outside the scope of this series, but from a cursory
look of its option parsing loop, it looks like a trivial improvement
to make.

> @@ -43,7 +43,11 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
>  			mi.use_scissors = 0;
>  		else if (!strcmp(argv[1], "--no-inbody-headers"))
>  			mi.use_inbody_headers = 0;
> -		else
> +		else if (skip_prefix(argv[1], "--quoted-cr=", &str)) {
> +			mi.quoted_cr = mailinfo_parse_quoted_cr_action(str);
> +			if (mi.quoted_cr == quoted_cr_invalid_action)
> +				usage(mailinfo_usage);

This is not all that helpful, given that mailinfo_usage[] only says
<action> without saying what the supported values are, and the
message does not make it clear it was issued while looking at the
--quoted-cr option.

At least, something like

			if (mi.quoted_cr == quoted_cr_invalid_action)
                        	die("--quoted-cr=%s: invalid action", str);

would be more palatable, but I wonder if mailinfo_parse_quoted_cr_action()
should have an option to die with the list of actions it knows about
in a message.

> diff --git a/mailinfo.c b/mailinfo.c
> index 713567f84b..fe7ffd01d0 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -1040,7 +1040,8 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
>  
>  static void summarize_quoted_cr(struct mailinfo *mi, int have_quoted_cr)
>  {
> -	if (have_quoted_cr)
> +	if (have_quoted_cr
> +	    && mi->quoted_cr == quoted_cr_warn)

Existing code in this file prefers to split a multi-line statement
after sequence point like &&, ||, etc., not before.

>  		warning("quoted CR detected");
>  }
>  
> @@ -1221,9 +1222,19 @@ int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
>  	return mi->input_error;
>  }
>  
> +enum quoted_cr_action mailinfo_parse_quoted_cr_action(const char *action)
> +{
> +	if (!strcmp(action, "nowarn"))
> +		return quoted_cr_nowarn;
> +	else if (!strcmp(action, "warn"))
> +		return quoted_cr_warn;
> +	return quoted_cr_invalid_action;
> +}

OK.

>  static int git_mailinfo_config(const char *var, const char *value, void *mi_)
>  {
>  	struct mailinfo *mi = mi_;
> +	const char *str;
>  
>  	if (!starts_with(var, "mailinfo."))
>  		return git_default_config(var, value, NULL);
> @@ -1231,6 +1242,13 @@ static int git_mailinfo_config(const char *var, const char *value, void *mi_)
>  		mi->use_scissors = git_config_bool(var, value);
>  		return 0;
>  	}
> +	if (!strcmp(var, "mailinfo.quotedcr")) {
> +		git_config_string(&str, var, value);
> +		mi->quoted_cr = mailinfo_parse_quoted_cr_action(str);
> +		if (mi->quoted_cr == quoted_cr_invalid_action)
> +			die(_("bad action '%s' for '%s'"), str, var);
> +		free((void *)str);
> +	}

Here, it is more reasonable.  It still does not say what actions are
accepted, but at least the user learns where our displeasure comes
from.

>  	/* perhaps others here */
>  	return 0;
>  }

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 4/5] mailinfo: strip quoted CR on users' wish
  2021-05-04 17:20   ` [PATCH v2 4/5] mailinfo: strip quoted CR on users' wish Đoàn Trần Công Danh
@ 2021-05-05  4:27     ` Junio C Hamano
  0 siblings, 0 replies; 35+ messages in thread
From: Junio C Hamano @ 2021-05-05  4:27 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git, brian m. carlson

Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> Subject: Re: [PATCH v2 4/5] mailinfo: strip quoted CR on users' wish

Again, perhaps

    mailinfo: allow stripping quoted CR without warning

By the way, the previous one said "skip", but I do not think it was
skipping quoted CR, so its title was misleading.  Perhaps

  [3/5] mailinfo: allow squelching quoted CR warning

or something.

> In previous changes, we've turned on warning for quoted CR in base64 or
> quoted-printable email messages. Some projects sees those quoted CR a lot
> and they know that it happens most of the time.

    ... a lot, they know that it happens most of the time, and they
    know it always is harmless to behave as if these CRs are not
    there.

The last sentence is an important precondition for the use of this
new option to be safe.

> diff --git a/mailinfo.c b/mailinfo.c
> index fe7ffd01d0..68f4eba72a 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -998,6 +998,11 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
>  		    line->buf[len - 2] == '\r' &&
>  		    line->buf[len - 1] == '\n') {
>  			*have_quoted_cr = 1;
> +			if (mi->quoted_cr == quoted_cr_strip) {
> +				strbuf_setlen(line, len - 2);
> +				strbuf_addch(line, '\n');
> +				len--;

The last one is beating a dead variable immediately before this
function returns, even though it is good for consistency (i.e. there
is an invaliant throughout the function that len is the number of
bytes contained in line->buf[]).

I am not sure what to think about this.  I wish there weren't need
for a separate len variable, with the need for this extra invariant.
After all, strbuf already has such an invariant that is well
understood by readers of this code (i.e. line->buf[]'s end is at
line->len).  For now, until we get rid of "len" from this function,
let's leave the final decrement in to make it absolutely clear that
we are aware of this extra invariant.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 0/5] Teach am/mailinfo to process quoted CR
  2021-05-04 17:19 ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
                     ` (4 preceding siblings ...)
  2021-05-04 17:20   ` [PATCH v2 5/5] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh
@ 2021-05-05  4:31   ` Junio C Hamano
  5 siblings, 0 replies; 35+ messages in thread
From: Junio C Hamano @ 2021-05-05  4:31 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git, brian m. carlson

Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> When an SMTP server receives an 8-bit email message, possibly with only
> LF as line ending, some of those servers decide to change said LF to
> CRLF.
>
> Some mailing list software, when receives an 8-bit email message,
> decide to encoding such message in base64 or quoted-printable.
>
> This series try to help users of such softwares deal with such patches.
>
> This series is a complete rewrite of v1.
> Hence, no {inter,range}-diff.

Thanks.

I've left a few comments here and there, but like the overall
approach.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 3/5] mailinfo: skip quoted CR on user's wish
  2021-05-05  4:12     ` Junio C Hamano
@ 2021-05-05 15:53       ` Đoàn Trần Công Danh
  0 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-05 15:53 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, brian m. carlson

On 2021-05-05 13:12:12+0900, Junio C Hamano <gitster@pobox.com> wrote:
> > diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
> > index d343f040f5..c776b27515 100644
> > --- a/Documentation/git-mailinfo.txt
> > +++ b/Documentation/git-mailinfo.txt
> > @@ -9,7 +9,7 @@ git-mailinfo - Extracts patch and authorship from a single e-mail message
> >  SYNOPSIS
> >  --------
> >  [verse]
> > -'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch>
> > +'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] [--quoted-cr=<action>] <msg> <patch>
> 
> This line is getting really crowded.  Perhaps it is time to do
> 
> 	'git mailinfo' [<options>] <msg> <patch>
> 
> like other Git subcommands with too many options?  Certainly it can
> be done after the dust settles from this entire series as a follow up
> clean-up patch.

Yes, I think it's time to do that clean-up.

> >  static const char mailinfo_usage[] =
> > -	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
> > +	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] [--quoted-cr=<action>] <msg> <patch> < mail >info";
> 
> It is surprising that we haven't switched this to parse_options().
> It of course is outside the scope of this series, but from a cursory
> look of its option parsing loop, it looks like a trivial improvement
> to make.

And given that we also need 1/5 (otherwise, we need a new declaration
for "const char *str"), I think it would be better to turn 1/5 to the
conversion to parse_option.

> > @@ -43,7 +43,11 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
> >  			mi.use_scissors = 0;
> >  		else if (!strcmp(argv[1], "--no-inbody-headers"))
> >  			mi.use_inbody_headers = 0;
> > -		else
> > +		else if (skip_prefix(argv[1], "--quoted-cr=", &str)) {
> > +			mi.quoted_cr = mailinfo_parse_quoted_cr_action(str);
> > +			if (mi.quoted_cr == quoted_cr_invalid_action)
> > +				usage(mailinfo_usage);
> 
> This is not all that helpful, given that mailinfo_usage[] only says
> <action> without saying what the supported values are, and the
> message does not make it clear it was issued while looking at the
> --quoted-cr option.
> 
> At least, something like
> 
> 			if (mi.quoted_cr == quoted_cr_invalid_action)
>                         	die("--quoted-cr=%s: invalid action", str);
> 
> would be more palatable, but I wonder if mailinfo_parse_quoted_cr_action()
> should have an option to die with the list of actions it knows about
> in a message.

I tempted to remove the _invalid_action with the re-roll and always
die when it doesn't understand the actions instead.
Let's see how far I can get with that approach.

-- 
Danh

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v3 0/6] Teach am/mailinfo to process quoted CR
  2021-04-21  1:34 [PATCH] mailinfo: strip CR from base64/quoted-printable email Đoàn Trần Công Danh
                   ` (2 preceding siblings ...)
  2021-05-04 17:19 ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
@ 2021-05-06 15:02 ` Đoàn Trần Công Danh
  2021-05-06 15:02   ` [PATCH v3 1/6] mailinfo: load default metainfo_charset lazily Đoàn Trần Công Danh
                     ` (6 more replies)
       [not found] ` <cover.1620309355.git.congdanhqx@gmail.com>
  2021-05-09 17:12 ` [PATCH v4 0/6] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
  5 siblings, 7 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-06 15:02 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

When SMTP servers receive 8-bit email messages, possibly with only
LF as line ending, some of those servers decide to change said LF to
CRLF.

Some mailing list software, when receives an 8-bit email message,
decide to encode such message in base64 or quoted-printable.

This series try to help users of such softwares deal with such patches.

Change from v2:
* Change preparatory step (in mailinfo) to use parse_options
* move "have_quoted_cr" to "struct mailinfo", thus the patch to warn about
  quoted CR is less weird.
* Change "mailinfo_parse_quoted_cr" to Git's do-some-work function, return
  0 on success and negative on failure, eliminate the usage of _invalid_action
* Better error messages if invalid action was given.
* completion support for git am --quoted-cr
* Some style changes


Đoàn Trần Công Danh (6):
  mailinfo: load default metainfo_charset lazily
  mailinfo: stop parsing options manually
  mailinfo: warn if CR found in decoded base64/QP email
  mailinfo: allow squelching quoted CR warning
  mailinfo: allow stripping quoted CR without warning
  am: learn to process quoted lines that ends with CRLF

 Documentation/git-am.txt               |   4 +
 Documentation/git-mailinfo.txt         |  21 ++++-
 builtin/am.c                           |  51 +++++++++++
 builtin/mailinfo.c                     | 115 ++++++++++++++++++-------
 contrib/completion/git-completion.bash |   5 ++
 mailinfo.c                             |  39 +++++++++
 mailinfo.h                             |  10 +++
 t/t4258-am-quoted-cr.sh                |  37 ++++++++
 t/t4258/mbox                           |  12 +++
 t/t5100-mailinfo.sh                    |  21 +++++
 t/t5100/quoted-cr-info                 |   5 ++
 t/t5100/quoted-cr-msg                  |   2 +
 t/t5100/quoted-cr-patch                |  22 +++++
 t/t5100/quoted-cr.mbox                 |  22 +++++
 14 files changed, 332 insertions(+), 34 deletions(-)
 create mode 100755 t/t4258-am-quoted-cr.sh
 create mode 100644 t/t4258/mbox
 create mode 100644 t/t5100/quoted-cr-info
 create mode 100644 t/t5100/quoted-cr-msg
 create mode 100644 t/t5100/quoted-cr-patch
 create mode 100644 t/t5100/quoted-cr.mbox

Range-diff against v2:
1:  5b1cab5f95 < -:  ---------- mailinfo: avoid magic number in option parsing
-:  ---------- > 1:  fac95392df mailinfo: load default metainfo_charset lazily
-:  ---------- > 2:  1fb08bb37d mailinfo: stop parsing options manually
2:  534183c458 ! 3:  5aac2ba38e mailinfo: warn if CR found in base64/quoted-printable email
    @@ Metadata
     Author: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
      ## Commit message ##
    -    mailinfo: warn if CR found in base64/quoted-printable email
    +    mailinfo: warn if CR found in decoded base64/QP email
     
    -    When an SMTP server receives an 8-bit email message, possibly with only
    -    LF as line ending, some of those servers decide to change said LF to
    -    CRLF.
    +    When SMTP servers receive 8-bit email messages, possibly with only
    +    LF as line ending, some of them decide to change said LF to CRLF.
     
    -    Some mailing list softwares, when receives an 8-bit email message,
    -    decide to encoding such message in base64 or quoted-printable.
    +    Some mailing list softwares, when receive 8-bit email messages,
    +    decide to encode those messages in base64 or quoted-printable.
     
         If an email is transfered through above mail servers, then distributed
         by such mailing list softwares, the recipients will receive an email
         contains a patch mungled with CRLF encoded inside another encoding.
    -    Thus, such CR couldn't be dropped by mailsplit. Hence, the mailed patch
    -    couldn't be applied cleanly. Such accidents have been observed in the wild [1].
     
    -    Let's give our users some warnings if such CR is found.
    +    Thus, such CR couldn't be dropped by "mailsplit".
    +    Hence, the mailed patch couldn't be applied cleanly.
    +    Such accidents have been observed in the wild [1].
    +
    +    Instead of silently rejecting those messages, let's give our users
    +    some warnings if such CR is found.
     
         [1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi
     
         Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
      ## mailinfo.c ##
    -@@ mailinfo.c: static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
    - }
    - 
    - static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
    --				 struct strbuf *prev)
    -+				 struct strbuf *prev, int *have_quoted_cr)
    - {
    - 	size_t len = line->len;
    +@@ mailinfo.c: static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
      	const char *rest;
      
      	if (!mi->format_flowed) {
     +		if (len >= 2 &&
     +		    line->buf[len - 2] == '\r' &&
     +		    line->buf[len - 1] == '\n') {
    -+			*have_quoted_cr = 1;
    ++			mi->have_quoted_cr = 1;
     +		}
      		handle_filter(mi, line);
      		return;
    @@ mailinfo.c: static void handle_filter_flowed(struct mailinfo *mi, struct strbuf
      	handle_filter(mi, line);
      }
      
    -+static void summarize_quoted_cr(struct mailinfo *mi, int have_quoted_cr)
    ++static void summarize_quoted_cr(struct mailinfo *mi)
     +{
    -+	if (have_quoted_cr)
    ++	if (mi->have_quoted_cr)
     +		warning("quoted CR detected");
     +}
     +
      static void handle_body(struct mailinfo *mi, struct strbuf *line)
      {
      	struct strbuf prev = STRBUF_INIT;
    -+	int have_quoted_cr = 0;
    - 
    - 	/* Skip up to the first boundary */
    - 	if (*(mi->content_top)) {
     @@ mailinfo.c: static void handle_body(struct mailinfo *mi, struct strbuf *line)
      				handle_filter(mi, &prev);
      				strbuf_reset(&prev);
      			}
    -+			summarize_quoted_cr(mi, have_quoted_cr);
    -+			have_quoted_cr = 0;
    ++			summarize_quoted_cr(mi);
    ++			mi->have_quoted_cr = 0;
      			if (!handle_boundary(mi, line))
      				goto handle_body_out;
      		}
     @@ mailinfo.c: static void handle_body(struct mailinfo *mi, struct strbuf *line)
    - 						strbuf_addbuf(&prev, sb);
    - 						break;
    - 					}
    --				handle_filter_flowed(mi, sb, &prev);
    -+				handle_filter_flowed(mi, sb, &prev, &have_quoted_cr);
    - 			}
    - 			/*
    - 			 * The partial chunk is saved in "prev" and will be
    -@@ mailinfo.c: static void handle_body(struct mailinfo *mi, struct strbuf *line)
    - 			break;
    - 		}
    - 		default:
    --			handle_filter_flowed(mi, line, &prev);
    -+			handle_filter_flowed(mi, line, &prev, &have_quoted_cr);
    - 		}
    - 
    - 		if (mi->input_error)
    -@@ mailinfo.c: static void handle_body(struct mailinfo *mi, struct strbuf *line)
      
      	if (prev.len)
      		handle_filter(mi, &prev);
    -+	summarize_quoted_cr(mi, have_quoted_cr);
    ++	summarize_quoted_cr(mi);
      
      	flush_inbody_header_accum(mi);
      
     
    + ## mailinfo.h ##
    +@@ mailinfo.h: struct mailinfo {
    + 	struct strbuf charset;
    + 	unsigned int format_flowed:1;
    + 	unsigned int delsp:1;
    ++	unsigned int have_quoted_cr:1;
    + 	char *message_id;
    + 	enum  {
    + 		TE_DONTCARE, TE_QP, TE_BASE64
    +
      ## t/t5100-mailinfo.sh ##
     @@ t/t5100-mailinfo.sh: test_expect_success 'mailinfo handles unusual header whitespace' '
      	test_cmp expect actual
      '
      
    -+check_quoted_cr_mail() {
    ++check_quoted_cr_mail () {
     +	git mailinfo -u "$@" quoted-cr-msg quoted-cr-patch \
     +		<"$DATA/quoted-cr.mbox" >quoted-cr-info 2>quoted-cr-err &&
     +	test_cmp "expect-cr-msg" quoted-cr-msg &&
3:  0d115821c3 ! 4:  d5b2da370d mailinfo: skip quoted CR on user's wish
    @@ Metadata
     Author: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
      ## Commit message ##
    -    mailinfo: skip quoted CR on user's wish
    +    mailinfo: allow squelching quoted CR warning
     
    -    In previous change, we've turned on warning for quoted CR in base64
    -    encoded email. Despite those warnings are usually helpful for our users,
    -    they may expect quoted CR in their emails.
    +    In previous change, Git starts to warn for quoted CR in decoded
    +    base64/QP email. Despite those warnings are usually helpful,
    +    quoted CR could be part of some users' workflow.
     
         Let's give them an option to turn off the warning completely.
     
    @@ Documentation/git-mailinfo.txt: git-mailinfo - Extracts patch and authorship fro
      --------
      [verse]
     -'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch>
    -+'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] [--quoted-cr=<action>] <msg> <patch>
    ++'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n]
    ++	       [--[no-]scissors] [--quoted-cr=<action>]
    ++	       <msg> <patch>
      
      
      DESCRIPTION
    @@ Documentation/git-mailinfo.txt: This can be enabled by default with the configur
      
     +--quoted-cr=<action>::
     +	Action when processes email messages sent with base64 or
    -+	quoted-printable encoding, and the decoded lines end with CR-LF
    ++	quoted-printable encoding, and the decoded lines end with a CRLF
     +	instead of a simple LF.
     ++
     +The valid actions are:
     ++
     +--
    -+*	`nowarn`: Git will do nothing with this action.
    -+*	`warn`: Git will issue a warning for each message if such CR-LF is
    ++*	`nowarn`: Git will do nothing when such a CRLF is found.
    ++*	`warn`: Git will issue a warning for each message if such a CRLF is
     +	found.
     +--
     ++
    @@ Documentation/git-mailinfo.txt: This can be enabled by default with the configur
      	except the title line which comes from e-mail Subject.
     
      ## builtin/mailinfo.c ##
    -@@
    - #include "mailinfo.h"
    - 
    - static const char mailinfo_usage[] =
    --	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
    -+	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] [--quoted-cr=<action>] <msg> <patch> < mail >info";
    +@@ builtin/mailinfo.c: static int parse_opt_explicit_encoding(const struct option *opt,
    + 	return 0;
    + }
      
    ++static int parse_opt_quoted_cr(const struct option *opt, const char *arg, int unset)
    ++{
    ++	BUG_ON_OPT_NEG(unset);
    ++
    ++	if (mailinfo_parse_quoted_cr_action(arg, opt->value) != 0)
    ++		return error(_("bad action '%s' for '%s'"), arg, "--quoted-cr");
    ++	return 0;
    ++}
    ++
      int cmd_mailinfo(int argc, const char **argv, const char *prefix)
      {
    + 	struct metainfo_charset meta_charset;
     @@ builtin/mailinfo.c: int cmd_mailinfo(int argc, const char **argv, const char *prefix)
    - 			mi.use_scissors = 0;
    - 		else if (!strcmp(argv[1], "--no-inbody-headers"))
    - 			mi.use_inbody_headers = 0;
    --		else
    -+		else if (skip_prefix(argv[1], "--quoted-cr=", &str)) {
    -+			mi.quoted_cr = mailinfo_parse_quoted_cr_action(str);
    -+			if (mi.quoted_cr == quoted_cr_invalid_action)
    -+				usage(mailinfo_usage);
    -+		} else
    - 			usage(mailinfo_usage);
    - 		argc--; argv++;
    - 	}
    + 			       N_("re-code metadata to this encoding"),
    + 			       PARSE_OPT_NONEG, parse_opt_explicit_encoding),
    + 		OPT_BOOL(0, "scissors", &mi.use_scissors, N_("use scissors")),
    ++		OPT_CALLBACK_F(0, "quoted-cr", &mi.quoted_cr, N_("<action>"),
    ++			       N_("action when quoted CR is found"),
    ++			       PARSE_OPT_NONEG, parse_opt_quoted_cr),
    + 		OPT_HIDDEN_BOOL(0, "inbody-headers", &mi.use_inbody_headers,
    + 			 N_("use headers in message's body")),
    + 		OPT_END()
     
      ## mailinfo.c ##
     @@ mailinfo.c: static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
      
    - static void summarize_quoted_cr(struct mailinfo *mi, int have_quoted_cr)
    + static void summarize_quoted_cr(struct mailinfo *mi)
      {
    --	if (have_quoted_cr)
    -+	if (have_quoted_cr
    -+	    && mi->quoted_cr == quoted_cr_warn)
    +-	if (mi->have_quoted_cr)
    ++	if (mi->have_quoted_cr &&
    ++	    mi->quoted_cr == quoted_cr_warn)
      		warning("quoted CR detected");
      }
      
    @@ mailinfo.c: int mailinfo(struct mailinfo *mi, const char *msg, const char *patch
      	return mi->input_error;
      }
      
    -+enum quoted_cr_action mailinfo_parse_quoted_cr_action(const char *action)
    ++int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action)
     +{
    -+	if (!strcmp(action, "nowarn"))
    -+		return quoted_cr_nowarn;
    -+	else if (!strcmp(action, "warn"))
    -+		return quoted_cr_warn;
    -+	return quoted_cr_invalid_action;
    ++	if (!strcmp(actionstr, "nowarn"))
    ++		*action = quoted_cr_nowarn;
    ++	else if (!strcmp(actionstr, "warn"))
    ++		*action = quoted_cr_warn;
    ++	else
    ++		return -1;
    ++	return 0;
     +}
     +
      static int git_mailinfo_config(const char *var, const char *value, void *mi_)
      {
      	struct mailinfo *mi = mi_;
    -+	const char *str;
    - 
    - 	if (!starts_with(var, "mailinfo."))
    - 		return git_default_config(var, value, NULL);
     @@ mailinfo.c: static int git_mailinfo_config(const char *var, const char *value, void *mi_)
      		mi->use_scissors = git_config_bool(var, value);
      		return 0;
      	}
     +	if (!strcmp(var, "mailinfo.quotedcr")) {
    -+		git_config_string(&str, var, value);
    -+		mi->quoted_cr = mailinfo_parse_quoted_cr_action(str);
    -+		if (mi->quoted_cr == quoted_cr_invalid_action)
    -+			die(_("bad action '%s' for '%s'"), str, var);
    -+		free((void *)str);
    ++		if (mailinfo_parse_quoted_cr_action(value, &mi->quoted_cr) != 0)
    ++			return error(_("bad action '%s' for '%s'"), value, var);
    ++		return 0;
     +	}
      	/* perhaps others here */
      	return 0;
    @@ mailinfo.h
     +enum quoted_cr_action {
     +	quoted_cr_nowarn,
     +	quoted_cr_warn,
    -+	quoted_cr_invalid_action
     +};
     +
      struct mailinfo {
    @@ mailinfo.h: struct mailinfo {
      	struct strbuf email;
      	int keep_subject;
      	int keep_non_patch_brackets_in_subject;
    -+	enum quoted_cr_action quoted_cr;
    ++	int quoted_cr; /* enum quoted_cr_action */
      	int add_message_id;
      	int use_scissors;
      	int use_inbody_headers;
    @@ mailinfo.h: struct mailinfo {
      	int input_error;
      };
      
    -+enum quoted_cr_action mailinfo_parse_quoted_cr_action(const char *action);
    ++int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action);
      void setup_mailinfo(struct mailinfo *);
      int mailinfo(struct mailinfo *, const char *msg, const char *patch);
      void clear_mailinfo(struct mailinfo *);
     
      ## t/t5100-mailinfo.sh ##
    -@@ t/t5100-mailinfo.sh: check_quoted_cr_mail() {
    +@@ t/t5100-mailinfo.sh: check_quoted_cr_mail () {
      	test_cmp "$DATA/quoted-cr-info" quoted-cr-info
      }
      
4:  d48733805f ! 5:  9e96d4bf5e mailinfo: strip quoted CR on users' wish
    @@ Metadata
     Author: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
      ## Commit message ##
    -    mailinfo: strip quoted CR on users' wish
    +    mailinfo: allow stripping quoted CR without warning
     
         In previous changes, we've turned on warning for quoted CR in base64 or
    -    quoted-printable email messages. Some projects sees those quoted CR a lot
    -    and they know that it happens most of the time.
    +    quoted-printable email messages. Some projects see those quoted CR a lot,
    +    they know that it happens most of the time, and they find it's desirable
    +    to always strip those CR.
     
         Those projects in question usually fall back to use other tools to handle
    -    patches when receiving such patches.
    +    patches when receive such patches.
     
         Let's help those projects handle those patches by stripping those
    -    excessive CR-s.
    +    excessive CR.
     
         Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
      ## Documentation/git-mailinfo.txt ##
     @@ Documentation/git-mailinfo.txt: The valid actions are:
    - *	`nowarn`: Git will do nothing with this action.
    - *	`warn`: Git will issue a warning for each message if such CR-LF is
    + *	`nowarn`: Git will do nothing when such a CRLF is found.
    + *	`warn`: Git will issue a warning for each message if such a CRLF is
      	found.
    -+*	`strip`: Git will convert those CR-LF to LF.
    ++*	`strip`: Git will convert those CRLF to LF.
      --
      +
      The default action could be set by configuration option `mailinfo.quotedCR`.
    @@ mailinfo.c
     @@ mailinfo.c: static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
      		    line->buf[len - 2] == '\r' &&
      		    line->buf[len - 1] == '\n') {
    - 			*have_quoted_cr = 1;
    + 			mi->have_quoted_cr = 1;
     +			if (mi->quoted_cr == quoted_cr_strip) {
     +				strbuf_setlen(line, len - 2);
     +				strbuf_addch(line, '\n');
    @@ mailinfo.c: static void handle_filter_flowed(struct mailinfo *mi, struct strbuf
      		}
      		handle_filter(mi, line);
      		return;
    -@@ mailinfo.c: enum quoted_cr_action mailinfo_parse_quoted_cr_action(const char *action)
    - 		return quoted_cr_nowarn;
    - 	else if (!strcmp(action, "warn"))
    - 		return quoted_cr_warn;
    -+	else if (!strcmp(action, "strip"))
    -+		return quoted_cr_strip;
    - 	return quoted_cr_invalid_action;
    - }
    - 
    +@@ mailinfo.c: int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action)
    + 		*action = quoted_cr_nowarn;
    + 	else if (!strcmp(actionstr, "warn"))
    + 		*action = quoted_cr_warn;
    ++	else if (!strcmp(actionstr, "strip"))
    ++		*action = quoted_cr_strip;
    + 	else
    + 		return -1;
    + 	return 0;
     
      ## mailinfo.h ##
     @@
    @@ mailinfo.h
      	quoted_cr_nowarn,
      	quoted_cr_warn,
     +	quoted_cr_strip,
    - 	quoted_cr_invalid_action
      };
      
    + struct mailinfo {
     
      ## t/t5100-mailinfo.sh ##
     @@ t/t5100-mailinfo.sh: test_expect_success 'mailinfo handle CR in base64 encoded email' '
5:  95e309cc8b ! 6:  d6aa12acc0 am: learn to process quoted lines that ends with CRLF
    @@ Commit message
         am: learn to process quoted lines that ends with CRLF
     
         In previous changes, mailinfo has learnt to process lines that decoded
    -    from base64 or quoted-printable and ends with CRLF.
    +    from base64 or quoted-printable, and ends with CRLF.
     
    -    Let's teach "am" that new option, too.
    +    Let's teach "am" that new trick, too.
     
         Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
    @@ builtin/am.c: static void am_state_release(struct am_state *state)
     +static int am_option_parse_quoted_cr(const struct option *opt,
     +				     const char *arg, int unset)
     +{
    -+	int *quoted_cr = opt->value;
    -+
     +	BUG_ON_OPT_NEG(unset);
     +
    -+	*quoted_cr = mailinfo_parse_quoted_cr_action(arg);
    -+	if (*quoted_cr == quoted_cr_invalid_action)
    -+		return -1;
    ++	if (mailinfo_parse_quoted_cr_action(arg, opt->value) != 0)
    ++		return error(_("bad action '%s' for '%s'"), arg, "--quoted-cr");
     +	return 0;
     +}
     +
    @@ builtin/am.c: static void am_load(struct am_state *state)
     +	read_state_file(&sb, state, "quoted-cr", 1);
     +	if (!*sb.buf)
     +		state->quoted_cr = quoted_cr_unset;
    -+	else
    -+		state->quoted_cr = mailinfo_parse_quoted_cr_action(sb.buf);
    -+	if (state->quoted_cr == quoted_cr_invalid_action)
    ++	else if (mailinfo_parse_quoted_cr_action(sb.buf, &state->quoted_cr) != 0)
     +		die(_("could not parse %s"), am_path(state, "quoted-cr"));
     +
      	read_state_file(&sb, state, "apply-opt", 1);
    @@ builtin/am.c: int cmd_am(int argc, const char **argv, const char *prefix)
      			N_("pass it through git-apply"),
      			0),
     
    + ## contrib/completion/git-completion.bash ##
    +@@ contrib/completion/git-completion.bash: __git_whitespacelist="nowarn warn error error-all fix"
    + __git_patchformat="mbox stgit stgit-series hg mboxrd"
    + __git_showcurrentpatch="diff raw"
    + __git_am_inprogress_options="--skip --continue --resolved --abort --quit --show-current-patch"
    ++__git_quoted_cr="nowarn warn strip"
    + 
    + _git_am ()
    + {
    +@@ contrib/completion/git-completion.bash: _git_am ()
    + 		__gitcomp "$__git_showcurrentpatch" "" "${cur##--show-current-patch=}"
    + 		return
    + 		;;
    ++	--quoted-cr=*)
    ++		__gitcomp "$__git_quoted_cr" "" "${cur##--quoted-cr=}"
    ++		return
    ++		;;
    + 	--*)
    + 		__gitcomp_builtin am "" \
    + 			"$__git_am_inprogress_options"
    +
      ## mailinfo.h ##
     @@
      #define MAX_BOUNDARIES 5
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v3 1/6] mailinfo: load default metainfo_charset lazily
  2021-05-06 15:02 ` [PATCH v3 0/6] " Đoàn Trần Công Danh
@ 2021-05-06 15:02   ` Đoàn Trần Công Danh
  2021-05-06 15:02   ` [PATCH v3 2/6] mailinfo: stop parsing options manually Đoàn Trần Công Danh
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-06 15:02 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In a later change, we will use parse_option to parse mailinfo's options.
In mailinfo, both "-u", "-n", and "--encoding" try to set the same
field, with "-u" reset that field to some default value from
configuration variable "i18n.commitEncoding".

Let's delay the setting of that field until we finish processing all
options. By doing that, "i18n.commitEncoding" can be parsed on demand.
More importantly, it cleans the way for using parse_option.

This change introduces some inconsistent brackets "{}" in "if/else if"
construct, however, we will rewrite them in the next few changes.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 builtin/mailinfo.c | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index cfb667a594..77f96177cc 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -11,17 +11,25 @@
 static const char mailinfo_usage[] =
 	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
 
+struct metainfo_charset
+{
+	enum {
+		CHARSET_DEFAULT,
+		CHARSET_NO_REENCODE,
+		CHARSET_EXPLICIT,
+	} policy;
+	const char *charset;
+};
+
 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 {
-	const char *def_charset;
+	struct metainfo_charset meta_charset;
 	struct mailinfo mi;
 	int status;
 	char *msgfile, *patchfile;
 
 	setup_mailinfo(&mi);
-
-	def_charset = get_commit_output_encoding();
-	mi.metainfo_charset = def_charset;
+	meta_charset.policy = CHARSET_DEFAULT;
 
 	while (1 < argc && argv[1][0] == '-') {
 		if (!strcmp(argv[1], "-k"))
@@ -31,12 +39,13 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 		else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id"))
 			mi.add_message_id = 1;
 		else if (!strcmp(argv[1], "-u"))
-			mi.metainfo_charset = def_charset;
+			meta_charset.policy = CHARSET_DEFAULT;
 		else if (!strcmp(argv[1], "-n"))
-			mi.metainfo_charset = NULL;
-		else if (starts_with(argv[1], "--encoding="))
-			mi.metainfo_charset = argv[1] + 11;
-		else if (!strcmp(argv[1], "--scissors"))
+			meta_charset.policy = CHARSET_NO_REENCODE;
+		else if (starts_with(argv[1], "--encoding=")) {
+			meta_charset.policy = CHARSET_EXPLICIT;
+			meta_charset.charset = argv[1] + 11;
+		} else if (!strcmp(argv[1], "--scissors"))
 			mi.use_scissors = 1;
 		else if (!strcmp(argv[1], "--no-scissors"))
 			mi.use_scissors = 0;
@@ -50,6 +59,19 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 	if (argc != 3)
 		usage(mailinfo_usage);
 
+	switch (meta_charset.policy) {
+	case CHARSET_DEFAULT:
+		mi.metainfo_charset = get_commit_output_encoding();
+		break;
+	case CHARSET_NO_REENCODE:
+		mi.metainfo_charset = NULL;
+		break;
+	case CHARSET_EXPLICIT:
+		break;
+	default:
+		BUG("invalid meta_charset.policy");
+	}
+
 	mi.input = stdin;
 	mi.output = stdout;
 
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v3 2/6] mailinfo: stop parse options manually
       [not found] ` <cover.1620309355.git.congdanhqx@gmail.com>
@ 2021-05-06 15:02   ` Đoàn Trần Công Danh
  2021-05-06 15:19     ` Đoàn Trần Công Danh
  0 siblings, 1 reply; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-06 15:02 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In a later change, mailinfo will learn more options, let's switch to our
robust parse_options framework before that step.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 builtin/mailinfo.c | 87 +++++++++++++++++++++++++++-------------------
 1 file changed, 51 insertions(+), 36 deletions(-)

diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index 71e74bcdcb..a14232a437 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -7,20 +7,37 @@
 #include "utf8.h"
 #include "strbuf.h"
 #include "mailinfo.h"
+#include "parse-options.h"
 
-static const char mailinfo_usage[] =
-	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
+static const char * const mailinfo_usage[] = {
+	/* TRANSLATORS: keep <> in "<" mail ">" info. */
+	N_("git mailinfo [<options>] <msg> <patch> < mail >info"),
+	NULL,
+};
 
 struct metainfo_charset
 {
 	enum {
 		CHARSET_DEFAULT,
-		CHARSET_NONE,
+		CHARSET_NO_REENCODE,
 		CHARSET_EXPLICIT,
-	} from;
+	} origin;
 	const char *charset;
 };
 
+static int parse_opt_explicit_encoding(const struct option *opt,
+				       const char *arg, int unset)
+{
+	struct metainfo_charset *meta_charset = opt->value;
+
+	BUG_ON_OPT_NEG(unset);
+
+	meta_charset->origin = CHARSET_EXPLICIT;
+	meta_charset->charset = arg;
+
+	return 0;
+}
+
 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 {
 	struct metainfo_charset meta_charset;
@@ -28,42 +45,40 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 	int status;
 	char *msgfile, *patchfile;
 
+	struct option options[] = {
+		OPT_BOOL('k', NULL, &mi.keep_subject, N_("keep subject")),
+		OPT_BOOL('b', NULL, &mi.keep_non_patch_brackets_in_subject,
+			 N_("keep non patch brackets in subject")),
+		OPT_BOOL('m', "message-id", &mi.add_message_id,
+			 N_("copy Message-ID to the end of commit message")),
+		OPT_SET_INT_F('u', NULL, &meta_charset.origin,
+			      N_("re-code metadata to i18n.commitEncoding"),
+			      CHARSET_DEFAULT, PARSE_OPT_NONEG),
+		OPT_SET_INT_F('n', NULL, &meta_charset.origin,
+			      N_("disable charset re-coding of metadata"),
+			      CHARSET_NO_REENCODE, PARSE_OPT_NONEG),
+		OPT_CALLBACK_F(0, "encoding", &meta_charset, N_("encoding"),
+			       N_("re-code metadata to this encoding"),
+			       PARSE_OPT_NONEG, parse_opt_explicit_encoding),
+		OPT_BOOL(0, "scissors", &mi.use_scissors, N_("use scissors")),
+		OPT_HIDDEN_BOOL(0, "inbody-headers", &mi.use_inbody_headers,
+			 N_("use headers in message's body")),
+		OPT_END()
+	};
+
 	setup_mailinfo(&mi);
-	meta_charset.from = CHARSET_DEFAULT;
-
-	while (1 < argc && argv[1][0] == '-') {
-		if (!strcmp(argv[1], "-k"))
-			mi.keep_subject = 1;
-		else if (!strcmp(argv[1], "-b"))
-			mi.keep_non_patch_brackets_in_subject = 1;
-		else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id"))
-			mi.add_message_id = 1;
-		else if (!strcmp(argv[1], "-u"))
-			meta_charset.from = CHARSET_DEFAULT;
-		else if (!strcmp(argv[1], "-n"))
-			meta_charset.from = CHARSET_NONE;
-		else if (starts_with(argv[1], "--encoding=")) {
-			meta_charset.from = CHARSET_EXPLICIT;
-			meta_charset.charset = argv[1] + 11;
-		} else if (!strcmp(argv[1], "--scissors"))
-			mi.use_scissors = 1;
-		else if (!strcmp(argv[1], "--no-scissors"))
-			mi.use_scissors = 0;
-		else if (!strcmp(argv[1], "--no-inbody-headers"))
-			mi.use_inbody_headers = 0;
-		else
-			usage(mailinfo_usage);
-		argc--; argv++;
-	}
+	meta_charset.origin = CHARSET_DEFAULT;
+
+	argc = parse_options(argc, argv, prefix, options, mailinfo_usage, 0);
 
-	if (argc != 3)
-		usage(mailinfo_usage);
+	if (argc != 2)
+		usage_with_options(mailinfo_usage, options);
 
-	switch (meta_charset.from) {
+	switch (meta_charset.origin) {
 	case CHARSET_DEFAULT:
 		mi.metainfo_charset = get_commit_output_encoding();
 		break;
-	case CHARSET_NONE:
+	case CHARSET_NO_REENCODE:
 		mi.metainfo_charset = NULL;
 		break;
 	case CHARSET_EXPLICIT:
@@ -75,8 +90,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 	mi.input = stdin;
 	mi.output = stdout;
 
-	msgfile = prefix_filename(prefix, argv[1]);
-	patchfile = prefix_filename(prefix, argv[2]);
+	msgfile = prefix_filename(prefix, argv[0]);
+	patchfile = prefix_filename(prefix, argv[1]);
 
 	status = !!mailinfo(&mi, msgfile, patchfile);
 	clear_mailinfo(&mi);
-- 
2.31.1.500.gbc6bbdd36b


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v3 2/6] mailinfo: stop parsing options manually
  2021-05-06 15:02 ` [PATCH v3 0/6] " Đoàn Trần Công Danh
  2021-05-06 15:02   ` [PATCH v3 1/6] mailinfo: load default metainfo_charset lazily Đoàn Trần Công Danh
@ 2021-05-06 15:02   ` Đoàn Trần Công Danh
  2021-05-08 10:44     ` Junio C Hamano
  2021-05-06 15:02   ` [PATCH v3 3/6] mailinfo: warn if CR found in decoded base64/QP email Đoàn Trần Công Danh
                     ` (4 subsequent siblings)
  6 siblings, 1 reply; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-06 15:02 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In a later change, mailinfo will learn more options, let's switch to our
robust parse_options framework before that step.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 builtin/mailinfo.c | 75 +++++++++++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 30 deletions(-)

diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index 77f96177cc..f55549a097 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -7,9 +7,13 @@
 #include "utf8.h"
 #include "strbuf.h"
 #include "mailinfo.h"
+#include "parse-options.h"
 
-static const char mailinfo_usage[] =
-	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
+static const char * const mailinfo_usage[] = {
+	/* TRANSLATORS: keep <> in "<" mail ">" info. */
+	N_("git mailinfo [<options>] <msg> <patch> < mail >info"),
+	NULL,
+};
 
 struct metainfo_charset
 {
@@ -21,6 +25,19 @@ struct metainfo_charset
 	const char *charset;
 };
 
+static int parse_opt_explicit_encoding(const struct option *opt,
+				       const char *arg, int unset)
+{
+	struct metainfo_charset *meta_charset = opt->value;
+
+	BUG_ON_OPT_NEG(unset);
+
+	meta_charset->policy = CHARSET_EXPLICIT;
+	meta_charset->charset = arg;
+
+	return 0;
+}
+
 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 {
 	struct metainfo_charset meta_charset;
@@ -28,36 +45,34 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 	int status;
 	char *msgfile, *patchfile;
 
+	struct option options[] = {
+		OPT_BOOL('k', NULL, &mi.keep_subject, N_("keep subject")),
+		OPT_BOOL('b', NULL, &mi.keep_non_patch_brackets_in_subject,
+			 N_("keep non patch brackets in subject")),
+		OPT_BOOL('m', "message-id", &mi.add_message_id,
+			 N_("copy Message-ID to the end of commit message")),
+		OPT_SET_INT_F('u', NULL, &meta_charset.policy,
+			      N_("re-code metadata to i18n.commitEncoding"),
+			      CHARSET_DEFAULT, PARSE_OPT_NONEG),
+		OPT_SET_INT_F('n', NULL, &meta_charset.policy,
+			      N_("disable charset re-coding of metadata"),
+			      CHARSET_NO_REENCODE, PARSE_OPT_NONEG),
+		OPT_CALLBACK_F(0, "encoding", &meta_charset, N_("encoding"),
+			       N_("re-code metadata to this encoding"),
+			       PARSE_OPT_NONEG, parse_opt_explicit_encoding),
+		OPT_BOOL(0, "scissors", &mi.use_scissors, N_("use scissors")),
+		OPT_HIDDEN_BOOL(0, "inbody-headers", &mi.use_inbody_headers,
+			 N_("use headers in message's body")),
+		OPT_END()
+	};
+
 	setup_mailinfo(&mi);
 	meta_charset.policy = CHARSET_DEFAULT;
 
-	while (1 < argc && argv[1][0] == '-') {
-		if (!strcmp(argv[1], "-k"))
-			mi.keep_subject = 1;
-		else if (!strcmp(argv[1], "-b"))
-			mi.keep_non_patch_brackets_in_subject = 1;
-		else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id"))
-			mi.add_message_id = 1;
-		else if (!strcmp(argv[1], "-u"))
-			meta_charset.policy = CHARSET_DEFAULT;
-		else if (!strcmp(argv[1], "-n"))
-			meta_charset.policy = CHARSET_NO_REENCODE;
-		else if (starts_with(argv[1], "--encoding=")) {
-			meta_charset.policy = CHARSET_EXPLICIT;
-			meta_charset.charset = argv[1] + 11;
-		} else if (!strcmp(argv[1], "--scissors"))
-			mi.use_scissors = 1;
-		else if (!strcmp(argv[1], "--no-scissors"))
-			mi.use_scissors = 0;
-		else if (!strcmp(argv[1], "--no-inbody-headers"))
-			mi.use_inbody_headers = 0;
-		else
-			usage(mailinfo_usage);
-		argc--; argv++;
-	}
+	argc = parse_options(argc, argv, prefix, options, mailinfo_usage, 0);
 
-	if (argc != 3)
-		usage(mailinfo_usage);
+	if (argc != 2)
+		usage_with_options(mailinfo_usage, options);
 
 	switch (meta_charset.policy) {
 	case CHARSET_DEFAULT:
@@ -75,8 +90,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 	mi.input = stdin;
 	mi.output = stdout;
 
-	msgfile = prefix_filename(prefix, argv[1]);
-	patchfile = prefix_filename(prefix, argv[2]);
+	msgfile = prefix_filename(prefix, argv[0]);
+	patchfile = prefix_filename(prefix, argv[1]);
 
 	status = !!mailinfo(&mi, msgfile, patchfile);
 	clear_mailinfo(&mi);
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v3 3/6] mailinfo: warn if CR found in decoded base64/QP email
  2021-05-06 15:02 ` [PATCH v3 0/6] " Đoàn Trần Công Danh
  2021-05-06 15:02   ` [PATCH v3 1/6] mailinfo: load default metainfo_charset lazily Đoàn Trần Công Danh
  2021-05-06 15:02   ` [PATCH v3 2/6] mailinfo: stop parsing options manually Đoàn Trần Công Danh
@ 2021-05-06 15:02   ` Đoàn Trần Công Danh
  2021-05-08 10:52     ` Junio C Hamano
  2021-05-06 15:02   ` [PATCH v3 4/6] mailinfo: allow squelching quoted CR warning Đoàn Trần Công Danh
                     ` (3 subsequent siblings)
  6 siblings, 1 reply; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-06 15:02 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

When SMTP servers receive 8-bit email messages, possibly with only
LF as line ending, some of them decide to change said LF to CRLF.

Some mailing list softwares, when receive 8-bit email messages,
decide to encode those messages in base64 or quoted-printable.

If an email is transfered through above mail servers, then distributed
by such mailing list softwares, the recipients will receive an email
contains a patch mungled with CRLF encoded inside another encoding.

Thus, such CR couldn't be dropped by "mailsplit".
Hence, the mailed patch couldn't be applied cleanly.
Such accidents have been observed in the wild [1].

Instead of silently rejecting those messages, let's give our users
some warnings if such CR is found.

[1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 mailinfo.c              | 14 ++++++++++++++
 mailinfo.h              |  1 +
 t/t5100-mailinfo.sh     | 15 +++++++++++++++
 t/t5100/quoted-cr-info  |  5 +++++
 t/t5100/quoted-cr-msg   |  2 ++
 t/t5100/quoted-cr-patch | 22 ++++++++++++++++++++++
 t/t5100/quoted-cr.mbox  | 22 ++++++++++++++++++++++
 7 files changed, 81 insertions(+)
 create mode 100644 t/t5100/quoted-cr-info
 create mode 100644 t/t5100/quoted-cr-msg
 create mode 100644 t/t5100/quoted-cr-patch
 create mode 100644 t/t5100/quoted-cr.mbox

diff --git a/mailinfo.c b/mailinfo.c
index 5681d9130d..dcf579700d 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -994,6 +994,11 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 	const char *rest;
 
 	if (!mi->format_flowed) {
+		if (len >= 2 &&
+		    line->buf[len - 2] == '\r' &&
+		    line->buf[len - 1] == '\n') {
+			mi->have_quoted_cr = 1;
+		}
 		handle_filter(mi, line);
 		return;
 	}
@@ -1033,6 +1038,12 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 	handle_filter(mi, line);
 }
 
+static void summarize_quoted_cr(struct mailinfo *mi)
+{
+	if (mi->have_quoted_cr)
+		warning("quoted CR detected");
+}
+
 static void handle_body(struct mailinfo *mi, struct strbuf *line)
 {
 	struct strbuf prev = STRBUF_INIT;
@@ -1051,6 +1062,8 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 				handle_filter(mi, &prev);
 				strbuf_reset(&prev);
 			}
+			summarize_quoted_cr(mi);
+			mi->have_quoted_cr = 0;
 			if (!handle_boundary(mi, line))
 				goto handle_body_out;
 		}
@@ -1100,6 +1113,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 
 	if (prev.len)
 		handle_filter(mi, &prev);
+	summarize_quoted_cr(mi);
 
 	flush_inbody_header_accum(mi);
 
diff --git a/mailinfo.h b/mailinfo.h
index 79b1d6774e..b394ef9bce 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -24,6 +24,7 @@ struct mailinfo {
 	struct strbuf charset;
 	unsigned int format_flowed:1;
 	unsigned int delsp:1;
+	unsigned int have_quoted_cr:1;
 	char *message_id;
 	enum  {
 		TE_DONTCARE, TE_QP, TE_BASE64
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index 147e616533..c7ea1b30df 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -228,4 +228,19 @@ test_expect_success 'mailinfo handles unusual header whitespace' '
 	test_cmp expect actual
 '
 
+check_quoted_cr_mail () {
+	git mailinfo -u "$@" quoted-cr-msg quoted-cr-patch \
+		<"$DATA/quoted-cr.mbox" >quoted-cr-info 2>quoted-cr-err &&
+	test_cmp "expect-cr-msg" quoted-cr-msg &&
+	test_cmp "expect-cr-patch" quoted-cr-patch &&
+	test_cmp "$DATA/quoted-cr-info" quoted-cr-info
+}
+
+test_expect_success 'mailinfo warn CR in base64 encoded email' '
+	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-msg" >expect-cr-msg &&
+	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-patch" >expect-cr-patch &&
+	check_quoted_cr_mail &&
+	grep "quoted CR detected" quoted-cr-err
+'
+
 test_done
diff --git a/t/t5100/quoted-cr-info b/t/t5100/quoted-cr-info
new file mode 100644
index 0000000000..dab2228b70
--- /dev/null
+++ b/t/t5100/quoted-cr-info
@@ -0,0 +1,5 @@
+Author: A U Thor
+Email: mail@example.com
+Subject: sample
+Date: Mon, 3 Aug 2020 22:40:55 +0700
+
diff --git a/t/t5100/quoted-cr-msg b/t/t5100/quoted-cr-msg
new file mode 100644
index 0000000000..a148bc7e26
--- /dev/null
+++ b/t/t5100/quoted-cr-msg
@@ -0,0 +1,2 @@
+On different distro, pytest is suffixed with different patterns.%%
+%%
diff --git a/t/t5100/quoted-cr-patch b/t/t5100/quoted-cr-patch
new file mode 100644
index 0000000000..580e2bddb8
--- /dev/null
+++ b/t/t5100/quoted-cr-patch
@@ -0,0 +1,22 @@
+---%%
+ configure | 2 +-%%
+ 1 file changed, 1 insertion(+), 1 deletion(-)%%
+%%
+diff --git a/configure b/configure%%
+index db3538b3..f7c1c095 100755%%
+--- a/configure%%
++++ b/configure%%
+@@ -814,7 +814,7 @@ if [ $have_python3 -eq 1 ]; then%%
+     printf "Checking for python3 pytest (>= 3.0)... "%%
+     conf=$(mktemp)%%
+     printf "[pytest]\nminversion=3.0\n" > $conf%%
+-    if pytest-3 -c $conf --version >/dev/null 2>&1; then%%
++    if "$python" -m pytest -c $conf --version >/dev/null 2>&1; then%%
+         printf "Yes.\n"%%
+         have_python3_pytest=1%%
+     else%%
+-- %%
+2.28.0%%
+_______________________________________________
+example mailing list -- list@example.org
+To unsubscribe send an email to list-leave@example.org
diff --git a/t/t5100/quoted-cr.mbox b/t/t5100/quoted-cr.mbox
new file mode 100644
index 0000000000..6ea9806a6b
--- /dev/null
+++ b/t/t5100/quoted-cr.mbox
@@ -0,0 +1,22 @@
+From: A U Thor <mail@example.com>
+To: list@example.org
+Subject: [PATCH v2] sample
+Date: Mon,  3 Aug 2020 22:40:55 +0700
+Message-Id: <msg-id@example.com>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+T24gZGlmZmVyZW50IGRpc3RybywgcHl0ZXN0IGlzIHN1ZmZpeGVkIHdpdGggZGlmZmVyZW50IHBh
+dHRlcm5zLg0KDQotLS0NCiBjb25maWd1cmUgfCAyICstDQogMSBmaWxlIGNoYW5nZWQsIDEgaW5z
+ZXJ0aW9uKCspLCAxIGRlbGV0aW9uKC0pDQoNCmRpZmYgLS1naXQgYS9jb25maWd1cmUgYi9jb25m
+aWd1cmUNCmluZGV4IGRiMzUzOGIzLi5mN2MxYzA5NSAxMDA3NTUNCi0tLSBhL2NvbmZpZ3VyZQ0K
+KysrIGIvY29uZmlndXJlDQpAQCAtODE0LDcgKzgxNCw3IEBAIGlmIFsgJGhhdmVfcHl0aG9uMyAt
+ZXEgMSBdOyB0aGVuDQogICAgIHByaW50ZiAiQ2hlY2tpbmcgZm9yIHB5dGhvbjMgcHl0ZXN0ICg+
+PSAzLjApLi4uICINCiAgICAgY29uZj0kKG1rdGVtcCkNCiAgICAgcHJpbnRmICJbcHl0ZXN0XVxu
+bWludmVyc2lvbj0zLjBcbiIgPiAkY29uZg0KLSAgICBpZiBweXRlc3QtMyAtYyAkY29uZiAtLXZl
+cnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuDQorICAgIGlmICIkcHl0aG9uIiAtbSBweXRlc3Qg
+LWMgJGNvbmYgLS12ZXJzaW9uID4vZGV2L251bGwgMj4mMTsgdGhlbg0KICAgICAgICAgcHJpbnRm
+ICJZZXMuXG4iDQogICAgICAgICBoYXZlX3B5dGhvbjNfcHl0ZXN0PTENCiAgICAgZWxzZQ0KLS0g
+DQoyLjI4LjANCl9fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f
+CmV4YW1wbGUgbWFpbGluZyBsaXN0IC0tIGxpc3RAZXhhbXBsZS5vcmcKVG8gdW5zdWJzY3JpYmUg
+c2VuZCBhbiBlbWFpbCB0byBsaXN0LWxlYXZlQGV4YW1wbGUub3JnCg==
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v3 4/6] mailinfo: allow squelching quoted CR warning
  2021-05-06 15:02 ` [PATCH v3 0/6] " Đoàn Trần Công Danh
                     ` (2 preceding siblings ...)
  2021-05-06 15:02   ` [PATCH v3 3/6] mailinfo: warn if CR found in decoded base64/QP email Đoàn Trần Công Danh
@ 2021-05-06 15:02   ` Đoàn Trần Công Danh
  2021-05-06 15:02   ` [PATCH v3 5/6] mailinfo: allow stripping quoted CR without warning Đoàn Trần Công Danh
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-06 15:02 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In previous change, Git starts to warn for quoted CR in decoded
base64/QP email. Despite those warnings are usually helpful,
quoted CR could be part of some users' workflow.

Let's give them an option to turn off the warning completely.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 Documentation/git-mailinfo.txt | 20 +++++++++++++++++++-
 builtin/mailinfo.c             | 12 ++++++++++++
 mailinfo.c                     | 20 +++++++++++++++++++-
 mailinfo.h                     |  7 +++++++
 t/t5100-mailinfo.sh            |  6 ++++--
 5 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
index d343f040f5..824947a070 100644
--- a/Documentation/git-mailinfo.txt
+++ b/Documentation/git-mailinfo.txt
@@ -9,7 +9,9 @@ git-mailinfo - Extracts patch and authorship from a single e-mail message
 SYNOPSIS
 --------
 [verse]
-'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch>
+'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n]
+	       [--[no-]scissors] [--quoted-cr=<action>]
+	       <msg> <patch>
 
 
 DESCRIPTION
@@ -89,6 +91,22 @@ This can be enabled by default with the configuration option mailinfo.scissors.
 --no-scissors::
 	Ignore scissors lines. Useful for overriding mailinfo.scissors settings.
 
+--quoted-cr=<action>::
+	Action when processes email messages sent with base64 or
+	quoted-printable encoding, and the decoded lines end with a CRLF
+	instead of a simple LF.
++
+The valid actions are:
++
+--
+*	`nowarn`: Git will do nothing when such a CRLF is found.
+*	`warn`: Git will issue a warning for each message if such a CRLF is
+	found.
+--
++
+The default action could be set by configuration option `mailinfo.quotedCR`.
+If no such configuration option has been set, `warn` will be used.
+
 <msg>::
 	The commit log message extracted from e-mail, usually
 	except the title line which comes from e-mail Subject.
diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index f55549a097..01d16ef9e5 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -38,6 +38,15 @@ static int parse_opt_explicit_encoding(const struct option *opt,
 	return 0;
 }
 
+static int parse_opt_quoted_cr(const struct option *opt, const char *arg, int unset)
+{
+	BUG_ON_OPT_NEG(unset);
+
+	if (mailinfo_parse_quoted_cr_action(arg, opt->value) != 0)
+		return error(_("bad action '%s' for '%s'"), arg, "--quoted-cr");
+	return 0;
+}
+
 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 {
 	struct metainfo_charset meta_charset;
@@ -61,6 +70,9 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 			       N_("re-code metadata to this encoding"),
 			       PARSE_OPT_NONEG, parse_opt_explicit_encoding),
 		OPT_BOOL(0, "scissors", &mi.use_scissors, N_("use scissors")),
+		OPT_CALLBACK_F(0, "quoted-cr", &mi.quoted_cr, N_("<action>"),
+			       N_("action when quoted CR is found"),
+			       PARSE_OPT_NONEG, parse_opt_quoted_cr),
 		OPT_HIDDEN_BOOL(0, "inbody-headers", &mi.use_inbody_headers,
 			 N_("use headers in message's body")),
 		OPT_END()
diff --git a/mailinfo.c b/mailinfo.c
index dcf579700d..c57eed25a5 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -1040,7 +1040,8 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 
 static void summarize_quoted_cr(struct mailinfo *mi)
 {
-	if (mi->have_quoted_cr)
+	if (mi->have_quoted_cr &&
+	    mi->quoted_cr == quoted_cr_warn)
 		warning("quoted CR detected");
 }
 
@@ -1220,6 +1221,17 @@ int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
 	return mi->input_error;
 }
 
+int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action)
+{
+	if (!strcmp(actionstr, "nowarn"))
+		*action = quoted_cr_nowarn;
+	else if (!strcmp(actionstr, "warn"))
+		*action = quoted_cr_warn;
+	else
+		return -1;
+	return 0;
+}
+
 static int git_mailinfo_config(const char *var, const char *value, void *mi_)
 {
 	struct mailinfo *mi = mi_;
@@ -1230,6 +1242,11 @@ static int git_mailinfo_config(const char *var, const char *value, void *mi_)
 		mi->use_scissors = git_config_bool(var, value);
 		return 0;
 	}
+	if (!strcmp(var, "mailinfo.quotedcr")) {
+		if (mailinfo_parse_quoted_cr_action(value, &mi->quoted_cr) != 0)
+			return error(_("bad action '%s' for '%s'"), value, var);
+		return 0;
+	}
 	/* perhaps others here */
 	return 0;
 }
@@ -1242,6 +1259,7 @@ void setup_mailinfo(struct mailinfo *mi)
 	strbuf_init(&mi->charset, 0);
 	strbuf_init(&mi->log_message, 0);
 	strbuf_init(&mi->inbody_header_accum, 0);
+	mi->quoted_cr = quoted_cr_warn;
 	mi->header_stage = 1;
 	mi->use_inbody_headers = 1;
 	mi->content_top = mi->content;
diff --git a/mailinfo.h b/mailinfo.h
index b394ef9bce..768d06ac2a 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -5,6 +5,11 @@
 
 #define MAX_BOUNDARIES 5
 
+enum quoted_cr_action {
+	quoted_cr_nowarn,
+	quoted_cr_warn,
+};
+
 struct mailinfo {
 	FILE *input;
 	FILE *output;
@@ -14,6 +19,7 @@ struct mailinfo {
 	struct strbuf email;
 	int keep_subject;
 	int keep_non_patch_brackets_in_subject;
+	int quoted_cr; /* enum quoted_cr_action */
 	int add_message_id;
 	int use_scissors;
 	int use_inbody_headers;
@@ -40,6 +46,7 @@ struct mailinfo {
 	int input_error;
 };
 
+int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action);
 void setup_mailinfo(struct mailinfo *);
 int mailinfo(struct mailinfo *, const char *msg, const char *patch);
 void clear_mailinfo(struct mailinfo *);
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index c7ea1b30df..51e8690fbd 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -236,11 +236,13 @@ check_quoted_cr_mail () {
 	test_cmp "$DATA/quoted-cr-info" quoted-cr-info
 }
 
-test_expect_success 'mailinfo warn CR in base64 encoded email' '
+test_expect_success 'mailinfo handle CR in base64 encoded email' '
 	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-msg" >expect-cr-msg &&
 	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-patch" >expect-cr-patch &&
 	check_quoted_cr_mail &&
-	grep "quoted CR detected" quoted-cr-err
+	grep "quoted CR detected" quoted-cr-err &&
+	check_quoted_cr_mail --quoted-cr=nowarn &&
+	test_must_be_empty quoted-cr-err
 '
 
 test_done
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v3 5/6] mailinfo: allow stripping quoted CR without warning
  2021-05-06 15:02 ` [PATCH v3 0/6] " Đoàn Trần Công Danh
                     ` (3 preceding siblings ...)
  2021-05-06 15:02   ` [PATCH v3 4/6] mailinfo: allow squelching quoted CR warning Đoàn Trần Công Danh
@ 2021-05-06 15:02   ` Đoàn Trần Công Danh
  2021-05-06 15:02   ` [PATCH v3 6/6] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh
  2021-05-08 10:57   ` [PATCH v3 0/6] Teach am/mailinfo to process quoted CR Junio C Hamano
  6 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-06 15:02 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In previous changes, we've turned on warning for quoted CR in base64 or
quoted-printable email messages. Some projects see those quoted CR a lot,
they know that it happens most of the time, and they find it's desirable
to always strip those CR.

Those projects in question usually fall back to use other tools to handle
patches when receive such patches.

Let's help those projects handle those patches by stripping those
excessive CR.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 Documentation/git-mailinfo.txt | 1 +
 mailinfo.c                     | 7 +++++++
 mailinfo.h                     | 1 +
 t/t5100-mailinfo.sh            | 4 ++++
 4 files changed, 13 insertions(+)

diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
index 824947a070..3fcfd965fd 100644
--- a/Documentation/git-mailinfo.txt
+++ b/Documentation/git-mailinfo.txt
@@ -102,6 +102,7 @@ The valid actions are:
 *	`nowarn`: Git will do nothing when such a CRLF is found.
 *	`warn`: Git will issue a warning for each message if such a CRLF is
 	found.
+*	`strip`: Git will convert those CRLF to LF.
 --
 +
 The default action could be set by configuration option `mailinfo.quotedCR`.
diff --git a/mailinfo.c b/mailinfo.c
index c57eed25a5..5cddcdc8ae 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -998,6 +998,11 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 		    line->buf[len - 2] == '\r' &&
 		    line->buf[len - 1] == '\n') {
 			mi->have_quoted_cr = 1;
+			if (mi->quoted_cr == quoted_cr_strip) {
+				strbuf_setlen(line, len - 2);
+				strbuf_addch(line, '\n');
+				len--;
+			}
 		}
 		handle_filter(mi, line);
 		return;
@@ -1227,6 +1232,8 @@ int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action)
 		*action = quoted_cr_nowarn;
 	else if (!strcmp(actionstr, "warn"))
 		*action = quoted_cr_warn;
+	else if (!strcmp(actionstr, "strip"))
+		*action = quoted_cr_strip;
 	else
 		return -1;
 	return 0;
diff --git a/mailinfo.h b/mailinfo.h
index 768d06ac2a..2ddf8be90f 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -8,6 +8,7 @@
 enum quoted_cr_action {
 	quoted_cr_nowarn,
 	quoted_cr_warn,
+	quoted_cr_strip,
 };
 
 struct mailinfo {
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index 51e8690fbd..06807f3852 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -242,6 +242,10 @@ test_expect_success 'mailinfo handle CR in base64 encoded email' '
 	check_quoted_cr_mail &&
 	grep "quoted CR detected" quoted-cr-err &&
 	check_quoted_cr_mail --quoted-cr=nowarn &&
+	test_must_be_empty quoted-cr-err &&
+	sed "s/%%//" "$DATA/quoted-cr-msg" >expect-cr-msg &&
+	sed "s/%%//" "$DATA/quoted-cr-patch" >expect-cr-patch &&
+	check_quoted_cr_mail --quoted-cr=strip &&
 	test_must_be_empty quoted-cr-err
 '
 
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v3 6/6] am: learn to process quoted lines that ends with CRLF
  2021-05-06 15:02 ` [PATCH v3 0/6] " Đoàn Trần Công Danh
                     ` (4 preceding siblings ...)
  2021-05-06 15:02   ` [PATCH v3 5/6] mailinfo: allow stripping quoted CR without warning Đoàn Trần Công Danh
@ 2021-05-06 15:02   ` Đoàn Trần Công Danh
  2021-05-08 10:57   ` [PATCH v3 0/6] Teach am/mailinfo to process quoted CR Junio C Hamano
  6 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-06 15:02 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In previous changes, mailinfo has learnt to process lines that decoded
from base64 or quoted-printable, and ends with CRLF.

Let's teach "am" that new trick, too.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 Documentation/git-am.txt               |  4 ++
 builtin/am.c                           | 51 ++++++++++++++++++++++++++
 contrib/completion/git-completion.bash |  5 +++
 mailinfo.h                             |  1 +
 t/t4258-am-quoted-cr.sh                | 37 +++++++++++++++++++
 t/t4258/mbox                           | 12 ++++++
 6 files changed, 110 insertions(+)
 create mode 100755 t/t4258-am-quoted-cr.sh
 create mode 100644 t/t4258/mbox

diff --git a/Documentation/git-am.txt b/Documentation/git-am.txt
index decd8ae122..8714dfcb76 100644
--- a/Documentation/git-am.txt
+++ b/Documentation/git-am.txt
@@ -15,6 +15,7 @@ SYNOPSIS
 	 [--whitespace=<option>] [-C<n>] [-p<n>] [--directory=<dir>]
 	 [--exclude=<path>] [--include=<path>] [--reject] [-q | --quiet]
 	 [--[no-]scissors] [-S[<keyid>]] [--patch-format=<format>]
+	 [--quoted-cr=<action>]
 	 [(<mbox> | <Maildir>)...]
 'git am' (--continue | --skip | --abort | --quit | --show-current-patch[=(diff|raw)])
 
@@ -59,6 +60,9 @@ OPTIONS
 --no-scissors::
 	Ignore scissors lines (see linkgit:git-mailinfo[1]).
 
+--quoted-cr=<action>::
+	This flag will be passed down to 'git mailinfo' (see linkgit:git-mailinfo[1]).
+
 -m::
 --message-id::
 	Pass the `-m` flag to 'git mailinfo' (see linkgit:git-mailinfo[1]),
diff --git a/builtin/am.c b/builtin/am.c
index 8355e3566f..0b2d886c81 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -116,6 +116,7 @@ struct am_state {
 	int keep; /* enum keep_type */
 	int message_id;
 	int scissors; /* enum scissors_type */
+	int quoted_cr; /* enum quoted_cr_action */
 	struct strvec git_apply_opts;
 	const char *resolvemsg;
 	int committer_date_is_author_date;
@@ -145,6 +146,7 @@ static void am_state_init(struct am_state *state)
 	git_config_get_bool("am.messageid", &state->message_id);
 
 	state->scissors = SCISSORS_UNSET;
+	state->quoted_cr = quoted_cr_unset;
 
 	strvec_init(&state->git_apply_opts);
 
@@ -165,6 +167,16 @@ static void am_state_release(struct am_state *state)
 	strvec_clear(&state->git_apply_opts);
 }
 
+static int am_option_parse_quoted_cr(const struct option *opt,
+				     const char *arg, int unset)
+{
+	BUG_ON_OPT_NEG(unset);
+
+	if (mailinfo_parse_quoted_cr_action(arg, opt->value) != 0)
+		return error(_("bad action '%s' for '%s'"), arg, "--quoted-cr");
+	return 0;
+}
+
 /**
  * Returns path relative to the am_state directory.
  */
@@ -397,6 +409,12 @@ static void am_load(struct am_state *state)
 	else
 		state->scissors = SCISSORS_UNSET;
 
+	read_state_file(&sb, state, "quoted-cr", 1);
+	if (!*sb.buf)
+		state->quoted_cr = quoted_cr_unset;
+	else if (mailinfo_parse_quoted_cr_action(sb.buf, &state->quoted_cr) != 0)
+		die(_("could not parse %s"), am_path(state, "quoted-cr"));
+
 	read_state_file(&sb, state, "apply-opt", 1);
 	strvec_clear(&state->git_apply_opts);
 	if (sq_dequote_to_strvec(sb.buf, &state->git_apply_opts) < 0)
@@ -1002,6 +1020,24 @@ static void am_setup(struct am_state *state, enum patch_format patch_format,
 	}
 	write_state_text(state, "scissors", str);
 
+	switch (state->quoted_cr) {
+	case quoted_cr_unset:
+		str = "";
+		break;
+	case quoted_cr_nowarn:
+		str = "nowarn";
+		break;
+	case quoted_cr_warn:
+		str = "warn";
+		break;
+	case quoted_cr_strip:
+		str = "strip";
+		break;
+	default:
+		BUG("invalid value for state->quoted_cr");
+	}
+	write_state_text(state, "quoted-cr", str);
+
 	sq_quote_argv(&sb, state->git_apply_opts.v);
 	write_state_text(state, "apply-opt", sb.buf);
 
@@ -1162,6 +1198,18 @@ static int parse_mail(struct am_state *state, const char *mail)
 		BUG("invalid value for state->scissors");
 	}
 
+	switch (state->quoted_cr) {
+	case quoted_cr_unset:
+		break;
+	case quoted_cr_nowarn:
+	case quoted_cr_warn:
+	case quoted_cr_strip:
+		mi.quoted_cr = state->quoted_cr;
+		break;
+	default:
+		BUG("invalid value for state->quoted_cr");
+	}
+
 	mi.input = xfopen(mail, "r");
 	mi.output = xfopen(am_path(state, "info"), "w");
 	if (mailinfo(&mi, am_path(state, "msg"), am_path(state, "patch")))
@@ -2242,6 +2290,9 @@ int cmd_am(int argc, const char **argv, const char *prefix)
 			0, PARSE_OPT_NONEG),
 		OPT_BOOL('c', "scissors", &state.scissors,
 			N_("strip everything before a scissors line")),
+		OPT_CALLBACK_F(0, "quoted-cr", &state.quoted_cr, N_("action"),
+			       N_("pass it through git-mailinfo"),
+			       PARSE_OPT_NONEG, am_option_parse_quoted_cr),
 		OPT_PASSTHRU_ARGV(0, "whitespace", &state.git_apply_opts, N_("action"),
 			N_("pass it through git-apply"),
 			0),
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 49e76e9d08..edf635095e 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1333,6 +1333,7 @@ __git_whitespacelist="nowarn warn error error-all fix"
 __git_patchformat="mbox stgit stgit-series hg mboxrd"
 __git_showcurrentpatch="diff raw"
 __git_am_inprogress_options="--skip --continue --resolved --abort --quit --show-current-patch"
+__git_quoted_cr="nowarn warn strip"
 
 _git_am ()
 {
@@ -1354,6 +1355,10 @@ _git_am ()
 		__gitcomp "$__git_showcurrentpatch" "" "${cur##--show-current-patch=}"
 		return
 		;;
+	--quoted-cr=*)
+		__gitcomp "$__git_quoted_cr" "" "${cur##--quoted-cr=}"
+		return
+		;;
 	--*)
 		__gitcomp_builtin am "" \
 			"$__git_am_inprogress_options"
diff --git a/mailinfo.h b/mailinfo.h
index 2ddf8be90f..f2ffd0349e 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -6,6 +6,7 @@
 #define MAX_BOUNDARIES 5
 
 enum quoted_cr_action {
+	quoted_cr_unset = -1,
 	quoted_cr_nowarn,
 	quoted_cr_warn,
 	quoted_cr_strip,
diff --git a/t/t4258-am-quoted-cr.sh b/t/t4258-am-quoted-cr.sh
new file mode 100755
index 0000000000..2029115ecd
--- /dev/null
+++ b/t/t4258-am-quoted-cr.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+test_description='test am --quoted-cr=<action>'
+
+. ./test-lib.sh
+
+DATA="$TEST_DIRECTORY/t4258"
+
+test_expect_success 'setup' '
+	test_write_lines one two three >text &&
+	test_commit one text &&
+	test_write_lines one owt three >text &&
+	test_commit two text
+'
+
+test_expect_success 'am warn if quoted-cr is found' '
+	git reset --hard one &&
+	test_must_fail git am "$DATA/mbox" 2>err &&
+	grep "quoted CR detected" err
+'
+
+test_expect_success 'am strip if quoted-cr is found' '
+	test_might_fail git am --abort &&
+	git reset --hard one &&
+	git am --quoted-cr=strip "$DATA/mbox" &&
+	git diff --exit-code HEAD two
+'
+
+test_expect_success 'am strip if quoted-cr is found' '
+	test_might_fail git am --abort &&
+	git reset --hard one &&
+	test_config mailinfo.quotedCr strip &&
+	git am "$DATA/mbox" &&
+	git diff --exit-code HEAD two
+'
+
+test_done
diff --git a/t/t4258/mbox b/t/t4258/mbox
new file mode 100644
index 0000000000..c62819f3d2
--- /dev/null
+++ b/t/t4258/mbox
@@ -0,0 +1,12 @@
+From: A U Thor <mail@example.com>
+To: list@example.org
+Subject: [PATCH v2] sample
+Date: Mon,  3 Aug 2020 22:40:55 +0700
+Message-Id: <msg-id@example.com>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+VGhpcyBpcyBjb21taXQgbWVzc2FnZS4NCi0tLQ0KIHRleHQgfCAyICstDQogMSBmaWxlIGNoYW5n
+ZWQsIDEgaW5zZXJ0aW9uKCspLCAxIGRlbGV0aW9uKC0pDQoNCmRpZmYgLS1naXQgYS90ZXh0IGIv
+dGV4dA0KaW5kZXggNTYyNmFiZi4uZjcxOWVmZCAxMDA2NDQNCi0tLSBhL3RleHQNCisrKyBiL3Rl
+eHQNCkBAIC0xICsxIEBADQotb25lDQordHdvDQotLSANCjIuMzEuMQoK
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v3 2/6] mailinfo: stop parse options manually
  2021-05-06 15:02   ` [PATCH v3 2/6] mailinfo: stop parse options manually Đoàn Trần Công Danh
@ 2021-05-06 15:19     ` Đoàn Trần Công Danh
  0 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-06 15:19 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, brian m. carlson

On 2021-05-06 22:02:19+0700, Đoàn Trần Công Danh <congdanhqx@gmail.com> wrote:
> In a later change, mailinfo will learn more options, let's switch to our
> robust parse_options framework before that step.

Please ignore this email. The correct patch is down-thread with
 subject:

* [PATCH v3 2/6] mailinfo: stop parsing options manually

-- 
Danh

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v3 2/6] mailinfo: stop parsing options manually
  2021-05-06 15:02   ` [PATCH v3 2/6] mailinfo: stop parsing options manually Đoàn Trần Công Danh
@ 2021-05-08 10:44     ` Junio C Hamano
  0 siblings, 0 replies; 35+ messages in thread
From: Junio C Hamano @ 2021-05-08 10:44 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git, brian m. carlson

Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> In a later change, mailinfo will learn more options, let's switch to our
> robust parse_options framework before that step.
>
> Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
> ---
>  builtin/mailinfo.c | 75 +++++++++++++++++++++++++++-------------------
>  1 file changed, 45 insertions(+), 30 deletions(-)

Looks quite straight-forward, thanks to the previous step.

Nicely done.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v3 3/6] mailinfo: warn if CR found in decoded base64/QP email
  2021-05-06 15:02   ` [PATCH v3 3/6] mailinfo: warn if CR found in decoded base64/QP email Đoàn Trần Công Danh
@ 2021-05-08 10:52     ` Junio C Hamano
  0 siblings, 0 replies; 35+ messages in thread
From: Junio C Hamano @ 2021-05-08 10:52 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git, brian m. carlson

Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> +test_expect_success 'mailinfo warn CR in base64 encoded email' '
> +	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-msg" >expect-cr-msg &&
> +	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-patch" >expect-cr-patch &&

As we are not interested in a lone CR in the middle of the line, I
wonder if we want to anchor the double per-cent to the end of the
line, i.e. "s/%%$/$(printf \\015)/".

On the other hand, we may want to make sure that the new option does
not disturb CR in the middle of the line, so we may be better off
leaving double per-cent unanchored, but allow replacing more than
once on a line, i.e. "s/%%/$(printf \\015)/g".

If we were to go to the latter route, we'd want to try a file with a
CR in the middle of the line (without a CR at the end of any line)
and make sure we won't warn or strip.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v3 0/6] Teach am/mailinfo to process quoted CR
  2021-05-06 15:02 ` [PATCH v3 0/6] " Đoàn Trần Công Danh
                     ` (5 preceding siblings ...)
  2021-05-06 15:02   ` [PATCH v3 6/6] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh
@ 2021-05-08 10:57   ` Junio C Hamano
  6 siblings, 0 replies; 35+ messages in thread
From: Junio C Hamano @ 2021-05-08 10:57 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git, brian m. carlson

Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> When SMTP servers receive 8-bit email messages, possibly with only
> LF as line ending, some of those servers decide to change said LF to
> CRLF.
>
> Some mailing list software, when receives an 8-bit email message,
> decide to encode such message in base64 or quoted-printable.
>
> This series try to help users of such softwares deal with such patches.

Thanks.  Use of parse_options() did make the overall series longer,
but the result looks a lot nicer.

I've left a few comments, but overall the series looks almost
perfect ;-)

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v4 0/6] Teach am/mailinfo to process quoted CR
  2021-04-21  1:34 [PATCH] mailinfo: strip CR from base64/quoted-printable email Đoàn Trần Công Danh
                   ` (4 preceding siblings ...)
       [not found] ` <cover.1620309355.git.congdanhqx@gmail.com>
@ 2021-05-09 17:12 ` Đoàn Trần Công Danh
  2021-05-09 17:12   ` [PATCH v4 1/6] mailinfo: load default metainfo_charset lazily Đoàn Trần Công Danh
                     ` (5 more replies)
  5 siblings, 6 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-09 17:12 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

When SMTP servers receive 8-bit email messages, possibly with only
LF as line ending, some of those servers decide to change said LF to
CRLF.

Some mailing list software, when receives an 8-bit email message,
decide to encode such message in base64 or quoted-printable.

This series try to help users of such softwares deal with such patches.

Change in v4:
* Mark warning message for l10n
* Change all CR references to CRLF
* Add test case for not warn/strip if CR is found in the middle of the line.

Change in v3:
* Change preparatory step (in mailinfo) to use parse_options
* move "have_quoted_cr" to "struct mailinfo", thus the patch to warn about
  quoted CR is less weird.
* Change "mailinfo_parse_quoted_cr" to Git's do-some-work function, return
  0 on success and negative on failure, eliminate the usage of _invalid_action
* Better error messages if invalid action was given.
* completion support for git am --quoted-cr
* Some style changes

Đoàn Trần Công Danh (6):
  mailinfo: load default metainfo_charset lazily
  mailinfo: stop parsing options manually
  mailinfo: warn if CRLF found in decoded base64/QP email
  mailinfo: allow squelching quoted CRLF warning
  mailinfo: allow stripping quoted CR without warning
  am: learn to process quoted lines that ends with CRLF

 Documentation/git-am.txt               |   4 +
 Documentation/git-mailinfo.txt         |  21 ++++-
 builtin/am.c                           |  51 +++++++++++
 builtin/mailinfo.c                     | 115 ++++++++++++++++++-------
 contrib/completion/git-completion.bash |   5 ++
 mailinfo.c                             |  39 +++++++++
 mailinfo.h                             |  10 +++
 t/t4258-am-quoted-cr.sh                |  37 ++++++++
 t/t4258/mbox                           |  12 +++
 t/t5100-mailinfo.sh                    |  40 +++++++++
 t/t5100/quoted-cr-info                 |   5 ++
 t/t5100/quoted-cr-msg                  |   2 +
 t/t5100/quoted-cr-patch                |  22 +++++
 t/t5100/quoted-cr.mbox                 |  47 ++++++++++
 14 files changed, 376 insertions(+), 34 deletions(-)
 create mode 100755 t/t4258-am-quoted-cr.sh
 create mode 100644 t/t4258/mbox
 create mode 100644 t/t5100/quoted-cr-info
 create mode 100644 t/t5100/quoted-cr-msg
 create mode 100644 t/t5100/quoted-cr-patch
 create mode 100644 t/t5100/quoted-cr.mbox

Range-diff against v3:
1:  fac95392df = 1:  fac95392df mailinfo: load default metainfo_charset lazily
2:  1fb08bb37d = 2:  1fb08bb37d mailinfo: stop parsing options manually
3:  5aac2ba38e ! 3:  50404ffe74 mailinfo: warn if CR found in decoded base64/QP email
    @@ Metadata
     Author: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
      ## Commit message ##
    -    mailinfo: warn if CR found in decoded base64/QP email
    +    mailinfo: warn if CRLF found in decoded base64/QP email
     
         When SMTP servers receive 8-bit email messages, possibly with only
         LF as line ending, some of them decide to change said LF to CRLF.
    @@ Commit message
         by such mailing list softwares, the recipients will receive an email
         contains a patch mungled with CRLF encoded inside another encoding.
     
    -    Thus, such CR couldn't be dropped by "mailsplit".
    +    Thus, such CR (in CRLF) couldn't be dropped by "mailsplit".
         Hence, the mailed patch couldn't be applied cleanly.
         Such accidents have been observed in the wild [1].
     
         Instead of silently rejecting those messages, let's give our users
    -    some warnings if such CR is found.
    +    some warnings if such CR (as part of CRLF) is found.
     
         [1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi
     
    @@ mailinfo.c: static void handle_filter_flowed(struct mailinfo *mi, struct strbuf
     +static void summarize_quoted_cr(struct mailinfo *mi)
     +{
     +	if (mi->have_quoted_cr)
    -+		warning("quoted CR detected");
    ++		warning(_("quoted CRLF detected"));
     +}
     +
      static void handle_body(struct mailinfo *mi, struct strbuf *line)
    @@ t/t5100-mailinfo.sh: test_expect_success 'mailinfo handles unusual header whites
      '
      
     +check_quoted_cr_mail () {
    -+	git mailinfo -u "$@" quoted-cr-msg quoted-cr-patch \
    -+		<"$DATA/quoted-cr.mbox" >quoted-cr-info 2>quoted-cr-err &&
    -+	test_cmp "expect-cr-msg" quoted-cr-msg &&
    -+	test_cmp "expect-cr-patch" quoted-cr-patch &&
    -+	test_cmp "$DATA/quoted-cr-info" quoted-cr-info
    ++	mail="$1" && shift &&
    ++	git mailinfo -u "$@" "$mail.msg" "$mail.patch" \
    ++		<"$mail" >"$mail.info" 2>"$mail.err" &&
    ++	test_cmp "$mail-expected.msg" "$mail.msg" &&
    ++	test_cmp "$mail-expected.patch" "$mail.patch" &&
    ++	test_cmp "$DATA/quoted-cr-info" "$mail.info"
     +}
     +
    ++test_expect_success 'split base64 email with quoted-cr' '
    ++	mkdir quoted-cr &&
    ++	git mailsplit -oquoted-cr "$DATA/quoted-cr.mbox" >quoted-cr/last &&
    ++	test $(cat quoted-cr/last) = 2
    ++'
    ++
     +test_expect_success 'mailinfo warn CR in base64 encoded email' '
    -+	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-msg" >expect-cr-msg &&
    -+	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-patch" >expect-cr-patch &&
    -+	check_quoted_cr_mail &&
    -+	grep "quoted CR detected" quoted-cr-err
    ++	sed -e "s/%%$//" -e "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-msg" \
    ++		>quoted-cr/0001-expected.msg &&
    ++	sed "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-msg" \
    ++		>quoted-cr/0002-expected.msg &&
    ++	sed -e "s/%%$//" -e "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-patch" \
    ++		>quoted-cr/0001-expected.patch &&
    ++	sed "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-patch" \
    ++		>quoted-cr/0002-expected.patch &&
    ++	check_quoted_cr_mail quoted-cr/0001 &&
    ++	test_must_be_empty quoted-cr/0001.err &&
    ++	check_quoted_cr_mail quoted-cr/0002 &&
    ++	grep "quoted CRLF detected" quoted-cr/0002.err
     +'
     +
      test_done
    @@ t/t5100/quoted-cr-info (new)
     
      ## t/t5100/quoted-cr-msg (new) ##
     @@
    -+On different distro, pytest is suffixed with different patterns.%%
    ++On different distro, %%pytest is suffixed with different patterns.%%
     +%%
     
      ## t/t5100/quoted-cr-patch (new) ##
    @@ t/t5100/quoted-cr-patch (new)
     +--- a/configure%%
     ++++ b/configure%%
     +@@ -814,7 +814,7 @@ if [ $have_python3 -eq 1 ]; then%%
    -+     printf "Checking for python3 pytest (>= 3.0)... "%%
    ++     printf "%%Checking for python3 pytest (>= 3.0)... "%%
     +     conf=$(mktemp)%%
     +     printf "[pytest]\nminversion=3.0\n" > $conf%%
     +-    if pytest-3 -c $conf --version >/dev/null 2>&1; then%%
    @@ -814,7 +814,7 @@ if [ $have_python3 -eq 1 ]; then%%
     
      ## t/t5100/quoted-cr.mbox (new) ##
     @@
    ++From nobody Mon Sep 17 00:00:00 2001
     +From: A U Thor <mail@example.com>
     +To: list@example.org
     +Subject: [PATCH v2] sample
    @@ t/t5100/quoted-cr.mbox (new)
     +Content-Type: text/plain; charset="utf-8"
     +Content-Transfer-Encoding: base64
     +
    -+T24gZGlmZmVyZW50IGRpc3RybywgcHl0ZXN0IGlzIHN1ZmZpeGVkIHdpdGggZGlmZmVyZW50IHBh
    -+dHRlcm5zLg0KDQotLS0NCiBjb25maWd1cmUgfCAyICstDQogMSBmaWxlIGNoYW5nZWQsIDEgaW5z
    -+ZXJ0aW9uKCspLCAxIGRlbGV0aW9uKC0pDQoNCmRpZmYgLS1naXQgYS9jb25maWd1cmUgYi9jb25m
    -+aWd1cmUNCmluZGV4IGRiMzUzOGIzLi5mN2MxYzA5NSAxMDA3NTUNCi0tLSBhL2NvbmZpZ3VyZQ0K
    -+KysrIGIvY29uZmlndXJlDQpAQCAtODE0LDcgKzgxNCw3IEBAIGlmIFsgJGhhdmVfcHl0aG9uMyAt
    -+ZXEgMSBdOyB0aGVuDQogICAgIHByaW50ZiAiQ2hlY2tpbmcgZm9yIHB5dGhvbjMgcHl0ZXN0ICg+
    -+PSAzLjApLi4uICINCiAgICAgY29uZj0kKG1rdGVtcCkNCiAgICAgcHJpbnRmICJbcHl0ZXN0XVxu
    -+bWludmVyc2lvbj0zLjBcbiIgPiAkY29uZg0KLSAgICBpZiBweXRlc3QtMyAtYyAkY29uZiAtLXZl
    -+cnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuDQorICAgIGlmICIkcHl0aG9uIiAtbSBweXRlc3Qg
    -+LWMgJGNvbmYgLS12ZXJzaW9uID4vZGV2L251bGwgMj4mMTsgdGhlbg0KICAgICAgICAgcHJpbnRm
    -+ICJZZXMuXG4iDQogICAgICAgICBoYXZlX3B5dGhvbjNfcHl0ZXN0PTENCiAgICAgZWxzZQ0KLS0g
    -+DQoyLjI4LjANCl9fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f
    -+CmV4YW1wbGUgbWFpbGluZyBsaXN0IC0tIGxpc3RAZXhhbXBsZS5vcmcKVG8gdW5zdWJzY3JpYmUg
    -+c2VuZCBhbiBlbWFpbCB0byBsaXN0LWxlYXZlQGV4YW1wbGUub3JnCg==
    ++T24gZGlmZmVyZW50IGRpc3RybywgDXB5dGVzdCBpcyBzdWZmaXhlZCB3aXRoIGRpZmZlcmVudCBw
    ++YXR0ZXJucy4KCi0tLQogY29uZmlndXJlIHwgMiArLQogMSBmaWxlIGNoYW5nZWQsIDEgaW5zZXJ0
    ++aW9uKCspLCAxIGRlbGV0aW9uKC0pCgpkaWZmIC0tZ2l0IGEvY29uZmlndXJlIGIvY29uZmlndXJl
    ++CmluZGV4IGRiMzUzOGIzLi5mN2MxYzA5NSAxMDA3NTUKLS0tIGEvY29uZmlndXJlCisrKyBiL2Nv
    ++bmZpZ3VyZQpAQCAtODE0LDcgKzgxNCw3IEBAIGlmIFsgJGhhdmVfcHl0aG9uMyAtZXEgMSBdOyB0
    ++aGVuCiAgICAgcHJpbnRmICINQ2hlY2tpbmcgZm9yIHB5dGhvbjMgcHl0ZXN0ICg+PSAzLjApLi4u
    ++ICIKICAgICBjb25mPSQobWt0ZW1wKQogICAgIHByaW50ZiAiW3B5dGVzdF1cbm1pbnZlcnNpb249
    ++My4wXG4iID4gJGNvbmYKLSAgICBpZiBweXRlc3QtMyAtYyAkY29uZiAtLXZlcnNpb24gPi9kZXYv
    ++bnVsbCAyPiYxOyB0aGVuCisgICAgaWYgIiRweXRob24iIC1tIHB5dGVzdCAtYyAkY29uZiAtLXZl
    ++cnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuCiAgICAgICAgIHByaW50ZiAiWWVzLlxuIgogICAg
    ++ICAgICBoYXZlX3B5dGhvbjNfcHl0ZXN0PTEKICAgICBlbHNlCi0tIAoyLjI4LjAKX19fX19fX19f
    ++X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KZXhhbXBsZSBtYWlsaW5nIGxp
    ++c3QgLS0gbGlzdEBleGFtcGxlLm9yZwpUbyB1bnN1YnNjcmliZSBzZW5kIGFuIGVtYWlsIHRvIGxp
    ++c3QtbGVhdmVAZXhhbXBsZS5vcmcK
    ++
    ++From nobody Mon Sep 17 00:00:00 2001
    ++From: A U Thor <mail@example.com>
    ++To: list@example.org
    ++Subject: [PATCH v2] sample
    ++Date: Mon,  3 Aug 2020 22:40:55 +0700
    ++Message-Id: <msg-id2@example.com>
    ++Content-Type: text/plain; charset="utf-8"
    ++Content-Transfer-Encoding: base64
    ++
    ++T24gZGlmZmVyZW50IGRpc3RybywgDXB5dGVzdCBpcyBzdWZmaXhlZCB3aXRoIGRpZmZlcmVudCBw
    ++YXR0ZXJucy4NCg0KLS0tDQogY29uZmlndXJlIHwgMiArLQ0KIDEgZmlsZSBjaGFuZ2VkLCAxIGlu
    ++c2VydGlvbigrKSwgMSBkZWxldGlvbigtKQ0KDQpkaWZmIC0tZ2l0IGEvY29uZmlndXJlIGIvY29u
    ++ZmlndXJlDQppbmRleCBkYjM1MzhiMy4uZjdjMWMwOTUgMTAwNzU1DQotLS0gYS9jb25maWd1cmUN
    ++CisrKyBiL2NvbmZpZ3VyZQ0KQEAgLTgxNCw3ICs4MTQsNyBAQCBpZiBbICRoYXZlX3B5dGhvbjMg
    ++LWVxIDEgXTsgdGhlbg0KICAgICBwcmludGYgIg1DaGVja2luZyBmb3IgcHl0aG9uMyBweXRlc3Qg
    ++KD49IDMuMCkuLi4gIg0KICAgICBjb25mPSQobWt0ZW1wKQ0KICAgICBwcmludGYgIltweXRlc3Rd
    ++XG5taW52ZXJzaW9uPTMuMFxuIiA+ICRjb25mDQotICAgIGlmIHB5dGVzdC0zIC1jICRjb25mIC0t
    ++dmVyc2lvbiA+L2Rldi9udWxsIDI+JjE7IHRoZW4NCisgICAgaWYgIiRweXRob24iIC1tIHB5dGVz
    ++dCAtYyAkY29uZiAtLXZlcnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuDQogICAgICAgICBwcmlu
    ++dGYgIlllcy5cbiINCiAgICAgICAgIGhhdmVfcHl0aG9uM19weXRlc3Q9MQ0KICAgICBlbHNlDQot
    ++LSANCjIuMjguMA0KX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f
    ++X18KZXhhbXBsZSBtYWlsaW5nIGxpc3QgLS0gbGlzdEBleGFtcGxlLm9yZwpUbyB1bnN1YnNjcmli
    ++ZSBzZW5kIGFuIGVtYWlsIHRvIGxpc3QtbGVhdmVAZXhhbXBsZS5vcmcK
4:  d5b2da370d ! 4:  8aeb960dfd mailinfo: allow squelching quoted CR warning
    @@ Metadata
     Author: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
      ## Commit message ##
    -    mailinfo: allow squelching quoted CR warning
    +    mailinfo: allow squelching quoted CRLF warning
     
    -    In previous change, Git starts to warn for quoted CR in decoded
    +    In previous change, Git starts to warn for quoted CRLF in decoded
         base64/QP email. Despite those warnings are usually helpful,
    -    quoted CR could be part of some users' workflow.
    +    quoted CRLF could be part of some users' workflow.
     
         Let's give them an option to turn off the warning completely.
     
    @@ mailinfo.c: static void handle_filter_flowed(struct mailinfo *mi, struct strbuf
     -	if (mi->have_quoted_cr)
     +	if (mi->have_quoted_cr &&
     +	    mi->quoted_cr == quoted_cr_warn)
    - 		warning("quoted CR detected");
    + 		warning(_("quoted CRLF detected"));
      }
      
     @@ mailinfo.c: int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
    @@ mailinfo.h: struct mailinfo {
      void clear_mailinfo(struct mailinfo *);
     
      ## t/t5100-mailinfo.sh ##
    -@@ t/t5100-mailinfo.sh: check_quoted_cr_mail () {
    - 	test_cmp "$DATA/quoted-cr-info" quoted-cr-info
    - }
    - 
    --test_expect_success 'mailinfo warn CR in base64 encoded email' '
    -+test_expect_success 'mailinfo handle CR in base64 encoded email' '
    - 	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-msg" >expect-cr-msg &&
    - 	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-patch" >expect-cr-patch &&
    - 	check_quoted_cr_mail &&
    --	grep "quoted CR detected" quoted-cr-err
    -+	grep "quoted CR detected" quoted-cr-err &&
    -+	check_quoted_cr_mail --quoted-cr=nowarn &&
    -+	test_must_be_empty quoted-cr-err
    +@@ t/t5100-mailinfo.sh: test_expect_success 'mailinfo warn CR in base64 encoded email' '
    + 	check_quoted_cr_mail quoted-cr/0001 &&
    + 	test_must_be_empty quoted-cr/0001.err &&
    + 	check_quoted_cr_mail quoted-cr/0002 &&
    +-	grep "quoted CRLF detected" quoted-cr/0002.err
    ++	grep "quoted CRLF detected" quoted-cr/0002.err &&
    ++	check_quoted_cr_mail quoted-cr/0001 --quoted-cr=nowarn &&
    ++	test_must_be_empty quoted-cr/0001.err &&
    ++	check_quoted_cr_mail quoted-cr/0002 --quoted-cr=nowarn &&
    ++	test_must_be_empty quoted-cr/0002.err
      '
      
      test_done
5:  9e96d4bf5e ! 5:  448daac5b3 mailinfo: allow stripping quoted CR without warning
    @@ mailinfo.h
      struct mailinfo {
     
      ## t/t5100-mailinfo.sh ##
    -@@ t/t5100-mailinfo.sh: test_expect_success 'mailinfo handle CR in base64 encoded email' '
    - 	check_quoted_cr_mail &&
    - 	grep "quoted CR detected" quoted-cr-err &&
    - 	check_quoted_cr_mail --quoted-cr=nowarn &&
    -+	test_must_be_empty quoted-cr-err &&
    -+	sed "s/%%//" "$DATA/quoted-cr-msg" >expect-cr-msg &&
    -+	sed "s/%%//" "$DATA/quoted-cr-patch" >expect-cr-patch &&
    -+	check_quoted_cr_mail --quoted-cr=strip &&
    - 	test_must_be_empty quoted-cr-err
    +@@ t/t5100-mailinfo.sh: test_expect_success 'mailinfo warn CR in base64 encoded email' '
    + 	check_quoted_cr_mail quoted-cr/0001 --quoted-cr=nowarn &&
    + 	test_must_be_empty quoted-cr/0001.err &&
    + 	check_quoted_cr_mail quoted-cr/0002 --quoted-cr=nowarn &&
    ++	test_must_be_empty quoted-cr/0002.err &&
    ++	cp quoted-cr/0001-expected.msg quoted-cr/0002-expected.msg &&
    ++	cp quoted-cr/0001-expected.patch quoted-cr/0002-expected.patch &&
    ++	check_quoted_cr_mail quoted-cr/0001 --quoted-cr=strip &&
    ++	test_must_be_empty quoted-cr/0001.err &&
    ++	check_quoted_cr_mail quoted-cr/0002 --quoted-cr=strip &&
    + 	test_must_be_empty quoted-cr/0002.err
      '
      
6:  d6aa12acc0 ! 6:  e6dbc9d336 am: learn to process quoted lines that ends with CRLF
    @@ t/t4258-am-quoted-cr.sh (new)
     +test_expect_success 'am warn if quoted-cr is found' '
     +	git reset --hard one &&
     +	test_must_fail git am "$DATA/mbox" 2>err &&
    -+	grep "quoted CR detected" err
    ++	grep "quoted CRLF detected" err
     +'
     +
    -+test_expect_success 'am strip if quoted-cr is found' '
    ++test_expect_success 'am --quoted-cr=strip' '
     +	test_might_fail git am --abort &&
     +	git reset --hard one &&
     +	git am --quoted-cr=strip "$DATA/mbox" &&
     +	git diff --exit-code HEAD two
     +'
     +
    -+test_expect_success 'am strip if quoted-cr is found' '
    ++test_expect_success 'am with config mailinfo.quotecr=strip' '
     +	test_might_fail git am --abort &&
     +	git reset --hard one &&
     +	test_config mailinfo.quotedCr strip &&
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v4 1/6] mailinfo: load default metainfo_charset lazily
  2021-05-09 17:12 ` [PATCH v4 0/6] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
@ 2021-05-09 17:12   ` Đoàn Trần Công Danh
  2021-05-09 17:12   ` [PATCH v4 2/6] mailinfo: stop parsing options manually Đoàn Trần Công Danh
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-09 17:12 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In a later change, we will use parse_option to parse mailinfo's options.
In mailinfo, both "-u", "-n", and "--encoding" try to set the same
field, with "-u" reset that field to some default value from
configuration variable "i18n.commitEncoding".

Let's delay the setting of that field until we finish processing all
options. By doing that, "i18n.commitEncoding" can be parsed on demand.
More importantly, it cleans the way for using parse_option.

This change introduces some inconsistent brackets "{}" in "if/else if"
construct, however, we will rewrite them in the next few changes.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 builtin/mailinfo.c | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index cfb667a594..77f96177cc 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -11,17 +11,25 @@
 static const char mailinfo_usage[] =
 	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
 
+struct metainfo_charset
+{
+	enum {
+		CHARSET_DEFAULT,
+		CHARSET_NO_REENCODE,
+		CHARSET_EXPLICIT,
+	} policy;
+	const char *charset;
+};
+
 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 {
-	const char *def_charset;
+	struct metainfo_charset meta_charset;
 	struct mailinfo mi;
 	int status;
 	char *msgfile, *patchfile;
 
 	setup_mailinfo(&mi);
-
-	def_charset = get_commit_output_encoding();
-	mi.metainfo_charset = def_charset;
+	meta_charset.policy = CHARSET_DEFAULT;
 
 	while (1 < argc && argv[1][0] == '-') {
 		if (!strcmp(argv[1], "-k"))
@@ -31,12 +39,13 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 		else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id"))
 			mi.add_message_id = 1;
 		else if (!strcmp(argv[1], "-u"))
-			mi.metainfo_charset = def_charset;
+			meta_charset.policy = CHARSET_DEFAULT;
 		else if (!strcmp(argv[1], "-n"))
-			mi.metainfo_charset = NULL;
-		else if (starts_with(argv[1], "--encoding="))
-			mi.metainfo_charset = argv[1] + 11;
-		else if (!strcmp(argv[1], "--scissors"))
+			meta_charset.policy = CHARSET_NO_REENCODE;
+		else if (starts_with(argv[1], "--encoding=")) {
+			meta_charset.policy = CHARSET_EXPLICIT;
+			meta_charset.charset = argv[1] + 11;
+		} else if (!strcmp(argv[1], "--scissors"))
 			mi.use_scissors = 1;
 		else if (!strcmp(argv[1], "--no-scissors"))
 			mi.use_scissors = 0;
@@ -50,6 +59,19 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 	if (argc != 3)
 		usage(mailinfo_usage);
 
+	switch (meta_charset.policy) {
+	case CHARSET_DEFAULT:
+		mi.metainfo_charset = get_commit_output_encoding();
+		break;
+	case CHARSET_NO_REENCODE:
+		mi.metainfo_charset = NULL;
+		break;
+	case CHARSET_EXPLICIT:
+		break;
+	default:
+		BUG("invalid meta_charset.policy");
+	}
+
 	mi.input = stdin;
 	mi.output = stdout;
 
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v4 2/6] mailinfo: stop parsing options manually
  2021-05-09 17:12 ` [PATCH v4 0/6] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
  2021-05-09 17:12   ` [PATCH v4 1/6] mailinfo: load default metainfo_charset lazily Đoàn Trần Công Danh
@ 2021-05-09 17:12   ` Đoàn Trần Công Danh
  2021-05-09 17:12   ` [PATCH v4 3/6] mailinfo: warn if CRLF found in decoded base64/QP email Đoàn Trần Công Danh
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-09 17:12 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In a later change, mailinfo will learn more options, let's switch to our
robust parse_options framework before that step.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 builtin/mailinfo.c | 75 +++++++++++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 30 deletions(-)

diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index 77f96177cc..f55549a097 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -7,9 +7,13 @@
 #include "utf8.h"
 #include "strbuf.h"
 #include "mailinfo.h"
+#include "parse-options.h"
 
-static const char mailinfo_usage[] =
-	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
+static const char * const mailinfo_usage[] = {
+	/* TRANSLATORS: keep <> in "<" mail ">" info. */
+	N_("git mailinfo [<options>] <msg> <patch> < mail >info"),
+	NULL,
+};
 
 struct metainfo_charset
 {
@@ -21,6 +25,19 @@ struct metainfo_charset
 	const char *charset;
 };
 
+static int parse_opt_explicit_encoding(const struct option *opt,
+				       const char *arg, int unset)
+{
+	struct metainfo_charset *meta_charset = opt->value;
+
+	BUG_ON_OPT_NEG(unset);
+
+	meta_charset->policy = CHARSET_EXPLICIT;
+	meta_charset->charset = arg;
+
+	return 0;
+}
+
 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 {
 	struct metainfo_charset meta_charset;
@@ -28,36 +45,34 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 	int status;
 	char *msgfile, *patchfile;
 
+	struct option options[] = {
+		OPT_BOOL('k', NULL, &mi.keep_subject, N_("keep subject")),
+		OPT_BOOL('b', NULL, &mi.keep_non_patch_brackets_in_subject,
+			 N_("keep non patch brackets in subject")),
+		OPT_BOOL('m', "message-id", &mi.add_message_id,
+			 N_("copy Message-ID to the end of commit message")),
+		OPT_SET_INT_F('u', NULL, &meta_charset.policy,
+			      N_("re-code metadata to i18n.commitEncoding"),
+			      CHARSET_DEFAULT, PARSE_OPT_NONEG),
+		OPT_SET_INT_F('n', NULL, &meta_charset.policy,
+			      N_("disable charset re-coding of metadata"),
+			      CHARSET_NO_REENCODE, PARSE_OPT_NONEG),
+		OPT_CALLBACK_F(0, "encoding", &meta_charset, N_("encoding"),
+			       N_("re-code metadata to this encoding"),
+			       PARSE_OPT_NONEG, parse_opt_explicit_encoding),
+		OPT_BOOL(0, "scissors", &mi.use_scissors, N_("use scissors")),
+		OPT_HIDDEN_BOOL(0, "inbody-headers", &mi.use_inbody_headers,
+			 N_("use headers in message's body")),
+		OPT_END()
+	};
+
 	setup_mailinfo(&mi);
 	meta_charset.policy = CHARSET_DEFAULT;
 
-	while (1 < argc && argv[1][0] == '-') {
-		if (!strcmp(argv[1], "-k"))
-			mi.keep_subject = 1;
-		else if (!strcmp(argv[1], "-b"))
-			mi.keep_non_patch_brackets_in_subject = 1;
-		else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id"))
-			mi.add_message_id = 1;
-		else if (!strcmp(argv[1], "-u"))
-			meta_charset.policy = CHARSET_DEFAULT;
-		else if (!strcmp(argv[1], "-n"))
-			meta_charset.policy = CHARSET_NO_REENCODE;
-		else if (starts_with(argv[1], "--encoding=")) {
-			meta_charset.policy = CHARSET_EXPLICIT;
-			meta_charset.charset = argv[1] + 11;
-		} else if (!strcmp(argv[1], "--scissors"))
-			mi.use_scissors = 1;
-		else if (!strcmp(argv[1], "--no-scissors"))
-			mi.use_scissors = 0;
-		else if (!strcmp(argv[1], "--no-inbody-headers"))
-			mi.use_inbody_headers = 0;
-		else
-			usage(mailinfo_usage);
-		argc--; argv++;
-	}
+	argc = parse_options(argc, argv, prefix, options, mailinfo_usage, 0);
 
-	if (argc != 3)
-		usage(mailinfo_usage);
+	if (argc != 2)
+		usage_with_options(mailinfo_usage, options);
 
 	switch (meta_charset.policy) {
 	case CHARSET_DEFAULT:
@@ -75,8 +90,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 	mi.input = stdin;
 	mi.output = stdout;
 
-	msgfile = prefix_filename(prefix, argv[1]);
-	patchfile = prefix_filename(prefix, argv[2]);
+	msgfile = prefix_filename(prefix, argv[0]);
+	patchfile = prefix_filename(prefix, argv[1]);
 
 	status = !!mailinfo(&mi, msgfile, patchfile);
 	clear_mailinfo(&mi);
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v4 3/6] mailinfo: warn if CRLF found in decoded base64/QP email
  2021-05-09 17:12 ` [PATCH v4 0/6] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
  2021-05-09 17:12   ` [PATCH v4 1/6] mailinfo: load default metainfo_charset lazily Đoàn Trần Công Danh
  2021-05-09 17:12   ` [PATCH v4 2/6] mailinfo: stop parsing options manually Đoàn Trần Công Danh
@ 2021-05-09 17:12   ` Đoàn Trần Công Danh
  2021-05-09 17:12   ` [PATCH v4 4/6] mailinfo: allow squelching quoted CRLF warning Đoàn Trần Công Danh
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-09 17:12 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

When SMTP servers receive 8-bit email messages, possibly with only
LF as line ending, some of them decide to change said LF to CRLF.

Some mailing list softwares, when receive 8-bit email messages,
decide to encode those messages in base64 or quoted-printable.

If an email is transfered through above mail servers, then distributed
by such mailing list softwares, the recipients will receive an email
contains a patch mungled with CRLF encoded inside another encoding.

Thus, such CR (in CRLF) couldn't be dropped by "mailsplit".
Hence, the mailed patch couldn't be applied cleanly.
Such accidents have been observed in the wild [1].

Instead of silently rejecting those messages, let's give our users
some warnings if such CR (as part of CRLF) is found.

[1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 mailinfo.c              | 14 ++++++++++++
 mailinfo.h              |  1 +
 t/t5100-mailinfo.sh     | 30 ++++++++++++++++++++++++++
 t/t5100/quoted-cr-info  |  5 +++++
 t/t5100/quoted-cr-msg   |  2 ++
 t/t5100/quoted-cr-patch | 22 +++++++++++++++++++
 t/t5100/quoted-cr.mbox  | 47 +++++++++++++++++++++++++++++++++++++++++
 7 files changed, 121 insertions(+)
 create mode 100644 t/t5100/quoted-cr-info
 create mode 100644 t/t5100/quoted-cr-msg
 create mode 100644 t/t5100/quoted-cr-patch
 create mode 100644 t/t5100/quoted-cr.mbox

diff --git a/mailinfo.c b/mailinfo.c
index 5681d9130d..c8caee4f55 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -994,6 +994,11 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 	const char *rest;
 
 	if (!mi->format_flowed) {
+		if (len >= 2 &&
+		    line->buf[len - 2] == '\r' &&
+		    line->buf[len - 1] == '\n') {
+			mi->have_quoted_cr = 1;
+		}
 		handle_filter(mi, line);
 		return;
 	}
@@ -1033,6 +1038,12 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 	handle_filter(mi, line);
 }
 
+static void summarize_quoted_cr(struct mailinfo *mi)
+{
+	if (mi->have_quoted_cr)
+		warning(_("quoted CRLF detected"));
+}
+
 static void handle_body(struct mailinfo *mi, struct strbuf *line)
 {
 	struct strbuf prev = STRBUF_INIT;
@@ -1051,6 +1062,8 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 				handle_filter(mi, &prev);
 				strbuf_reset(&prev);
 			}
+			summarize_quoted_cr(mi);
+			mi->have_quoted_cr = 0;
 			if (!handle_boundary(mi, line))
 				goto handle_body_out;
 		}
@@ -1100,6 +1113,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line)
 
 	if (prev.len)
 		handle_filter(mi, &prev);
+	summarize_quoted_cr(mi);
 
 	flush_inbody_header_accum(mi);
 
diff --git a/mailinfo.h b/mailinfo.h
index 79b1d6774e..b394ef9bce 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -24,6 +24,7 @@ struct mailinfo {
 	struct strbuf charset;
 	unsigned int format_flowed:1;
 	unsigned int delsp:1;
+	unsigned int have_quoted_cr:1;
 	char *message_id;
 	enum  {
 		TE_DONTCARE, TE_QP, TE_BASE64
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index 147e616533..ac6fbfe596 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -228,4 +228,34 @@ test_expect_success 'mailinfo handles unusual header whitespace' '
 	test_cmp expect actual
 '
 
+check_quoted_cr_mail () {
+	mail="$1" && shift &&
+	git mailinfo -u "$@" "$mail.msg" "$mail.patch" \
+		<"$mail" >"$mail.info" 2>"$mail.err" &&
+	test_cmp "$mail-expected.msg" "$mail.msg" &&
+	test_cmp "$mail-expected.patch" "$mail.patch" &&
+	test_cmp "$DATA/quoted-cr-info" "$mail.info"
+}
+
+test_expect_success 'split base64 email with quoted-cr' '
+	mkdir quoted-cr &&
+	git mailsplit -oquoted-cr "$DATA/quoted-cr.mbox" >quoted-cr/last &&
+	test $(cat quoted-cr/last) = 2
+'
+
+test_expect_success 'mailinfo warn CR in base64 encoded email' '
+	sed -e "s/%%$//" -e "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-msg" \
+		>quoted-cr/0001-expected.msg &&
+	sed "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-msg" \
+		>quoted-cr/0002-expected.msg &&
+	sed -e "s/%%$//" -e "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-patch" \
+		>quoted-cr/0001-expected.patch &&
+	sed "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-patch" \
+		>quoted-cr/0002-expected.patch &&
+	check_quoted_cr_mail quoted-cr/0001 &&
+	test_must_be_empty quoted-cr/0001.err &&
+	check_quoted_cr_mail quoted-cr/0002 &&
+	grep "quoted CRLF detected" quoted-cr/0002.err
+'
+
 test_done
diff --git a/t/t5100/quoted-cr-info b/t/t5100/quoted-cr-info
new file mode 100644
index 0000000000..dab2228b70
--- /dev/null
+++ b/t/t5100/quoted-cr-info
@@ -0,0 +1,5 @@
+Author: A U Thor
+Email: mail@example.com
+Subject: sample
+Date: Mon, 3 Aug 2020 22:40:55 +0700
+
diff --git a/t/t5100/quoted-cr-msg b/t/t5100/quoted-cr-msg
new file mode 100644
index 0000000000..89b05a0784
--- /dev/null
+++ b/t/t5100/quoted-cr-msg
@@ -0,0 +1,2 @@
+On different distro, %%pytest is suffixed with different patterns.%%
+%%
diff --git a/t/t5100/quoted-cr-patch b/t/t5100/quoted-cr-patch
new file mode 100644
index 0000000000..65b13eeef7
--- /dev/null
+++ b/t/t5100/quoted-cr-patch
@@ -0,0 +1,22 @@
+---%%
+ configure | 2 +-%%
+ 1 file changed, 1 insertion(+), 1 deletion(-)%%
+%%
+diff --git a/configure b/configure%%
+index db3538b3..f7c1c095 100755%%
+--- a/configure%%
++++ b/configure%%
+@@ -814,7 +814,7 @@ if [ $have_python3 -eq 1 ]; then%%
+     printf "%%Checking for python3 pytest (>= 3.0)... "%%
+     conf=$(mktemp)%%
+     printf "[pytest]\nminversion=3.0\n" > $conf%%
+-    if pytest-3 -c $conf --version >/dev/null 2>&1; then%%
++    if "$python" -m pytest -c $conf --version >/dev/null 2>&1; then%%
+         printf "Yes.\n"%%
+         have_python3_pytest=1%%
+     else%%
+-- %%
+2.28.0%%
+_______________________________________________
+example mailing list -- list@example.org
+To unsubscribe send an email to list-leave@example.org
diff --git a/t/t5100/quoted-cr.mbox b/t/t5100/quoted-cr.mbox
new file mode 100644
index 0000000000..909021bb7a
--- /dev/null
+++ b/t/t5100/quoted-cr.mbox
@@ -0,0 +1,47 @@
+From nobody Mon Sep 17 00:00:00 2001
+From: A U Thor <mail@example.com>
+To: list@example.org
+Subject: [PATCH v2] sample
+Date: Mon,  3 Aug 2020 22:40:55 +0700
+Message-Id: <msg-id@example.com>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+T24gZGlmZmVyZW50IGRpc3RybywgDXB5dGVzdCBpcyBzdWZmaXhlZCB3aXRoIGRpZmZlcmVudCBw
+YXR0ZXJucy4KCi0tLQogY29uZmlndXJlIHwgMiArLQogMSBmaWxlIGNoYW5nZWQsIDEgaW5zZXJ0
+aW9uKCspLCAxIGRlbGV0aW9uKC0pCgpkaWZmIC0tZ2l0IGEvY29uZmlndXJlIGIvY29uZmlndXJl
+CmluZGV4IGRiMzUzOGIzLi5mN2MxYzA5NSAxMDA3NTUKLS0tIGEvY29uZmlndXJlCisrKyBiL2Nv
+bmZpZ3VyZQpAQCAtODE0LDcgKzgxNCw3IEBAIGlmIFsgJGhhdmVfcHl0aG9uMyAtZXEgMSBdOyB0
+aGVuCiAgICAgcHJpbnRmICINQ2hlY2tpbmcgZm9yIHB5dGhvbjMgcHl0ZXN0ICg+PSAzLjApLi4u
+ICIKICAgICBjb25mPSQobWt0ZW1wKQogICAgIHByaW50ZiAiW3B5dGVzdF1cbm1pbnZlcnNpb249
+My4wXG4iID4gJGNvbmYKLSAgICBpZiBweXRlc3QtMyAtYyAkY29uZiAtLXZlcnNpb24gPi9kZXYv
+bnVsbCAyPiYxOyB0aGVuCisgICAgaWYgIiRweXRob24iIC1tIHB5dGVzdCAtYyAkY29uZiAtLXZl
+cnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuCiAgICAgICAgIHByaW50ZiAiWWVzLlxuIgogICAg
+ICAgICBoYXZlX3B5dGhvbjNfcHl0ZXN0PTEKICAgICBlbHNlCi0tIAoyLjI4LjAKX19fX19fX19f
+X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KZXhhbXBsZSBtYWlsaW5nIGxp
+c3QgLS0gbGlzdEBleGFtcGxlLm9yZwpUbyB1bnN1YnNjcmliZSBzZW5kIGFuIGVtYWlsIHRvIGxp
+c3QtbGVhdmVAZXhhbXBsZS5vcmcK
+
+From nobody Mon Sep 17 00:00:00 2001
+From: A U Thor <mail@example.com>
+To: list@example.org
+Subject: [PATCH v2] sample
+Date: Mon,  3 Aug 2020 22:40:55 +0700
+Message-Id: <msg-id2@example.com>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+T24gZGlmZmVyZW50IGRpc3RybywgDXB5dGVzdCBpcyBzdWZmaXhlZCB3aXRoIGRpZmZlcmVudCBw
+YXR0ZXJucy4NCg0KLS0tDQogY29uZmlndXJlIHwgMiArLQ0KIDEgZmlsZSBjaGFuZ2VkLCAxIGlu
+c2VydGlvbigrKSwgMSBkZWxldGlvbigtKQ0KDQpkaWZmIC0tZ2l0IGEvY29uZmlndXJlIGIvY29u
+ZmlndXJlDQppbmRleCBkYjM1MzhiMy4uZjdjMWMwOTUgMTAwNzU1DQotLS0gYS9jb25maWd1cmUN
+CisrKyBiL2NvbmZpZ3VyZQ0KQEAgLTgxNCw3ICs4MTQsNyBAQCBpZiBbICRoYXZlX3B5dGhvbjMg
+LWVxIDEgXTsgdGhlbg0KICAgICBwcmludGYgIg1DaGVja2luZyBmb3IgcHl0aG9uMyBweXRlc3Qg
+KD49IDMuMCkuLi4gIg0KICAgICBjb25mPSQobWt0ZW1wKQ0KICAgICBwcmludGYgIltweXRlc3Rd
+XG5taW52ZXJzaW9uPTMuMFxuIiA+ICRjb25mDQotICAgIGlmIHB5dGVzdC0zIC1jICRjb25mIC0t
+dmVyc2lvbiA+L2Rldi9udWxsIDI+JjE7IHRoZW4NCisgICAgaWYgIiRweXRob24iIC1tIHB5dGVz
+dCAtYyAkY29uZiAtLXZlcnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuDQogICAgICAgICBwcmlu
+dGYgIlllcy5cbiINCiAgICAgICAgIGhhdmVfcHl0aG9uM19weXRlc3Q9MQ0KICAgICBlbHNlDQot
+LSANCjIuMjguMA0KX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f
+X18KZXhhbXBsZSBtYWlsaW5nIGxpc3QgLS0gbGlzdEBleGFtcGxlLm9yZwpUbyB1bnN1YnNjcmli
+ZSBzZW5kIGFuIGVtYWlsIHRvIGxpc3QtbGVhdmVAZXhhbXBsZS5vcmcK
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v4 4/6] mailinfo: allow squelching quoted CRLF warning
  2021-05-09 17:12 ` [PATCH v4 0/6] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
                     ` (2 preceding siblings ...)
  2021-05-09 17:12   ` [PATCH v4 3/6] mailinfo: warn if CRLF found in decoded base64/QP email Đoàn Trần Công Danh
@ 2021-05-09 17:12   ` Đoàn Trần Công Danh
  2021-05-09 17:12   ` [PATCH v4 5/6] mailinfo: allow stripping quoted CR without warning Đoàn Trần Công Danh
  2021-05-09 17:12   ` [PATCH v4 6/6] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh
  5 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-09 17:12 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In previous change, Git starts to warn for quoted CRLF in decoded
base64/QP email. Despite those warnings are usually helpful,
quoted CRLF could be part of some users' workflow.

Let's give them an option to turn off the warning completely.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 Documentation/git-mailinfo.txt | 20 +++++++++++++++++++-
 builtin/mailinfo.c             | 12 ++++++++++++
 mailinfo.c                     | 20 +++++++++++++++++++-
 mailinfo.h                     |  7 +++++++
 t/t5100-mailinfo.sh            |  6 +++++-
 5 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
index d343f040f5..824947a070 100644
--- a/Documentation/git-mailinfo.txt
+++ b/Documentation/git-mailinfo.txt
@@ -9,7 +9,9 @@ git-mailinfo - Extracts patch and authorship from a single e-mail message
 SYNOPSIS
 --------
 [verse]
-'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch>
+'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n]
+	       [--[no-]scissors] [--quoted-cr=<action>]
+	       <msg> <patch>
 
 
 DESCRIPTION
@@ -89,6 +91,22 @@ This can be enabled by default with the configuration option mailinfo.scissors.
 --no-scissors::
 	Ignore scissors lines. Useful for overriding mailinfo.scissors settings.
 
+--quoted-cr=<action>::
+	Action when processes email messages sent with base64 or
+	quoted-printable encoding, and the decoded lines end with a CRLF
+	instead of a simple LF.
++
+The valid actions are:
++
+--
+*	`nowarn`: Git will do nothing when such a CRLF is found.
+*	`warn`: Git will issue a warning for each message if such a CRLF is
+	found.
+--
++
+The default action could be set by configuration option `mailinfo.quotedCR`.
+If no such configuration option has been set, `warn` will be used.
+
 <msg>::
 	The commit log message extracted from e-mail, usually
 	except the title line which comes from e-mail Subject.
diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index f55549a097..01d16ef9e5 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -38,6 +38,15 @@ static int parse_opt_explicit_encoding(const struct option *opt,
 	return 0;
 }
 
+static int parse_opt_quoted_cr(const struct option *opt, const char *arg, int unset)
+{
+	BUG_ON_OPT_NEG(unset);
+
+	if (mailinfo_parse_quoted_cr_action(arg, opt->value) != 0)
+		return error(_("bad action '%s' for '%s'"), arg, "--quoted-cr");
+	return 0;
+}
+
 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 {
 	struct metainfo_charset meta_charset;
@@ -61,6 +70,9 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 			       N_("re-code metadata to this encoding"),
 			       PARSE_OPT_NONEG, parse_opt_explicit_encoding),
 		OPT_BOOL(0, "scissors", &mi.use_scissors, N_("use scissors")),
+		OPT_CALLBACK_F(0, "quoted-cr", &mi.quoted_cr, N_("<action>"),
+			       N_("action when quoted CR is found"),
+			       PARSE_OPT_NONEG, parse_opt_quoted_cr),
 		OPT_HIDDEN_BOOL(0, "inbody-headers", &mi.use_inbody_headers,
 			 N_("use headers in message's body")),
 		OPT_END()
diff --git a/mailinfo.c b/mailinfo.c
index c8caee4f55..a784552c7b 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -1040,7 +1040,8 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 
 static void summarize_quoted_cr(struct mailinfo *mi)
 {
-	if (mi->have_quoted_cr)
+	if (mi->have_quoted_cr &&
+	    mi->quoted_cr == quoted_cr_warn)
 		warning(_("quoted CRLF detected"));
 }
 
@@ -1220,6 +1221,17 @@ int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
 	return mi->input_error;
 }
 
+int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action)
+{
+	if (!strcmp(actionstr, "nowarn"))
+		*action = quoted_cr_nowarn;
+	else if (!strcmp(actionstr, "warn"))
+		*action = quoted_cr_warn;
+	else
+		return -1;
+	return 0;
+}
+
 static int git_mailinfo_config(const char *var, const char *value, void *mi_)
 {
 	struct mailinfo *mi = mi_;
@@ -1230,6 +1242,11 @@ static int git_mailinfo_config(const char *var, const char *value, void *mi_)
 		mi->use_scissors = git_config_bool(var, value);
 		return 0;
 	}
+	if (!strcmp(var, "mailinfo.quotedcr")) {
+		if (mailinfo_parse_quoted_cr_action(value, &mi->quoted_cr) != 0)
+			return error(_("bad action '%s' for '%s'"), value, var);
+		return 0;
+	}
 	/* perhaps others here */
 	return 0;
 }
@@ -1242,6 +1259,7 @@ void setup_mailinfo(struct mailinfo *mi)
 	strbuf_init(&mi->charset, 0);
 	strbuf_init(&mi->log_message, 0);
 	strbuf_init(&mi->inbody_header_accum, 0);
+	mi->quoted_cr = quoted_cr_warn;
 	mi->header_stage = 1;
 	mi->use_inbody_headers = 1;
 	mi->content_top = mi->content;
diff --git a/mailinfo.h b/mailinfo.h
index b394ef9bce..768d06ac2a 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -5,6 +5,11 @@
 
 #define MAX_BOUNDARIES 5
 
+enum quoted_cr_action {
+	quoted_cr_nowarn,
+	quoted_cr_warn,
+};
+
 struct mailinfo {
 	FILE *input;
 	FILE *output;
@@ -14,6 +19,7 @@ struct mailinfo {
 	struct strbuf email;
 	int keep_subject;
 	int keep_non_patch_brackets_in_subject;
+	int quoted_cr; /* enum quoted_cr_action */
 	int add_message_id;
 	int use_scissors;
 	int use_inbody_headers;
@@ -40,6 +46,7 @@ struct mailinfo {
 	int input_error;
 };
 
+int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action);
 void setup_mailinfo(struct mailinfo *);
 int mailinfo(struct mailinfo *, const char *msg, const char *patch);
 void clear_mailinfo(struct mailinfo *);
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index ac6fbfe596..1ecefa381d 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -255,7 +255,11 @@ test_expect_success 'mailinfo warn CR in base64 encoded email' '
 	check_quoted_cr_mail quoted-cr/0001 &&
 	test_must_be_empty quoted-cr/0001.err &&
 	check_quoted_cr_mail quoted-cr/0002 &&
-	grep "quoted CRLF detected" quoted-cr/0002.err
+	grep "quoted CRLF detected" quoted-cr/0002.err &&
+	check_quoted_cr_mail quoted-cr/0001 --quoted-cr=nowarn &&
+	test_must_be_empty quoted-cr/0001.err &&
+	check_quoted_cr_mail quoted-cr/0002 --quoted-cr=nowarn &&
+	test_must_be_empty quoted-cr/0002.err
 '
 
 test_done
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v4 5/6] mailinfo: allow stripping quoted CR without warning
  2021-05-09 17:12 ` [PATCH v4 0/6] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
                     ` (3 preceding siblings ...)
  2021-05-09 17:12   ` [PATCH v4 4/6] mailinfo: allow squelching quoted CRLF warning Đoàn Trần Công Danh
@ 2021-05-09 17:12   ` Đoàn Trần Công Danh
  2021-05-09 17:12   ` [PATCH v4 6/6] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh
  5 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-09 17:12 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In previous changes, we've turned on warning for quoted CR in base64 or
quoted-printable email messages. Some projects see those quoted CR a lot,
they know that it happens most of the time, and they find it's desirable
to always strip those CR.

Those projects in question usually fall back to use other tools to handle
patches when receive such patches.

Let's help those projects handle those patches by stripping those
excessive CR.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 Documentation/git-mailinfo.txt | 1 +
 mailinfo.c                     | 7 +++++++
 mailinfo.h                     | 1 +
 t/t5100-mailinfo.sh            | 6 ++++++
 4 files changed, 15 insertions(+)

diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
index 824947a070..3fcfd965fd 100644
--- a/Documentation/git-mailinfo.txt
+++ b/Documentation/git-mailinfo.txt
@@ -102,6 +102,7 @@ The valid actions are:
 *	`nowarn`: Git will do nothing when such a CRLF is found.
 *	`warn`: Git will issue a warning for each message if such a CRLF is
 	found.
+*	`strip`: Git will convert those CRLF to LF.
 --
 +
 The default action could be set by configuration option `mailinfo.quotedCR`.
diff --git a/mailinfo.c b/mailinfo.c
index a784552c7b..ed863c3a95 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -998,6 +998,11 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 		    line->buf[len - 2] == '\r' &&
 		    line->buf[len - 1] == '\n') {
 			mi->have_quoted_cr = 1;
+			if (mi->quoted_cr == quoted_cr_strip) {
+				strbuf_setlen(line, len - 2);
+				strbuf_addch(line, '\n');
+				len--;
+			}
 		}
 		handle_filter(mi, line);
 		return;
@@ -1227,6 +1232,8 @@ int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action)
 		*action = quoted_cr_nowarn;
 	else if (!strcmp(actionstr, "warn"))
 		*action = quoted_cr_warn;
+	else if (!strcmp(actionstr, "strip"))
+		*action = quoted_cr_strip;
 	else
 		return -1;
 	return 0;
diff --git a/mailinfo.h b/mailinfo.h
index 768d06ac2a..2ddf8be90f 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -8,6 +8,7 @@
 enum quoted_cr_action {
 	quoted_cr_nowarn,
 	quoted_cr_warn,
+	quoted_cr_strip,
 };
 
 struct mailinfo {
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index 1ecefa381d..141b29f031 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -259,6 +259,12 @@ test_expect_success 'mailinfo warn CR in base64 encoded email' '
 	check_quoted_cr_mail quoted-cr/0001 --quoted-cr=nowarn &&
 	test_must_be_empty quoted-cr/0001.err &&
 	check_quoted_cr_mail quoted-cr/0002 --quoted-cr=nowarn &&
+	test_must_be_empty quoted-cr/0002.err &&
+	cp quoted-cr/0001-expected.msg quoted-cr/0002-expected.msg &&
+	cp quoted-cr/0001-expected.patch quoted-cr/0002-expected.patch &&
+	check_quoted_cr_mail quoted-cr/0001 --quoted-cr=strip &&
+	test_must_be_empty quoted-cr/0001.err &&
+	check_quoted_cr_mail quoted-cr/0002 --quoted-cr=strip &&
 	test_must_be_empty quoted-cr/0002.err
 '
 
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v4 6/6] am: learn to process quoted lines that ends with CRLF
  2021-05-09 17:12 ` [PATCH v4 0/6] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
                     ` (4 preceding siblings ...)
  2021-05-09 17:12   ` [PATCH v4 5/6] mailinfo: allow stripping quoted CR without warning Đoàn Trần Công Danh
@ 2021-05-09 17:12   ` Đoàn Trần Công Danh
  5 siblings, 0 replies; 35+ messages in thread
From: Đoàn Trần Công Danh @ 2021-05-09 17:12 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Junio C Hamano,
	brian m. carlson

In previous changes, mailinfo has learnt to process lines that decoded
from base64 or quoted-printable, and ends with CRLF.

Let's teach "am" that new trick, too.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 Documentation/git-am.txt               |  4 ++
 builtin/am.c                           | 51 ++++++++++++++++++++++++++
 contrib/completion/git-completion.bash |  5 +++
 mailinfo.h                             |  1 +
 t/t4258-am-quoted-cr.sh                | 37 +++++++++++++++++++
 t/t4258/mbox                           | 12 ++++++
 6 files changed, 110 insertions(+)
 create mode 100755 t/t4258-am-quoted-cr.sh
 create mode 100644 t/t4258/mbox

diff --git a/Documentation/git-am.txt b/Documentation/git-am.txt
index decd8ae122..8714dfcb76 100644
--- a/Documentation/git-am.txt
+++ b/Documentation/git-am.txt
@@ -15,6 +15,7 @@ SYNOPSIS
 	 [--whitespace=<option>] [-C<n>] [-p<n>] [--directory=<dir>]
 	 [--exclude=<path>] [--include=<path>] [--reject] [-q | --quiet]
 	 [--[no-]scissors] [-S[<keyid>]] [--patch-format=<format>]
+	 [--quoted-cr=<action>]
 	 [(<mbox> | <Maildir>)...]
 'git am' (--continue | --skip | --abort | --quit | --show-current-patch[=(diff|raw)])
 
@@ -59,6 +60,9 @@ OPTIONS
 --no-scissors::
 	Ignore scissors lines (see linkgit:git-mailinfo[1]).
 
+--quoted-cr=<action>::
+	This flag will be passed down to 'git mailinfo' (see linkgit:git-mailinfo[1]).
+
 -m::
 --message-id::
 	Pass the `-m` flag to 'git mailinfo' (see linkgit:git-mailinfo[1]),
diff --git a/builtin/am.c b/builtin/am.c
index 8355e3566f..0b2d886c81 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -116,6 +116,7 @@ struct am_state {
 	int keep; /* enum keep_type */
 	int message_id;
 	int scissors; /* enum scissors_type */
+	int quoted_cr; /* enum quoted_cr_action */
 	struct strvec git_apply_opts;
 	const char *resolvemsg;
 	int committer_date_is_author_date;
@@ -145,6 +146,7 @@ static void am_state_init(struct am_state *state)
 	git_config_get_bool("am.messageid", &state->message_id);
 
 	state->scissors = SCISSORS_UNSET;
+	state->quoted_cr = quoted_cr_unset;
 
 	strvec_init(&state->git_apply_opts);
 
@@ -165,6 +167,16 @@ static void am_state_release(struct am_state *state)
 	strvec_clear(&state->git_apply_opts);
 }
 
+static int am_option_parse_quoted_cr(const struct option *opt,
+				     const char *arg, int unset)
+{
+	BUG_ON_OPT_NEG(unset);
+
+	if (mailinfo_parse_quoted_cr_action(arg, opt->value) != 0)
+		return error(_("bad action '%s' for '%s'"), arg, "--quoted-cr");
+	return 0;
+}
+
 /**
  * Returns path relative to the am_state directory.
  */
@@ -397,6 +409,12 @@ static void am_load(struct am_state *state)
 	else
 		state->scissors = SCISSORS_UNSET;
 
+	read_state_file(&sb, state, "quoted-cr", 1);
+	if (!*sb.buf)
+		state->quoted_cr = quoted_cr_unset;
+	else if (mailinfo_parse_quoted_cr_action(sb.buf, &state->quoted_cr) != 0)
+		die(_("could not parse %s"), am_path(state, "quoted-cr"));
+
 	read_state_file(&sb, state, "apply-opt", 1);
 	strvec_clear(&state->git_apply_opts);
 	if (sq_dequote_to_strvec(sb.buf, &state->git_apply_opts) < 0)
@@ -1002,6 +1020,24 @@ static void am_setup(struct am_state *state, enum patch_format patch_format,
 	}
 	write_state_text(state, "scissors", str);
 
+	switch (state->quoted_cr) {
+	case quoted_cr_unset:
+		str = "";
+		break;
+	case quoted_cr_nowarn:
+		str = "nowarn";
+		break;
+	case quoted_cr_warn:
+		str = "warn";
+		break;
+	case quoted_cr_strip:
+		str = "strip";
+		break;
+	default:
+		BUG("invalid value for state->quoted_cr");
+	}
+	write_state_text(state, "quoted-cr", str);
+
 	sq_quote_argv(&sb, state->git_apply_opts.v);
 	write_state_text(state, "apply-opt", sb.buf);
 
@@ -1162,6 +1198,18 @@ static int parse_mail(struct am_state *state, const char *mail)
 		BUG("invalid value for state->scissors");
 	}
 
+	switch (state->quoted_cr) {
+	case quoted_cr_unset:
+		break;
+	case quoted_cr_nowarn:
+	case quoted_cr_warn:
+	case quoted_cr_strip:
+		mi.quoted_cr = state->quoted_cr;
+		break;
+	default:
+		BUG("invalid value for state->quoted_cr");
+	}
+
 	mi.input = xfopen(mail, "r");
 	mi.output = xfopen(am_path(state, "info"), "w");
 	if (mailinfo(&mi, am_path(state, "msg"), am_path(state, "patch")))
@@ -2242,6 +2290,9 @@ int cmd_am(int argc, const char **argv, const char *prefix)
 			0, PARSE_OPT_NONEG),
 		OPT_BOOL('c', "scissors", &state.scissors,
 			N_("strip everything before a scissors line")),
+		OPT_CALLBACK_F(0, "quoted-cr", &state.quoted_cr, N_("action"),
+			       N_("pass it through git-mailinfo"),
+			       PARSE_OPT_NONEG, am_option_parse_quoted_cr),
 		OPT_PASSTHRU_ARGV(0, "whitespace", &state.git_apply_opts, N_("action"),
 			N_("pass it through git-apply"),
 			0),
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 49e76e9d08..edf635095e 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1333,6 +1333,7 @@ __git_whitespacelist="nowarn warn error error-all fix"
 __git_patchformat="mbox stgit stgit-series hg mboxrd"
 __git_showcurrentpatch="diff raw"
 __git_am_inprogress_options="--skip --continue --resolved --abort --quit --show-current-patch"
+__git_quoted_cr="nowarn warn strip"
 
 _git_am ()
 {
@@ -1354,6 +1355,10 @@ _git_am ()
 		__gitcomp "$__git_showcurrentpatch" "" "${cur##--show-current-patch=}"
 		return
 		;;
+	--quoted-cr=*)
+		__gitcomp "$__git_quoted_cr" "" "${cur##--quoted-cr=}"
+		return
+		;;
 	--*)
 		__gitcomp_builtin am "" \
 			"$__git_am_inprogress_options"
diff --git a/mailinfo.h b/mailinfo.h
index 2ddf8be90f..f2ffd0349e 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -6,6 +6,7 @@
 #define MAX_BOUNDARIES 5
 
 enum quoted_cr_action {
+	quoted_cr_unset = -1,
 	quoted_cr_nowarn,
 	quoted_cr_warn,
 	quoted_cr_strip,
diff --git a/t/t4258-am-quoted-cr.sh b/t/t4258-am-quoted-cr.sh
new file mode 100755
index 0000000000..fb5071f914
--- /dev/null
+++ b/t/t4258-am-quoted-cr.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+test_description='test am --quoted-cr=<action>'
+
+. ./test-lib.sh
+
+DATA="$TEST_DIRECTORY/t4258"
+
+test_expect_success 'setup' '
+	test_write_lines one two three >text &&
+	test_commit one text &&
+	test_write_lines one owt three >text &&
+	test_commit two text
+'
+
+test_expect_success 'am warn if quoted-cr is found' '
+	git reset --hard one &&
+	test_must_fail git am "$DATA/mbox" 2>err &&
+	grep "quoted CRLF detected" err
+'
+
+test_expect_success 'am --quoted-cr=strip' '
+	test_might_fail git am --abort &&
+	git reset --hard one &&
+	git am --quoted-cr=strip "$DATA/mbox" &&
+	git diff --exit-code HEAD two
+'
+
+test_expect_success 'am with config mailinfo.quotecr=strip' '
+	test_might_fail git am --abort &&
+	git reset --hard one &&
+	test_config mailinfo.quotedCr strip &&
+	git am "$DATA/mbox" &&
+	git diff --exit-code HEAD two
+'
+
+test_done
diff --git a/t/t4258/mbox b/t/t4258/mbox
new file mode 100644
index 0000000000..c62819f3d2
--- /dev/null
+++ b/t/t4258/mbox
@@ -0,0 +1,12 @@
+From: A U Thor <mail@example.com>
+To: list@example.org
+Subject: [PATCH v2] sample
+Date: Mon,  3 Aug 2020 22:40:55 +0700
+Message-Id: <msg-id@example.com>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+VGhpcyBpcyBjb21taXQgbWVzc2FnZS4NCi0tLQ0KIHRleHQgfCAyICstDQogMSBmaWxlIGNoYW5n
+ZWQsIDEgaW5zZXJ0aW9uKCspLCAxIGRlbGV0aW9uKC0pDQoNCmRpZmYgLS1naXQgYS90ZXh0IGIv
+dGV4dA0KaW5kZXggNTYyNmFiZi4uZjcxOWVmZCAxMDA2NDQNCi0tLSBhL3RleHQNCisrKyBiL3Rl
+eHQNCkBAIC0xICsxIEBADQotb25lDQordHdvDQotLSANCjIuMzEuMQoK
-- 
2.31.1.448.g9c2f8508d1


^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2021-05-09 17:12 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-21  1:34 [PATCH] mailinfo: strip CR from base64/quoted-printable email Đoàn Trần Công Danh
2021-04-21  2:09 ` Junio C Hamano
2021-04-21  3:32 ` brian m. carlson
2021-04-21 12:07   ` Đoàn Trần Công Danh
2021-04-22  1:10     ` brian m. carlson
2021-05-04 17:19 ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
2021-05-04 17:19   ` [PATCH v2 1/5] mailinfo: avoid magic number in option parsing Đoàn Trần Công Danh
2021-05-04 17:19   ` [PATCH v2 2/5] mailinfo: warn if CR found in base64/quoted-printable email Đoàn Trần Công Danh
2021-05-05  3:41     ` Junio C Hamano
2021-05-04 17:20   ` [PATCH v2 3/5] mailinfo: skip quoted CR on user's wish Đoàn Trần Công Danh
2021-05-05  4:12     ` Junio C Hamano
2021-05-05 15:53       ` Đoàn Trần Công Danh
2021-05-04 17:20   ` [PATCH v2 4/5] mailinfo: strip quoted CR on users' wish Đoàn Trần Công Danh
2021-05-05  4:27     ` Junio C Hamano
2021-05-04 17:20   ` [PATCH v2 5/5] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh
2021-05-05  4:31   ` [PATCH v2 0/5] Teach am/mailinfo to process quoted CR Junio C Hamano
2021-05-06 15:02 ` [PATCH v3 0/6] " Đoàn Trần Công Danh
2021-05-06 15:02   ` [PATCH v3 1/6] mailinfo: load default metainfo_charset lazily Đoàn Trần Công Danh
2021-05-06 15:02   ` [PATCH v3 2/6] mailinfo: stop parsing options manually Đoàn Trần Công Danh
2021-05-08 10:44     ` Junio C Hamano
2021-05-06 15:02   ` [PATCH v3 3/6] mailinfo: warn if CR found in decoded base64/QP email Đoàn Trần Công Danh
2021-05-08 10:52     ` Junio C Hamano
2021-05-06 15:02   ` [PATCH v3 4/6] mailinfo: allow squelching quoted CR warning Đoàn Trần Công Danh
2021-05-06 15:02   ` [PATCH v3 5/6] mailinfo: allow stripping quoted CR without warning Đoàn Trần Công Danh
2021-05-06 15:02   ` [PATCH v3 6/6] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh
2021-05-08 10:57   ` [PATCH v3 0/6] Teach am/mailinfo to process quoted CR Junio C Hamano
     [not found] ` <cover.1620309355.git.congdanhqx@gmail.com>
2021-05-06 15:02   ` [PATCH v3 2/6] mailinfo: stop parse options manually Đoàn Trần Công Danh
2021-05-06 15:19     ` Đoàn Trần Công Danh
2021-05-09 17:12 ` [PATCH v4 0/6] Teach am/mailinfo to process quoted CR Đoàn Trần Công Danh
2021-05-09 17:12   ` [PATCH v4 1/6] mailinfo: load default metainfo_charset lazily Đoàn Trần Công Danh
2021-05-09 17:12   ` [PATCH v4 2/6] mailinfo: stop parsing options manually Đoàn Trần Công Danh
2021-05-09 17:12   ` [PATCH v4 3/6] mailinfo: warn if CRLF found in decoded base64/QP email Đoàn Trần Công Danh
2021-05-09 17:12   ` [PATCH v4 4/6] mailinfo: allow squelching quoted CRLF warning Đoàn Trần Công Danh
2021-05-09 17:12   ` [PATCH v4 5/6] mailinfo: allow stripping quoted CR without warning Đoàn Trần Công Danh
2021-05-09 17:12   ` [PATCH v4 6/6] am: learn to process quoted lines that ends with CRLF Đoàn Trần Công Danh

Code repositories for project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).