git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] format-patch: escape "From " lines recognized by mailsplit
@ 2016-07-22 22:47 Eric Wong
  2016-07-23  8:59 ` Johannes Schindelin
  2016-07-24  7:11 ` [PATCH] " Junio C Hamano
  0 siblings, 2 replies; 17+ messages in thread
From: Eric Wong @ 2016-07-22 22:47 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Eric Sunshine

Users have mistakenly copied "From " lines into commit messages
in the past, and will certainly make the same mistakes in the
future.  Since not everyone uses mboxrd, yet, we should at least
prevent miss-split mails by always escaping "From " lines based
on the check used by mailsplit.

mailsplit will not perform unescaping by default, yet, as it
could cause further invocations of format-patch from old
versions of git to generate bad output.  Propagating the mboxo
escaping is preferable to miss-split patches.  Unescaping may
still be performed via "--mboxrd".

ref: https://public-inbox.org/git/20160606230248.GA15906@dcvr.yhbt.net/T/#u

Signed-off-by: Eric Wong <e@80x24.org>
---
 builtin/mailsplit.c     | 32 +-------------------------------
 mailinfo.c              | 31 +++++++++++++++++++++++++++++++
 mailinfo.h              |  1 +
 pretty.c                | 16 +++++++++++++---
 t/t4014-format-patch.sh | 14 ++++++++++++++
 5 files changed, 60 insertions(+), 34 deletions(-)

diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c
index 3068168..bb8f9c9 100644
--- a/builtin/mailsplit.c
+++ b/builtin/mailsplit.c
@@ -8,41 +8,11 @@
 #include "builtin.h"
 #include "string-list.h"
 #include "strbuf.h"
+#include "mailinfo.h"
 
 static const char git_mailsplit_usage[] =
 "git mailsplit [-d<prec>] [-f<n>] [-b] [--keep-cr] -o<directory> [(<mbox>|<Maildir>)...]";
 
-static int is_from_line(const char *line, int len)
-{
-	const char *colon;
-
-	if (len < 20 || memcmp("From ", line, 5))
-		return 0;
-
-	colon = line + len - 2;
-	line += 5;
-	for (;;) {
-		if (colon < line)
-			return 0;
-		if (*--colon == ':')
-			break;
-	}
-
-	if (!isdigit(colon[-4]) ||
-	    !isdigit(colon[-2]) ||
-	    !isdigit(colon[-1]) ||
-	    !isdigit(colon[ 1]) ||
-	    !isdigit(colon[ 2]))
-		return 0;
-
-	/* year */
-	if (strtol(colon+3, NULL, 10) <= 90)
-		return 0;
-
-	/* Ok, close enough */
-	return 1;
-}
-
 static struct strbuf buf = STRBUF_INIT;
 static int keep_cr;
 static int mboxrd;
diff --git a/mailinfo.c b/mailinfo.c
index 9f19ca1..0ebd953 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -1035,3 +1035,34 @@ void clear_mailinfo(struct mailinfo *mi)
 
 	strbuf_release(&mi->log_message);
 }
+
+int is_from_line(const char *line, int len)
+{
+	const char *colon;
+
+	if (len < 20 || memcmp("From ", line, 5))
+		return 0;
+
+	colon = line + len - 2;
+	line += 5;
+	for (;;) {
+		if (colon < line)
+			return 0;
+		if (*--colon == ':')
+			break;
+	}
+
+	if (!isdigit(colon[-4]) ||
+	    !isdigit(colon[-2]) ||
+	    !isdigit(colon[-1]) ||
+	    !isdigit(colon[ 1]) ||
+	    !isdigit(colon[ 2]))
+		return 0;
+
+	/* year */
+	if (strtol(colon+3, NULL, 10) <= 90)
+		return 0;
+
+	/* Ok, close enough */
+	return 1;
+}
diff --git a/mailinfo.h b/mailinfo.h
index 93776a7..c1430a0 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -37,5 +37,6 @@ struct mailinfo {
 extern void setup_mailinfo(struct mailinfo *);
 extern int mailinfo(struct mailinfo *, const char *msg, const char *patch);
 extern void clear_mailinfo(struct mailinfo *);
+int is_from_line(const char *line, int len);
 
 #endif /* MAILINFO_H */
diff --git a/pretty.c b/pretty.c
index 9fa42c2..898c0a3 100644
--- a/pretty.c
+++ b/pretty.c
@@ -10,6 +10,7 @@
 #include "color.h"
 #include "reflog-walk.h"
 #include "gpg-interface.h"
+#include "mailinfo.h"
 
 static char *user_format;
 static struct cmt_fmt_map {
@@ -1745,9 +1746,18 @@ void pp_remainder(struct pretty_print_context *pp,
 			strbuf_add_tabexpand(sb, pp->expand_tabs_in_log,
 					     line, linelen);
 		else {
-			if (pp->fmt == CMIT_FMT_MBOXRD &&
-					is_mboxrd_from(line, linelen))
-				strbuf_addch(sb, '>');
+			switch (pp->fmt) {
+			case CMIT_FMT_EMAIL:
+				if (is_from_line(line, linelen))
+					strbuf_addch(sb, '>');
+				break;
+			case CMIT_FMT_MBOXRD:
+				if (is_mboxrd_from(line, linelen))
+					strbuf_addch(sb, '>');
+				break;
+			default:
+				break;
+			}
 
 			strbuf_add(sb, line, linelen);
 		}
diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh
index 1206c48..8fa3982 100755
--- a/t/t4014-format-patch.sh
+++ b/t/t4014-format-patch.sh
@@ -1606,4 +1606,18 @@ test_expect_success 'format-patch --pretty=mboxrd' '
 	test_cmp expect actual
 '
 
+test_expect_success 'format-patch From escaping' '
+	cat >msg <<-INPUT_END &&
+	somebody pasted format-patch output into a body
+
+	From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+	INPUT_END
+
+	C=$(git commit-tree HEAD^^{tree} -p HEAD <msg) &&
+	git format-patch --stdout -1 $C~1..$C >patch &&
+	git grep -h --no-index \
+		">From 0000000000000000000000000000000000000000 " \
+		patch
+'
+
 test_done
-- 
EW

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] format-patch: escape "From " lines recognized by mailsplit
  2016-07-22 22:47 [PATCH] format-patch: escape "From " lines recognized by mailsplit Eric Wong
@ 2016-07-23  8:59 ` Johannes Schindelin
  2016-07-24  3:14   ` Eric Wong
  2016-07-24  7:11 ` [PATCH] " Junio C Hamano
  1 sibling, 1 reply; 17+ messages in thread
From: Johannes Schindelin @ 2016-07-23  8:59 UTC (permalink / raw)
  To: Eric Wong; +Cc: Junio C Hamano, git, Eric Sunshine

Hi Eric,

I think that this change:

On Fri, 22 Jul 2016, Eric Wong wrote:

> diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c
> index 3068168..bb8f9c9 100644
> --- a/builtin/mailsplit.c
> +++ b/builtin/mailsplit.c
> @@ -8,41 +8,11 @@
>  #include "builtin.h"
>  #include "string-list.h"
>  #include "strbuf.h"
> +#include "mailinfo.h"
>  
>  static const char git_mailsplit_usage[] =
>  "git mailsplit [-d<prec>] [-f<n>] [-b] [--keep-cr] -o<directory> [(<mbox>|<Maildir>)...]";
>  
> -static int is_from_line(const char *line, int len)
> -{
> -	const char *colon;
> -
> -	if (len < 20 || memcmp("From ", line, 5))
> -		return 0;
> -
> -	colon = line + len - 2;
> -	line += 5;
> -	for (;;) {
> -		if (colon < line)
> -			return 0;
> -		if (*--colon == ':')
> -			break;
> -	}
> -
> -	if (!isdigit(colon[-4]) ||
> -	    !isdigit(colon[-2]) ||
> -	    !isdigit(colon[-1]) ||
> -	    !isdigit(colon[ 1]) ||
> -	    !isdigit(colon[ 2]))
> -		return 0;
> -
> -	/* year */
> -	if (strtol(colon+3, NULL, 10) <= 90)
> -		return 0;
> -
> -	/* Ok, close enough */
> -	return 1;
> -}
> -
>  static struct strbuf buf = STRBUF_INIT;
>  static int keep_cr;
>  static int mboxrd;
> diff --git a/mailinfo.c b/mailinfo.c
> index 9f19ca1..0ebd953 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -1035,3 +1035,34 @@ void clear_mailinfo(struct mailinfo *mi)
>  
>  	strbuf_release(&mi->log_message);
>  }
> +
> +int is_from_line(const char *line, int len)
> +{
> +	const char *colon;
> +
> +	if (len < 20 || memcmp("From ", line, 5))
> +		return 0;
> +
> +	colon = line + len - 2;
> +	line += 5;
> +	for (;;) {
> +		if (colon < line)
> +			return 0;
> +		if (*--colon == ':')
> +			break;
> +	}
> +
> +	if (!isdigit(colon[-4]) ||
> +	    !isdigit(colon[-2]) ||
> +	    !isdigit(colon[-1]) ||
> +	    !isdigit(colon[ 1]) ||
> +	    !isdigit(colon[ 2]))
> +		return 0;
> +
> +	/* year */
> +	if (strtol(colon+3, NULL, 10) <= 90)
> +		return 0;
> +
> +	/* Ok, close enough */
> +	return 1;
> +}
> diff --git a/mailinfo.h b/mailinfo.h
> index 93776a7..c1430a0 100644
> --- a/mailinfo.h
> +++ b/mailinfo.h
> @@ -37,5 +37,6 @@ struct mailinfo {
>  extern void setup_mailinfo(struct mailinfo *);
>  extern int mailinfo(struct mailinfo *, const char *msg, const char *patch);
>  extern void clear_mailinfo(struct mailinfo *);
> +int is_from_line(const char *line, int len);
>  
>  #endif /* MAILINFO_H */

deserves to live in a separate patch... It would make the real change
stick out better.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] format-patch: escape "From " lines recognized by mailsplit
  2016-07-23  8:59 ` Johannes Schindelin
@ 2016-07-24  3:14   ` Eric Wong
  2016-07-24  3:15     ` [PATCH 1/2] mailinfo: extract is_from_line from mailsplit Eric Wong
  2016-07-24  3:15     ` [PATCH 2/2] format-patch: escape "From " lines recognized by mailsplit Eric Wong
  0 siblings, 2 replies; 17+ messages in thread
From: Eric Wong @ 2016-07-24  3:14 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Junio C Hamano, git, Eric Sunshine

Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> I think that this change:

<snip>

> deserves to live in a separate patch... It would make the real change
> stick out better.

Good point, I actually wrote a looser check based on
is_mboxrd_from before realizing the stricter check from
mailsplit would probably be more appropriate to use by default.


The following changes since commit 08bb3500a2a718c3c78b0547c68601cafa7a8fd9:

  Sixth batch of topics for 2.10 (2016-07-19 13:26:16 -0700)

are available in the git repository at:

  git://bogomips.org/git-svn.git mboxo

for you to fetch changes up to 4f769f1799db11f135948bf517f2d8d864fa778f:

  format-patch: escape "From " lines recognized by mailsplit (2016-07-23 21:38:40 +0000)

----------------------------------------------------------------
Eric Wong (2):
      mailinfo: extract is_from_line from mailsplit
      format-patch: escape "From " lines recognized by mailsplit

 builtin/mailsplit.c     | 32 +-------------------------------
 mailinfo.c              | 31 +++++++++++++++++++++++++++++++
 mailinfo.h              |  1 +
 pretty.c                | 16 +++++++++++++---
 t/t4014-format-patch.sh | 14 ++++++++++++++
 5 files changed, 60 insertions(+), 34 deletions(-)

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/2] mailinfo: extract is_from_line from mailsplit
  2016-07-24  3:14   ` Eric Wong
@ 2016-07-24  3:15     ` Eric Wong
  2016-07-24  7:37       ` Andreas Schwab
  2016-07-24  3:15     ` [PATCH 2/2] format-patch: escape "From " lines recognized by mailsplit Eric Wong
  1 sibling, 1 reply; 17+ messages in thread
From: Eric Wong @ 2016-07-24  3:15 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Eric Sunshine, Johannes Schindelin

We will be reusing is_from_line for quoting format-patch output
in the next commit.

Suggested-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Eric Wong <e@80x24.org>
---
 builtin/mailsplit.c | 32 +-------------------------------
 mailinfo.c          | 31 +++++++++++++++++++++++++++++++
 mailinfo.h          |  1 +
 3 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c
index 3068168..bb8f9c9 100644
--- a/builtin/mailsplit.c
+++ b/builtin/mailsplit.c
@@ -8,41 +8,11 @@
 #include "builtin.h"
 #include "string-list.h"
 #include "strbuf.h"
+#include "mailinfo.h"
 
 static const char git_mailsplit_usage[] =
 "git mailsplit [-d<prec>] [-f<n>] [-b] [--keep-cr] -o<directory> [(<mbox>|<Maildir>)...]";
 
-static int is_from_line(const char *line, int len)
-{
-	const char *colon;
-
-	if (len < 20 || memcmp("From ", line, 5))
-		return 0;
-
-	colon = line + len - 2;
-	line += 5;
-	for (;;) {
-		if (colon < line)
-			return 0;
-		if (*--colon == ':')
-			break;
-	}
-
-	if (!isdigit(colon[-4]) ||
-	    !isdigit(colon[-2]) ||
-	    !isdigit(colon[-1]) ||
-	    !isdigit(colon[ 1]) ||
-	    !isdigit(colon[ 2]))
-		return 0;
-
-	/* year */
-	if (strtol(colon+3, NULL, 10) <= 90)
-		return 0;
-
-	/* Ok, close enough */
-	return 1;
-}
-
 static struct strbuf buf = STRBUF_INIT;
 static int keep_cr;
 static int mboxrd;
diff --git a/mailinfo.c b/mailinfo.c
index 9f19ca1..0ebd953 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -1035,3 +1035,34 @@ void clear_mailinfo(struct mailinfo *mi)
 
 	strbuf_release(&mi->log_message);
 }
+
+int is_from_line(const char *line, int len)
+{
+	const char *colon;
+
+	if (len < 20 || memcmp("From ", line, 5))
+		return 0;
+
+	colon = line + len - 2;
+	line += 5;
+	for (;;) {
+		if (colon < line)
+			return 0;
+		if (*--colon == ':')
+			break;
+	}
+
+	if (!isdigit(colon[-4]) ||
+	    !isdigit(colon[-2]) ||
+	    !isdigit(colon[-1]) ||
+	    !isdigit(colon[ 1]) ||
+	    !isdigit(colon[ 2]))
+		return 0;
+
+	/* year */
+	if (strtol(colon+3, NULL, 10) <= 90)
+		return 0;
+
+	/* Ok, close enough */
+	return 1;
+}
diff --git a/mailinfo.h b/mailinfo.h
index 93776a7..c1430a0 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -37,5 +37,6 @@ struct mailinfo {
 extern void setup_mailinfo(struct mailinfo *);
 extern int mailinfo(struct mailinfo *, const char *msg, const char *patch);
 extern void clear_mailinfo(struct mailinfo *);
+int is_from_line(const char *line, int len);
 
 #endif /* MAILINFO_H */
-- 
EW

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/2] format-patch: escape "From " lines recognized by mailsplit
  2016-07-24  3:14   ` Eric Wong
  2016-07-24  3:15     ` [PATCH 1/2] mailinfo: extract is_from_line from mailsplit Eric Wong
@ 2016-07-24  3:15     ` Eric Wong
  2016-07-24  7:37       ` Johannes Schindelin
  1 sibling, 1 reply; 17+ messages in thread
From: Eric Wong @ 2016-07-24  3:15 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Eric Sunshine, Johannes Schindelin

Users have mistakenly copied "From " lines into commit messages
in the past, and will certainly make the same mistakes in the
future.  Since not everyone uses mboxrd, yet, we should at least
prevent miss-split mails by always escaping "From " lines based
on the check used by mailsplit.

mailsplit will not perform unescaping by default, yet, as it
could cause further invocations of format-patch from old
versions of git to generate bad output.  Propagating the mboxo
escaping is preferable to miss-split patches.  Unescaping may
still be performed via "--mboxrd".

ref: https://public-inbox.org/git/20160606230248.GA15906@dcvr.yhbt.net/T/#u

Signed-off-by: Eric Wong <e@80x24.org>
---
 pretty.c                | 16 +++++++++++++---
 t/t4014-format-patch.sh | 14 ++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/pretty.c b/pretty.c
index 9fa42c2..898c0a3 100644
--- a/pretty.c
+++ b/pretty.c
@@ -10,6 +10,7 @@
 #include "color.h"
 #include "reflog-walk.h"
 #include "gpg-interface.h"
+#include "mailinfo.h"
 
 static char *user_format;
 static struct cmt_fmt_map {
@@ -1745,9 +1746,18 @@ void pp_remainder(struct pretty_print_context *pp,
 			strbuf_add_tabexpand(sb, pp->expand_tabs_in_log,
 					     line, linelen);
 		else {
-			if (pp->fmt == CMIT_FMT_MBOXRD &&
-					is_mboxrd_from(line, linelen))
-				strbuf_addch(sb, '>');
+			switch (pp->fmt) {
+			case CMIT_FMT_EMAIL:
+				if (is_from_line(line, linelen))
+					strbuf_addch(sb, '>');
+				break;
+			case CMIT_FMT_MBOXRD:
+				if (is_mboxrd_from(line, linelen))
+					strbuf_addch(sb, '>');
+				break;
+			default:
+				break;
+			}
 
 			strbuf_add(sb, line, linelen);
 		}
diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh
index 1206c48..8fa3982 100755
--- a/t/t4014-format-patch.sh
+++ b/t/t4014-format-patch.sh
@@ -1606,4 +1606,18 @@ test_expect_success 'format-patch --pretty=mboxrd' '
 	test_cmp expect actual
 '
 
+test_expect_success 'format-patch From escaping' '
+	cat >msg <<-INPUT_END &&
+	somebody pasted format-patch output into a body
+
+	From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+	INPUT_END
+
+	C=$(git commit-tree HEAD^^{tree} -p HEAD <msg) &&
+	git format-patch --stdout -1 $C~1..$C >patch &&
+	git grep -h --no-index \
+		">From 0000000000000000000000000000000000000000 " \
+		patch
+'
+
 test_done
-- 
EW

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] format-patch: escape "From " lines recognized by mailsplit
  2016-07-22 22:47 [PATCH] format-patch: escape "From " lines recognized by mailsplit Eric Wong
  2016-07-23  8:59 ` Johannes Schindelin
@ 2016-07-24  7:11 ` Junio C Hamano
  2016-07-24 10:58   ` Eric Wong
  2016-07-24 15:51   ` Junio C Hamano
  1 sibling, 2 replies; 17+ messages in thread
From: Junio C Hamano @ 2016-07-24  7:11 UTC (permalink / raw)
  To: Eric Wong; +Cc: git, Eric Sunshine

Eric Wong <e@80x24.org> writes:

> Users have mistakenly copied "From " lines into commit messages
> in the past, and will certainly make the same mistakes in the
> future.  Since not everyone uses mboxrd, yet, we should at least
> prevent miss-split mails by always escaping "From " lines based
> on the check used by mailsplit.
>
> mailsplit will not perform unescaping by default, yet, as it
> could cause further invocations of format-patch from old
> versions of git to generate bad output.  Propagating the mboxo
> escaping is preferable to miss-split patches.  Unescaping may
> still be performed via "--mboxrd".

As a tool to produce mbox file, quoting like this in format-patch
output may make sense, I would think, but shouldn't send-email undo
this when sending individual patches?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] mailinfo: extract is_from_line from mailsplit
  2016-07-24  3:15     ` [PATCH 1/2] mailinfo: extract is_from_line from mailsplit Eric Wong
@ 2016-07-24  7:37       ` Andreas Schwab
  2016-07-24  8:05         ` Johannes Schindelin
  2016-07-25 18:03         ` Junio C Hamano
  0 siblings, 2 replies; 17+ messages in thread
From: Andreas Schwab @ 2016-07-24  7:37 UTC (permalink / raw)
  To: Eric Wong; +Cc: Junio C Hamano, git, Eric Sunshine, Johannes Schindelin

Eric Wong <e@80x24.org> writes:

> diff --git a/mailinfo.c b/mailinfo.c
> index 9f19ca1..0ebd953 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -1035,3 +1035,34 @@ void clear_mailinfo(struct mailinfo *mi)
>  
>  	strbuf_release(&mi->log_message);
>  }
> +
> +int is_from_line(const char *line, int len)
> +{
> +	const char *colon;
> +
> +	if (len < 20 || memcmp("From ", line, 5))
> +		return 0;
> +
> +	colon = line + len - 2;
> +	line += 5;
> +	for (;;) {
> +		if (colon < line)
> +			return 0;
> +		if (*--colon == ':')
> +			break;
> +	}
> +
> +	if (!isdigit(colon[-4]) ||
> +	    !isdigit(colon[-2]) ||
> +	    !isdigit(colon[-1]) ||
> +	    !isdigit(colon[ 1]) ||
> +	    !isdigit(colon[ 2]))
> +		return 0;
> +
> +	/* year */
> +	if (strtol(colon+3, NULL, 10) <= 90)
> +		return 0;
> +
> +	/* Ok, close enough */
> +	return 1;
> +}

Should this be made more strict, like by checking for a space before the
year?

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/2] format-patch: escape "From " lines recognized by mailsplit
  2016-07-24  3:15     ` [PATCH 2/2] format-patch: escape "From " lines recognized by mailsplit Eric Wong
@ 2016-07-24  7:37       ` Johannes Schindelin
  2016-07-24 15:30         ` Jeff King
  0 siblings, 1 reply; 17+ messages in thread
From: Johannes Schindelin @ 2016-07-24  7:37 UTC (permalink / raw)
  To: Eric Wong; +Cc: Junio C Hamano, git, Eric Sunshine

Hi Eric,

On Sun, 24 Jul 2016, Eric Wong wrote:

> @@ -1745,9 +1746,18 @@ void pp_remainder(struct pretty_print_context *pp,
>  			strbuf_add_tabexpand(sb, pp->expand_tabs_in_log,
>  					     line, linelen);
>  		else {
> -			if (pp->fmt == CMIT_FMT_MBOXRD &&
> -					is_mboxrd_from(line, linelen))
> -				strbuf_addch(sb, '>');
> +			switch (pp->fmt) {
> +			case CMIT_FMT_EMAIL:
> +				if (is_from_line(line, linelen))
> +					strbuf_addch(sb, '>');
> +				break;
> +			case CMIT_FMT_MBOXRD:
> +				if (is_mboxrd_from(line, linelen))
> +					strbuf_addch(sb, '>');
> +				break;
> +			default:
> +				break;
> +			}

Sorry to be nitpicking once again; I think this would be conciser (and
easier to read at least for me) as:

-			if (pp->fmt == CMIT_FMT_MBOXRD &&
-					is_mboxrd_from(line, linelen))
+			if ((pp->fmt == CMIT_FMT_MBOXRD &&
+			     is_mboxrd_from(line, linelen)) ||
+			    (pp->fmt == CMIT_FMT_EMAIL &&
+			     is_from_line(line, linelen)))
 				strbuf_addch(sb, '>');

> diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh
> index 1206c48..8fa3982 100755
> --- a/t/t4014-format-patch.sh
> +++ b/t/t4014-format-patch.sh
> @@ -1606,4 +1606,18 @@ test_expect_success 'format-patch --pretty=mboxrd' '
>  	test_cmp expect actual
>  '
>  
> +test_expect_success 'format-patch From escaping' '
> +	cat >msg <<-INPUT_END &&
> +	somebody pasted format-patch output into a body
> +
> +	From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
> +	INPUT_END
> +
> +	C=$(git commit-tree HEAD^^{tree} -p HEAD <msg) &&

The double caret makes this a bit hard to read. Maybe this instead?

+	C=$(git commit-tree HEAD: -p HEAD^ <msg) &&

> +	git format-patch --stdout -1 $C~1..$C >patch &&

Either "-1 $C" or "$C~1..$C", not both...

> +	git grep -h --no-index \
> +		">From 0000000000000000000000000000000000000000 " \
> +		patch
> +'
> +
>  test_done
> -- 
> EW

Heh, that's a nice Git version ;-)

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] mailinfo: extract is_from_line from mailsplit
  2016-07-24  7:37       ` Andreas Schwab
@ 2016-07-24  8:05         ` Johannes Schindelin
  2016-07-24  8:13           ` Andreas Schwab
  2016-07-25 18:03         ` Junio C Hamano
  1 sibling, 1 reply; 17+ messages in thread
From: Johannes Schindelin @ 2016-07-24  8:05 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Eric Wong, Junio C Hamano, git, Eric Sunshine

Hi Andreas,

On Sun, 24 Jul 2016, Andreas Schwab wrote:

> Eric Wong <e@80x24.org> writes:
> 
> > diff --git a/mailinfo.c b/mailinfo.c
> > index 9f19ca1..0ebd953 100644
> > --- a/mailinfo.c
> > +++ b/mailinfo.c
> > @@ -1035,3 +1035,34 @@ void clear_mailinfo(struct mailinfo *mi)
> >  
> >  	strbuf_release(&mi->log_message);
> >  }
> > +
> > +int is_from_line(const char *line, int len)
> > +{
> > +	const char *colon;
> > +
> > +	if (len < 20 || memcmp("From ", line, 5))
> > +		return 0;
> > +
> > +	colon = line + len - 2;
> > +	line += 5;
> > +	for (;;) {
> > +		if (colon < line)
> > +			return 0;
> > +		if (*--colon == ':')
> > +			break;
> > +	}
> > +
> > +	if (!isdigit(colon[-4]) ||
> > +	    !isdigit(colon[-2]) ||
> > +	    !isdigit(colon[-1]) ||
> > +	    !isdigit(colon[ 1]) ||
> > +	    !isdigit(colon[ 2]))
> > +		return 0;
> > +
> > +	/* year */
> > +	if (strtol(colon+3, NULL, 10) <= 90)
> > +		return 0;
> > +
> > +	/* Ok, close enough */
> > +	return 1;
> > +}
> 
> Should this be made more strict, like by checking for a space before the
> year?

This patch only moves the function, so it would be inappropriate to change
it.

If you want to make it stricter, you will have to submit a separate patch.

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] mailinfo: extract is_from_line from mailsplit
  2016-07-24  8:05         ` Johannes Schindelin
@ 2016-07-24  8:13           ` Andreas Schwab
  0 siblings, 0 replies; 17+ messages in thread
From: Andreas Schwab @ 2016-07-24  8:13 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Eric Wong, Junio C Hamano, git, Eric Sunshine

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> Hi Andreas,
>
> On Sun, 24 Jul 2016, Andreas Schwab wrote:
>
>> Eric Wong <e@80x24.org> writes:
>> 
>> > diff --git a/mailinfo.c b/mailinfo.c
>> > index 9f19ca1..0ebd953 100644
>> > --- a/mailinfo.c
>> > +++ b/mailinfo.c
>> > @@ -1035,3 +1035,34 @@ void clear_mailinfo(struct mailinfo *mi)
>> >  
>> >  	strbuf_release(&mi->log_message);
>> >  }
>> > +
>> > +int is_from_line(const char *line, int len)
>> > +{
>> > +	const char *colon;
>> > +
>> > +	if (len < 20 || memcmp("From ", line, 5))
>> > +		return 0;
>> > +
>> > +	colon = line + len - 2;
>> > +	line += 5;
>> > +	for (;;) {
>> > +		if (colon < line)
>> > +			return 0;
>> > +		if (*--colon == ':')
>> > +			break;
>> > +	}
>> > +
>> > +	if (!isdigit(colon[-4]) ||
>> > +	    !isdigit(colon[-2]) ||
>> > +	    !isdigit(colon[-1]) ||
>> > +	    !isdigit(colon[ 1]) ||
>> > +	    !isdigit(colon[ 2]))
>> > +		return 0;
>> > +
>> > +	/* year */
>> > +	if (strtol(colon+3, NULL, 10) <= 90)
>> > +		return 0;
>> > +
>> > +	/* Ok, close enough */
>> > +	return 1;
>> > +}
>> 
>> Should this be made more strict, like by checking for a space before the
>> year?
>
> This patch only moves the function, so it would be inappropriate to change
> it.
>
> If you want to make it stricter, you will have to submit a separate patch.

You didn't answer my question.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] format-patch: escape "From " lines recognized by mailsplit
  2016-07-24  7:11 ` [PATCH] " Junio C Hamano
@ 2016-07-24 10:58   ` Eric Wong
  2016-07-24 15:51   ` Junio C Hamano
  1 sibling, 0 replies; 17+ messages in thread
From: Eric Wong @ 2016-07-24 10:58 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Eric Sunshine

Junio C Hamano <gitster@pobox.com> wrote:
> As a tool to produce mbox file, quoting like this in format-patch
> output may make sense, I would think, but shouldn't send-email undo
> this when sending individual patches?

Yes, that sounds like a good idea.  Thanks.
Will followup in a day or two.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/2] format-patch: escape "From " lines recognized by mailsplit
  2016-07-24  7:37       ` Johannes Schindelin
@ 2016-07-24 15:30         ` Jeff King
  0 siblings, 0 replies; 17+ messages in thread
From: Jeff King @ 2016-07-24 15:30 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Eric Wong, Junio C Hamano, git, Eric Sunshine

On Sun, Jul 24, 2016 at 09:37:57AM +0200, Johannes Schindelin wrote:

> On Sun, 24 Jul 2016, Eric Wong wrote:
> 
> > @@ -1745,9 +1746,18 @@ void pp_remainder(struct pretty_print_context *pp,
> >  			strbuf_add_tabexpand(sb, pp->expand_tabs_in_log,
> >  					     line, linelen);
> >  		else {
> > -			if (pp->fmt == CMIT_FMT_MBOXRD &&
> > -					is_mboxrd_from(line, linelen))
> > -				strbuf_addch(sb, '>');
> > +			switch (pp->fmt) {
> > +			case CMIT_FMT_EMAIL:
> > +				if (is_from_line(line, linelen))
> > +					strbuf_addch(sb, '>');
> > +				break;
> > +			case CMIT_FMT_MBOXRD:
> > +				if (is_mboxrd_from(line, linelen))
> > +					strbuf_addch(sb, '>');
> > +				break;
> > +			default:
> > +				break;
> > +			}
> 
> Sorry to be nitpicking once again; I think this would be conciser (and
> easier to read at least for me) as:
> 
> -			if (pp->fmt == CMIT_FMT_MBOXRD &&
> -					is_mboxrd_from(line, linelen))
> +			if ((pp->fmt == CMIT_FMT_MBOXRD &&
> +			     is_mboxrd_from(line, linelen)) ||
> +			    (pp->fmt == CMIT_FMT_EMAIL &&
> +			     is_from_line(line, linelen)))
>  				strbuf_addch(sb, '>');

Since we are nitpicking, I think:

  static int needs_from_quoting(enum cmit_fmt fmt,
                                const char *line, size_t len)
  {
	if (fmt == CMIT_FMT_MBOXRD && is_mboxrd_from(line, linelen))
		return 1;
	if (fmt == CMIT_FMT_EMAIL && is_from_line(line, linelen))
		return 1;
	return 0;
  }

  ...
  if (needs_from_quoting(pp->fmt, line, linelen))
	strbuf_addch(sb, '>');

is even nicer. There are lots of alternatives to write the helper
function, and I do not care much which one is chosen. But splitting it
out into a concise "do we need to do this?" query function makes the
flow in pp_remainder much easier to follow.

-Peff

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] format-patch: escape "From " lines recognized by mailsplit
  2016-07-24  7:11 ` [PATCH] " Junio C Hamano
  2016-07-24 10:58   ` Eric Wong
@ 2016-07-24 15:51   ` Junio C Hamano
  2016-07-25  8:43     ` Eric Wong
  1 sibling, 1 reply; 17+ messages in thread
From: Junio C Hamano @ 2016-07-24 15:51 UTC (permalink / raw)
  To: Eric Wong; +Cc: git, Eric Sunshine

Junio C Hamano <gitster@pobox.com> writes:

> Eric Wong <e@80x24.org> writes:
>
>> Users have mistakenly copied "From " lines into commit messages
>> in the past, and will certainly make the same mistakes in the
>> future.  Since not everyone uses mboxrd, yet, we should at least
>> prevent miss-split mails by always escaping "From " lines based
>> on the check used by mailsplit.
>>
>> mailsplit will not perform unescaping by default, yet, as it
>> could cause further invocations of format-patch from old
>> versions of git to generate bad output.  Propagating the mboxo
>> escaping is preferable to miss-split patches.  Unescaping may
>> still be performed via "--mboxrd".
>
> As a tool to produce mbox file, quoting like this in format-patch
> output may make sense, I would think, but shouldn't send-email undo
> this when sending individual patches?

Also, doesn't it break "git rebase" (non-interactive), or anything
that internally runs format-patch to individual files and then runs
am on each of them, anything that knows that each output file from
format-patch corresponds to a single change and there is no need to
split, badly if we do this unconditionally?

IOW, shouldn't this be an optional feature to format-patch that is
triggered by passing a new command line option that currently nobody
is passing?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] format-patch: escape "From " lines recognized by mailsplit
  2016-07-24 15:51   ` Junio C Hamano
@ 2016-07-25  8:43     ` Eric Wong
  2016-07-25 17:33       ` Junio C Hamano
  0 siblings, 1 reply; 17+ messages in thread
From: Eric Wong @ 2016-07-25  8:43 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Eric Sunshine

Junio C Hamano <gitster@pobox.com> wrote:
> Junio C Hamano <gitster@pobox.com> writes:
> > Eric Wong <e@80x24.org> writes:
> >
> >> Users have mistakenly copied "From " lines into commit messages
> >> in the past, and will certainly make the same mistakes in the
> >> future.  Since not everyone uses mboxrd, yet, we should at least
> >> prevent miss-split mails by always escaping "From " lines based
> >> on the check used by mailsplit.
> >>
> >> mailsplit will not perform unescaping by default, yet, as it
> >> could cause further invocations of format-patch from old
> >> versions of git to generate bad output.  Propagating the mboxo
> >> escaping is preferable to miss-split patches.  Unescaping may
> >> still be performed via "--mboxrd".
> >
> > As a tool to produce mbox file, quoting like this in format-patch
> > output may make sense, I would think, but shouldn't send-email undo
> > this when sending individual patches?
> 
> Also, doesn't it break "git rebase" (non-interactive), or anything
> that internally runs format-patch to individual files and then runs
> am on each of them, anything that knows that each output file from
> format-patch corresponds to a single change and there is no need to
> split, badly if we do this unconditionally?

Yes, rebase should probably unescape is_from_line matches.

Anything which spawns an editor should probably warn/reprompt
users on is_from_line() matches, too, to prevent user errors
from sneaking in.

> IOW, shouldn't this be an optional feature to format-patch that is
> triggered by passing a new command line option that currently nobody
> is passing?

I added --pretty=mboxrd as the optional feature for this reason.
It'll take a while for people to start using it (or perhaps make
it the default in git 3.0).
In the meantime, I would prefer extra ">" being injected rather
than breaking mailsplit completely.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] format-patch: escape "From " lines recognized by mailsplit
  2016-07-25  8:43     ` Eric Wong
@ 2016-07-25 17:33       ` Junio C Hamano
  2016-07-25 20:49         ` Eric Wong
  0 siblings, 1 reply; 17+ messages in thread
From: Junio C Hamano @ 2016-07-25 17:33 UTC (permalink / raw)
  To: Eric Wong; +Cc: git, Eric Sunshine

Eric Wong <e@80x24.org> writes:

>> Also, doesn't it break "git rebase" (non-interactive), or anything
>> that internally runs format-patch to individual files and then runs
>> am on each of them, anything that knows that each output file from
>> format-patch corresponds to a single change and there is no need to
>> split, badly if we do this unconditionally?
>
> Yes, rebase should probably unescape is_from_line matches.

This shouldn't matter for "git rebase", as it only reads from the
mbox "From <commit object name> <datestamp>" line to learn the
original commit and extract the log message directly from there.

But a third-party script that wants to read format-patch output
would be forced to upgrade, which is a pain if we make this change
unconditionally.

>> IOW, shouldn't this be an optional feature to format-patch that
>> is triggered by passing a new command line option that currently
>> nobody is passing?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] mailinfo: extract is_from_line from mailsplit
  2016-07-24  7:37       ` Andreas Schwab
  2016-07-24  8:05         ` Johannes Schindelin
@ 2016-07-25 18:03         ` Junio C Hamano
  1 sibling, 0 replies; 17+ messages in thread
From: Junio C Hamano @ 2016-07-25 18:03 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Eric Wong, git, Eric Sunshine, Johannes Schindelin

Andreas Schwab <schwab@linux-m68k.org> writes:

>> +	colon = line + len - 2;
>> +	line += 5;
>> +	for (;;) {
>> +		if (colon < line)
>> +			return 0;
>> +		if (*--colon == ':')
>> +			break;
>> +	}
>> +
>> +	if (!isdigit(colon[-4]) ||
>> +	    !isdigit(colon[-2]) ||
>> +	    !isdigit(colon[-1]) ||
>> +	    !isdigit(colon[ 1]) ||
>> +	    !isdigit(colon[ 2]))
>> +		return 0;
>> +
>> +	/* year */
>> +	if (strtol(colon+3, NULL, 10) <= 90)
>> +		return 0;
>> +
>> +	/* Ok, close enough */
>> +	return 1;
>> +}
>
> Should this be made more strict, like by checking for a space before the
> year?

The function seems to judge the line to be "close enough" by
checking these places (please view with fixed-width font ;-):

    From me Mon Jul 25 01:23:45 2016
    xxxxx               x xxxxx ....

We only see if the year part is more than 90 but we do not even
ensure that it is at the end of the line without any further
garbage, which I think is probably OK for the purpose of "close
enough".

We also do not bother rejecting a single-digit hour, or there is a
colon between the hour and minute part.

I would say requiring SP between the year and the second, and
requiring colon between hour and minute, would probably be a good
change, i.e. something like this:

	if (!isdigit(colon[-4]) ||
	    colon[-3] != ':' ||
	    !isdigit(colon[-2]) ||
	    !isdigit(colon[-1]) ||
	    !isdigit(colon[ 1]) ||
	    !isdigit(colon[ 2]) ||
	    colon[3] != ' ')

would be safe enough without increasing the false negatives too
much.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] format-patch: escape "From " lines recognized by mailsplit
  2016-07-25 17:33       ` Junio C Hamano
@ 2016-07-25 20:49         ` Eric Wong
  0 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2016-07-25 20:49 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Eric Sunshine

Junio C Hamano <gitster@pobox.com> wrote:
> Eric Wong <e@80x24.org> writes:
> 
> >> Also, doesn't it break "git rebase" (non-interactive), or anything
> >> that internally runs format-patch to individual files and then runs
> >> am on each of them, anything that knows that each output file from
> >> format-patch corresponds to a single change and there is no need to
> >> split, badly if we do this unconditionally?
> >
> > Yes, rebase should probably unescape is_from_line matches.
> 
> This shouldn't matter for "git rebase", as it only reads from the
> mbox "From <commit object name> <datestamp>" line to learn the
> original commit and extract the log message directly from there.

OK.

> But a third-party script that wants to read format-patch output
> would be forced to upgrade, which is a pain if we make this change
> unconditionally.

I suppose unconditionally having mailsplit unescape
">From ...  $DATE" lines might be acceptable?

It'll still propagate these mistakes to older versions of git,
but those installations will eventually become fewer.

> >> IOW, shouldn't this be an optional feature to format-patch that
> >> is triggered by passing a new command line option that currently
> >> nobody is passing?

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2016-07-25 20:49 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-22 22:47 [PATCH] format-patch: escape "From " lines recognized by mailsplit Eric Wong
2016-07-23  8:59 ` Johannes Schindelin
2016-07-24  3:14   ` Eric Wong
2016-07-24  3:15     ` [PATCH 1/2] mailinfo: extract is_from_line from mailsplit Eric Wong
2016-07-24  7:37       ` Andreas Schwab
2016-07-24  8:05         ` Johannes Schindelin
2016-07-24  8:13           ` Andreas Schwab
2016-07-25 18:03         ` Junio C Hamano
2016-07-24  3:15     ` [PATCH 2/2] format-patch: escape "From " lines recognized by mailsplit Eric Wong
2016-07-24  7:37       ` Johannes Schindelin
2016-07-24 15:30         ` Jeff King
2016-07-24  7:11 ` [PATCH] " Junio C Hamano
2016-07-24 10:58   ` Eric Wong
2016-07-24 15:51   ` Junio C Hamano
2016-07-25  8:43     ` Eric Wong
2016-07-25 17:33       ` Junio C Hamano
2016-07-25 20:49         ` Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).