git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 1/2] mailinfo: Add support for keep_cr
@ 2017-01-12  9:20 Matthew Wilcox
  2017-01-12  9:20 ` [PATCH 2/2] mailinfo: Understand forwarded patches Matthew Wilcox
  2017-01-12 18:04 ` [PATCH 1/2] mailinfo: Add support for keep_cr Jonathan Tan
  0 siblings, 2 replies; 7+ messages in thread
From: Matthew Wilcox @ 2017-01-12  9:20 UTC (permalink / raw)
  To: git; +Cc: Matthew Wilcox, Jonathan Tan

From: Matthew Wilcox <mawilcox@microsoft.com>

If you have a base-64 encoded patch with CRLF endings (as produced
by forwarding a patch from Outlook to a Linux machine, for example),
the keep_cr setting is not honoured because keep_cr is only passed
to mailsplit, which does not look through the encoding.  The keep_cr
logic needs to be applied after the base64 decode.  I copied that
logic to handle_filter(), and rather than add a new keep_cr parameter
to handle_filter, I opted to add keep_cr to struct mailinfo; it seemed
appropriate given use_scissors was already there.

Then I needed to initialise keep_cr in the struct mailinfo passed from
git-am, and rather than thread a 'keep_cr' parameter all the way through
to parse_mail(), I decided to add keep_cr to struct am_state, which let
it be removed as a parameter from five other functions.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 builtin/am.c | 49 ++++++++++++++++++++++++-------------------------
 mailinfo.c   |  6 ++++++
 mailinfo.h   |  1 +
 3 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/builtin/am.c b/builtin/am.c
index 31fb60578..6cb6e6ca8 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -124,6 +124,7 @@ struct am_state {
 	int keep; /* enum keep_type */
 	int message_id;
 	int scissors; /* enum scissors_type */
+	int keep_cr;
 	struct argv_array git_apply_opts;
 	const char *resolvemsg;
 	int committer_date_is_author_date;
@@ -143,6 +144,7 @@ static void am_state_init(struct am_state *state, const char *dir)
 
 	memset(state, 0, sizeof(*state));
 
+	state->keep_cr = -1;
 	assert(dir);
 	state->dir = xstrdup(dir);
 
@@ -697,7 +699,7 @@ static int detect_patch_format(const char **paths)
  * a mbox file or a Maildir. Returns 0 on success, -1 on failure.
  */
 static int split_mail_mbox(struct am_state *state, const char **paths,
-				int keep_cr, int mboxrd)
+				int mboxrd)
 {
 	struct child_process cp = CHILD_PROCESS_INIT;
 	struct strbuf last = STRBUF_INIT;
@@ -707,7 +709,7 @@ static int split_mail_mbox(struct am_state *state, const char **paths,
 	argv_array_pushf(&cp.args, "-d%d", state->prec);
 	argv_array_pushf(&cp.args, "-o%s", state->dir);
 	argv_array_push(&cp.args, "-b");
-	if (keep_cr)
+	if (state->keep_cr)
 		argv_array_push(&cp.args, "--keep-cr");
 	if (mboxrd)
 		argv_array_push(&cp.args, "--mboxrd");
@@ -737,7 +739,7 @@ typedef int (*mail_conv_fn)(FILE *out, FILE *in, int keep_cr);
  * Returns 0 on success, -1 on failure.
  */
 static int split_mail_conv(mail_conv_fn fn, struct am_state *state,
-			const char **paths, int keep_cr)
+			const char **paths)
 {
 	static const char *stdin_only[] = {"-", NULL};
 	int i;
@@ -766,7 +768,7 @@ static int split_mail_conv(mail_conv_fn fn, struct am_state *state,
 			return error_errno(_("could not open '%s' for writing"),
 					   mail);
 
-		ret = fn(out, in, keep_cr);
+		ret = fn(out, in, state->keep_cr);
 
 		fclose(out);
 		fclose(in);
@@ -826,8 +828,7 @@ static int stgit_patch_to_mail(FILE *out, FILE *in, int keep_cr)
  *
  * Returns 0 on success, -1 on failure.
  */
-static int split_mail_stgit_series(struct am_state *state, const char **paths,
-					int keep_cr)
+static int split_mail_stgit_series(struct am_state *state, const char **paths)
 {
 	const char *series_dir;
 	char *series_dir_buf;
@@ -857,7 +858,7 @@ static int split_mail_stgit_series(struct am_state *state, const char **paths,
 	strbuf_release(&sb);
 	free(series_dir_buf);
 
-	ret = split_mail_conv(stgit_patch_to_mail, state, patches.argv, keep_cr);
+	ret = split_mail_conv(stgit_patch_to_mail, state, patches.argv);
 
 	argv_array_clear(&patches);
 	return ret;
@@ -937,30 +938,27 @@ static int hg_patch_to_mail(FILE *out, FILE *in, int keep_cr)
  * state->cur will be set to the index of the first mail, and state->last will
  * be set to the index of the last mail.
  *
- * Set keep_cr to 0 to convert all lines ending with \r\n to end with \n, 1
- * to disable this behavior, -1 to use the default configured setting.
- *
  * Returns 0 on success, -1 on failure.
  */
 static int split_mail(struct am_state *state, enum patch_format patch_format,
-			const char **paths, int keep_cr)
+			const char **paths)
 {
-	if (keep_cr < 0) {
-		keep_cr = 0;
-		git_config_get_bool("am.keepcr", &keep_cr);
+	if (state->keep_cr < 0) {
+		state->keep_cr = 0;
+		git_config_get_bool("am.keepcr", &state->keep_cr);
 	}
 
 	switch (patch_format) {
 	case PATCH_FORMAT_MBOX:
-		return split_mail_mbox(state, paths, keep_cr, 0);
+		return split_mail_mbox(state, paths, 0);
 	case PATCH_FORMAT_STGIT:
-		return split_mail_conv(stgit_patch_to_mail, state, paths, keep_cr);
+		return split_mail_conv(stgit_patch_to_mail, state, paths);
 	case PATCH_FORMAT_STGIT_SERIES:
-		return split_mail_stgit_series(state, paths, keep_cr);
+		return split_mail_stgit_series(state, paths);
 	case PATCH_FORMAT_HG:
-		return split_mail_conv(hg_patch_to_mail, state, paths, keep_cr);
+		return split_mail_conv(hg_patch_to_mail, state, paths);
 	case PATCH_FORMAT_MBOXRD:
-		return split_mail_mbox(state, paths, keep_cr, 1);
+		return split_mail_mbox(state, paths, 1);
 	default:
 		die("BUG: invalid patch_format");
 	}
@@ -971,7 +969,7 @@ static int split_mail(struct am_state *state, enum patch_format patch_format,
  * Setup a new am session for applying patches
  */
 static void am_setup(struct am_state *state, enum patch_format patch_format,
-			const char **paths, int keep_cr)
+			const char **paths)
 {
 	struct object_id curr_head;
 	const char *str;
@@ -988,7 +986,7 @@ static void am_setup(struct am_state *state, enum patch_format patch_format,
 	if (mkdir(state->dir, 0777) < 0 && errno != EEXIST)
 		die_errno(_("failed to create directory '%s'"), state->dir);
 
-	if (split_mail(state, patch_format, paths, keep_cr) < 0) {
+	if (split_mail(state, patch_format, paths) < 0) {
 		am_destroy(state);
 		die(_("Failed to split patches."));
 	}
@@ -1276,6 +1274,8 @@ static int parse_mail(struct am_state *state, const char *mail)
 		die("BUG: invalid value for state->scissors");
 	}
 
+	mi.keep_cr = state->keep_cr;
+
 	mi.input = fopen(mail, "r");
 	if (!mi.input)
 		die("could not open input");
@@ -2224,7 +2224,6 @@ int cmd_am(int argc, const char **argv, const char *prefix)
 {
 	struct am_state state;
 	int binary = -1;
-	int keep_cr = -1;
 	int patch_format = PATCH_FORMAT_UNKNOWN;
 	enum resume_mode resume = RESUME_FALSE;
 	int in_progress;
@@ -2254,10 +2253,10 @@ int cmd_am(int argc, const char **argv, const char *prefix)
 			N_("pass -b flag to git-mailinfo"), KEEP_NON_PATCH),
 		OPT_BOOL('m', "message-id", &state.message_id,
 			N_("pass -m flag to git-mailinfo")),
-		{ OPTION_SET_INT, 0, "keep-cr", &keep_cr, NULL,
+		{ OPTION_SET_INT, 0, "keep-cr", &state.keep_cr, NULL,
 		  N_("pass --keep-cr flag to git-mailsplit for mbox format"),
 		  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1},
-		{ OPTION_SET_INT, 0, "no-keep-cr", &keep_cr, NULL,
+		{ OPTION_SET_INT, 0, "no-keep-cr", &state.keep_cr, NULL,
 		  N_("do not pass --keep-cr flag to git-mailsplit independent of am.keepcr"),
 		  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 0},
 		OPT_BOOL('c', "scissors", &state.scissors,
@@ -2392,7 +2391,7 @@ int cmd_am(int argc, const char **argv, const char *prefix)
 				argv_array_push(&paths, mkpath("%s/%s", prefix, argv[i]));
 		}
 
-		am_setup(&state, patch_format, paths.argv, keep_cr);
+		am_setup(&state, patch_format, paths.argv);
 
 		argv_array_clear(&paths);
 	}
diff --git a/mailinfo.c b/mailinfo.c
index a489d9d0f..2059704a8 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -812,6 +812,12 @@ static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
 
 static void handle_filter(struct mailinfo *mi, struct strbuf *line)
 {
+	if (!mi->keep_cr && line->len > 1 &&
+			line->buf[line->len - 1] == '\n' &&
+			line->buf[line->len - 2] == '\r') {
+		strbuf_setlen(line, line->len - 2);
+		strbuf_addch(line, '\n');
+	}
 	switch (mi->filter_stage) {
 	case 0:
 		if (!handle_commit_msg(mi, line))
diff --git a/mailinfo.h b/mailinfo.h
index 04a25351d..9fddcf684 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -12,6 +12,7 @@ struct mailinfo {
 	struct strbuf email;
 	int keep_subject;
 	int keep_non_patch_brackets_in_subject;
+	int keep_cr;
 	int add_message_id;
 	int use_scissors;
 	int use_inbody_headers;
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] mailinfo: Understand forwarded patches
  2017-01-12  9:20 [PATCH 1/2] mailinfo: Add support for keep_cr Matthew Wilcox
@ 2017-01-12  9:20 ` Matthew Wilcox
  2017-01-12 18:17   ` Jonathan Tan
  2017-01-12 20:18   ` Junio C Hamano
  2017-01-12 18:04 ` [PATCH 1/2] mailinfo: Add support for keep_cr Jonathan Tan
  1 sibling, 2 replies; 7+ messages in thread
From: Matthew Wilcox @ 2017-01-12  9:20 UTC (permalink / raw)
  To: git; +Cc: Matthew Wilcox, Jonathan Tan

From: Matthew Wilcox <mawilcox@microsoft.com>

Extend the --scissors mechanism to strip off the preamble created by
forwarding a patch.  There are a couple of extra headers ("Sent" and
"To") added by forwarding, but other than that, the --scissors option
will now remove patches forwarded from Microsoft Outlook to a Linux
email account.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 mailinfo.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/mailinfo.c b/mailinfo.c
index 2059704a8..fc1275532 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -332,7 +332,7 @@ static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
 
 #define MAX_HDR_PARSED 10
 static const char *header[MAX_HDR_PARSED] = {
-	"From","Subject","Date",
+	"From","Subject","Date","Sent","To",
 };
 
 static inline int cmp_header(const struct strbuf *line, const char *hdr)
@@ -685,6 +685,13 @@ static int is_scissors_line(const char *line)
 			c++;
 			continue;
 		}
+		if (!memcmp(c, "Original Message", 16)) {
+			in_perforation = 1;
+			perforation += 16;
+			scissors += 16;
+			c += 15;
+			continue;
+		}
 		in_perforation = 0;
 	}
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] mailinfo: Add support for keep_cr
  2017-01-12  9:20 [PATCH 1/2] mailinfo: Add support for keep_cr Matthew Wilcox
  2017-01-12  9:20 ` [PATCH 2/2] mailinfo: Understand forwarded patches Matthew Wilcox
@ 2017-01-12 18:04 ` Jonathan Tan
  2017-01-12 19:06   ` Matthew Wilcox
  1 sibling, 1 reply; 7+ messages in thread
From: Jonathan Tan @ 2017-01-12 18:04 UTC (permalink / raw)
  To: Matthew Wilcox, git; +Cc: Matthew Wilcox

On 01/12/2017 01:20 AM, Matthew Wilcox wrote:
> From: Matthew Wilcox <mawilcox@microsoft.com>
>
> If you have a base-64 encoded patch with CRLF endings (as produced
> by forwarding a patch from Outlook to a Linux machine, for example),
> the keep_cr setting is not honoured because keep_cr is only passed
> to mailsplit, which does not look through the encoding.  The keep_cr
> logic needs to be applied after the base64 decode.  I copied that
> logic to handle_filter(), and rather than add a new keep_cr parameter
> to handle_filter, I opted to add keep_cr to struct mailinfo; it seemed
> appropriate given use_scissors was already there.
>
> Then I needed to initialise keep_cr in the struct mailinfo passed from
> git-am, and rather than thread a 'keep_cr' parameter all the way through
> to parse_mail(), I decided to add keep_cr to struct am_state, which let
> it be removed as a parameter from five other functions.
>
> Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>

A test exercising the new functionality would be nice.

Also, maybe a more descriptive title like "mailinfo: also respect 
keep_cr after base64 decode" (50 characters) is better.

> @@ -143,6 +144,7 @@ static void am_state_init(struct am_state *state, const char *dir)
>
>  	memset(state, 0, sizeof(*state));
>
> +	state->keep_cr = -1;

Maybe query the git config here (instead of later) so that we never have 
to worry about state->keep_cr being neither 0 nor 1? This function 
already queries the git config anyway.

> diff --git a/mailinfo.h b/mailinfo.h
> index 04a25351d..9fddcf684 100644
> --- a/mailinfo.h
> +++ b/mailinfo.h
> @@ -12,6 +12,7 @@ struct mailinfo {
>  	struct strbuf email;
>  	int keep_subject;
>  	int keep_non_patch_brackets_in_subject;
> +	int keep_cr;
>  	int add_message_id;
>  	int use_scissors;
>  	int use_inbody_headers;

I personally would write "unsigned keep_cr : 1" to further emphasize 
that this can only be 0 or 1, but I don't know if it's better to keep 
with the style existing in the file (that is, using int).

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] mailinfo: Understand forwarded patches
  2017-01-12  9:20 ` [PATCH 2/2] mailinfo: Understand forwarded patches Matthew Wilcox
@ 2017-01-12 18:17   ` Jonathan Tan
  2017-01-12 19:35     ` Matthew Wilcox
  2017-01-12 20:18   ` Junio C Hamano
  1 sibling, 1 reply; 7+ messages in thread
From: Jonathan Tan @ 2017-01-12 18:17 UTC (permalink / raw)
  To: Matthew Wilcox, git; +Cc: Matthew Wilcox

On 01/12/2017 01:20 AM, Matthew Wilcox wrote:
> From: Matthew Wilcox <mawilcox@microsoft.com>
>
> Extend the --scissors mechanism to strip off the preamble created by
> forwarding a patch.  There are a couple of extra headers ("Sent" and
> "To") added by forwarding, but other than that, the --scissors option
> will now remove patches forwarded from Microsoft Outlook to a Linux
> email account.
>
> Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>

Also add a test showing the kind of message that the current code 
doesn't handle, and that this commit addresses.

> ---
>  mailinfo.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/mailinfo.c b/mailinfo.c
> index 2059704a8..fc1275532 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -332,7 +332,7 @@ static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
>
>  #define MAX_HDR_PARSED 10
>  static const char *header[MAX_HDR_PARSED] = {
> -	"From","Subject","Date",
> +	"From","Subject","Date","Sent","To",

Are these extra headers used in both the "real" e-mail headers and the 
in-body headers, or only one of them? (If the latter, they should 
probably be handled only in the relevant function - my previous patches 
to this file were in that direction too, if I remember correctly.) Also, 
I suspect that these will have to be handled differently to the other 3, 
but that will be clearer when you add the test with an example message.

>  };
>
>  static inline int cmp_header(const struct strbuf *line, const char *hdr)
> @@ -685,6 +685,13 @@ static int is_scissors_line(const char *line)
>  			c++;
>  			continue;
>  		}
> +		if (!memcmp(c, "Original Message", 16)) {

1) You can use starts_with or skip_prefix.
2) This seems vulnerable to false positives. If "Original Message" 
always follows a certain kind of line, it might be better to check for 
that. (Again, it will be clearer when we have an example message.)

> +			in_perforation = 1;
> +			perforation += 16;
> +			scissors += 16;
> +			c += 15;

Why 15? Also, can skip_prefix avoid these magic numbers?

> +			continue;
> +		}
>  		in_perforation = 0;
>  	}
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 1/2] mailinfo: Add support for keep_cr
  2017-01-12 18:04 ` [PATCH 1/2] mailinfo: Add support for keep_cr Jonathan Tan
@ 2017-01-12 19:06   ` Matthew Wilcox
  0 siblings, 0 replies; 7+ messages in thread
From: Matthew Wilcox @ 2017-01-12 19:06 UTC (permalink / raw)
  To: Jonathan Tan, Matthew Wilcox, git@vger.kernel.org

From: Jonathan Tan [mailto:jonathantanmy@google.com]
> On 01/12/2017 01:20 AM, Matthew Wilcox wrote:
> A test exercising the new functionality would be nice.

Roger.

> Also, maybe a more descriptive title like "mailinfo: also respect
> keep_cr after base64 decode" (50 characters) is better.

No problem.

> > @@ -143,6 +144,7 @@ static void am_state_init(struct am_state *state,
> const char *dir)
> >
> >  	memset(state, 0, sizeof(*state));
> >
> > +	state->keep_cr = -1;
> 
> Maybe query the git config here (instead of later) so that we never have
> to worry about state->keep_cr being neither 0 nor 1? This function
> already queries the git config anyway.

I wondered why the existing code didn't do that, but I wanted to make a minimal change rather than clean up an older mistake.  I'm happy to do it that way.

> > diff --git a/mailinfo.h b/mailinfo.h
> > index 04a25351d..9fddcf684 100644
> > --- a/mailinfo.h
> > +++ b/mailinfo.h
> > @@ -12,6 +12,7 @@ struct mailinfo {
> >  	struct strbuf email;
> >  	int keep_subject;
> >  	int keep_non_patch_brackets_in_subject;
> > +	int keep_cr;
> >  	int add_message_id;
> >  	int use_scissors;
> >  	int use_inbody_headers;
> 
> I personally would write "unsigned keep_cr : 1" to further emphasize
> that this can only be 0 or 1, but I don't know if it's better to keep
> with the style existing in the file (that is, using int).

Probably best to stick to the existing file ... someone can always do a cleanup patch later, and having this match the others will make that easier, not harder.

Thanks for the review.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 2/2] mailinfo: Understand forwarded patches
  2017-01-12 18:17   ` Jonathan Tan
@ 2017-01-12 19:35     ` Matthew Wilcox
  0 siblings, 0 replies; 7+ messages in thread
From: Matthew Wilcox @ 2017-01-12 19:35 UTC (permalink / raw)
  To: Jonathan Tan, Matthew Wilcox, git@vger.kernel.org

From: Jonathan Tan [mailto:jonathantanmy@google.com]
> On 01/12/2017 01:20 AM, Matthew Wilcox wrote:
> > From: Matthew Wilcox <mawilcox@microsoft.com>
> >
> > Extend the --scissors mechanism to strip off the preamble created by
> > forwarding a patch.  There are a couple of extra headers ("Sent" and
> > "To") added by forwarding, but other than that, the --scissors option
> > will now remove patches forwarded from Microsoft Outlook to a Linux
> > email account.
> >
> > Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
> 
> Also add a test showing the kind of message that the current code
> doesn't handle, and that this commit addresses.

OK.  For the sake of discussion, here's what the base64-decoded message looks like:

--- 8< ---

-----Original Message-----
From: Rehas Sachdeva [mailto:aquannie@gmail.com]
Sent: Wednesday, January 4, 2017 11:55 AM
To: Matthew Wilcox <mawilcox@microsoft.com>; riel@surriel.com
Subject: [PATCH v3] radix tree test suite: Dial down verbosity with -v

Make the output of radix tree test suite less verbose by default and add
-v and -vv command line options for increasing level of verbosity.

--- >8 ---

> > ---
> >  mailinfo.c | 9 ++++++++-
> >  1 file changed, 8 insertions(+), 1 deletion(-)
> >
> > diff --git a/mailinfo.c b/mailinfo.c
> > index 2059704a8..fc1275532 100644
> > --- a/mailinfo.c
> > +++ b/mailinfo.c
> > @@ -332,7 +332,7 @@ static void cleanup_subject(struct mailinfo *mi,
> struct strbuf *subject)
> >
> >  #define MAX_HDR_PARSED 10
> >  static const char *header[MAX_HDR_PARSED] = {
> > -	"From","Subject","Date",
> > +	"From","Subject","Date","Sent","To",
> 
> Are these extra headers used in both the "real" e-mail headers and the
> in-body headers, or only one of them? (If the latter, they should
> probably be handled only in the relevant function - my previous patches
> to this file were in that direction too, if I remember correctly.) Also,
> I suspect that these will have to be handled differently to the other 3,
> but that will be clearer when you add the test with an example message.

Without this part of the patch, using --scissors means it stops parsing headers when it hits the 'Sent' line.  So all I'm looking for is to have 'is_inbody_header()' return true.  If there's a better way to achieve that, I'm all for it.  Without this hunk of the patch, the commit looks like this:

Without any of this patch, the commit looks like this:

Author: Matthew Wilcox <mawilcox@microsoft.com>
Date:   Sat Jan 7 18:33:43 2017 +0000

    FW: [PATCH v3] radix tree test suite: Dial down verbosity with -v

    -----Original Message-----
    From: Rehas Sachdeva [mailto:aquannie@gmail.com]
    Sent: Wednesday, January 4, 2017 11:55 AM
    To: Matthew Wilcox <mawilcox@microsoft.com>; riel@surriel.com
    Subject: [PATCH v3] radix tree test suite: Dial down verbosity with -v

    Make the output of radix tree test suite less verbose by default and add
    -v and -vv command line options for increasing level of verbosity.

Without this hunk of the patch, the commit looks like this:

Author: Rehas Sachdeva <[mailto:aquannie@gmail.com]>
Date:   Sat Jan 7 18:33:43 2017 +0000

    FW: [PATCH v3] radix tree test suite: Dial down verbosity with -v

    Sent: Wednesday, January 4, 2017 11:55 AM
    To: Matthew Wilcox <mawilcox@microsoft.com>; riel@surriel.com
    Subject: [PATCH v3] radix tree test suite: Dial down verbosity with -v

    Make the output of radix tree test suite less verbose by default and add
    -v and -vv command line options for increasing level of verbosity.

And with this hunk, it turns into this:

Author: Rehas Sachdeva <[mailto:aquannie@gmail.com]>
Date:   Sat Jan 7 18:33:43 2017 +0000

    radix tree test suite: Dial down verbosity with -v

    Make the output of radix tree test suite less verbose by default and add
    -v and -vv command line options for increasing level of verbosity.


There's more work to do here, turning that silly <[mailto:]> into a proper email address, for example, and parsing Sent: like we parse Date:, but I thought it worth sending out patches 1 & 2 before writing patch 3.

> > @@ -685,6 +685,13 @@ static int is_scissors_line(const char *line)
> >  			c++;
> >  			continue;
> >  		}
> > +		if (!memcmp(c, "Original Message", 16)) {
> 
> 1) You can use starts_with or skip_prefix.

I was following the existing logic in this function that looks for 8< or >8 or ...

> 2) This seems vulnerable to false positives. If "Original Message"
> always follows a certain kind of line, it might be better to check for
> that. (Again, it will be clearer when we have an example message.)

I don't think it's terribly vulnerable to false positives.  The logic at the end of the function tries to make sure that the scissor marks make up a substantial component of the line.

We could do this differently; I'm not sure if there's a regex library built into git, but we could even custom-write a separate matcher that looks only for ^-{3,}Original Message-{3,}$

Or we could use a different option; eg --forwarded that will trim off the extra gunk associated with forwarding messages, instead of overloading --scissors.

> > +			in_perforation = 1;
> > +			perforation += 16;
> > +			scissors += 16;
> > +			c += 15;
> 
> Why 15? Also, can skip_prefix avoid these magic numbers?

Again, following the rest of the function.  c has already been advanced by 1, now it needs to be advanced to the end of the 16 character string "Original Message".


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] mailinfo: Understand forwarded patches
  2017-01-12  9:20 ` [PATCH 2/2] mailinfo: Understand forwarded patches Matthew Wilcox
  2017-01-12 18:17   ` Jonathan Tan
@ 2017-01-12 20:18   ` Junio C Hamano
  1 sibling, 0 replies; 7+ messages in thread
From: Junio C Hamano @ 2017-01-12 20:18 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: git, Matthew Wilcox, Jonathan Tan

Matthew Wilcox <mawilcox@linuxonhyperv.com> writes:

> From: Matthew Wilcox <mawilcox@microsoft.com>
>
> Extend the --scissors mechanism to strip off the preamble created by
> forwarding a patch.  There are a couple of extra headers ("Sent" and
> "To") added by forwarding, but other than that, the --scissors option
> will now remove patches forwarded from Microsoft Outlook to a Linux
> email account.
>
> Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
> ---
>  mailinfo.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)

To be quite honest, I am not enthused to even having to think about
this kind of change.  There seems to be no standard way MUAs produce
"forwarded" messages, and this adds support for only one specific
MUA, that happens to say "Original Message".  Why should such a thing
hardcoded in this codepath?

I think I am OK with a patch that lets users customize
is_scissors_line(), perhaps accepting a regexp that ought to match a
line to consider a scissors line.  When such a regexp is given,
check for a match before doing the "do we have a line filled with
'-' with the scissors marker and is the mark long enough?" check.

Then you can do

	mailinfo --scissors='^-{5}Original Message-{5}$'

or something like that.  Perhaps allow multiple regexps, or even
allow them to come from a multi-valued configuration variable.

> diff --git a/mailinfo.c b/mailinfo.c
> index 2059704a8..fc1275532 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -332,7 +332,7 @@ static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
>  
>  #define MAX_HDR_PARSED 10
>  static const char *header[MAX_HDR_PARSED] = {
> -	"From","Subject","Date",
> +	"From","Subject","Date","Sent","To",
>  };

This array lists fields whose value we _care_ about.  Do not put
random garbage whose value we do not use in it.

Even though I am not enthused to see support for messages forwarded
by Outlook bolted onto this codepath, I think it may make sense to
allow random garbage that looks like an e-mail header to appear
immediately after a scissors line (and ignore them).  For that,
perhaps you would instead want to extend is_inbody_header() so that
after it decides that the given line is *NOT* one of the header
field we care about, return a value that is not 0 or 1.  Its caller
currently expects to see 1 if it is a relevant in-body header line,
or 0 if the line signals the end of the in-body header block.  You'd
be adding another class of lines that are not a header line with a
meaning but do not terminate the in-body header block.


>  static inline int cmp_header(const struct strbuf *line, const char *hdr)
> @@ -685,6 +685,13 @@ static int is_scissors_line(const char *line)
>  			c++;
>  			continue;
>  		}
> +		if (!memcmp(c, "Original Message", 16)) {
> +			in_perforation = 1;
> +			perforation += 16;
> +			scissors += 16;
> +			c += 15;
> +			continue;
> +		}
>  		in_perforation = 0;
>  	}

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-01-12 20:18 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-12  9:20 [PATCH 1/2] mailinfo: Add support for keep_cr Matthew Wilcox
2017-01-12  9:20 ` [PATCH 2/2] mailinfo: Understand forwarded patches Matthew Wilcox
2017-01-12 18:17   ` Jonathan Tan
2017-01-12 19:35     ` Matthew Wilcox
2017-01-12 20:18   ` Junio C Hamano
2017-01-12 18:04 ` [PATCH 1/2] mailinfo: Add support for keep_cr Jonathan Tan
2017-01-12 19:06   ` Matthew Wilcox

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).