git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] strbuf: add and use strbuf_insertstr()
@ 2020-02-08 19:56 René Scharfe
  2020-02-08 23:08 ` Taylor Blau
                   ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: René Scharfe @ 2020-02-08 19:56 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano

Add a function for inserting a C string into a strbuf.  Use it
throughout the source to get rid of magic string length constants and
explicit strlen() calls.

Like strbuf_addstr(), implement it as an inline function to avoid the
implicit strlen() calls to cause runtime overhead.

Signed-off-by: René Scharfe <l.s.r@web.de>
---
 builtin/checkout.c        |  2 +-
 builtin/notes.c           |  4 ++--
 builtin/sparse-checkout.c |  2 +-
 commit.c                  |  2 +-
 config.c                  |  2 +-
 http.c                    |  4 ++--
 mailinfo.c                |  2 +-
 notes-utils.c             |  2 +-
 notes.c                   |  4 ++--
 pretty.c                  |  4 ++--
 strbuf.h                  | 12 ++++++++++++
 11 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index fc2eb1befc..d6773818b8 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -865,7 +865,7 @@ static void update_refs_for_switch(const struct checkout_opts *opts,
 		strbuf_addf(&msg, "checkout: moving from %s to %s",
 			old_desc ? old_desc : "(invalid)", new_branch_info->name);
 	else
-		strbuf_insert(&msg, 0, reflog_msg, strlen(reflog_msg));
+		strbuf_insertstr(&msg, 0, reflog_msg);

 	if (!strcmp(new_branch_info->name, "HEAD") && !new_branch_info->path && !opts->force_detach) {
 		/* Nothing to do. */
diff --git a/builtin/notes.c b/builtin/notes.c
index 95456f3165..35e468ea2d 100644
--- a/builtin/notes.c
+++ b/builtin/notes.c
@@ -622,7 +622,7 @@ static int append_edit(int argc, const char **argv, const char *prefix)

 		strbuf_grow(&d.buf, size + 1);
 		if (d.buf.len && prev_buf && size)
-			strbuf_insert(&d.buf, 0, "\n", 1);
+			strbuf_insertstr(&d.buf, 0, "\n");
 		if (prev_buf && size)
 			strbuf_insert(&d.buf, 0, prev_buf, size);
 		free(prev_buf);
@@ -745,7 +745,7 @@ static int merge_commit(struct notes_merge_options *o)
 	memset(&pretty_ctx, 0, sizeof(pretty_ctx));
 	format_commit_message(partial, "%s", &msg, &pretty_ctx);
 	strbuf_trim(&msg);
-	strbuf_insert(&msg, 0, "notes: ", 7);
+	strbuf_insertstr(&msg, 0, "notes: ");
 	update_ref(msg.buf, o->local_ref, &oid,
 		   is_null_oid(&parent_oid) ? NULL : &parent_oid,
 		   0, UPDATE_REFS_DIE_ON_ERR);
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index b3bed891cb..38d0d944b3 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -373,7 +373,7 @@ static void strbuf_to_cone_pattern(struct strbuf *line, struct pattern_list *pl)
 		return;

 	if (line->buf[0] != '/')
-		strbuf_insert(line, 0, "/", 1);
+		strbuf_insertstr(line, 0, "/");

 	insert_recursive_pattern(pl, line);
 }
diff --git a/commit.c b/commit.c
index 3f91d3efc5..a6cfa41a4e 100644
--- a/commit.c
+++ b/commit.c
@@ -993,7 +993,7 @@ static int do_sign_commit(struct strbuf *buf, const char *keyid)
 			strbuf_insert(buf, inspos, gpg_sig_header, gpg_sig_header_len);
 			inspos += gpg_sig_header_len;
 		}
-		strbuf_insert(buf, inspos++, " ", 1);
+		strbuf_insertstr(buf, inspos++, " ");
 		strbuf_insert(buf, inspos, bol, len);
 		inspos += len;
 		copypos += len;
diff --git a/config.c b/config.c
index d75f88ca0c..b386c0c628 100644
--- a/config.c
+++ b/config.c
@@ -204,7 +204,7 @@ static int prepare_include_condition_pattern(struct strbuf *pat)
 		strbuf_splice(pat, 0, 1, path.buf, slash - path.buf);
 		prefix = slash - path.buf + 1 /* slash */;
 	} else if (!is_absolute_path(pat->buf))
-		strbuf_insert(pat, 0, "**/", 3);
+		strbuf_insertstr(pat, 0, "**/");

 	add_trailing_starstar_for_dir(pat);

diff --git a/http.c b/http.c
index 5f348169c3..00a0e50763 100644
--- a/http.c
+++ b/http.c
@@ -680,8 +680,8 @@ static void curl_dump_header(const char *text, unsigned char *ptr, size_t size,
 	for (header = headers; *header; header++) {
 		if (hide_sensitive_header)
 			redact_sensitive_header(*header);
-		strbuf_insert((*header), 0, text, strlen(text));
-		strbuf_insert((*header), strlen(text), ": ", 2);
+		strbuf_insertstr((*header), 0, text);
+		strbuf_insertstr((*header), strlen(text), ": ");
 		strbuf_rtrim((*header));
 		strbuf_addch((*header), '\n');
 		trace_strbuf(&trace_curl, (*header));
diff --git a/mailinfo.c b/mailinfo.c
index b395adbdf2..c535dec2e9 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -254,7 +254,7 @@ static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
 	mi->delsp = has_attr_value(line->buf, "delsp=", "yes");

 	if (slurp_attr(line->buf, "boundary=", boundary)) {
-		strbuf_insert(boundary, 0, "--", 2);
+		strbuf_insertstr(boundary, 0, "--");
 		if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
 			error("Too many boundaries to handle");
 			mi->input_error = -1;
diff --git a/notes-utils.c b/notes-utils.c
index a819410698..5f94c513d4 100644
--- a/notes-utils.c
+++ b/notes-utils.c
@@ -52,7 +52,7 @@ void commit_notes(struct repository *r, struct notes_tree *t, const char *msg)
 	strbuf_complete_line(&buf);

 	create_notes_commit(r, t, NULL, buf.buf, buf.len, &commit_oid);
-	strbuf_insert(&buf, 0, "notes: ", 7); /* commit message starts at index 7 */
+	strbuf_insertstr(&buf, 0, "notes: "); /* commit message starts at index 7 */
 	update_ref(buf.buf, t->update_ref, &commit_oid, NULL, 0,
 		   UPDATE_REFS_DIE_ON_ERR);

diff --git a/notes.c b/notes.c
index 0c79964c26..a24af53de6 100644
--- a/notes.c
+++ b/notes.c
@@ -1332,9 +1332,9 @@ void expand_notes_ref(struct strbuf *sb)
 	if (starts_with(sb->buf, "refs/notes/"))
 		return; /* we're happy */
 	else if (starts_with(sb->buf, "notes/"))
-		strbuf_insert(sb, 0, "refs/", 5);
+		strbuf_insertstr(sb, 0, "refs/");
 	else
-		strbuf_insert(sb, 0, "refs/notes/", 11);
+		strbuf_insertstr(sb, 0, "refs/notes/");
 }

 void expand_loose_notes_ref(struct strbuf *sb)
diff --git a/pretty.c b/pretty.c
index f5fbbc5ffb..28afc701b6 100644
--- a/pretty.c
+++ b/pretty.c
@@ -1609,9 +1609,9 @@ static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
 			strbuf_setlen(sb, sb->len - 1);
 	} else if (orig_len != sb->len) {
 		if (magic == ADD_LF_BEFORE_NON_EMPTY)
-			strbuf_insert(sb, orig_len, "\n", 1);
+			strbuf_insertstr(sb, orig_len, "\n");
 		else if (magic == ADD_SP_BEFORE_NON_EMPTY)
-			strbuf_insert(sb, orig_len, " ", 1);
+			strbuf_insertstr(sb, orig_len, " ");
 	}
 	return consumed + 1;
 }
diff --git a/strbuf.h b/strbuf.h
index bfa66569a4..aae7ac3a82 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -244,6 +244,18 @@ void strbuf_addchars(struct strbuf *sb, int c, size_t n);
  */
 void strbuf_insert(struct strbuf *sb, size_t pos, const void *, size_t);

+/**
+ * Insert a NUL-terminated string to the given position of the buffer.
+ * The remaining contents will be shifted, not overwritten.  It's an
+ * inline function to allow the compiler to resolve strlen() calls on
+ * constants at compile time.
+ */
+static inline void strbuf_insertstr(struct strbuf *sb, size_t pos,
+				    const char *s)
+{
+	strbuf_insert(sb, pos, s, strlen(s));
+}
+
 /**
  * Insert data to the given position of the buffer giving a printf format
  * string. The contents will be shifted, not overwritten.
--
2.25.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH] strbuf: add and use strbuf_insertstr()
  2020-02-08 19:56 [PATCH] strbuf: add and use strbuf_insertstr() René Scharfe
@ 2020-02-08 23:08 ` Taylor Blau
  2020-02-09 10:23   ` René Scharfe
  2020-02-09  0:53 ` Eric Sunshine
  2020-02-09 13:44 ` [PATCH v2] " René Scharfe
  2 siblings, 1 reply; 23+ messages in thread
From: Taylor Blau @ 2020-02-08 23:08 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git Mailing List, Junio C Hamano

Hi René,

On Sat, Feb 08, 2020 at 08:56:43PM +0100, René Scharfe wrote:
> Add a function for inserting a C string into a strbuf.  Use it
> throughout the source to get rid of magic string length constants and
> explicit strlen() calls.
>
> Like strbuf_addstr(), implement it as an inline function to avoid the
> implicit strlen() calls to cause runtime overhead.

This all looks quite reasonable to me. Did you have a specific
motivation in mind when writing this patch, other than to get rid of
having to call strlen on the same argument like:

  strbuf_insert(&buf, pos, x, strlen(x))

? Not that this needs more motivation than that, I'm just curious.

I looked through 'git grep strbuf_insert(', and only noticed one spot
that I could be updated with 'strbuf_insertstr' in mailinfo.c:

diff --git a/mailinfo.c b/mailinfo.c
index c535dec2e9..543962d40c 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -570,7 +570,7 @@ static int check_header(struct mailinfo *mi,
    len = strlen("Content-Type: ");
    strbuf_add(&sb, line->buf + len, line->len - len);
    decode_header(mi, &sb);
-		strbuf_insert(&sb, 0, "Content-Type: ", len);
+		strbuf_insertstr(&sb, 0, "Content-Type: ");
    handle_content_type(mi, &sb);
    ret = 1;
    goto check_header_out;

Otherwise this looks quite good to me.

>
> Signed-off-by: René Scharfe <l.s.r@web.de>
> ---
>  builtin/checkout.c        |  2 +-
>  builtin/notes.c           |  4 ++--
>  builtin/sparse-checkout.c |  2 +-
>  commit.c                  |  2 +-
>  config.c                  |  2 +-
>  http.c                    |  4 ++--
>  mailinfo.c                |  2 +-
>  notes-utils.c             |  2 +-
>  notes.c                   |  4 ++--
>  pretty.c                  |  4 ++--
>  strbuf.h                  | 12 ++++++++++++
>  11 files changed, 26 insertions(+), 14 deletions(-)
>
> diff --git a/builtin/checkout.c b/builtin/checkout.c
> index fc2eb1befc..d6773818b8 100644
> --- a/builtin/checkout.c
> +++ b/builtin/checkout.c
> @@ -865,7 +865,7 @@ static void update_refs_for_switch(const struct checkout_opts *opts,
>  		strbuf_addf(&msg, "checkout: moving from %s to %s",
>  			old_desc ? old_desc : "(invalid)", new_branch_info->name);
>  	else
> -		strbuf_insert(&msg, 0, reflog_msg, strlen(reflog_msg));
> +		strbuf_insertstr(&msg, 0, reflog_msg);
>
>  	if (!strcmp(new_branch_info->name, "HEAD") && !new_branch_info->path && !opts->force_detach) {
>  		/* Nothing to do. */
> diff --git a/builtin/notes.c b/builtin/notes.c
> index 95456f3165..35e468ea2d 100644
> --- a/builtin/notes.c
> +++ b/builtin/notes.c
> @@ -622,7 +622,7 @@ static int append_edit(int argc, const char **argv, const char *prefix)
>
>  		strbuf_grow(&d.buf, size + 1);
>  		if (d.buf.len && prev_buf && size)
> -			strbuf_insert(&d.buf, 0, "\n", 1);
> +			strbuf_insertstr(&d.buf, 0, "\n");
>  		if (prev_buf && size)
>  			strbuf_insert(&d.buf, 0, prev_buf, size);
>  		free(prev_buf);
> @@ -745,7 +745,7 @@ static int merge_commit(struct notes_merge_options *o)
>  	memset(&pretty_ctx, 0, sizeof(pretty_ctx));
>  	format_commit_message(partial, "%s", &msg, &pretty_ctx);
>  	strbuf_trim(&msg);
> -	strbuf_insert(&msg, 0, "notes: ", 7);
> +	strbuf_insertstr(&msg, 0, "notes: ");
>  	update_ref(msg.buf, o->local_ref, &oid,
>  		   is_null_oid(&parent_oid) ? NULL : &parent_oid,
>  		   0, UPDATE_REFS_DIE_ON_ERR);
> diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
> index b3bed891cb..38d0d944b3 100644
> --- a/builtin/sparse-checkout.c
> +++ b/builtin/sparse-checkout.c
> @@ -373,7 +373,7 @@ static void strbuf_to_cone_pattern(struct strbuf *line, struct pattern_list *pl)
>  		return;
>
>  	if (line->buf[0] != '/')
> -		strbuf_insert(line, 0, "/", 1);
> +		strbuf_insertstr(line, 0, "/");
>
>  	insert_recursive_pattern(pl, line);
>  }
> diff --git a/commit.c b/commit.c
> index 3f91d3efc5..a6cfa41a4e 100644
> --- a/commit.c
> +++ b/commit.c
> @@ -993,7 +993,7 @@ static int do_sign_commit(struct strbuf *buf, const char *keyid)
>  			strbuf_insert(buf, inspos, gpg_sig_header, gpg_sig_header_len);
>  			inspos += gpg_sig_header_len;
>  		}
> -		strbuf_insert(buf, inspos++, " ", 1);
> +		strbuf_insertstr(buf, inspos++, " ");
>  		strbuf_insert(buf, inspos, bol, len);
>  		inspos += len;
>  		copypos += len;
> diff --git a/config.c b/config.c
> index d75f88ca0c..b386c0c628 100644
> --- a/config.c
> +++ b/config.c
> @@ -204,7 +204,7 @@ static int prepare_include_condition_pattern(struct strbuf *pat)
>  		strbuf_splice(pat, 0, 1, path.buf, slash - path.buf);
>  		prefix = slash - path.buf + 1 /* slash */;
>  	} else if (!is_absolute_path(pat->buf))
> -		strbuf_insert(pat, 0, "**/", 3);
> +		strbuf_insertstr(pat, 0, "**/");
>
>  	add_trailing_starstar_for_dir(pat);
>
> diff --git a/http.c b/http.c
> index 5f348169c3..00a0e50763 100644
> --- a/http.c
> +++ b/http.c
> @@ -680,8 +680,8 @@ static void curl_dump_header(const char *text, unsigned char *ptr, size_t size,
>  	for (header = headers; *header; header++) {
>  		if (hide_sensitive_header)
>  			redact_sensitive_header(*header);
> -		strbuf_insert((*header), 0, text, strlen(text));
> -		strbuf_insert((*header), strlen(text), ": ", 2);
> +		strbuf_insertstr((*header), 0, text);
> +		strbuf_insertstr((*header), strlen(text), ": ");
>  		strbuf_rtrim((*header));
>  		strbuf_addch((*header), '\n');
>  		trace_strbuf(&trace_curl, (*header));
> diff --git a/mailinfo.c b/mailinfo.c
> index b395adbdf2..c535dec2e9 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -254,7 +254,7 @@ static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
>  	mi->delsp = has_attr_value(line->buf, "delsp=", "yes");
>
>  	if (slurp_attr(line->buf, "boundary=", boundary)) {
> -		strbuf_insert(boundary, 0, "--", 2);
> +		strbuf_insertstr(boundary, 0, "--");
>  		if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
>  			error("Too many boundaries to handle");
>  			mi->input_error = -1;
> diff --git a/notes-utils.c b/notes-utils.c
> index a819410698..5f94c513d4 100644
> --- a/notes-utils.c
> +++ b/notes-utils.c
> @@ -52,7 +52,7 @@ void commit_notes(struct repository *r, struct notes_tree *t, const char *msg)
>  	strbuf_complete_line(&buf);
>
>  	create_notes_commit(r, t, NULL, buf.buf, buf.len, &commit_oid);
> -	strbuf_insert(&buf, 0, "notes: ", 7); /* commit message starts at index 7 */
> +	strbuf_insertstr(&buf, 0, "notes: "); /* commit message starts at index 7 */
>  	update_ref(buf.buf, t->update_ref, &commit_oid, NULL, 0,
>  		   UPDATE_REFS_DIE_ON_ERR);
>
> diff --git a/notes.c b/notes.c
> index 0c79964c26..a24af53de6 100644
> --- a/notes.c
> +++ b/notes.c
> @@ -1332,9 +1332,9 @@ void expand_notes_ref(struct strbuf *sb)
>  	if (starts_with(sb->buf, "refs/notes/"))
>  		return; /* we're happy */
>  	else if (starts_with(sb->buf, "notes/"))
> -		strbuf_insert(sb, 0, "refs/", 5);
> +		strbuf_insertstr(sb, 0, "refs/");
>  	else
> -		strbuf_insert(sb, 0, "refs/notes/", 11);
> +		strbuf_insertstr(sb, 0, "refs/notes/");
>  }
>
>  void expand_loose_notes_ref(struct strbuf *sb)
> diff --git a/pretty.c b/pretty.c
> index f5fbbc5ffb..28afc701b6 100644
> --- a/pretty.c
> +++ b/pretty.c
> @@ -1609,9 +1609,9 @@ static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
>  			strbuf_setlen(sb, sb->len - 1);
>  	} else if (orig_len != sb->len) {
>  		if (magic == ADD_LF_BEFORE_NON_EMPTY)
> -			strbuf_insert(sb, orig_len, "\n", 1);
> +			strbuf_insertstr(sb, orig_len, "\n");
>  		else if (magic == ADD_SP_BEFORE_NON_EMPTY)
> -			strbuf_insert(sb, orig_len, " ", 1);
> +			strbuf_insertstr(sb, orig_len, " ");
>  	}
>  	return consumed + 1;
>  }
> diff --git a/strbuf.h b/strbuf.h
> index bfa66569a4..aae7ac3a82 100644
> --- a/strbuf.h
> +++ b/strbuf.h
> @@ -244,6 +244,18 @@ void strbuf_addchars(struct strbuf *sb, int c, size_t n);
>   */
>  void strbuf_insert(struct strbuf *sb, size_t pos, const void *, size_t);
>
> +/**
> + * Insert a NUL-terminated string to the given position of the buffer.
> + * The remaining contents will be shifted, not overwritten.  It's an
> + * inline function to allow the compiler to resolve strlen() calls on
> + * constants at compile time.
> + */
> +static inline void strbuf_insertstr(struct strbuf *sb, size_t pos,
> +				    const char *s)
> +{
> +	strbuf_insert(sb, pos, s, strlen(s));
> +}
> +
>  /**
>   * Insert data to the given position of the buffer giving a printf format
>   * string. The contents will be shifted, not overwritten.
> --
> 2.25.0
>

Thanks,
Taylor

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH] strbuf: add and use strbuf_insertstr()
  2020-02-08 19:56 [PATCH] strbuf: add and use strbuf_insertstr() René Scharfe
  2020-02-08 23:08 ` Taylor Blau
@ 2020-02-09  0:53 ` Eric Sunshine
  2020-02-09 10:23   ` René Scharfe
  2020-02-09 13:44 ` [PATCH v2] " René Scharfe
  2 siblings, 1 reply; 23+ messages in thread
From: Eric Sunshine @ 2020-02-09  0:53 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git Mailing List, Junio C Hamano

On Sat, Feb 8, 2020 at 2:57 PM René Scharfe <l.s.r@web.de> wrote:
> Add a function for inserting a C string into a strbuf.  Use it
> throughout the source to get rid of magic string length constants and
> explicit strlen() calls.
>
> Like strbuf_addstr(), implement it as an inline function to avoid the
> implicit strlen() calls to cause runtime overhead.
>
> Signed-off-by: René Scharfe <l.s.r@web.de>
> ---
> diff --git a/notes-utils.c b/notes-utils.c
> @@ -52,7 +52,7 @@ void commit_notes(struct repository *r, struct notes_tree *t, const char *msg)
> -       strbuf_insert(&buf, 0, "notes: ", 7); /* commit message starts at index 7 */
> +       strbuf_insertstr(&buf, 0, "notes: "); /* commit message starts at index 7 */

Is there a reason to retain the comment which talks about magic number 7?

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] strbuf: add and use strbuf_insertstr()
  2020-02-08 23:08 ` Taylor Blau
@ 2020-02-09 10:23   ` René Scharfe
  0 siblings, 0 replies; 23+ messages in thread
From: René Scharfe @ 2020-02-09 10:23 UTC (permalink / raw)
  To: Taylor Blau; +Cc: Git Mailing List, Junio C Hamano

Am 09.02.20 um 00:08 schrieb Taylor Blau:
> Hi René,
>
> On Sat, Feb 08, 2020 at 08:56:43PM +0100, René Scharfe wrote:
>> Add a function for inserting a C string into a strbuf.  Use it
>> throughout the source to get rid of magic string length constants and
>> explicit strlen() calls.
>>
>> Like strbuf_addstr(), implement it as an inline function to avoid the
>> implicit strlen() calls to cause runtime overhead.
>
> This all looks quite reasonable to me. Did you have a specific
> motivation in mind when writing this patch, other than to get rid of
> having to call strlen on the same argument like:
>
>   strbuf_insert(&buf, pos, x, strlen(x))
>
> ? Not that this needs more motivation than that, I'm just curious.

No, that's it.

> I looked through 'git grep strbuf_insert(', and only noticed one spot
> that I could be updated with 'strbuf_insertstr' in mailinfo.c:
>
> diff --git a/mailinfo.c b/mailinfo.c
> index c535dec2e9..543962d40c 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -570,7 +570,7 @@ static int check_header(struct mailinfo *mi,
>     len = strlen("Content-Type: ");
>     strbuf_add(&sb, line->buf + len, line->len - len);
>     decode_header(mi, &sb);
> -		strbuf_insert(&sb, 0, "Content-Type: ", len);
> +		strbuf_insertstr(&sb, 0, "Content-Type: ");
>     handle_content_type(mi, &sb);
>     ret = 1;
>     goto check_header_out;

Right.  Missed that one because the strlen() call is not in side the
strbuf_insert() invocation.

René

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] strbuf: add and use strbuf_insertstr()
  2020-02-09  0:53 ` Eric Sunshine
@ 2020-02-09 10:23   ` René Scharfe
  0 siblings, 0 replies; 23+ messages in thread
From: René Scharfe @ 2020-02-09 10:23 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git Mailing List, Junio C Hamano

Am 09.02.20 um 01:53 schrieb Eric Sunshine:
> On Sat, Feb 8, 2020 at 2:57 PM René Scharfe <l.s.r@web.de> wrote:
>> Add a function for inserting a C string into a strbuf.  Use it
>> throughout the source to get rid of magic string length constants and
>> explicit strlen() calls.
>>
>> Like strbuf_addstr(), implement it as an inline function to avoid the
>> implicit strlen() calls to cause runtime overhead.
>>
>> Signed-off-by: René Scharfe <l.s.r@web.de>
>> ---
>> diff --git a/notes-utils.c b/notes-utils.c
>> @@ -52,7 +52,7 @@ void commit_notes(struct repository *r, struct notes_tree *t, const char *msg)
>> -       strbuf_insert(&buf, 0, "notes: ", 7); /* commit message starts at index 7 */
>> +       strbuf_insertstr(&buf, 0, "notes: "); /* commit message starts at index 7 */
>
> Is there a reason to retain the comment which talks about magic number 7?

I didn't understand its usefulness, so I didn't dare touch it.  Looking
deeper, I think it already became obsolete with 13f8b72d8c (Convert
commit_tree() to take strbuf as message, 2011-12-15), which included:

diff --git a/builtin/notes.c b/builtin/notes.c
index f8e437db01..5e32548cdd 100644
--- a/builtin/notes.c
+++ b/builtin/notes.c
@@ -301,12 +301,12 @@ void commit_notes(struct notes_tree *t, const char *msg)
 		return; /* don't have to commit an unchanged tree */

 	/* Prepare commit message and reflog message */
-	strbuf_addstr(&buf, "notes: "); /* commit message starts at index 7 */
 	strbuf_addstr(&buf, msg);
 	if (buf.buf[buf.len - 1] != '\n')
 		strbuf_addch(&buf, '\n'); /* Make sure msg ends with newline */

-	create_notes_commit(t, NULL, buf.buf + 7, commit_sha1);
+	create_notes_commit(t, NULL, &buf, commit_sha1);
+	strbuf_insert(&buf, 0, "notes: ", 7); /* commit message starts at index 7 */
 	update_ref(buf.buf, t->ref, commit_sha1, NULL, 0, DIE_ON_ERR);

 	strbuf_release(&buf);

René

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2] strbuf: add and use strbuf_insertstr()
  2020-02-08 19:56 [PATCH] strbuf: add and use strbuf_insertstr() René Scharfe
  2020-02-08 23:08 ` Taylor Blau
  2020-02-09  0:53 ` Eric Sunshine
@ 2020-02-09 13:44 ` René Scharfe
  2020-02-09 17:36   ` Eric Sunshine
  2020-02-10  7:15   ` [PATCH 2/1] mailinfo: don't insert header prefix for handle_content_type() René Scharfe
  2 siblings, 2 replies; 23+ messages in thread
From: René Scharfe @ 2020-02-09 13:44 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano, Taylor Blau, Eric Sunshine

Add a function for inserting a C string into a strbuf.  Use it
throughout the source to get rid of magic string length constants and
explicit strlen() calls.

Like strbuf_addstr(), implement it as an inline function to avoid the
implicit strlen() calls to cause runtime overhead.

Helped-by: Taylor Blau <me@ttaylorr.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: René Scharfe <l.s.r@web.de>
---
Changes:
- Added second hunk in mailinfo.c.
- Removed outdated comment in notes-util.c.

 builtin/checkout.c        |  2 +-
 builtin/notes.c           |  4 ++--
 builtin/sparse-checkout.c |  2 +-
 commit.c                  |  2 +-
 config.c                  |  2 +-
 http.c                    |  4 ++--
 mailinfo.c                |  4 ++--
 notes-utils.c             |  2 +-
 notes.c                   |  4 ++--
 pretty.c                  |  4 ++--
 strbuf.h                  | 12 ++++++++++++
 11 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index fc2eb1befc..d6773818b8 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -865,7 +865,7 @@ static void update_refs_for_switch(const struct checkout_opts *opts,
 		strbuf_addf(&msg, "checkout: moving from %s to %s",
 			old_desc ? old_desc : "(invalid)", new_branch_info->name);
 	else
-		strbuf_insert(&msg, 0, reflog_msg, strlen(reflog_msg));
+		strbuf_insertstr(&msg, 0, reflog_msg);

 	if (!strcmp(new_branch_info->name, "HEAD") && !new_branch_info->path && !opts->force_detach) {
 		/* Nothing to do. */
diff --git a/builtin/notes.c b/builtin/notes.c
index 95456f3165..35e468ea2d 100644
--- a/builtin/notes.c
+++ b/builtin/notes.c
@@ -622,7 +622,7 @@ static int append_edit(int argc, const char **argv, const char *prefix)

 		strbuf_grow(&d.buf, size + 1);
 		if (d.buf.len && prev_buf && size)
-			strbuf_insert(&d.buf, 0, "\n", 1);
+			strbuf_insertstr(&d.buf, 0, "\n");
 		if (prev_buf && size)
 			strbuf_insert(&d.buf, 0, prev_buf, size);
 		free(prev_buf);
@@ -745,7 +745,7 @@ static int merge_commit(struct notes_merge_options *o)
 	memset(&pretty_ctx, 0, sizeof(pretty_ctx));
 	format_commit_message(partial, "%s", &msg, &pretty_ctx);
 	strbuf_trim(&msg);
-	strbuf_insert(&msg, 0, "notes: ", 7);
+	strbuf_insertstr(&msg, 0, "notes: ");
 	update_ref(msg.buf, o->local_ref, &oid,
 		   is_null_oid(&parent_oid) ? NULL : &parent_oid,
 		   0, UPDATE_REFS_DIE_ON_ERR);
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index b3bed891cb..38d0d944b3 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -373,7 +373,7 @@ static void strbuf_to_cone_pattern(struct strbuf *line, struct pattern_list *pl)
 		return;

 	if (line->buf[0] != '/')
-		strbuf_insert(line, 0, "/", 1);
+		strbuf_insertstr(line, 0, "/");

 	insert_recursive_pattern(pl, line);
 }
diff --git a/commit.c b/commit.c
index 3f91d3efc5..a6cfa41a4e 100644
--- a/commit.c
+++ b/commit.c
@@ -993,7 +993,7 @@ static int do_sign_commit(struct strbuf *buf, const char *keyid)
 			strbuf_insert(buf, inspos, gpg_sig_header, gpg_sig_header_len);
 			inspos += gpg_sig_header_len;
 		}
-		strbuf_insert(buf, inspos++, " ", 1);
+		strbuf_insertstr(buf, inspos++, " ");
 		strbuf_insert(buf, inspos, bol, len);
 		inspos += len;
 		copypos += len;
diff --git a/config.c b/config.c
index d75f88ca0c..b386c0c628 100644
--- a/config.c
+++ b/config.c
@@ -204,7 +204,7 @@ static int prepare_include_condition_pattern(struct strbuf *pat)
 		strbuf_splice(pat, 0, 1, path.buf, slash - path.buf);
 		prefix = slash - path.buf + 1 /* slash */;
 	} else if (!is_absolute_path(pat->buf))
-		strbuf_insert(pat, 0, "**/", 3);
+		strbuf_insertstr(pat, 0, "**/");

 	add_trailing_starstar_for_dir(pat);

diff --git a/http.c b/http.c
index 5f348169c3..00a0e50763 100644
--- a/http.c
+++ b/http.c
@@ -680,8 +680,8 @@ static void curl_dump_header(const char *text, unsigned char *ptr, size_t size,
 	for (header = headers; *header; header++) {
 		if (hide_sensitive_header)
 			redact_sensitive_header(*header);
-		strbuf_insert((*header), 0, text, strlen(text));
-		strbuf_insert((*header), strlen(text), ": ", 2);
+		strbuf_insertstr((*header), 0, text);
+		strbuf_insertstr((*header), strlen(text), ": ");
 		strbuf_rtrim((*header));
 		strbuf_addch((*header), '\n');
 		trace_strbuf(&trace_curl, (*header));
diff --git a/mailinfo.c b/mailinfo.c
index b395adbdf2..543962d40c 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -254,7 +254,7 @@ static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
 	mi->delsp = has_attr_value(line->buf, "delsp=", "yes");

 	if (slurp_attr(line->buf, "boundary=", boundary)) {
-		strbuf_insert(boundary, 0, "--", 2);
+		strbuf_insertstr(boundary, 0, "--");
 		if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
 			error("Too many boundaries to handle");
 			mi->input_error = -1;
@@ -570,7 +570,7 @@ static int check_header(struct mailinfo *mi,
 		len = strlen("Content-Type: ");
 		strbuf_add(&sb, line->buf + len, line->len - len);
 		decode_header(mi, &sb);
-		strbuf_insert(&sb, 0, "Content-Type: ", len);
+		strbuf_insertstr(&sb, 0, "Content-Type: ");
 		handle_content_type(mi, &sb);
 		ret = 1;
 		goto check_header_out;
diff --git a/notes-utils.c b/notes-utils.c
index a819410698..4bf4888d8c 100644
--- a/notes-utils.c
+++ b/notes-utils.c
@@ -52,7 +52,7 @@ void commit_notes(struct repository *r, struct notes_tree *t, const char *msg)
 	strbuf_complete_line(&buf);

 	create_notes_commit(r, t, NULL, buf.buf, buf.len, &commit_oid);
-	strbuf_insert(&buf, 0, "notes: ", 7); /* commit message starts at index 7 */
+	strbuf_insertstr(&buf, 0, "notes: ");
 	update_ref(buf.buf, t->update_ref, &commit_oid, NULL, 0,
 		   UPDATE_REFS_DIE_ON_ERR);

diff --git a/notes.c b/notes.c
index 0c79964c26..a24af53de6 100644
--- a/notes.c
+++ b/notes.c
@@ -1332,9 +1332,9 @@ void expand_notes_ref(struct strbuf *sb)
 	if (starts_with(sb->buf, "refs/notes/"))
 		return; /* we're happy */
 	else if (starts_with(sb->buf, "notes/"))
-		strbuf_insert(sb, 0, "refs/", 5);
+		strbuf_insertstr(sb, 0, "refs/");
 	else
-		strbuf_insert(sb, 0, "refs/notes/", 11);
+		strbuf_insertstr(sb, 0, "refs/notes/");
 }

 void expand_loose_notes_ref(struct strbuf *sb)
diff --git a/pretty.c b/pretty.c
index f5fbbc5ffb..28afc701b6 100644
--- a/pretty.c
+++ b/pretty.c
@@ -1609,9 +1609,9 @@ static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
 			strbuf_setlen(sb, sb->len - 1);
 	} else if (orig_len != sb->len) {
 		if (magic == ADD_LF_BEFORE_NON_EMPTY)
-			strbuf_insert(sb, orig_len, "\n", 1);
+			strbuf_insertstr(sb, orig_len, "\n");
 		else if (magic == ADD_SP_BEFORE_NON_EMPTY)
-			strbuf_insert(sb, orig_len, " ", 1);
+			strbuf_insertstr(sb, orig_len, " ");
 	}
 	return consumed + 1;
 }
diff --git a/strbuf.h b/strbuf.h
index bfa66569a4..aae7ac3a82 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -244,6 +244,18 @@ void strbuf_addchars(struct strbuf *sb, int c, size_t n);
  */
 void strbuf_insert(struct strbuf *sb, size_t pos, const void *, size_t);

+/**
+ * Insert a NUL-terminated string to the given position of the buffer.
+ * The remaining contents will be shifted, not overwritten.  It's an
+ * inline function to allow the compiler to resolve strlen() calls on
+ * constants at compile time.
+ */
+static inline void strbuf_insertstr(struct strbuf *sb, size_t pos,
+				    const char *s)
+{
+	strbuf_insert(sb, pos, s, strlen(s));
+}
+
 /**
  * Insert data to the given position of the buffer giving a printf format
  * string. The contents will be shifted, not overwritten.
--
2.25.0

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] strbuf: add and use strbuf_insertstr()
  2020-02-09 13:44 ` [PATCH v2] " René Scharfe
@ 2020-02-09 17:36   ` Eric Sunshine
  2020-02-09 18:28     ` René Scharfe
  2020-02-10 23:44     ` Jeff King
  2020-02-10  7:15   ` [PATCH 2/1] mailinfo: don't insert header prefix for handle_content_type() René Scharfe
  1 sibling, 2 replies; 23+ messages in thread
From: Eric Sunshine @ 2020-02-09 17:36 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git Mailing List, Junio C Hamano, Taylor Blau

On Sun, Feb 9, 2020 at 8:45 AM René Scharfe <l.s.r@web.de> wrote:
> Add a function for inserting a C string into a strbuf.  Use it
> throughout the source to get rid of magic string length constants and
> explicit strlen() calls.
>
> Like strbuf_addstr(), implement it as an inline function to avoid the
> implicit strlen() calls to cause runtime overhead.
>
> Signed-off-by: René Scharfe <l.s.r@web.de>
> ---
> diff --git a/mailinfo.c b/mailinfo.c
> @@ -570,7 +570,7 @@ static int check_header(struct mailinfo *mi,
>                 len = strlen("Content-Type: ");
>                 strbuf_add(&sb, line->buf + len, line->len - len);
>                 decode_header(mi, &sb);
> -               strbuf_insert(&sb, 0, "Content-Type: ", len);
> +               strbuf_insertstr(&sb, 0, "Content-Type: ");
>                 handle_content_type(mi, &sb);

Meh. We've already computed the length of "Content-Type: " a few lines
earlier, so taking advantage of that value when inserting the string
literal is perfectly sensible. Thus, I'm not convinced that this
change is an improvement.

Digging deeper, though, I have to wonder why this bothers inserting
"Content-Type: " at all. None of the other cases handled by
check_header() bother re-inserting the header, so why this one? I
thought it might be because handle_content_type() depends upon the
header being present, but from my reading, this does not appear to be
the case. handle_content_type() calls has_attr_value() and
slurp_attr() to examine the incoming line, but neither of those seem
to expect any sort of "<Header>: " either. Thus, it appears that the
insertion of "Content-Type: " is superfluous. If this is indeed the
case, then rather than converting this to strbuf_insertstr(), I could
see it being pulled out into a separate patch which merely removes the
strbuf_insert() call.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] strbuf: add and use strbuf_insertstr()
  2020-02-09 17:36   ` Eric Sunshine
@ 2020-02-09 18:28     ` René Scharfe
  2020-02-09 21:09       ` Eric Sunshine
  2020-02-09 23:10       ` Taylor Blau
  2020-02-10 23:44     ` Jeff King
  1 sibling, 2 replies; 23+ messages in thread
From: René Scharfe @ 2020-02-09 18:28 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git Mailing List, Junio C Hamano, Taylor Blau

Am 09.02.20 um 18:36 schrieb Eric Sunshine:
> On Sun, Feb 9, 2020 at 8:45 AM René Scharfe <l.s.r@web.de> wrote:
>> Add a function for inserting a C string into a strbuf.  Use it
>> throughout the source to get rid of magic string length constants and
>> explicit strlen() calls.
>>
>> Like strbuf_addstr(), implement it as an inline function to avoid the
>> implicit strlen() calls to cause runtime overhead.
>>
>> Signed-off-by: René Scharfe <l.s.r@web.de>
>> ---
>> diff --git a/mailinfo.c b/mailinfo.c
>> @@ -570,7 +570,7 @@ static int check_header(struct mailinfo *mi,
>>                 len = strlen("Content-Type: ");
>>                 strbuf_add(&sb, line->buf + len, line->len - len);
>>                 decode_header(mi, &sb);
>> -               strbuf_insert(&sb, 0, "Content-Type: ", len);
>> +               strbuf_insertstr(&sb, 0, "Content-Type: ");
>>                 handle_content_type(mi, &sb);
>
> Meh. We've already computed the length of "Content-Type: " a few lines
> earlier, so taking advantage of that value when inserting the string
> literal is perfectly sensible.

Well, yes, but it would be more sensible if we'd have only a single
string here.  At the source code level we have two string constants that
happen to have the same contents.  Handling them separately is
reasonable, I think.

The compiler is merging those two, and resolves the other strlen() call
at compile time, so the generated code is the same.

> Thus, I'm not convinced that this change is an improvement.

The improvement is to untangle the handling of those two string
constants and to use a C string without having to pass along its
length.  That doesn't make the code clean, yet, admittedly.

> Digging deeper, though, I have to wonder why this bothers inserting
> "Content-Type: " at all. None of the other cases handled by
> check_header() bother re-inserting the header, so why this one? I
> thought it might be because handle_content_type() depends upon the
> header being present, but from my reading, this does not appear to be
> the case. handle_content_type() calls has_attr_value() and
> slurp_attr() to examine the incoming line, but neither of those seem
> to expect any sort of "<Header>: " either. Thus, it appears that the
> insertion of "Content-Type: " is superfluous. If this is indeed the
> case, then rather than converting this to strbuf_insertstr(), I could
> see it being pulled out into a separate patch which merely removes the
> strbuf_insert() call.

Interesting.  It makes sense that handle_content_type() wouldn't need
such a header prefix -- it's only called if its existence in the line
has been confirmed.  And I also don't see a hint in the code that
would justify the insertion.

Do you care to send a follow-up patch (or one against master if you're
not convinced by my reasoning given above)?

Thanks,
René

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] strbuf: add and use strbuf_insertstr()
  2020-02-09 18:28     ` René Scharfe
@ 2020-02-09 21:09       ` Eric Sunshine
  2020-02-09 23:10       ` Taylor Blau
  1 sibling, 0 replies; 23+ messages in thread
From: Eric Sunshine @ 2020-02-09 21:09 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git Mailing List, Junio C Hamano, Taylor Blau

On Sun, Feb 9, 2020 at 1:28 PM René Scharfe <l.s.r@web.de> wrote:
> Am 09.02.20 um 18:36 schrieb Eric Sunshine:
> > Digging deeper, though, I have to wonder why this bothers inserting
> > "Content-Type: " at all. [...]
>
> Do you care to send a follow-up patch (or one against master if you're
> not convinced by my reasoning given above)?

It's unlikely I will get to it any time soon. I'm still sitting on
unfinished patches I started 1.5 years ago and have a queue of "git
worktree"-related tasks on my To-Do list.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] strbuf: add and use strbuf_insertstr()
  2020-02-09 18:28     ` René Scharfe
  2020-02-09 21:09       ` Eric Sunshine
@ 2020-02-09 23:10       ` Taylor Blau
  1 sibling, 0 replies; 23+ messages in thread
From: Taylor Blau @ 2020-02-09 23:10 UTC (permalink / raw)
  To: René Scharfe
  Cc: Eric Sunshine, Git Mailing List, Junio C Hamano, Taylor Blau

On Sun, Feb 09, 2020 at 07:28:31PM +0100, René Scharfe wrote:
> Am 09.02.20 um 18:36 schrieb Eric Sunshine:
> > On Sun, Feb 9, 2020 at 8:45 AM René Scharfe <l.s.r@web.de> wrote:
> >> Add a function for inserting a C string into a strbuf.  Use it
> >> throughout the source to get rid of magic string length constants and
> >> explicit strlen() calls.
> >>
> >> Like strbuf_addstr(), implement it as an inline function to avoid the
> >> implicit strlen() calls to cause runtime overhead.
> >>
> >> Signed-off-by: René Scharfe <l.s.r@web.de>
> >> ---
> >> diff --git a/mailinfo.c b/mailinfo.c
> >> @@ -570,7 +570,7 @@ static int check_header(struct mailinfo *mi,
> >>                 len = strlen("Content-Type: ");
> >>                 strbuf_add(&sb, line->buf + len, line->len - len);
> >>                 decode_header(mi, &sb);
> >> -               strbuf_insert(&sb, 0, "Content-Type: ", len);
> >> +               strbuf_insertstr(&sb, 0, "Content-Type: ");
> >>                 handle_content_type(mi, &sb);
> >
> > Meh. We've already computed the length of "Content-Type: " a few lines
> > earlier, so taking advantage of that value when inserting the string
> > literal is perfectly sensible.
>
> Well, yes, but it would be more sensible if we'd have only a single
> string here.  At the source code level we have two string constants that
> happen to have the same contents.  Handling them separately is
> reasonable, I think.
>
> The compiler is merging those two, and resolves the other strlen() call
> at compile time, so the generated code is the same.

Yes, if 'strbuf_insertstr' weren't inlined, I'd be less eager to make
this suggestion, but since it *is* inlined, I don't think that the
compiler will generate substantially different instructions whether we
use one or the other here.

> > Thus, I'm not convinced that this change is an improvement.
>
> The improvement is to untangle the handling of those two string
> constants and to use a C string without having to pass along its
> length.  That doesn't make the code clean, yet, admittedly.

Agreed.

> > Digging deeper, though, I have to wonder why this bothers inserting
> > "Content-Type: " at all. None of the other cases handled by
> > check_header() bother re-inserting the header, so why this one? I
> > thought it might be because handle_content_type() depends upon the
> > header being present, but from my reading, this does not appear to be
> > the case. handle_content_type() calls has_attr_value() and
> > slurp_attr() to examine the incoming line, but neither of those seem
> > to expect any sort of "<Header>: " either. Thus, it appears that the
> > insertion of "Content-Type: " is superfluous. If this is indeed the
> > case, then rather than converting this to strbuf_insertstr(), I could
> > see it being pulled out into a separate patch which merely removes the
> > strbuf_insert() call.
>
> Interesting.  It makes sense that handle_content_type() wouldn't need
> such a header prefix -- it's only called if its existence in the line
> has been confirmed.  And I also don't see a hint in the code that
> would justify the insertion.
>
> Do you care to send a follow-up patch (or one against master if you're
> not convinced by my reasoning given above)?

I certainly can't speak for Eric, but for my $.02 I don't think that
it's worth holding this series up. This seems like a separate issue to
me, and I'd rather it not get get in the way of a perfectly good patch
in the meantime.

For now, this increases the churn a little bit, but that is the price
we have to pay for the new 'strbuf_insertstr' to be applied/used
consistently.

I'd be happy to see this go further, but I'd be just as happy to stop
where we're at.

> Thanks,
> René

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 2/1] mailinfo: don't insert header prefix for handle_content_type()
  2020-02-09 13:44 ` [PATCH v2] " René Scharfe
  2020-02-09 17:36   ` Eric Sunshine
@ 2020-02-10  7:15   ` René Scharfe
  2020-02-10 17:27     ` Junio C Hamano
  2020-02-10 19:55     ` Taylor Blau
  1 sibling, 2 replies; 23+ messages in thread
From: René Scharfe @ 2020-02-10  7:15 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano, Taylor Blau, Eric Sunshine

handle_content_type() only cares about the value after "Content-Type: ";
there is no need to insert that string for it.

Suggested-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: René Scharfe <l.s.r@web.de>
---
 mailinfo.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mailinfo.c b/mailinfo.c
index 543962d40c..402ef04dd1 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -570,7 +570,6 @@ static int check_header(struct mailinfo *mi,
 		len = strlen("Content-Type: ");
 		strbuf_add(&sb, line->buf + len, line->len - len);
 		decode_header(mi, &sb);
-		strbuf_insertstr(&sb, 0, "Content-Type: ");
 		handle_content_type(mi, &sb);
 		ret = 1;
 		goto check_header_out;
--
2.25.0

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/1] mailinfo: don't insert header prefix for handle_content_type()
  2020-02-10  7:15   ` [PATCH 2/1] mailinfo: don't insert header prefix for handle_content_type() René Scharfe
@ 2020-02-10 17:27     ` Junio C Hamano
  2020-02-10 19:55     ` Taylor Blau
  1 sibling, 0 replies; 23+ messages in thread
From: Junio C Hamano @ 2020-02-10 17:27 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git Mailing List, Taylor Blau, Eric Sunshine

René Scharfe <l.s.r@web.de> writes:

> handle_content_type() only cares about the value after "Content-Type: ";
> there is no need to insert that string for it.
>
> Suggested-by: Eric Sunshine <sunshine@sunshineco.com>
> Signed-off-by: René Scharfe <l.s.r@web.de>
> ---
>  mailinfo.c | 1 -
>  1 file changed, 1 deletion(-)

OK.  There is only one caller that just saw that the line it is
looking at begins with "Content-Type:", and the only thing the
helper function does is to look for a handful of attr=value pairs
(without even looking at the primary thing the header wants to
convey---the type itself, like text/plain etc.).  The helper may
want to learn a bit more careful than the current implementation
in doing what it does, but it does not need to see the header name
to do so.  It's not like we will be accepting more than one kind of
content-type like header with this helper and make it capable of
tailoring its behaviour based on which exact header type it is.

Will queue.  Thanks.

> diff --git a/mailinfo.c b/mailinfo.c
> index 543962d40c..402ef04dd1 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -570,7 +570,6 @@ static int check_header(struct mailinfo *mi,
>  		len = strlen("Content-Type: ");
>  		strbuf_add(&sb, line->buf + len, line->len - len);
>  		decode_header(mi, &sb);
> -		strbuf_insertstr(&sb, 0, "Content-Type: ");
>  		handle_content_type(mi, &sb);
>  		ret = 1;
>  		goto check_header_out;
> --
> 2.25.0

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/1] mailinfo: don't insert header prefix for handle_content_type()
  2020-02-10  7:15   ` [PATCH 2/1] mailinfo: don't insert header prefix for handle_content_type() René Scharfe
  2020-02-10 17:27     ` Junio C Hamano
@ 2020-02-10 19:55     ` Taylor Blau
  1 sibling, 0 replies; 23+ messages in thread
From: Taylor Blau @ 2020-02-10 19:55 UTC (permalink / raw)
  To: René Scharfe
  Cc: Git Mailing List, Junio C Hamano, Taylor Blau, Eric Sunshine

Hi René,

On Mon, Feb 10, 2020 at 08:15:19AM +0100, René Scharfe wrote:
> handle_content_type() only cares about the value after "Content-Type: ";
> there is no need to insert that string for it.
>
> Suggested-by: Eric Sunshine <sunshine@sunshineco.com>
> Signed-off-by: René Scharfe <l.s.r@web.de>
> ---
>  mailinfo.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/mailinfo.c b/mailinfo.c
> index 543962d40c..402ef04dd1 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -570,7 +570,6 @@ static int check_header(struct mailinfo *mi,
>  		len = strlen("Content-Type: ");
>  		strbuf_add(&sb, line->buf + len, line->len - len);
>  		decode_header(mi, &sb);
> -		strbuf_insertstr(&sb, 0, "Content-Type: ");

This looks quite good to me. When I first looked, I was wondering if we
should have used 'skip_prefix' and passed that result into
'strbuf_insertstr', but I think that's a bigger change than we need for
now.

This looks all right to me.

>  		handle_content_type(mi, &sb);
>  		ret = 1;
>  		goto check_header_out;
> --
> 2.25.0

Here is my:

  Reviewed-by: Taylor Blau <me@ttaylorr.com>

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] strbuf: add and use strbuf_insertstr()
  2020-02-09 17:36   ` Eric Sunshine
  2020-02-09 18:28     ` René Scharfe
@ 2020-02-10 23:44     ` Jeff King
  2020-02-11 16:17       ` Junio C Hamano
  2020-02-11 16:18       ` [PATCH v2] strbuf: add and use strbuf_insertstr() René Scharfe
  1 sibling, 2 replies; 23+ messages in thread
From: Jeff King @ 2020-02-10 23:44 UTC (permalink / raw)
  To: Eric Sunshine
  Cc: René Scharfe, Git Mailing List, Junio C Hamano, Taylor Blau

On Sun, Feb 09, 2020 at 12:36:22PM -0500, Eric Sunshine wrote:

> On Sun, Feb 9, 2020 at 8:45 AM René Scharfe <l.s.r@web.de> wrote:
> > Add a function for inserting a C string into a strbuf.  Use it
> > throughout the source to get rid of magic string length constants and
> > explicit strlen() calls.
> >
> > Like strbuf_addstr(), implement it as an inline function to avoid the
> > implicit strlen() calls to cause runtime overhead.
> >
> > Signed-off-by: René Scharfe <l.s.r@web.de>
> > ---
> > diff --git a/mailinfo.c b/mailinfo.c
> > @@ -570,7 +570,7 @@ static int check_header(struct mailinfo *mi,
> >                 len = strlen("Content-Type: ");
> >                 strbuf_add(&sb, line->buf + len, line->len - len);
> >                 decode_header(mi, &sb);
> > -               strbuf_insert(&sb, 0, "Content-Type: ", len);
> > +               strbuf_insertstr(&sb, 0, "Content-Type: ");
> >                 handle_content_type(mi, &sb);
> 
> Meh. We've already computed the length of "Content-Type: " a few lines
> earlier, so taking advantage of that value when inserting the string
> literal is perfectly sensible. Thus, I'm not convinced that this
> change is an improvement.

I had a similar thought. I kind of wonder if all of these "len" bits
(and their repeated strings) could go away if cmp_header() just used
skip_iprefix() under the hood and had a pointer out-parameter.

Something like the (largely untested) patch below. That would also make
it easy to support arbitrary amounts of whitespace after the header,
which I think are allowed by the standard (whereas now we'd parse
"Content-type:    text/plain" as "    text/plain", which is silly).

Worth doing?

---
diff --git a/mailinfo.c b/mailinfo.c
index b395adbdf2..bbb5b429f8 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -346,11 +346,16 @@ static const char *header[MAX_HDR_PARSED] = {
 	"From","Subject","Date",
 };
 
-static inline int cmp_header(const struct strbuf *line, const char *hdr)
+static inline int cmp_header(const struct strbuf *line, const char *hdr,
+			     const char **outval)
 {
-	int len = strlen(hdr);
-	return !strncasecmp(line->buf, hdr, len) && line->len > len &&
-			line->buf[len] == ':' && isspace(line->buf[len + 1]);
+	const char *val;
+	if (!skip_iprefix(line->buf, hdr, &val) ||
+	    *val++ != ':' ||
+	    !isspace(*val++))
+		return 0;
+	*outval = val;
+	return 1;
 }
 
 static int is_format_patch_separator(const char *line, int len)
@@ -547,17 +552,17 @@ static int check_header(struct mailinfo *mi,
 			const struct strbuf *line,
 			struct strbuf *hdr_data[], int overwrite)
 {
-	int i, ret = 0, len;
+	int i, ret = 0;
 	struct strbuf sb = STRBUF_INIT;
+	const char *val;
 
 	/* search for the interesting parts */
 	for (i = 0; header[i]; i++) {
-		int len = strlen(header[i]);
-		if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) {
+		if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i], &val)) {
 			/* Unwrap inline B and Q encoding, and optionally
 			 * normalize the meta information to utf8.
 			 */
-			strbuf_add(&sb, line->buf + len + 2, line->len - len - 2);
+			strbuf_addstr(&sb, val);
 			decode_header(mi, &sb);
 			handle_header(&hdr_data[i], &sb);
 			ret = 1;
@@ -566,26 +571,22 @@ static int check_header(struct mailinfo *mi,
 	}
 
 	/* Content stuff */
-	if (cmp_header(line, "Content-Type")) {
-		len = strlen("Content-Type: ");
-		strbuf_add(&sb, line->buf + len, line->len - len);
+	if (cmp_header(line, "Content-Type", &val)) {
+		strbuf_addstr(&sb, val);
 		decode_header(mi, &sb);
-		strbuf_insert(&sb, 0, "Content-Type: ", len);
 		handle_content_type(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
-	if (cmp_header(line, "Content-Transfer-Encoding")) {
-		len = strlen("Content-Transfer-Encoding: ");
-		strbuf_add(&sb, line->buf + len, line->len - len);
+	if (cmp_header(line, "Content-Transfer-Encoding", &val)) {
+		strbuf_addstr(&sb, val);
 		decode_header(mi, &sb);
 		handle_content_transfer_encoding(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
-	if (cmp_header(line, "Message-Id")) {
-		len = strlen("Message-Id: ");
-		strbuf_add(&sb, line->buf + len, line->len - len);
+	if (cmp_header(line, "Message-Id", &val)) {
+		strbuf_addstr(&sb, val);
 		decode_header(mi, &sb);
 		if (mi->add_message_id)
 			mi->message_id = strbuf_detach(&sb, NULL);
@@ -607,8 +608,9 @@ static int is_inbody_header(const struct mailinfo *mi,
 			    const struct strbuf *line)
 {
 	int i;
+	const char *val;
 	for (i = 0; header[i]; i++)
-		if (!mi->s_hdr_data[i] && cmp_header(line, header[i]))
+		if (!mi->s_hdr_data[i] && cmp_header(line, header[i], &val))
 			return 1;
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] strbuf: add and use strbuf_insertstr()
  2020-02-10 23:44     ` Jeff King
@ 2020-02-11 16:17       ` Junio C Hamano
  2020-02-11 17:16         ` [PATCH 0/4] some more mailinfo cleanups Jeff King
  2020-02-11 16:18       ` [PATCH v2] strbuf: add and use strbuf_insertstr() René Scharfe
  1 sibling, 1 reply; 23+ messages in thread
From: Junio C Hamano @ 2020-02-11 16:17 UTC (permalink / raw)
  To: Jeff King; +Cc: Eric Sunshine, René Scharfe, Git Mailing List, Taylor Blau

Jeff King <peff@peff.net> writes:

> Something like the (largely untested) patch below. That would also make
> it easy to support arbitrary amounts of whitespace after the header,
> which I think are allowed by the standard (whereas now we'd parse
> "Content-type:    text/plain" as "    text/plain", which is silly).
>
> Worth doing?

The result does look cleaner.  I do not think we've seen much
polishing in this area of the code for quite a long time, which
might indicate that what we have may be good enough, but at the same
time it would mean it is quiescent time for the code and it is safe
to clean it up.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] strbuf: add and use strbuf_insertstr()
  2020-02-10 23:44     ` Jeff King
  2020-02-11 16:17       ` Junio C Hamano
@ 2020-02-11 16:18       ` René Scharfe
  2020-02-11 17:13         ` Jeff King
  1 sibling, 1 reply; 23+ messages in thread
From: René Scharfe @ 2020-02-11 16:18 UTC (permalink / raw)
  To: Jeff King, Eric Sunshine; +Cc: Git Mailing List, Junio C Hamano, Taylor Blau

Am 11.02.20 um 00:44 schrieb Jeff King:
> On Sun, Feb 09, 2020 at 12:36:22PM -0500, Eric Sunshine wrote:
>
>> On Sun, Feb 9, 2020 at 8:45 AM René Scharfe <l.s.r@web.de> wrote:
>>> Add a function for inserting a C string into a strbuf.  Use it
>>> throughout the source to get rid of magic string length constants and
>>> explicit strlen() calls.
>>>
>>> Like strbuf_addstr(), implement it as an inline function to avoid the
>>> implicit strlen() calls to cause runtime overhead.
>>>
>>> Signed-off-by: René Scharfe <l.s.r@web.de>
>>> ---
>>> diff --git a/mailinfo.c b/mailinfo.c
>>> @@ -570,7 +570,7 @@ static int check_header(struct mailinfo *mi,
>>>                 len = strlen("Content-Type: ");
>>>                 strbuf_add(&sb, line->buf + len, line->len - len);
>>>                 decode_header(mi, &sb);
>>> -               strbuf_insert(&sb, 0, "Content-Type: ", len);
>>> +               strbuf_insertstr(&sb, 0, "Content-Type: ");
>>>                 handle_content_type(mi, &sb);
>>
>> Meh. We've already computed the length of "Content-Type: " a few lines
>> earlier, so taking advantage of that value when inserting the string
>> literal is perfectly sensible. Thus, I'm not convinced that this
>> change is an improvement.
>
> I had a similar thought. I kind of wonder if all of these "len" bits
> (and their repeated strings) could go away if cmp_header() just used
> skip_iprefix() under the hood and had a pointer out-parameter.
>
> Something like the (largely untested) patch below. That would also make
> it easy to support arbitrary amounts of whitespace after the header,
> which I think are allowed by the standard (whereas now we'd parse
> "Content-type:    text/plain" as "    text/plain", which is silly).
>
> Worth doing?

Sure.

> ---
> diff --git a/mailinfo.c b/mailinfo.c
> index b395adbdf2..bbb5b429f8 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -346,11 +346,16 @@ static const char *header[MAX_HDR_PARSED] = {
>  	"From","Subject","Date",
>  };
>
> -static inline int cmp_header(const struct strbuf *line, const char *hdr)
> +static inline int cmp_header(const struct strbuf *line, const char *hdr,
> +			     const char **outval)
>  {
> -	int len = strlen(hdr);
> -	return !strncasecmp(line->buf, hdr, len) && line->len > len &&
> -			line->buf[len] == ':' && isspace(line->buf[len + 1]);
> +	const char *val;
> +	if (!skip_iprefix(line->buf, hdr, &val) ||
> +	    *val++ != ':' ||
> +	    !isspace(*val++))
> +		return 0;
> +	*outval = val;
> +	return 1;
>  }

And you could rename it to skip_header() to fix the issue that its name
starts with cmp but its return value is the inverse of a cmp-style
function.

And it could take a char pointer instead of a strbuf, to reduce its
dependencies and make it more widely useful -- but that might also be
a case of YAGNI.

>
>  static int is_format_patch_separator(const char *line, int len)
> @@ -547,17 +552,17 @@ static int check_header(struct mailinfo *mi,
>  			const struct strbuf *line,
>  			struct strbuf *hdr_data[], int overwrite)
>  {
> -	int i, ret = 0, len;
> +	int i, ret = 0;
>  	struct strbuf sb = STRBUF_INIT;
> +	const char *val;
>
>  	/* search for the interesting parts */
>  	for (i = 0; header[i]; i++) {
> -		int len = strlen(header[i]);
> -		if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) {
> +		if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i], &val)) {
>  			/* Unwrap inline B and Q encoding, and optionally
>  			 * normalize the meta information to utf8.
>  			 */
> -			strbuf_add(&sb, line->buf + len + 2, line->len - len - 2);
> +			strbuf_addstr(&sb, val);

That assumes the header value never contains NULs.  Valid?

>  			decode_header(mi, &sb);
>  			handle_header(&hdr_data[i], &sb);
>  			ret = 1;
> @@ -566,26 +571,22 @@ static int check_header(struct mailinfo *mi,
>  	}
>
>  	/* Content stuff */
> -	if (cmp_header(line, "Content-Type")) {
> -		len = strlen("Content-Type: ");
> -		strbuf_add(&sb, line->buf + len, line->len - len);
> +	if (cmp_header(line, "Content-Type", &val)) {
> +		strbuf_addstr(&sb, val);
>  		decode_header(mi, &sb);
> -		strbuf_insert(&sb, 0, "Content-Type: ", len);
>  		handle_content_type(mi, &sb);
>  		ret = 1;
>  		goto check_header_out;
>  	}
> -	if (cmp_header(line, "Content-Transfer-Encoding")) {
> -		len = strlen("Content-Transfer-Encoding: ");
> -		strbuf_add(&sb, line->buf + len, line->len - len);
> +	if (cmp_header(line, "Content-Transfer-Encoding", &val)) {
> +		strbuf_addstr(&sb, val);
>  		decode_header(mi, &sb);
>  		handle_content_transfer_encoding(mi, &sb);
>  		ret = 1;
>  		goto check_header_out;
>  	}
> -	if (cmp_header(line, "Message-Id")) {
> -		len = strlen("Message-Id: ");
> -		strbuf_add(&sb, line->buf + len, line->len - len);
> +	if (cmp_header(line, "Message-Id", &val)) {
> +		strbuf_addstr(&sb, val);
>  		decode_header(mi, &sb);
>  		if (mi->add_message_id)
>  			mi->message_id = strbuf_detach(&sb, NULL);

The repeated sequence cmp_header()+strbuf_add{,str}()+decode_header()
makes me itchy.

> @@ -607,8 +608,9 @@ static int is_inbody_header(const struct mailinfo *mi,
>  			    const struct strbuf *line)
>  {
>  	int i;
> +	const char *val;
>  	for (i = 0; header[i]; i++)
> -		if (!mi->s_hdr_data[i] && cmp_header(line, header[i]))
> +		if (!mi->s_hdr_data[i] && cmp_header(line, header[i], &val))
>  			return 1;
>  	return 0;
>  }
>


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] strbuf: add and use strbuf_insertstr()
  2020-02-11 16:18       ` [PATCH v2] strbuf: add and use strbuf_insertstr() René Scharfe
@ 2020-02-11 17:13         ` Jeff King
  0 siblings, 0 replies; 23+ messages in thread
From: Jeff King @ 2020-02-11 17:13 UTC (permalink / raw)
  To: René Scharfe
  Cc: Eric Sunshine, Git Mailing List, Junio C Hamano, Taylor Blau

On Tue, Feb 11, 2020 at 05:18:02PM +0100, René Scharfe wrote:

> And you could rename it to skip_header() to fix the issue that its name
> starts with cmp but its return value is the inverse of a cmp-style
> function.

Good suggestion.

> And it could take a char pointer instead of a strbuf, to reduce its
> dependencies and make it more widely useful -- but that might also be
> a case of YAGNI.

I didn't take this one because all of the callers have strbuf, and it
saves them from dereferencing.

> > -			strbuf_add(&sb, line->buf + len + 2, line->len - len - 2);
> > +			strbuf_addstr(&sb, val);
> 
> That assumes the header value never contains NULs.  Valid?

I think so, but I pulled it out to a separate patch so that it could be
argued for explicitly.

> The repeated sequence cmp_header()+strbuf_add{,str}()+decode_header()
> makes me itchy.

Yeah, it's ugly and I factored it out in a new patch. But the
boilerplate and docstring ends up longer than the savings. ;)

-Peff

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 0/4] some more mailinfo cleanups
  2020-02-11 16:17       ` Junio C Hamano
@ 2020-02-11 17:16         ` Jeff King
  2020-02-11 17:18           ` [PATCH 1/4] mailinfo: treat header values as C strings Jeff King
                             ` (3 more replies)
  0 siblings, 4 replies; 23+ messages in thread
From: Jeff King @ 2020-02-11 17:16 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Eric Sunshine, René Scharfe, Git Mailing List, Taylor Blau

On Tue, Feb 11, 2020 at 08:17:30AM -0800, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> > Something like the (largely untested) patch below. That would also make
> > it easy to support arbitrary amounts of whitespace after the header,
> > which I think are allowed by the standard (whereas now we'd parse
> > "Content-type:    text/plain" as "    text/plain", which is silly).
> >
> > Worth doing?
> 
> The result does look cleaner.  I do not think we've seen much
> polishing in this area of the code for quite a long time, which
> might indicate that what we have may be good enough, but at the same
> time it would mean it is quiescent time for the code and it is safe
> to clean it up.

Yeah, it feels a little like code churn. It's not like we ever add new
headers to match. But I doodled the series below while I was in a
meeting, and it does fix one tiny user-visible bug (that I don't think
anybody has ever noticed or complained about), so maybe it's worth
doing. ;)

This is on top of rs/strbuf-insertstr, since it relies on 517b60564e
(mailinfo: don't insert header prefix for handle_content_type(),
2020-02-10).

  [1/4]: mailinfo: treat header values as C strings
  [2/4]: mailinfo: simplify parsing of header values
  [3/4]: mailinfo: be more liberal with header whitespace
  [4/4]: mailinfo: factor out some repeated header handling

 mailinfo.c          | 62 +++++++++++++++++++++++++++------------------
 t/t5100-mailinfo.sh | 15 +++++++++++
 2 files changed, 52 insertions(+), 25 deletions(-)

-Peff

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 1/4] mailinfo: treat header values as C strings
  2020-02-11 17:16         ` [PATCH 0/4] some more mailinfo cleanups Jeff King
@ 2020-02-11 17:18           ` Jeff King
  2020-02-11 17:26             ` Eric Sunshine
  2020-02-11 17:19           ` [PATCH 2/4] mailinfo: simplify parsing of header values Jeff King
                             ` (2 subsequent siblings)
  3 siblings, 1 reply; 23+ messages in thread
From: Jeff King @ 2020-02-11 17:18 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Eric Sunshine, René Scharfe, Git Mailing List, Taylor Blau

We read each header line into a strbuf, which means that we could
in theory handle header values with embedded NUL bytes. But in practice,
the values we parse out are passed to decode_header(), which uses
strstr(), strchr(), etc. And we would not expect such bytes anyway; they
are forbidden by RFC822, etc and any non-ASCII characters should be
encoded with RFC2047 encoding.

So let's switch to using strbuf_addstr(), which saves us some length
computations (and will enable further cleanups in this code).

Signed-off-by: Jeff King <peff@peff.net>
---
We _could_ skip this and compute the length later as:

  line->len - (val - line->buf)

but I like the simplification.

 mailinfo.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mailinfo.c b/mailinfo.c
index 402ef04dd1..59d5a8b8f3 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -557,7 +557,7 @@ static int check_header(struct mailinfo *mi,
 			/* Unwrap inline B and Q encoding, and optionally
 			 * normalize the meta information to utf8.
 			 */
-			strbuf_add(&sb, line->buf + len + 2, line->len - len - 2);
+			strbuf_addstr(&sb, line->buf + len + 2);
 			decode_header(mi, &sb);
 			handle_header(&hdr_data[i], &sb);
 			ret = 1;
@@ -568,23 +568,23 @@ static int check_header(struct mailinfo *mi,
 	/* Content stuff */
 	if (cmp_header(line, "Content-Type")) {
 		len = strlen("Content-Type: ");
-		strbuf_add(&sb, line->buf + len, line->len - len);
+		strbuf_addstr(&sb, line->buf + len);
 		decode_header(mi, &sb);
 		handle_content_type(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
 	if (cmp_header(line, "Content-Transfer-Encoding")) {
 		len = strlen("Content-Transfer-Encoding: ");
-		strbuf_add(&sb, line->buf + len, line->len - len);
+		strbuf_addstr(&sb, line->buf + len);
 		decode_header(mi, &sb);
 		handle_content_transfer_encoding(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
 	if (cmp_header(line, "Message-Id")) {
 		len = strlen("Message-Id: ");
-		strbuf_add(&sb, line->buf + len, line->len - len);
+		strbuf_addstr(&sb, line->buf + len);
 		decode_header(mi, &sb);
 		if (mi->add_message_id)
 			mi->message_id = strbuf_detach(&sb, NULL);
-- 
2.25.0.708.g4c6f45973e


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/4] mailinfo: simplify parsing of header values
  2020-02-11 17:16         ` [PATCH 0/4] some more mailinfo cleanups Jeff King
  2020-02-11 17:18           ` [PATCH 1/4] mailinfo: treat header values as C strings Jeff King
@ 2020-02-11 17:19           ` Jeff King
  2020-02-11 17:19           ` [PATCH 3/4] mailinfo: be more liberal with header whitespace Jeff King
  2020-02-11 17:20           ` [PATCH 4/4] mailinfo: factor out some repeated header handling Jeff King
  3 siblings, 0 replies; 23+ messages in thread
From: Jeff King @ 2020-02-11 17:19 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Eric Sunshine, René Scharfe, Git Mailing List, Taylor Blau

Our code to parse header values first checks to see if a line starts
with a header, and then manually skips past the matched string to find
the value. We can do this all in one step by modeling after
skip_prefix(), which returns a pointer into the string after the
parsing.

This lets us remove some repeated strings, and will also enable us to
parse more flexibly in a future patch.

Signed-off-by: Jeff King <peff@peff.net>
---
 mailinfo.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/mailinfo.c b/mailinfo.c
index 59d5a8b8f3..ee8d05e239 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -346,11 +346,16 @@ static const char *header[MAX_HDR_PARSED] = {
 	"From","Subject","Date",
 };
 
-static inline int cmp_header(const struct strbuf *line, const char *hdr)
+static inline int skip_header(const struct strbuf *line, const char *hdr,
+			      const char **outval)
 {
-	int len = strlen(hdr);
-	return !strncasecmp(line->buf, hdr, len) && line->len > len &&
-			line->buf[len] == ':' && isspace(line->buf[len + 1]);
+	const char *val;
+	if (!skip_iprefix(line->buf, hdr, &val) ||
+	    *val++ != ':' ||
+	    !isspace(*val++))
+		return 0;
+	*outval = val;
+	return 1;
 }
 
 static int is_format_patch_separator(const char *line, int len)
@@ -547,17 +552,18 @@ static int check_header(struct mailinfo *mi,
 			const struct strbuf *line,
 			struct strbuf *hdr_data[], int overwrite)
 {
-	int i, ret = 0, len;
+	int i, ret = 0;
 	struct strbuf sb = STRBUF_INIT;
+	const char *val;
 
 	/* search for the interesting parts */
 	for (i = 0; header[i]; i++) {
-		int len = strlen(header[i]);
-		if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) {
+		if ((!hdr_data[i] || overwrite) &&
+		    skip_header(line, header[i], &val)) {
 			/* Unwrap inline B and Q encoding, and optionally
 			 * normalize the meta information to utf8.
 			 */
-			strbuf_addstr(&sb, line->buf + len + 2);
+			strbuf_addstr(&sb, val);
 			decode_header(mi, &sb);
 			handle_header(&hdr_data[i], &sb);
 			ret = 1;
@@ -566,25 +572,22 @@ static int check_header(struct mailinfo *mi,
 	}
 
 	/* Content stuff */
-	if (cmp_header(line, "Content-Type")) {
-		len = strlen("Content-Type: ");
-		strbuf_addstr(&sb, line->buf + len);
+	if (skip_header(line, "Content-Type", &val)) {
+		strbuf_addstr(&sb, val);
 		decode_header(mi, &sb);
 		handle_content_type(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
-	if (cmp_header(line, "Content-Transfer-Encoding")) {
-		len = strlen("Content-Transfer-Encoding: ");
-		strbuf_addstr(&sb, line->buf + len);
+	if (skip_header(line, "Content-Transfer-Encoding", &val)) {
+		strbuf_addstr(&sb, val);
 		decode_header(mi, &sb);
 		handle_content_transfer_encoding(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
-	if (cmp_header(line, "Message-Id")) {
-		len = strlen("Message-Id: ");
-		strbuf_addstr(&sb, line->buf + len);
+	if (skip_header(line, "Message-Id", &val)) {
+		strbuf_addstr(&sb, val);
 		decode_header(mi, &sb);
 		if (mi->add_message_id)
 			mi->message_id = strbuf_detach(&sb, NULL);
@@ -606,8 +609,9 @@ static int is_inbody_header(const struct mailinfo *mi,
 			    const struct strbuf *line)
 {
 	int i;
+	const char *val;
 	for (i = 0; header[i]; i++)
-		if (!mi->s_hdr_data[i] && cmp_header(line, header[i]))
+		if (!mi->s_hdr_data[i] && skip_header(line, header[i], &val))
 			return 1;
 	return 0;
 }
-- 
2.25.0.708.g4c6f45973e


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 3/4] mailinfo: be more liberal with header whitespace
  2020-02-11 17:16         ` [PATCH 0/4] some more mailinfo cleanups Jeff King
  2020-02-11 17:18           ` [PATCH 1/4] mailinfo: treat header values as C strings Jeff King
  2020-02-11 17:19           ` [PATCH 2/4] mailinfo: simplify parsing of header values Jeff King
@ 2020-02-11 17:19           ` Jeff King
  2020-02-11 17:20           ` [PATCH 4/4] mailinfo: factor out some repeated header handling Jeff King
  3 siblings, 0 replies; 23+ messages in thread
From: Jeff King @ 2020-02-11 17:19 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Eric Sunshine, René Scharfe, Git Mailing List, Taylor Blau

RFC822 and friends allow arbitrary whitespace after the colon of a
header and before the values. I.e.:

  Subject:foo
  Subject: foo
  Subject:  foo

all have the subject "foo". But mailinfo requires exactly one space.
This doesn't seem to be bothering anybody, but it is pickier than the
standard specifies. And we can easily just soak up arbitrary whitespace
there in our parser, so let's do so.

Note that the test covers both too little and too much whitespace, but
the "too much" case already works fine (because we later eat leading and
trailing whitespace from the values).

Signed-off-by: Jeff King <peff@peff.net>
---
 mailinfo.c          |  5 +++--
 t/t5100-mailinfo.sh | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/mailinfo.c b/mailinfo.c
index ee8d05e239..78f06da524 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -351,9 +351,10 @@ static inline int skip_header(const struct strbuf *line, const char *hdr,
 {
 	const char *val;
 	if (!skip_iprefix(line->buf, hdr, &val) ||
-	    *val++ != ':' ||
-	    !isspace(*val++))
+	    *val++ != ':')
 		return 0;
+	while (isspace(*val))
+		val++;
 	*outval = val;
 	return 1;
 }
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index 9690dcad4f..147e616533 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -213,4 +213,19 @@ test_expect_failure 'mailinfo -b separated double [PATCH]' '
 	test z"$subj" = z"Subject: [other] message"
 '
 
+test_expect_success 'mailinfo handles unusual header whitespace' '
+	git mailinfo /dev/null /dev/null >actual <<-\EOF &&
+	From:Real Name <user@example.com>
+	Subject:    extra spaces
+	EOF
+
+	cat >expect <<-\EOF &&
+	Author: Real Name
+	Email: user@example.com
+	Subject: extra spaces
+
+	EOF
+	test_cmp expect actual
+'
+
 test_done
-- 
2.25.0.708.g4c6f45973e


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 4/4] mailinfo: factor out some repeated header handling
  2020-02-11 17:16         ` [PATCH 0/4] some more mailinfo cleanups Jeff King
                             ` (2 preceding siblings ...)
  2020-02-11 17:19           ` [PATCH 3/4] mailinfo: be more liberal with header whitespace Jeff King
@ 2020-02-11 17:20           ` Jeff King
  3 siblings, 0 replies; 23+ messages in thread
From: Jeff King @ 2020-02-11 17:20 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Eric Sunshine, René Scharfe, Git Mailing List, Taylor Blau

We do the same thing for each header: match it, copy it to a strbuf, and
decode it. Let's put that in a helper function to avoid repetition.

Signed-off-by: Jeff King <peff@peff.net>
---
 mailinfo.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/mailinfo.c b/mailinfo.c
index 78f06da524..cf92255515 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -549,47 +549,54 @@ static void decode_header(struct mailinfo *mi, struct strbuf *it)
 		mi->input_error = -1;
 }
 
+/*
+ * Returns true if "line" contains a header matching "hdr", in which case "val"
+ * will contain the value of the header with any RFC2047 B and Q encoding
+ * unwrapped, and optionally normalize the meta information to utf8.
+ */
+static int parse_header(const struct strbuf *line,
+			const char *hdr,
+			struct mailinfo *mi,
+			struct strbuf *val)
+{
+	const char *val_str;
+
+	if (!skip_header(line, hdr, &val_str))
+		return 0;
+	strbuf_addstr(val, val_str);
+	decode_header(mi, val);
+	return 1;
+}
+
 static int check_header(struct mailinfo *mi,
 			const struct strbuf *line,
 			struct strbuf *hdr_data[], int overwrite)
 {
 	int i, ret = 0;
 	struct strbuf sb = STRBUF_INIT;
-	const char *val;
 
 	/* search for the interesting parts */
 	for (i = 0; header[i]; i++) {
 		if ((!hdr_data[i] || overwrite) &&
-		    skip_header(line, header[i], &val)) {
-			/* Unwrap inline B and Q encoding, and optionally
-			 * normalize the meta information to utf8.
-			 */
-			strbuf_addstr(&sb, val);
-			decode_header(mi, &sb);
+		    parse_header(line, header[i], mi, &sb)) {
 			handle_header(&hdr_data[i], &sb);
 			ret = 1;
 			goto check_header_out;
 		}
 	}
 
 	/* Content stuff */
-	if (skip_header(line, "Content-Type", &val)) {
-		strbuf_addstr(&sb, val);
-		decode_header(mi, &sb);
+	if (parse_header(line, "Content-Type", mi, &sb)) {
 		handle_content_type(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
-	if (skip_header(line, "Content-Transfer-Encoding", &val)) {
-		strbuf_addstr(&sb, val);
-		decode_header(mi, &sb);
+	if (parse_header(line, "Content-Transfer-Encoding", mi, &sb)) {
 		handle_content_transfer_encoding(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
-	if (skip_header(line, "Message-Id", &val)) {
-		strbuf_addstr(&sb, val);
-		decode_header(mi, &sb);
+	if (parse_header(line, "Message-Id", mi, &sb)) {
 		if (mi->add_message_id)
 			mi->message_id = strbuf_detach(&sb, NULL);
 		ret = 1;
-- 
2.25.0.708.g4c6f45973e

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] mailinfo: treat header values as C strings
  2020-02-11 17:18           ` [PATCH 1/4] mailinfo: treat header values as C strings Jeff King
@ 2020-02-11 17:26             ` Eric Sunshine
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Sunshine @ 2020-02-11 17:26 UTC (permalink / raw)
  To: Jeff King
  Cc: Junio C Hamano, René Scharfe, Git Mailing List, Taylor Blau

On Tue, Feb 11, 2020 at 12:18 PM Jeff King <peff@peff.net> wrote:
> We read each header line into a strbuf, which means that we could
> in theory handle header values with embedded NUL bytes. But in practice,
> the values we parse out are passed to decode_header(), which uses
> strstr(), strchr(), etc. And we would not expect such bytes anyway; they
> are forbidden by RFC822, etc and any non-ASCII characters should be

s/etc/etc./

> encoded with RFC2047 encoding.
>
> So let's switch to using strbuf_addstr(), which saves us some length
> computations (and will enable further cleanups in this code).
>
> Signed-off-by: Jeff King <peff@peff.net>

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2020-02-11 17:27 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-08 19:56 [PATCH] strbuf: add and use strbuf_insertstr() René Scharfe
2020-02-08 23:08 ` Taylor Blau
2020-02-09 10:23   ` René Scharfe
2020-02-09  0:53 ` Eric Sunshine
2020-02-09 10:23   ` René Scharfe
2020-02-09 13:44 ` [PATCH v2] " René Scharfe
2020-02-09 17:36   ` Eric Sunshine
2020-02-09 18:28     ` René Scharfe
2020-02-09 21:09       ` Eric Sunshine
2020-02-09 23:10       ` Taylor Blau
2020-02-10 23:44     ` Jeff King
2020-02-11 16:17       ` Junio C Hamano
2020-02-11 17:16         ` [PATCH 0/4] some more mailinfo cleanups Jeff King
2020-02-11 17:18           ` [PATCH 1/4] mailinfo: treat header values as C strings Jeff King
2020-02-11 17:26             ` Eric Sunshine
2020-02-11 17:19           ` [PATCH 2/4] mailinfo: simplify parsing of header values Jeff King
2020-02-11 17:19           ` [PATCH 3/4] mailinfo: be more liberal with header whitespace Jeff King
2020-02-11 17:20           ` [PATCH 4/4] mailinfo: factor out some repeated header handling Jeff King
2020-02-11 16:18       ` [PATCH v2] strbuf: add and use strbuf_insertstr() René Scharfe
2020-02-11 17:13         ` Jeff King
2020-02-10  7:15   ` [PATCH 2/1] mailinfo: don't insert header prefix for handle_content_type() René Scharfe
2020-02-10 17:27     ` Junio C Hamano
2020-02-10 19:55     ` Taylor Blau

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).