[PATCH] general style: replaces memcmp() with proper starts

git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed

* [PATCH] general style: replaces memcmp() with proper starts_with()
@ 2014-03-12 14:43 Quint Guvernator
  2014-03-12 17:56 ` Jeff King
  0 siblings, 1 reply; 22+ messages in thread
From: Quint Guvernator @ 2014-03-12 14:43 UTC (permalink / raw)
  To: git; +Cc: Quint Guvernator

memcmp() is replaced with negated starts_with() when comparing strings
from the beginning. starts_with() looks nicer and it saves the extra
argument of the length of the comparing string.

note: this commit properly handles negation in starts_with()

Signed-off-by: Quint Guvernator <quintus.public@gmail.com>
---
 builtin/apply.c                           | 10 +++++-----
 builtin/cat-file.c                        |  2 +-
 builtin/commit-tree.c                     |  2 +-
 builtin/for-each-ref.c                    |  2 +-
 builtin/get-tar-commit-id.c               |  2 +-
 builtin/mailinfo.c                        | 10 +++++-----
 builtin/mktag.c                           |  8 ++++----
 builtin/patch-id.c                        | 18 +++++++++---------
 commit.c                                  | 18 +++++++++---------
 connect.c                                 |  8 ++++----
 contrib/convert-objects/convert-objects.c |  6 +++---
 convert.c                                 |  2 +-
 credential-cache.c                        |  2 +-
 fetch-pack.c                              |  2 +-
 fsck.c                                    |  8 ++++----
 http-walker.c                             |  4 ++--
 imap-send.c                               |  6 +++---
 pack-write.c                              |  2 +-
 path.c                                    |  2 +-
 refs.c                                    |  2 +-
 remote.c                                  |  2 +-
 submodule.c                               |  2 +-
 transport.c                               |  2 +-
 23 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index a7e72d5..16c20af 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -846,8 +846,8 @@ static int has_epoch_timestamp(const char *nameline)
 	 * YYYY-MM-DD hh:mm:ss must be from either 1969-12-31
 	 * (west of GMT) or 1970-01-01 (east of GMT)
 	 */
-	if ((zoneoffset < 0 && memcmp(timestamp, "1969-12-31", 10)) ||
-	    (0 <= zoneoffset && memcmp(timestamp, "1970-01-01", 10)))
+	if ((zoneoffset < 0 && !starts_with(timestamp, "1969-12-31")) ||
+	    (0 <= zoneoffset && !starts_with(timestamp, "1970-01-01")))
 		return 0;
 
 	hourminute = (strtol(timestamp + 11, NULL, 10) * 60 +
@@ -1631,7 +1631,7 @@ static int parse_fragment(const char *line, unsigned long size,
 		 * l10n of "\ No newline..." is at least that long.
 		 */
 		case '\\':
-			if (len < 12 || memcmp(line, "\\ ", 2))
+			if (len < 12 || !starts_with(line, "\\ "))
 				return -1;
 			break;
 		}
@@ -1646,7 +1646,7 @@ static int parse_fragment(const char *line, unsigned long size,
 	 * it in the above loop because we hit oldlines == newlines == 0
 	 * before seeing it.
 	 */
-	if (12 < size && !memcmp(line, "\\ ", 2))
+	if (12 < size && starts_with(line, "\\ "))
 		offset += linelen(line, size);
 
 	patch->lines_added += added;
@@ -1673,7 +1673,7 @@ static int parse_single_patch(const char *line, unsigned long size, struct patch
 	unsigned long oldlines = 0, newlines = 0, context = 0;
 	struct fragment **fragp = &patch->fragments;
 
-	while (size > 4 && !memcmp(line, "@@ -", 4)) {
+	while (size > 4 && starts_with(line, "@@ -")) {
 		struct fragment *fragment;
 		int len;
 
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index d5a93e0..6266bbb 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -82,7 +82,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 				enum object_type type;
 				unsigned long size;
 				char *buffer = read_sha1_file(sha1, &type, &size);
-				if (memcmp(buffer, "object ", 7) ||
+				if (!starts_with(buffer, "object ") ||
 				    get_sha1_hex(buffer + 7, blob_sha1))
 					die("%s not a valid tag", sha1_to_hex(sha1));
 				free(buffer);
diff --git a/builtin/commit-tree.c b/builtin/commit-tree.c
index 987a4c3..2777519 100644
--- a/builtin/commit-tree.c
+++ b/builtin/commit-tree.c
@@ -66,7 +66,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
 			continue;
 		}
 
-		if (!memcmp(arg, "-S", 2)) {
+		if (starts_with(arg, "-S")) {
 			sign_commit = arg + 2;
 			continue;
 		}
diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
index 51798b4..fe198fd 100644
--- a/builtin/for-each-ref.c
+++ b/builtin/for-each-ref.c
@@ -193,7 +193,7 @@ static int verify_format(const char *format)
 		at = parse_atom(sp + 2, ep);
 		cp = ep + 1;
 
-		if (!memcmp(used_atom[at], "color:", 6))
+		if (starts_with(used_atom[at], "color:"))
 			need_color_reset_at_eol = !!strcmp(used_atom[at], color_reset);
 	}
 	return 0;
diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c
index aa72596..6409c26 100644
--- a/builtin/get-tar-commit-id.c
+++ b/builtin/get-tar-commit-id.c
@@ -29,7 +29,7 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix)
 		die("git get-tar-commit-id: read error");
 	if (header->typeflag[0] != 'g')
 		return 1;
-	if (memcmp(content, "52 comment=", 11))
+	if (!starts_with(content, "52 comment="))
 		return 1;
 
 	n = write_in_full(1, content + 11, 41);
diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index 2c3cd8e..1e76dac 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -626,7 +626,7 @@ static int handle_boundary(void)
 	strbuf_addch(&newline, '\n');
 again:
 	if (line.len >= (*content_top)->len + 2 &&
-	    !memcmp(line.buf + (*content_top)->len, "--", 2)) {
+	    starts_with(line.buf + (*content_top)->len, "--")) {
 		/* we hit an end boundary */
 		/* pop the current boundary off the stack */
 		strbuf_release(*content_top);
@@ -727,8 +727,8 @@ static int is_scissors_line(const struct strbuf *line)
 			continue;
 		}
 		if (i + 1 < len &&
-		    (!memcmp(buf + i, ">8", 2) || !memcmp(buf + i, "8<", 2) ||
-		     !memcmp(buf + i, ">%", 2) || !memcmp(buf + i, "%<", 2))) {
+		    (starts_with(buf + i, ">8") || starts_with(buf + i, "8<") ||
+		     starts_with(buf + i, ">%") || starts_with(buf + i, "%<"))) {
 			in_perforation = 1;
 			perforation += 2;
 			scissors += 2;
@@ -929,13 +929,13 @@ static void handle_info(void)
 		else
 			continue;
 
-		if (!memcmp(header[i], "Subject", 7)) {
+		if (starts_with(header[i], "Subject")) {
 			if (!keep_subject) {
 				cleanup_subject(hdr);
 				cleanup_space(hdr);
 			}
 			output_header_lines(fout, "Subject", hdr);
-		} else if (!memcmp(header[i], "From", 4)) {
+		} else if (starts_with(header[i], "From")) {
 			cleanup_space(hdr);
 			handle_from(hdr);
 			fprintf(fout, "Author: %s\n", name.buf);
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 640ab64..70385ac 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -49,7 +49,7 @@ static int verify_tag(char *buffer, unsigned long size)
 
 	/* Verify object line */
 	object = buffer;
-	if (memcmp(object, "object ", 7))
+	if (!starts_with(object, "object "))
 		return error("char%d: does not start with \"object \"", 0);
 
 	if (get_sha1_hex(object + 7, sha1))
@@ -57,7 +57,7 @@ static int verify_tag(char *buffer, unsigned long size)
 
 	/* Verify type line */
 	type_line = object + 48;
-	if (memcmp(type_line - 1, "\ntype ", 6))
+	if (!starts_with(type_line - 1, "\ntype "))
 		return error("char%d: could not find \"\\ntype \"", 47);
 
 	/* Verify tag-line */
@@ -66,7 +66,7 @@ static int verify_tag(char *buffer, unsigned long size)
 		return error("char%"PRIuMAX": could not find next \"\\n\"",
 				(uintmax_t) (type_line - buffer));
 	tag_line++;
-	if (memcmp(tag_line, "tag ", 4) || tag_line[4] == '\n')
+	if (!starts_with(tag_line, "tag ") || tag_line[4] == '\n')
 		return error("char%"PRIuMAX": no \"tag \" found",
 				(uintmax_t) (tag_line - buffer));
 
@@ -98,7 +98,7 @@ static int verify_tag(char *buffer, unsigned long size)
 	/* Verify the tagger line */
 	tagger_line = tag_line;
 
-	if (memcmp(tagger_line, "tagger ", 7))
+	if (!starts_with(tagger_line, "tagger "))
 		return error("char%"PRIuMAX": could not find \"tagger \"",
 			(uintmax_t) (tagger_line - buffer));
 
diff --git a/builtin/patch-id.c b/builtin/patch-id.c
index 3cfe02d..7e02f2c 100644
--- a/builtin/patch-id.c
+++ b/builtin/patch-id.c
@@ -66,13 +66,13 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
 		char *p = line;
 		int len;
 
-		if (!memcmp(line, "diff-tree ", 10))
+		if (starts_with(line, "diff-tree "))
 			p += 10;
-		else if (!memcmp(line, "commit ", 7))
+		else if (starts_with(line, "commit "))
 			p += 7;
-		else if (!memcmp(line, "From ", 5))
+		else if (starts_with(line, "From "))
 			p += 5;
-		else if (!memcmp(line, "\\ ", 2) && 12 < strlen(line))
+		else if (starts_with(line, "\\ ") && 12 < strlen(line))
 			continue;
 
 		if (!get_sha1_hex(p, next_sha1)) {
@@ -81,14 +81,14 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
 		}
 
 		/* Ignore commit comments */
-		if (!patchlen && memcmp(line, "diff ", 5))
+		if (!patchlen && !starts_with(line, "diff "))
 			continue;
 
 		/* Parsing diff header?  */
 		if (before == -1) {
-			if (!memcmp(line, "index ", 6))
+			if (starts_with(line, "index "))
 				continue;
-			else if (!memcmp(line, "--- ", 4))
+			else if (starts_with(line, "--- "))
 				before = after = 1;
 			else if (!isalpha(line[0]))
 				break;
@@ -96,14 +96,14 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
 
 		/* Looking for a valid hunk header?  */
 		if (before == 0 && after == 0) {
-			if (!memcmp(line, "@@ -", 4)) {
+			if (starts_with(line, "@@ -")) {
 				/* Parse next hunk, but ignore line numbers.  */
 				scan_hunk_header(line, &before, &after);
 				continue;
 			}
 
 			/* Split at the end of the patch.  */
-			if (memcmp(line, "diff ", 5))
+			if (!starts_with(line, "diff "))
 				break;
 
 			/* Else we're parsing another header.  */
diff --git a/commit.c b/commit.c
index 6bf4fe0..0a6ce49 100644
--- a/commit.c
+++ b/commit.c
@@ -90,13 +90,13 @@ static unsigned long parse_commit_date(const char *buf, const char *tail)
 
 	if (buf + 6 >= tail)
 		return 0;
-	if (memcmp(buf, "author", 6))
+	if (!starts_with(buf, "author"))
 		return 0;
 	while (buf < tail && *buf++ != '\n')
 		/* nada */;
 	if (buf + 9 >= tail)
 		return 0;
-	if (memcmp(buf, "committer", 9))
+	if (!starts_with(buf, "committer"))
 		return 0;
 	while (buf < tail && *buf++ != '>')
 		/* nada */;
@@ -269,7 +269,7 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s
 		return 0;
 	item->object.parsed = 1;
 	tail += size;
-	if (tail <= bufptr + 46 || memcmp(bufptr, "tree ", 5) || bufptr[45] != '\n')
+	if (tail <= bufptr + 46 || !starts_with(bufptr, "tree ") || bufptr[45] != '\n')
 		return error("bogus commit object %s", sha1_to_hex(item->object.sha1));
 	if (get_sha1_hex(bufptr + 5, parent) < 0)
 		return error("bad tree pointer in commit %s",
@@ -279,7 +279,7 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s
 	pptr = &item->parents;
 
 	graft = lookup_commit_graft(item->object.sha1);
-	while (bufptr + 48 < tail && !memcmp(bufptr, "parent ", 7)) {
+	while (bufptr + 48 < tail && starts_with(bufptr, "parent ")) {
 		struct commit *new_parent;
 
 		if (tail <= bufptr + 48 ||
@@ -1279,11 +1279,11 @@ struct commit_extra_header *read_commit_extra_headers(struct commit *commit,
 
 static inline int standard_header_field(const char *field, size_t len)
 {
-	return ((len == 4 && !memcmp(field, "tree ", 5)) ||
-		(len == 6 && !memcmp(field, "parent ", 7)) ||
-		(len == 6 && !memcmp(field, "author ", 7)) ||
-		(len == 9 && !memcmp(field, "committer ", 10)) ||
-		(len == 8 && !memcmp(field, "encoding ", 9)));
+	return ((len == 4 && starts_with(field, "tree ")) ||
+		(len == 6 && starts_with(field, "parent ")) ||
+		(len == 6 && starts_with(field, "author ")) ||
+		(len == 9 && starts_with(field, "committer ")) ||
+		(len == 8 && starts_with(field, "encoding ")));
 }
 
 static int excluded_header_field(const char *field, size_t len, const char **exclude)
diff --git a/connect.c b/connect.c
index 4150412..ce47631 100644
--- a/connect.c
+++ b/connect.c
@@ -18,7 +18,7 @@ static int check_ref(const char *name, int len, unsigned int flags)
 	if (!flags)
 		return 1;
 
-	if (len < 5 || memcmp(name, "refs/", 5))
+	if (len < 5 || !starts_with(name, "refs/"))
 		return 0;
 
 	/* Skip the "refs/" part */
@@ -30,11 +30,11 @@ static int check_ref(const char *name, int len, unsigned int flags)
 		return 0;
 
 	/* REF_HEADS means that we want regular branch heads */
-	if ((flags & REF_HEADS) && !memcmp(name, "heads/", 6))
+	if ((flags & REF_HEADS) && starts_with(name, "heads/"))
 		return 1;
 
 	/* REF_TAGS means that we want tags */
-	if ((flags & REF_TAGS) && !memcmp(name, "tags/", 5))
+	if ((flags & REF_TAGS) && starts_with(name, "tags/"))
 		return 1;
 
 	/* All type bits clear means that we are ok with anything */
@@ -514,7 +514,7 @@ static int git_proxy_command_options(const char *var, const char *value,
 		if (0 <= matchlen) {
 			/* core.gitproxy = none for kernel.org */
 			if (matchlen == 4 &&
-			    !memcmp(value, "none", 4))
+			    starts_with(value, "none"))
 				matchlen = 0;
 			git_proxy_command = xmemdupz(value, matchlen);
 		}
diff --git a/contrib/convert-objects/convert-objects.c b/contrib/convert-objects/convert-objects.c
index f3b57bf..9fdc730 100644
--- a/contrib/convert-objects/convert-objects.c
+++ b/contrib/convert-objects/convert-objects.c
@@ -245,7 +245,7 @@ static void convert_date(void *buffer, unsigned long size, unsigned char *result
 	size -= 46;
 
 	/* "parent <sha1>\n" */
-	while (!memcmp(buffer, "parent ", 7)) {
+	while (starts_with(buffer, "parent ")) {
 		memcpy(new + newlen, buffer, 48);
 		newlen += 48;
 		buffer = (char *) buffer + 48;
@@ -270,11 +270,11 @@ static void convert_commit(void *buffer, unsigned long size, unsigned char *resu
 	void *orig_buffer = buffer;
 	unsigned long orig_size = size;
 
-	if (memcmp(buffer, "tree ", 5))
+	if (!starts_with(buffer, "tree "))
 		die("Bad commit '%s'", (char *) buffer);
 	convert_ascii_sha1((char *) buffer + 5);
 	buffer = (char *) buffer + 46;    /* "tree " + "hex sha1" + "\n" */
-	while (!memcmp(buffer, "parent ", 7)) {
+	while (starts_with(buffer, "parent ")) {
 		convert_ascii_sha1((char *) buffer + 7);
 		buffer = (char *) buffer + 48;
 	}
diff --git a/convert.c b/convert.c
index ab80b72..30882e2 100644
--- a/convert.c
+++ b/convert.c
@@ -543,7 +543,7 @@ static int ident_to_git(const char *path, const char *src, size_t len,
 		len -= dollar + 1 - src;
 		src  = dollar + 1;
 
-		if (len > 3 && !memcmp(src, "Id:", 3)) {
+		if (len > 3 && starts_with(src, "Id:")) {
 			dollar = memchr(src + 3, '$', len - 3);
 			if (!dollar)
 				break;
diff --git a/credential-cache.c b/credential-cache.c
index 9a03792..23b153c 100644
--- a/credential-cache.c
+++ b/credential-cache.c
@@ -54,7 +54,7 @@ static void spawn_daemon(const char *socket)
 	r = read_in_full(daemon.out, buf, sizeof(buf));
 	if (r < 0)
 		die_errno("unable to read result code from cache daemon");
-	if (r != 3 || memcmp(buf, "ok\n", 3))
+	if (r != 3 || !starts_with(buf, "ok\n"))
 		die("cache daemon did not start: %.*s", r, buf);
 	close(daemon.out);
 }
diff --git a/fetch-pack.c b/fetch-pack.c
index f061f1f..17823ab 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -506,7 +506,7 @@ static void filter_refs(struct fetch_pack_args *args,
 		int keep = 0;
 		next = ref->next;
 
-		if (!memcmp(ref->name, "refs/", 5) &&
+		if (starts_with(ref->name, "refs/") &&
 		    check_refname_format(ref->name, 0))
 			; /* trash */
 		else {
diff --git a/fsck.c b/fsck.c
index 99c0497..3bd69da 100644
--- a/fsck.c
+++ b/fsck.c
@@ -290,12 +290,12 @@ static int fsck_commit(struct commit *commit, fsck_error error_func)
 	if (commit->date == ULONG_MAX)
 		return error_func(&commit->object, FSCK_ERROR, "invalid author/committer line");
 
-	if (memcmp(buffer, "tree ", 5))
+	if (!starts_with(buffer, "tree "))
 		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'tree' line");
 	if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
 		return error_func(&commit->object, FSCK_ERROR, "invalid 'tree' line format - bad sha1");
 	buffer += 46;
-	while (!memcmp(buffer, "parent ", 7)) {
+	while (starts_with(buffer, "parent ")) {
 		if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
 			return error_func(&commit->object, FSCK_ERROR, "invalid 'parent' line format - bad sha1");
 		buffer += 48;
@@ -322,13 +322,13 @@ static int fsck_commit(struct commit *commit, fsck_error error_func)
 		if (p || parents)
 			return error_func(&commit->object, FSCK_ERROR, "parent objects missing");
 	}
-	if (memcmp(buffer, "author ", 7))
+	if (!starts_with(buffer, "author "))
 		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'author' line");
 	buffer += 7;
 	err = fsck_ident(&buffer, &commit->object, error_func);
 	if (err)
 		return err;
-	if (memcmp(buffer, "committer ", strlen("committer ")))
+	if (!starts_with(buffer, "committer "))
 		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'committer' line");
 	buffer += strlen("committer ");
 	err = fsck_ident(&buffer, &commit->object, error_func);
diff --git a/http-walker.c b/http-walker.c
index 1516c5e..9a8d412 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -245,7 +245,7 @@ static void process_alternates_response(void *callback_data)
 						     - base);
 					okay = 1;
 				}
-			} else if (!memcmp(data + i, "../", 3)) {
+			} else if (starts_with(data + i, "../")) {
 				/*
 				 * Relative URL; chop the corresponding
 				 * number of subpath from base (and ../
@@ -267,7 +267,7 @@ static void process_alternates_response(void *callback_data)
 				i += 3;
 				serverlen = strlen(base);
 				while (i + 2 < posn &&
-				       !memcmp(data + i, "../", 3)) {
+				       starts_with(data + i, "../")) {
 					do {
 						serverlen--;
 					} while (serverlen &&
diff --git a/imap-send.c b/imap-send.c
index 0bc6f7f..019de18 100644
--- a/imap-send.c
+++ b/imap-send.c
@@ -524,7 +524,7 @@ static struct imap_cmd *v_issue_imap_cmd(struct imap_store *ctx,
 	if (Verbose) {
 		if (imap->num_in_progress)
 			printf("(%d in progress) ", imap->num_in_progress);
-		if (memcmp(cmd->cmd, "LOGIN", 5))
+		if (!starts_with(cmd->cmd, "LOGIN"))
 			printf(">>> %s", buf);
 		else
 			printf(">>> %d LOGIN <user> <pass>\n", cmd->tag);
@@ -802,7 +802,7 @@ static int get_cmd_result(struct imap_store *ctx, struct imap_cmd *tcmd)
 				resp = DRV_OK;
 			else {
 				if (!strcmp("NO", arg)) {
-					if (cmdp->cb.create && cmd && (cmdp->cb.trycreate || !memcmp(cmd, "[TRYCREATE]", 11))) { /* SELECT, APPEND or UID COPY */
+					if (cmdp->cb.create && cmd && (cmdp->cb.trycreate || starts_with(cmd, "[TRYCREATE]"))) { /* SELECT, APPEND or UID COPY */
 						p = strchr(cmdp->cmd, '"');
 						if (!issue_imap_cmd(ctx, NULL, "CREATE \"%.*s\"", (int)(strchr(p + 1, '"') - p + 1), p)) {
 							resp = RESP_BAD;
@@ -827,7 +827,7 @@ static int get_cmd_result(struct imap_store *ctx, struct imap_cmd *tcmd)
 				} else /*if (!strcmp("BAD", arg))*/
 					resp = RESP_BAD;
 				fprintf(stderr, "IMAP command '%s' returned response (%s) - %s\n",
-					 memcmp(cmdp->cmd, "LOGIN", 5) ?
+					 !starts_with(cmdp->cmd, "LOGIN") ?
 							cmdp->cmd : "LOGIN <user> <pass>",
 							arg, cmd ? cmd : "");
 			}
diff --git a/pack-write.c b/pack-write.c
index 9b8308b..c3bfa16 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -289,7 +289,7 @@ char *index_pack_lockfile(int ip_out)
 	 * later on.  If we don't get that then tough luck with it.
 	 */
 	if (read_in_full(ip_out, packname, 46) == 46 && packname[45] == '\n' &&
-	    memcmp(packname, "keep\t", 5) == 0) {
+	    !starts_with(packname, "keep\t") == 0) {
 		char path[PATH_MAX];
 		packname[45] = 0;
 		snprintf(path, sizeof(path), "%s/pack/pack-%s.keep",
diff --git a/path.c b/path.c
index f9c5062..b4b07d6 100644
--- a/path.c
+++ b/path.c
@@ -26,7 +26,7 @@ static char *get_pathname(void)
 static char *cleanup_path(char *path)
 {
 	/* Clean it up */
-	if (!memcmp(path, "./", 2)) {
+	if (starts_with(path, "./")) {
 		path += 2;
 		while (*path == '/')
 			path++;
diff --git a/refs.c b/refs.c
index 89228e2..d167dfd 100644
--- a/refs.c
+++ b/refs.c
@@ -63,7 +63,7 @@ static int check_refname_component(const char *refname, int flags)
 		if (refname[1] == '\0')
 			return -1; /* Component equals ".". */
 	}
-	if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5))
+	if (cp - refname >= 5 && starts_with(cp - 5, ".lock"))
 		return -1; /* Refname ends with ".lock". */
 	return cp - refname;
 }
diff --git a/remote.c b/remote.c
index e41251e..8a03b4a 100644
--- a/remote.c
+++ b/remote.c
@@ -1144,7 +1144,7 @@ static int match_explicit(struct ref *src, struct ref *dst,
 	case 1:
 		break;
 	case 0:
-		if (!memcmp(dst_value, "refs/", 5))
+		if (starts_with(dst_value, "refs/"))
 			matched_dst = make_linked_ref(dst_value, dst_tail);
 		else if (is_null_sha1(matched_src->new_sha1))
 			error("unable to delete '%s': remote ref does not exist",
diff --git a/submodule.c b/submodule.c
index b80ecac..3de9ae5 100644
--- a/submodule.c
+++ b/submodule.c
@@ -203,7 +203,7 @@ void gitmodules_config(void)
 			if (active_nr > pos) {  /* there is a .gitmodules */
 				const struct cache_entry *ce = active_cache[pos];
 				if (ce_namelen(ce) == 11 &&
-				    !memcmp(ce->name, ".gitmodules", 11))
+				    starts_with(ce->name, ".gitmodules"))
 					gitmodules_is_unmerged = 1;
 			}
 		} else if (pos < active_nr) {
diff --git a/transport.c b/transport.c
index ca7bb44..a267822 100644
--- a/transport.c
+++ b/transport.c
@@ -1364,7 +1364,7 @@ static int refs_from_alternate_cb(struct alternate_object_database *e,
 
 	while (other[len-1] == '/')
 		other[--len] = '\0';
-	if (len < 8 || memcmp(other + len - 8, "/objects", 8))
+	if (len < 8 || !starts_with(other + len - 8, "/objects"))
 		return 0;
 	/* Is this a git repository with refs? */
 	memcpy(other + len - 8, "/refs", 6);
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 14:43 [PATCH] general style: replaces memcmp() with proper starts_with() Quint Guvernator
@ 2014-03-12 17:56 ` Jeff King
  2014-03-12 19:39   ` Junio C Hamano
  2014-03-12 22:37   ` Junio C Hamano
  0 siblings, 2 replies; 22+ messages in thread
From: Jeff King @ 2014-03-12 17:56 UTC (permalink / raw)
  To: Quint Guvernator; +Cc: git

On Wed, Mar 12, 2014 at 10:43:54AM -0400, Quint Guvernator wrote:

> memcmp() is replaced with negated starts_with() when comparing strings
> from the beginning. starts_with() looks nicer and it saves the extra
> argument of the length of the comparing string.

Thanks, I think this is a real readability improvement in most cases.

One of the things to do when reviewing a patch like this is make sure
that there aren't any silly arithmetic mistakes. Checking that can be
tedious, so I thought about how _I_ would do it programatically, and
then compared our results.

I tried:

  perl -i -lpe '
    s/memcmp\(([^,]+), "(.*?)", (\d+)\)/
     length($2) == $3 ?
       qq{!starts_with($1, "$2")} :
       $&
    /ge
  ' $(git ls-files '*.c')

That comes up with almost the same results as yours, but misses a few
cases that you caught (in addition to the need to simplify
!!starts_with()):

  - memcmp(foo, "bar", strlen("bar"))

  - strings with interpolation, like "foo\n", which is actually 4
    characters in length, not 5.

But there were only a few of those, and I hand-verified them. So I feel
pretty good that the changes are all correct transformations.

That leaves the question of whether they are all improvements in
readability (more on that below).

> note: this commit properly handles negation in starts_with()
> 
> Signed-off-by: Quint Guvernator <quintus.public@gmail.com>
> ---

This note should go after the "---" line so that it is not part of the
commit message (it is only interesting to people seeing v2 and wondering
what changed from v1 earlier on the list, not people reading the history
much later).

> diff --git a/builtin/commit-tree.c b/builtin/commit-tree.c
> index 987a4c3..2777519 100644
> --- a/builtin/commit-tree.c
> +++ b/builtin/commit-tree.c
> @@ -66,7 +66,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
>  			continue;
>  		}
>  
> -		if (!memcmp(arg, "-S", 2)) {
> +		if (starts_with(arg, "-S")) {
>  			sign_commit = arg + 2;
>  			continue;
>  		}

This is an improvement, but we still have the magic "2" below. Would
skip_prefix be a better match here, like:

  if ((val = skip_prefix(arg, "-S"))) {
          sign_commit = val;
          continue;
  }

We've also talked about refactoring skip_prefix to return a boolean, and
put the value in an out-parameter. Which would make this even more
readable:

  if (skip_prefix(arg, "-S", &sign_commit))
          continue;

Maybe the time has come to do that.

> --- a/builtin/mailinfo.c
> +++ b/builtin/mailinfo.c
> @@ -626,7 +626,7 @@ static int handle_boundary(void)
>  	strbuf_addch(&newline, '\n');
>  again:
>  	if (line.len >= (*content_top)->len + 2 &&
> -	    !memcmp(line.buf + (*content_top)->len, "--", 2)) {
> +	    starts_with(line.buf + (*content_top)->len, "--")) {

I'm not sure if this improves readability or not. It's certainly nice to
get rid of the magic "2", but starts_with is a bit of a misnomer, since
we are indexing deep into the buffer anyway. And we still have the magic
"2" above anyway.

I'm on the fence.

> @@ -727,8 +727,8 @@ static int is_scissors_line(const struct strbuf *line)
>  			continue;
>  		}
>  		if (i + 1 < len &&
> -		    (!memcmp(buf + i, ">8", 2) || !memcmp(buf + i, "8<", 2) ||
> -		     !memcmp(buf + i, ">%", 2) || !memcmp(buf + i, "%<", 2))) {
> +		    (starts_with(buf + i, ">8") || starts_with(buf + i, "8<") ||
> +		     starts_with(buf + i, ">%") || starts_with(buf + i, "%<"))) {

Same as above.

> @@ -66,7 +66,7 @@ static int verify_tag(char *buffer, unsigned long size)
>  		return error("char%"PRIuMAX": could not find next \"\\n\"",
>  				(uintmax_t) (type_line - buffer));
>  	tag_line++;
> -	if (memcmp(tag_line, "tag ", 4) || tag_line[4] == '\n')
> +	if (!starts_with(tag_line, "tag ") || tag_line[4] == '\n')
>  		return error("char%"PRIuMAX": no \"tag \" found",
>  				(uintmax_t) (tag_line - buffer));

I think this is another that could benefit from an enhanced skip_prefix:

  if (!skip_prefix(tag_line, "tag ", &tag_name) || *tag_name == '\n')
     ...

and then we can get rid of the "tag_line += 4" that is used much later
(in fact, I suspect that whole function could be improved in this
respect).

> @@ -269,7 +269,7 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s
>  		return 0;
>  	item->object.parsed = 1;
>  	tail += size;
> -	if (tail <= bufptr + 46 || memcmp(bufptr, "tree ", 5) || bufptr[45] != '\n')
> +	if (tail <= bufptr + 46 || !starts_with(bufptr, "tree ") || bufptr[45] != '\n')
>  		return error("bogus commit object %s", sha1_to_hex(item->object.sha1));
>  	if (get_sha1_hex(bufptr + 5, parent) < 0)

Again, we just use "bufptr + 5" a bit later here. So a candidate for
skip_prefix.

>  	graft = lookup_commit_graft(item->object.sha1);
> -	while (bufptr + 48 < tail && !memcmp(bufptr, "parent ", 7)) {
> +	while (bufptr + 48 < tail && starts_with(bufptr, "parent ")) {

Ditto here.

>  static inline int standard_header_field(const char *field, size_t len)
>  {
> -	return ((len == 4 && !memcmp(field, "tree ", 5)) ||
> -		(len == 6 && !memcmp(field, "parent ", 7)) ||
> -		(len == 6 && !memcmp(field, "author ", 7)) ||
> -		(len == 9 && !memcmp(field, "committer ", 10)) ||
> -		(len == 8 && !memcmp(field, "encoding ", 9)));
> +	return ((len == 4 && starts_with(field, "tree ")) ||
> +		(len == 6 && starts_with(field, "parent ")) ||
> +		(len == 6 && starts_with(field, "author ")) ||
> +		(len == 9 && starts_with(field, "committer ")) ||
> +		(len == 8 && starts_with(field, "encoding ")));

These extra "len" checks are interesting.  They look like an attempt to
optimize lookup, since the caller will already have scanned forward to
the space. I wonder if that makes a measurable difference, or is simply
premature optimization.

> @@ -514,7 +514,7 @@ static int git_proxy_command_options(const char *var, const char *value,
>  		if (0 <= matchlen) {
>  			/* core.gitproxy = none for kernel.org */
>  			if (matchlen == 4 &&
> -			    !memcmp(value, "none", 4))
> +			    starts_with(value, "none"))

While technically correct, I'm not sure starts_with really expresses
what's going on here. We're matching a full string, but we cannot use
strcmp because the buffer isn't terminated. I'd think a more readable
alternative would be something like:

  int mem_equals(const char *buf, size_t len, const char *match)
  {
          int match_len = strlen(match);
          return len == match_len && !memcmp(buf, match, len);
  }

  ...

  if (mem_equals(value, matchlen, "none"))
          ...

That might also apply to some other cases (e.g., the case above with
standard_header_field). The name mem_equals might not be descriptive
enough, but hopefully you get the idea.

> --- a/credential-cache.c
> +++ b/credential-cache.c
> @@ -54,7 +54,7 @@ static void spawn_daemon(const char *socket)
>  	r = read_in_full(daemon.out, buf, sizeof(buf));
>  	if (r < 0)
>  		die_errno("unable to read result code from cache daemon");
> -	if (r != 3 || memcmp(buf, "ok\n", 3))
> +	if (r != 3 || !starts_with(buf, "ok\n"))
>  		die("cache daemon did not start: %.*s", r, buf);

Another mem_equals case?

>  	close(daemon.out);
>  }
> diff --git a/fetch-pack.c b/fetch-pack.c
> index f061f1f..17823ab 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -506,7 +506,7 @@ static void filter_refs(struct fetch_pack_args *args,
>  		int keep = 0;
>  		next = ref->next;
>  
> -		if (!memcmp(ref->name, "refs/", 5) &&
> +		if (starts_with(ref->name, "refs/") &&
>  		    check_refname_format(ref->name, 0))
>  			; /* trash */
>  		else {
> diff --git a/fsck.c b/fsck.c
> index 99c0497..3bd69da 100644
> --- a/fsck.c
> +++ b/fsck.c
> @@ -290,12 +290,12 @@ static int fsck_commit(struct commit *commit, fsck_error error_func)
>  	if (commit->date == ULONG_MAX)
>  		return error_func(&commit->object, FSCK_ERROR, "invalid author/committer line");
>  
> -	if (memcmp(buffer, "tree ", 5))
> +	if (!starts_with(buffer, "tree "))
>  		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'tree' line");
>  	if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')

Another skip_prefix case.

> -	while (!memcmp(buffer, "parent ", 7)) {
> +	while (starts_with(buffer, "parent ")) {
>  		if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')

Ditto.

> -	if (memcmp(buffer, "author ", 7))
> +	if (!starts_with(buffer, "author "))
>  		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'author' line");
>  	buffer += 7;

Ditto.

> -	if (memcmp(buffer, "committer ", strlen("committer ")))
> +	if (!starts_with(buffer, "committer "))
>  		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'committer' line");
>  	buffer += strlen("committer ");

Ditto.

> diff --git a/http-walker.c b/http-walker.c
> index 1516c5e..9a8d412 100644
> --- a/http-walker.c
> +++ b/http-walker.c
> @@ -245,7 +245,7 @@ static void process_alternates_response(void *callback_data)
>  						     - base);
>  					okay = 1;
>  				}
> -			} else if (!memcmp(data + i, "../", 3)) {
> +			} else if (starts_with(data + i, "../")) {
>  				/*
>  				 * Relative URL; chop the corresponding
>  				 * number of subpath from base (and ../
> @@ -267,7 +267,7 @@ static void process_alternates_response(void *callback_data)
>  				i += 3;

Another skip_prefix?

> --- a/path.c
> +++ b/path.c
> @@ -26,7 +26,7 @@ static char *get_pathname(void)
>  static char *cleanup_path(char *path)
>  {
>  	/* Clean it up */
> -	if (!memcmp(path, "./", 2)) {
> +	if (starts_with(path, "./")) {
>  		path += 2;

Another skip_prefix.

> @@ -63,7 +63,7 @@ static int check_refname_component(const char *refname, int flags)
>  		if (refname[1] == '\0')
>  			return -1; /* Component equals ".". */
>  	}
> -	if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5))
> +	if (cp - refname >= 5 && starts_with(cp - 5, ".lock"))
>  		return -1; /* Refname ends with ".lock". */

Should this actually become ends_with? I guess it needs refname to be
NUL-terminated, though, which it is not. I don't know if it is worth
having a mem_ends_with variant, like:

  if (mem_ends_with(refname, cp - refname, ".lock"))
          ...

> diff --git a/submodule.c b/submodule.c
> index b80ecac..3de9ae5 100644
> --- a/submodule.c
> +++ b/submodule.c
> @@ -203,7 +203,7 @@ void gitmodules_config(void)
>  			if (active_nr > pos) {  /* there is a .gitmodules */
>  				const struct cache_entry *ce = active_cache[pos];
>  				if (ce_namelen(ce) == 11 &&
> -				    !memcmp(ce->name, ".gitmodules", 11))
> +				    starts_with(ce->name, ".gitmodules"))
>  					gitmodules_is_unmerged = 1;

Jens singled this one out earlier, and I agree. I think this is a
"mem_equals".

> diff --git a/transport.c b/transport.c
> index ca7bb44..a267822 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -1364,7 +1364,7 @@ static int refs_from_alternate_cb(struct alternate_object_database *e,
>  
>  	while (other[len-1] == '/')
>  		other[--len] = '\0';
> -	if (len < 8 || memcmp(other + len - 8, "/objects", 8))
> +	if (len < 8 || !starts_with(other + len - 8, "/objects"))
>  		return 0;

This is another ends_with candidate (maybe mem_ends_with? I didn't
check).

Phew. That's a lot of changes. The ones I didn't mention are all
hands-down readability improvements, though I suspect some of them may
also be skip_prefix candidates (I didn't investigate each one, but only
mentioned the ones where I saw the obvious magic number in the diff
context).

I think we can take most of them as-is, though, and perhaps even some of
the ones I mentioned (even if skip_prefix can be used, switching to
starts_with may be an intermediate improvement).

If you don't feel like going further on this topic, that's OK, but I
think with a few more helpers we could really further clean up some of
these callsites.

-Peff

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 17:56 ` Jeff King
@ 2014-03-12 19:39   ` Junio C Hamano
  2014-03-12 19:49     ` Jeff King
                       ` (2 more replies)
  2014-03-12 22:37   ` Junio C Hamano
  1 sibling, 3 replies; 22+ messages in thread
From: Junio C Hamano @ 2014-03-12 19:39 UTC (permalink / raw)
  To: Jeff King; +Cc: Quint Guvernator, git

Jeff King <peff@peff.net> writes:

>>  static inline int standard_header_field(const char *field, size_t len)
>>  {
>> -	return ((len == 4 && !memcmp(field, "tree ", 5)) ||
>> -		(len == 6 && !memcmp(field, "parent ", 7)) ||
>> -		(len == 6 && !memcmp(field, "author ", 7)) ||
>> -		(len == 9 && !memcmp(field, "committer ", 10)) ||
>> -		(len == 8 && !memcmp(field, "encoding ", 9)));
>> +	return ((len == 4 && starts_with(field, "tree ")) ||
>> +		(len == 6 && starts_with(field, "parent ")) ||
>> +		(len == 6 && starts_with(field, "author ")) ||
>> +		(len == 9 && starts_with(field, "committer ")) ||
>> +		(len == 8 && starts_with(field, "encoding ")));
>
> These extra "len" checks are interesting.  They look like an attempt to
> optimize lookup, since the caller will already have scanned forward to
> the space.

If one really wants to remove the magic constants from this, then
one must take advantage of the pattern

	len == strlen(S) - 1 && !memcmp(field, S, strlen(S))

that appears here, and come up with a simple abstraction to express
that we are only using the string S (e.g. "tree "), length len and
location field of the counted string.

Blindly replacing starts_with() with !memcmp() in the above part is
a readability regression otherwise.

> ... I
> think with a few more helpers we could really further clean up some of
> these callsites.

Yes.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 19:39   ` Junio C Hamano
@ 2014-03-12 19:49     ` Jeff King
  2014-03-12 20:08       ` Junio C Hamano
  2014-03-12 20:51     ` René Scharfe
  2014-03-12 20:52     ` David Kastrup
  2 siblings, 1 reply; 22+ messages in thread
From: Jeff King @ 2014-03-12 19:49 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Quint Guvernator, git

On Wed, Mar 12, 2014 at 12:39:01PM -0700, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> >>  static inline int standard_header_field(const char *field, size_t len)
> >>  {
> >> -	return ((len == 4 && !memcmp(field, "tree ", 5)) ||
> >> -		(len == 6 && !memcmp(field, "parent ", 7)) ||
> >> -		(len == 6 && !memcmp(field, "author ", 7)) ||
> >> -		(len == 9 && !memcmp(field, "committer ", 10)) ||
> >> -		(len == 8 && !memcmp(field, "encoding ", 9)));
> >> +	return ((len == 4 && starts_with(field, "tree ")) ||
> >> +		(len == 6 && starts_with(field, "parent ")) ||
> >> +		(len == 6 && starts_with(field, "author ")) ||
> >> +		(len == 9 && starts_with(field, "committer ")) ||
> >> +		(len == 8 && starts_with(field, "encoding ")));
> >
> > These extra "len" checks are interesting.  They look like an attempt to
> > optimize lookup, since the caller will already have scanned forward to
> > the space.
> 
> If one really wants to remove the magic constants from this, then
> one must take advantage of the pattern
> 
> 	len == strlen(S) - 1 && !memcmp(field, S, strlen(S))
> 
> that appears here, and come up with a simple abstraction to express
> that we are only using the string S (e.g. "tree "), length len and
> location field of the counted string.
> 
> Blindly replacing starts_with() with !memcmp() in the above part is
> a readability regression otherwise.

I actually think the right solution is:

  static inline int standard_header_field(const char *field, size_t len)
  {
          return mem_equals(field, len, "tree ") ||
                 mem_equals(field, len, "parent ") ||
                 ...;
  }

and the caller should tell us it's OK to look at field[len]:

  standard_header_field(line, eof - line + 1)

We could also omit the space from the standard_header_field. The caller
just ran strchr() looking for the space, so we know that either it is
there, or we are at the end of the line/buffer. Arguably a string like
"parent\n" should be "a parent header with no data" (but right now it is
not matched by this function). I'm not aware of an implementation that
writes such a thing, but it seems to fall in the "be liberal in what you
accept" category.

-Peff

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 19:49     ` Jeff King
@ 2014-03-12 20:08       ` Junio C Hamano
  2014-03-12 21:14         ` Jeff King
  0 siblings, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2014-03-12 20:08 UTC (permalink / raw)
  To: Jeff King; +Cc: Quint Guvernator, git

Jeff King <peff@peff.net> writes:

>> Blindly replacing starts_with() with !memcmp() in the above part is
>> a readability regression otherwise.
>
> I actually think the right solution is:
>
>   static inline int standard_header_field(const char *field, size_t len)
>   {
>           return mem_equals(field, len, "tree ") ||
>                  mem_equals(field, len, "parent ") ||
>                  ...;
>   }
>
> and the caller should tell us it's OK to look at field[len]:
>
>   standard_header_field(line, eof - line + 1)
>
> We could also omit the space from the standard_header_field.

Yes, that was what I had in mind.  The only reason why the callee
(over-)optimizes the "SP must follow these know keywords" part by
using the extra "len" parameter is because the caller has to do a
single strchr() to skip an arbitrary field name anyway so computing
"len" is essentially free.

> The caller
> just ran strchr() looking for the space, so we know that either it is
> there, or we are at the end of the line/buffer. Arguably a string like
> "parent\n" should be "a parent header with no data" (but right now it is
> not matched by this function). I'm not aware of an implementation that
> writes such a thing, but it seems to fall in the "be liberal in what you
> accept" category.

It is _not_ a standard header field, so it will be read by the logic
in the caller as a non-standard header field without getting lost.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 19:39   ` Junio C Hamano
  2014-03-12 19:49     ` Jeff King
@ 2014-03-12 20:51     ` René Scharfe
  2014-03-12 21:16       ` David Kastrup
  2014-03-12 20:52     ` David Kastrup
  2 siblings, 1 reply; 22+ messages in thread
From: René Scharfe @ 2014-03-12 20:51 UTC (permalink / raw)
  To: Junio C Hamano, Jeff King; +Cc: Quint Guvernator, git

Am 12.03.2014 20:39, schrieb Junio C Hamano:
> Jeff King <peff@peff.net> writes:
>
>>>   static inline int standard_header_field(const char *field, size_t len)
>>>   {
>>> -	return ((len == 4 && !memcmp(field, "tree ", 5)) ||
>>> -		(len == 6 && !memcmp(field, "parent ", 7)) ||
>>> -		(len == 6 && !memcmp(field, "author ", 7)) ||
>>> -		(len == 9 && !memcmp(field, "committer ", 10)) ||
>>> -		(len == 8 && !memcmp(field, "encoding ", 9)));
>>> +	return ((len == 4 && starts_with(field, "tree ")) ||
>>> +		(len == 6 && starts_with(field, "parent ")) ||
>>> +		(len == 6 && starts_with(field, "author ")) ||
>>> +		(len == 9 && starts_with(field, "committer ")) ||
>>> +		(len == 8 && starts_with(field, "encoding ")));
>>
>> These extra "len" checks are interesting.  They look like an attempt to
>> optimize lookup, since the caller will already have scanned forward to
>> the space.

I wonder what the performance impact might be.  The length checks are 
also needed for correctness, however, to avoid running over the end of 
the buffer.

> If one really wants to remove the magic constants from this, then
> one must take advantage of the pattern
>
> 	len == strlen(S) - 1 && !memcmp(field, S, strlen(S))
>
> that appears here, and come up with a simple abstraction to express
> that we are only using the string S (e.g. "tree "), length len and
> location field of the counted string.

This might be a job for kwset.

René

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 19:39   ` Junio C Hamano
  2014-03-12 19:49     ` Jeff King
  2014-03-12 20:51     ` René Scharfe
@ 2014-03-12 20:52     ` David Kastrup
  2 siblings, 0 replies; 22+ messages in thread
From: David Kastrup @ 2014-03-12 20:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, Quint Guvernator, git

Junio C Hamano <gitster@pobox.com> writes:

> Jeff King <peff@peff.net> writes:
>
>>>  static inline int standard_header_field(const char *field, size_t len)
>>>  {
>>> -	return ((len == 4 && !memcmp(field, "tree ", 5)) ||
>>> -		(len == 6 && !memcmp(field, "parent ", 7)) ||
>>> -		(len == 6 && !memcmp(field, "author ", 7)) ||
>>> -		(len == 9 && !memcmp(field, "committer ", 10)) ||
>>> -		(len == 8 && !memcmp(field, "encoding ", 9)));
>>> +	return ((len == 4 && starts_with(field, "tree ")) ||
>>> +		(len == 6 && starts_with(field, "parent ")) ||
>>> +		(len == 6 && starts_with(field, "author ")) ||
>>> +		(len == 9 && starts_with(field, "committer ")) ||
>>> +		(len == 8 && starts_with(field, "encoding ")));
>>
>> These extra "len" checks are interesting.  They look like an attempt to
>> optimize lookup, since the caller will already have scanned forward to
>> the space.
>
> If one really wants to remove the magic constants from this, then
> one must take advantage of the pattern
>
> 	len == strlen(S) - 1 && !memcmp(field, S, strlen(S))

If the caller has already scanned forward to the space, then there is no
actual need to let the comparison compare the space again after checking
len, is there?  That makes for a more consistent look in the memcmp
case.

One could do this in a switch on len instead.  Not that it seems
terribly more efficient.

> that appears here, and come up with a simple abstraction to express
> that we are only using the string S (e.g. "tree "), length len and
> location field of the counted string.
>
> Blindly replacing starts_with() with !memcmp() in the above part is
> a readability regression otherwise.

Don't really think so: if the len at the front and the back is the same
and the space is not explicitly compared any more, both look pretty much
the same to me.

>> ... I think with a few more helpers we could really further clean up
>> some of these callsites.
>
> Yes.

Possibly.  But it does seem like overkill.

-- 
David Kastrup

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 20:08       ` Junio C Hamano
@ 2014-03-12 21:14         ` Jeff King
  2014-03-12 21:39           ` Jeff King
  2014-03-12 22:06           ` Jeff King
  0 siblings, 2 replies; 22+ messages in thread
From: Jeff King @ 2014-03-12 21:14 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Quint Guvernator, git

On Wed, Mar 12, 2014 at 01:08:03PM -0700, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> >> Blindly replacing starts_with() with !memcmp() in the above part is
> >> a readability regression otherwise.
> >
> > I actually think the right solution is:
> >
> >   static inline int standard_header_field(const char *field, size_t len)
> >   {
> >           return mem_equals(field, len, "tree ") ||
> >                  mem_equals(field, len, "parent ") ||
> >                  ...;
> >   }
> >
> > and the caller should tell us it's OK to look at field[len]:
> >
> >   standard_header_field(line, eof - line + 1)
> >
> > We could also omit the space from the standard_header_field.
> 
> Yes, that was what I had in mind.  The only reason why the callee
> (over-)optimizes the "SP must follow these know keywords" part by
> using the extra "len" parameter is because the caller has to do a
> single strchr() to skip an arbitrary field name anyway so computing
> "len" is essentially free.

One thing that bugs me about the current code is that the sub-function
looks one past the end of the length given to it by the caller.
Switching it to pass "eof - line + 1" resolves that, but is it right?

The character pointed at by "eof" is either:

  1. space, if our strchr(line, ' ') found something

  2. the first character of the next line, if our
     memchr(line, '\n', eob - line) found something

  3. Whatever is at eob (end-of-buffer)

There are two questionable things here. In (1), we use strchr on a
sized buffer.  And in (3), we look one past the size that was passed in.

In both cases, we are saved by the fact that the buffer is actually NUL
terminated at the end of "size" (because it comes from read_sha1_file).
But we may find a space much further than the line ending which is
supposed to be our boundary, and end up having to do a comparison to
cover this case.

So I think the current code is correct, but we could make it more
obvious by:

  1. Restricting our search for the field separator to the current line.

  2. Explicitly avoid looking for headers when we did not find a space,
     since we cannot match anything anyway.

Like:

diff --git a/commit.c b/commit.c
index 6bf4fe0..9383cc1 100644
--- a/commit.c
+++ b/commit.c
@@ -1325,14 +1325,14 @@ static struct commit_extra_header *read_commit_extra_header_lines(
 		strbuf_reset(&buf);
 		it = NULL;

-		eof = strchr(line, ' ');
-		if (next <= eof)
+		eof = memchr(line, ' ', next - line);
+		if (eof) {
+			if (standard_header_field(line, eof - line + 1) ||
+			    excluded_header_field(line, eof - line, exclude))
+				continue;
+		} else
 			eof = next;

-		if (standard_header_field(line, eof - line) ||
-		    excluded_header_field(line, eof - line, exclude))
-			continue;
-
 		it = xcalloc(1, sizeof(*it));
 		it->key = xmemdupz(line, eof-line);
 		*tail = it;

I also think that "eof = next" (which I retained here) is off-by-one.
"next" here is not the newline, but the start of the next line. And I'm
guessing the code actually wanted the newline (otherwise "it->key" ends
up with the newline in it). But we cannot just subtract one, because if
we hit "eob", it really is in the right spot.

-Peff

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 20:51     ` René Scharfe
@ 2014-03-12 21:16       ` David Kastrup
  2014-03-12 21:45         ` René Scharfe
  0 siblings, 1 reply; 22+ messages in thread
From: David Kastrup @ 2014-03-12 21:16 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, Jeff King, Quint Guvernator, git

René Scharfe <l.s.r@web.de> writes:

> Am 12.03.2014 20:39, schrieb Junio C Hamano:
>> Jeff King <peff@peff.net> writes:
>>
>>>>   static inline int standard_header_field(const char *field, size_t len)
>>>>   {
>>>> -	return ((len == 4 && !memcmp(field, "tree ", 5)) ||
>>>> -		(len == 6 && !memcmp(field, "parent ", 7)) ||
>>>> -		(len == 6 && !memcmp(field, "author ", 7)) ||
>>>> -		(len == 9 && !memcmp(field, "committer ", 10)) ||
>>>> -		(len == 8 && !memcmp(field, "encoding ", 9)));
>>>> +	return ((len == 4 && starts_with(field, "tree ")) ||
>>>> +		(len == 6 && starts_with(field, "parent ")) ||
>>>> +		(len == 6 && starts_with(field, "author ")) ||
>>>> +		(len == 9 && starts_with(field, "committer ")) ||
>>>> +		(len == 8 && starts_with(field, "encoding ")));
>>>
>>> These extra "len" checks are interesting.  They look like an attempt to
>>> optimize lookup, since the caller will already have scanned forward to
>>> the space.
>
> I wonder what the performance impact might be.  The length checks are
> also needed for correctness, however, to avoid running over the end of
> the buffer.

Depends on whether memcmp is guaranteed to stop immediately on mismatch.
Then memcmp(field, "tree ", 5) cannot walk across a NUL byte in field.

-- 
David Kastrup

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 21:14         ` Jeff King
@ 2014-03-12 21:39           ` Jeff King
  2014-03-12 22:06           ` Jeff King
  1 sibling, 0 replies; 22+ messages in thread
From: Jeff King @ 2014-03-12 21:39 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Quint Guvernator, git

On Wed, Mar 12, 2014 at 05:14:15PM -0400, Jeff King wrote:

> I also think that "eof = next" (which I retained here) is off-by-one.
> "next" here is not the newline, but the start of the next line. And I'm
> guessing the code actually wanted the newline (otherwise "it->key" ends
> up with the newline in it). But we cannot just subtract one, because if
> we hit "eob", it really is in the right spot.

I started on a patch for this, but found another interesting corner
case. If we do not find a newline and hit end-of-buffer (which
_shouldn't_ happen, as that is a malformed commit object), we set "next"
to "eob". But then we copy the whole string, including *next into the
"value" of the header.

So we intend to capture the trailing newline in the value, and do in the
normal case. But in the end-of-buffer case, we capture an extra NUL. I
think that's OK, because the eventual fate of the buffer is to become a
C-string. But it does mean that the result sometimes has a
newline-terminator and sometimes doesn't, and the calling code needs to
handle this when printing, or risk lines running together.

Should this function append a newline if there is not already one? If
it's a mergetag header, we feed the result to gpg, etc, and expect to
get the data out verbatim. We would not want to mess that up. OTOH, the
commit object is bogus (and possibly truncated) in the first place, so
it probably doesn't really matter. And the GPG signature on tag objects
has its own delimiters, so a stray newline present or not at the end
should not matter.

-Peff

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 21:16       ` David Kastrup
@ 2014-03-12 21:45         ` René Scharfe
  0 siblings, 0 replies; 22+ messages in thread
From: René Scharfe @ 2014-03-12 21:45 UTC (permalink / raw)
  To: David Kastrup; +Cc: Junio C Hamano, Jeff King, Quint Guvernator, git

Am 12.03.2014 22:16, schrieb David Kastrup:
> René Scharfe <l.s.r@web.de> writes:
>
>> Am 12.03.2014 20:39, schrieb Junio C Hamano:
>>> Jeff King <peff@peff.net> writes:
>>>
>>>>>    static inline int standard_header_field(const char *field, size_t len)
>>>>>    {
>>>>> -	return ((len == 4 && !memcmp(field, "tree ", 5)) ||
>>>>> -		(len == 6 && !memcmp(field, "parent ", 7)) ||
>>>>> -		(len == 6 && !memcmp(field, "author ", 7)) ||
>>>>> -		(len == 9 && !memcmp(field, "committer ", 10)) ||
>>>>> -		(len == 8 && !memcmp(field, "encoding ", 9)));
>>>>> +	return ((len == 4 && starts_with(field, "tree ")) ||
>>>>> +		(len == 6 && starts_with(field, "parent ")) ||
>>>>> +		(len == 6 && starts_with(field, "author ")) ||
>>>>> +		(len == 9 && starts_with(field, "committer ")) ||
>>>>> +		(len == 8 && starts_with(field, "encoding ")));
>>>>
>>>> These extra "len" checks are interesting.  They look like an attempt to
>>>> optimize lookup, since the caller will already have scanned forward to
>>>> the space.
>>
>> I wonder what the performance impact might be.  The length checks are
>> also needed for correctness, however, to avoid running over the end of
>> the buffer.
>
> Depends on whether memcmp is guaranteed to stop immediately on mismatch.
> Then memcmp(field, "tree ", 5) cannot walk across a NUL byte in field.

I'm not sure we can rely on that property.  But anyway -- if field 
points to, say, a one-byte buffer containing the letter t, then memcmp 
would overrun that buffer.  If field was guaranteed to be NUL-terminated 
then at least starts_with would stop at the end, but the signature of 
standard_header_field() suggests that it can be used with arbitrary 
buffers, not just C strings.

René

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 21:14         ` Jeff King
  2014-03-12 21:39           ` Jeff King
@ 2014-03-12 22:06           ` Jeff King
  2014-03-12 22:38             ` Junio C Hamano
  1 sibling, 1 reply; 22+ messages in thread
From: Jeff King @ 2014-03-12 22:06 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Quint Guvernator, git

On Wed, Mar 12, 2014 at 05:14:15PM -0400, Jeff King wrote:

> One thing that bugs me about the current code is that the sub-function
> looks one past the end of the length given to it by the caller.
> Switching it to pass "eof - line + 1" resolves that, but is it right?
> 
> The character pointed at by "eof" is either:
> 
>   1. space, if our strchr(line, ' ') found something
> 
>   2. the first character of the next line, if our
>      memchr(line, '\n', eob - line) found something
> 
>   3. Whatever is at eob (end-of-buffer)

This misses a case. We might find no space at all, and eof is NULL. We
never dereference it, so we don't segfault, but it does cause a bogus
giant allocation.

I'm out of time for the day, but here is a test I started on that
demonstrates the failure:

diff --git a/t/t7513-commit-bogus-extra-headers.sh b/t/t7513-commit-bogus-extra-headers.sh
index e69de29..834db08 100755
--- a/t/t7513-commit-bogus-extra-headers.sh
+++ b/t/t7513-commit-bogus-extra-headers.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+test_description='check parsing of badly formed commit objects'
+. ./test-lib.sh
+
+EMPTY_TREE=4b825dc642cb6eb9a060e54bf8d69288fbee4904
+
+test_expect_success 'newline right after mergetag header' '
+	test_tick &&
+	cat >commit <<-EOF &&
+	tree $EMPTY_TREE
+	author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> $GIT_AUTHOR_DATE
+	committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+	mergetag
+
+	foo
+	EOF
+	commit=$(git hash-object -w -t commit commit) &&
+	cat >expect <<-EOF &&
+	todo...
+	EOF
+	git --no-pager show --show-signature $commit >actual &&
+	test_cmp expect actual
+'
+
+test_done

The "git show" fails with:

  fatal: Out of memory, malloc failed (tried to allocate 18446744073699764927 bytes)

So I think the whole function could use some refactoring to handle
corner cases better. I'll try to take a look tomorrow, but please feel
free if somebody else wants to take a crack at it.

-Peff

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 17:56 ` Jeff King
  2014-03-12 19:39   ` Junio C Hamano
@ 2014-03-12 22:37   ` Junio C Hamano
  2014-03-13  6:27     ` David Kastrup
  1 sibling, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2014-03-12 22:37 UTC (permalink / raw)
  To: Jeff King; +Cc: Quint Guvernator, git

Jeff King <peff@peff.net> writes:

> Thanks, I think this is a real readability improvement in most cases.
> ...
>
> I tried:
>
>   perl -i -lpe '
>     s/memcmp\(([^,]+), "(.*?)", (\d+)\)/
>      length($2) == $3 ?
>        qq{!starts_with($1, "$2")} :
>        $&
>     /ge
>   ' $(git ls-files '*.c')
>
> That comes up with almost the same results as yours, but misses a few
> cases that you caught (in addition to the need to simplify
> !!starts_with()):
>
>   - memcmp(foo, "bar", strlen("bar"))
>
>   - strings with interpolation, like "foo\n", which is actually 4
>     characters in length, not 5.
>
> But there were only a few of those, and I hand-verified them. So I feel
> pretty good that the changes are all correct transformations.
>
> That leaves the question of whether they are all improvements in
> readability (more on that below).

After reading the patch and the result of your Perl replacement, I'd
agree with the "correctness" but I am not as convinced as you seem
to be about the "real readability improvement in most cases."  "some
cases", perhaps, though.

Taking two random examples from an early and a late parts of the
patch:

--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -82,7 +82,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 				enum object_type type;
 				unsigned long size;
 				char *buffer = read_sha1_file(sha1, &type, &size);
-				if (memcmp(buffer, "object ", 7) ||
+				if (!starts_with(buffer, "object ") ||
 				    get_sha1_hex(buffer + 7, blob_sha1))
 					die("%s not a valid tag", sha1_to_hex(sha1));
 				free(buffer);

diff --git a/transport.c b/transport.c
index ca7bb44..a267822 100644
--- a/transport.c
+++ b/transport.c
@@ -1364,7 +1364,7 @@ static int refs_from_alternate_cb(struct alternate_object_database *e,

 	while (other[len-1] == '/')
 		other[--len] = '\0';
-	if (len < 8 || memcmp(other + len - 8, "/objects", 8))
+	if (len < 8 || !starts_with(other + len - 8, "/objects"))
 		return 0;
 	/* Is this a git repository with refs? */
 	memcpy(other + len - 8, "/refs", 6);

The original hunks show that the code knows and relies on magic
numbers 7 and 8 very clearly and there are rooms for improvement.
The result after the conversion, however, still have the same magic
numbers, but one less of them each.  Doesn't it make it harder to
later spot the patterns to come up with a better abstraction that
does not rely on the magic number?  Especially in the first hunk, we
can spot the repeated 7s in the original that make it glaringly
clear that we might want a better abstraction there, but in the text
after the patch, there is less clue that encourages us to take a
look at that "buffer + 7" further to make sure we do not feed a
wrong string to get_sha1_hex() by mistake when we update the
surrounding code or the data format this codepath parses.

I think grepping for "memcmp()" that compares the same number of
bytes as a constant string, like you showed in that Perl scriptlet,
is a very good first step to locate where we might want to look for
improvements.  I however think that a mechanical replacement of all
such memcmp() with !start_with() is of a dubious value.

After finding the hunk in the cat-file.c shown above, and after
seeing many other similar patterns, one may realize that it is a
good use case for a variant of skip_prefix() that returns boolean,
which we discussed earlier, perhaps like so:

	if (!skip_over(buffer, "object ", &object_name) ||
            get_sha1_hex(object_name, blob_sha1))
		die(...);

and such a solution would be what truly eradicates the reliance of
magic constants that might break by miscounting bytes in string
constants.  I do not think mechanical replacement to !start_with()
is "going in the right direction and reaching a halfway to that
goal".  I honestly think that it instead is taking us into a wrong
direction, without really solving the use of brittle magic constants
and making remaining reliance of them even harder to spot.

So....

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 22:06           ` Jeff King
@ 2014-03-12 22:38             ` Junio C Hamano
  2014-03-13  3:33               ` Quint Guvernator
  0 siblings, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2014-03-12 22:38 UTC (permalink / raw)
  To: Jeff King; +Cc: Quint Guvernator, git

Jeff King <peff@peff.net> writes:

> So I think the whole function could use some refactoring to handle
> corner cases better.  I'll try to take a look tomorrow, but please
> feel free if somebody else wants to take a crack at it.

Yup, thanks.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 22:38             ` Junio C Hamano
@ 2014-03-13  3:33               ` Quint Guvernator
  2014-03-13 17:46                 ` Junio C Hamano
  2014-03-14  4:57                 ` Jeff King
  0 siblings, 2 replies; 22+ messages in thread
From: Quint Guvernator @ 2014-03-13  3:33 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, Git Mailing List

>From what I can gather, there seems to be opposition to specific
pieces of this patch.

The following area is clearly the most controversial:

>  static inline int standard_header_field(const char *field, size_t len)
>  {
> -    return ((len == 4 && !memcmp(field, "tree ", 5)) ||
> -            (len == 6 && !memcmp(field, "parent ", 7)) ||
> -            (len == 6 && !memcmp(field, "author ", 7)) ||
> -            (len == 9 && !memcmp(field, "committer ", 10)) ||
> -            (len == 8 && !memcmp(field, "encoding ", 9)));
> +    return ((len == 4 && starts_with(field, "tree ")) ||
> +            (len == 6 && starts_with(field, "parent ")) ||
> +            (len == 6 && starts_with(field, "author ")) ||
> +            (len == 9 && starts_with(field, "committer ")) ||
> +            (len == 8 && starts_with(field, "encoding ")));

I am happy to submit a version of this patch excluding this section
(and/or others), but it seems I've stumbled into a more fundamental
conversation about the place for helper functions in general (and
about refactoring skip_prefix()). I am working on this particular
change as a microproject, #14 on the list [1], and am not as familiar
with the conventions of the Git codebase as many of you on this list
are.

Junio said:

> The result after the conversion, however, still have the same magic
> numbers, but one less of them each.  Doesn't it make it harder to
> later spot the patterns to come up with a better abstraction that
> does not rely on the magic number?

It is _not_ my goal to make the code harder to maintain down the road.
So, at this point, which hunks (if any) are worth patching?

Quint


[1]: http://git.github.io/SoC-2014-Microprojects.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-12 22:37   ` Junio C Hamano
@ 2014-03-13  6:27     ` David Kastrup
  2014-03-13 17:47       ` Junio C Hamano
  0 siblings, 1 reply; 22+ messages in thread
From: David Kastrup @ 2014-03-13  6:27 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, Quint Guvernator, git

Junio C Hamano <gitster@pobox.com> writes:

> Taking two random examples from an early and a late parts of the
> patch:
>
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -82,7 +82,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
>  				enum object_type type;
>  				unsigned long size;
>  				char *buffer = read_sha1_file(sha1, &type, &size);
> -				if (memcmp(buffer, "object ", 7) ||
> +				if (!starts_with(buffer, "object ") ||

[...]

> The original hunks show that the code knows and relies on magic
> numbers 7 and 8 very clearly and there are rooms for improvement.

Like: what if the file is empty?

-- 
David Kastrup

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-13  3:33               ` Quint Guvernator
@ 2014-03-13 17:46                 ` Junio C Hamano
  2014-03-14  4:57                 ` Jeff King
  1 sibling, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2014-03-13 17:46 UTC (permalink / raw)
  To: Quint Guvernator; +Cc: Jeff King, Git Mailing List

Quint Guvernator <quintus.public@gmail.com> writes:

>> The result after the conversion, however, still have the same magic
>> numbers, but one less of them each.  Doesn't it make it harder to
>> later spot the patterns to come up with a better abstraction that
>> does not rely on the magic number?
>
> It is _not_ my goal to make the code harder to maintain down the road.

Good.

> So, at this point, which hunks (if any) are worth patching?

Will, I am not going through all the mechanical hits to memcmp() and
judge each and every one if it is a good idea to convert.  Anybody
who does so in order to tell you "which hunks are worth patching"
would end up being the one doing the real work, and at that point
there is nothing left to be credited as your work anymore ;-)

But as Peff said, there are good bits, like these ones just for a
few examples.

diff --git a/builtin/apply.c b/builtin/apply.c
index a7e72d5..16c20af 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -1631,7 +1631,7 @@ static int parse_fragment(const char *line, unsigned long size,
 		 * l10n of "\ No newline..." is at least that long.
 		 */
 		case '\\':
-			if (len < 12 || memcmp(line, "\\ ", 2))
+			if (len < 12 || !starts_with(line, "\\ "))
 				return -1;
 			break;
 		}
@@ -1646,7 +1646,7 @@ static int parse_fragment(const char *line, unsigned long size,
 	 * it in the above loop because we hit oldlines == newlines == 0
 	 * before seeing it.
 	 */
-	if (12 < size && !memcmp(line, "\\ ", 2))
+	if (12 < size && starts_with(line, "\\ "))
 		offset += linelen(line, size);

 	patch->lines_added += added;

These two are about "An incomplete line marker begins with a
backslash and a SP" and there is no other significance in the
constant 2 (like, "after we recognise the match, we start scanning
the remainder of the line starting at the offset 2").

It is a tangent but I notice that these two parts (both in the
original and in the version after patch) contradict what the
incomplete last line marker should look like in a minor detail.

On the other hand, I think this one from nearby is iffy.

@@ -846,8 +846,8 @@ static int has_epoch_timestamp(const char *nameline)
 	 * YYYY-MM-DD hh:mm:ss must be from either 1969-12-31
 	 * (west of GMT) or 1970-01-01 (east of GMT)
 	 */
-	if ((zoneoffset < 0 && memcmp(timestamp, "1969-12-31", 10)) ||
-	    (0 <= zoneoffset && memcmp(timestamp, "1970-01-01", 10)))
+	if ((zoneoffset < 0 && !starts_with(timestamp, "1969-12-31")) ||
+	    (0 <= zoneoffset && !starts_with(timestamp, "1970-01-01")))
 		return 0;

 	hourminute = (strtol(timestamp + 11, NULL, 10) * 60 +

If one looks at the post-context of the hunk, one would realize that
this codepath very intimately knows how the timestamp should look
like at the byte-offset level, not just "YYYY-MM-DD ought to be
10-byte long", but "there should be two-digit hour part after
skipping one byte after that YYYY-MM-DD part, followed by two-digit
minute part after further skipping one byte", knowing that these
details are guaranteed by the stamp_regexp[] pattern it earlier made
sure the given string would match.

I do not think it is a good idea to reduce this kind of precise
format knowledge from this function in the first place (after all,
this is parsing a header line in a traditional diff whose format is
known to us), and even if it were our eventual goal to reduce the
precise format knowledge, it would not help very much to get rid of
constant 10 only from these two memcmp() calls, and that is why I
think this hunk is iffy.

Hope the above shows what kind of thinking is needed to judge each
change from memcmp() to !starts_with().

Thanks.

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-13  6:27     ` David Kastrup
@ 2014-03-13 17:47       ` Junio C Hamano
  2014-03-13 17:55         ` Jeff King
  0 siblings, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2014-03-13 17:47 UTC (permalink / raw)
  To: David Kastrup; +Cc: Jeff King, Quint Guvernator, git

David Kastrup <dak@gnu.org> writes:

> Junio C Hamano <gitster@pobox.com> writes:
>
>> Taking two random examples from an early and a late parts of the
>> patch:
>>
>> --- a/builtin/cat-file.c
>> +++ b/builtin/cat-file.c
>> @@ -82,7 +82,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
>>  				enum object_type type;
>>  				unsigned long size;
>>  				char *buffer = read_sha1_file(sha1, &type, &size);
>> -				if (memcmp(buffer, "object ", 7) ||
>> +				if (!starts_with(buffer, "object ") ||
>
> [...]
>
>> The original hunks show that the code knows and relies on magic
>> numbers 7 and 8 very clearly and there are rooms for improvement.
>
> Like: what if the file is empty?

Yes.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-13 17:47       ` Junio C Hamano
@ 2014-03-13 17:55         ` Jeff King
  0 siblings, 0 replies; 22+ messages in thread
From: Jeff King @ 2014-03-13 17:55 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: David Kastrup, Quint Guvernator, git

On Thu, Mar 13, 2014 at 10:47:28AM -0700, Junio C Hamano wrote:

> >> --- a/builtin/cat-file.c
> >> +++ b/builtin/cat-file.c
> >> @@ -82,7 +82,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
> >>  				enum object_type type;
> >>  				unsigned long size;
> >>  				char *buffer = read_sha1_file(sha1, &type, &size);
> >> -				if (memcmp(buffer, "object ", 7) ||
> >> +				if (!starts_with(buffer, "object ") ||
> >
> > [...]
> >
> >> The original hunks show that the code knows and relies on magic
> >> numbers 7 and 8 very clearly and there are rooms for improvement.
> >
> > Like: what if the file is empty?
> 
> Yes.

I think this one is actually OK. The result of read_sha1_file is
NUL-terminated, and we know that starts_with reads byte-by-byte (the
prior memcmp is wrong, but only if you care about accessing bytes past
the end of the NUL).

That whole piece of code seems silly, though, IMHO. It should be using
parse_tag or peel_to_type.

-Peff

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-13  3:33               ` Quint Guvernator
  2014-03-13 17:46                 ` Junio C Hamano
@ 2014-03-14  4:57                 ` Jeff King
  2014-03-14 14:51                   ` Quint Guvernator
  1 sibling, 1 reply; 22+ messages in thread
From: Jeff King @ 2014-03-14  4:57 UTC (permalink / raw)
  To: Quint Guvernator; +Cc: Junio C Hamano, Git Mailing List

On Wed, Mar 12, 2014 at 11:33:50PM -0400, Quint Guvernator wrote:

> It is _not_ my goal to make the code harder to maintain down the road.
> So, at this point, which hunks (if any) are worth patching?

This discussion ended up encompassing a lot of other related cleanups. I
hope we didn't scare you away. :)

My understanding is that you were approaching this as a micro-project
for GSoC. I'd love it if you want to pick up and run with some of the
ideas discussed here. But as far as a microproject goes, I think it
would make sense to identify one or two no-brainer improvement spots by
hand, and submit a patch with just those (and I think Junio gave some
good guidelines in his reply).

-Peff

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-14  4:57                 ` Jeff King
@ 2014-03-14 14:51                   ` Quint Guvernator
  2014-03-14 16:56                     ` Junio C Hamano
  0 siblings, 1 reply; 22+ messages in thread
From: Quint Guvernator @ 2014-03-14 14:51 UTC (permalink / raw)
  To: Jeff King; +Cc: Git Mailing List, Junio C Hamano

2014-03-14 0:57 GMT-04:00 Jeff King <peff@peff.net>:
> This discussion ended up encompassing a lot of other related cleanups. I
> hope we didn't scare you away. :)

I don't think you could; this community is much more accepting than
other software communities around the web. The fact that I received
constructive feedback rather than a lecture when formatting issues
slipped my mind (i.e. forgetting [PATCH v2]) is reason enough to stick
around!

> My understanding is that you were approaching this as a micro-project
> for GSoC. I'd love it if you want to pick up and run with some of the
> ideas discussed here. But as far as a microproject goes, I think it
> would make sense to identify one or two no-brainer improvement spots by
> hand, and submit a patch with just those (and I think Junio gave some
> good guidelines in his reply).

I agree with trying to push a few uncontroversial changes through. I'd
love to take a deeper look at these helper functions and related
cleanups…perhaps it would be worth it to identify a few key areas to
work on in addition to a main GSoC project? In fact, the project I'm
looking to take on (rebase --interactive) also involves code cleanup
and might not take all summer, so I could see how those could work
well together in a proposal.

I'll be re-reading this thread and working on this patch over the
weekend to try to identify the more straightforward hunks I could
submit in a patch.

Thanks Peff and everyone else for your help.
Quint

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] general style: replaces memcmp() with proper starts_with()
  2014-03-14 14:51                   ` Quint Guvernator
@ 2014-03-14 16:56                     ` Junio C Hamano
  0 siblings, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2014-03-14 16:56 UTC (permalink / raw)
  To: Quint Guvernator; +Cc: Jeff King, Git Mailing List

Quint Guvernator <quintus.public@gmail.com> writes:

> I'll be re-reading this thread and working on this patch over the
> weekend to try to identify the more straightforward hunks I could
> submit in a patch.

Thanks.

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2014-03-14 16:57 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-03-12 14:43 [PATCH] general style: replaces memcmp() with proper starts_with() Quint Guvernator
2014-03-12 17:56 ` Jeff King
2014-03-12 19:39   ` Junio C Hamano
2014-03-12 19:49     ` Jeff King
2014-03-12 20:08       ` Junio C Hamano
2014-03-12 21:14         ` Jeff King
2014-03-12 21:39           ` Jeff King
2014-03-12 22:06           ` Jeff King
2014-03-12 22:38             ` Junio C Hamano
2014-03-13  3:33               ` Quint Guvernator
2014-03-13 17:46                 ` Junio C Hamano
2014-03-14  4:57                 ` Jeff King
2014-03-14 14:51                   ` Quint Guvernator
2014-03-14 16:56                     ` Junio C Hamano
2014-03-12 20:51     ` René Scharfe
2014-03-12 21:16       ` David Kastrup
2014-03-12 21:45         ` René Scharfe
2014-03-12 20:52     ` David Kastrup
2014-03-12 22:37   ` Junio C Hamano
2014-03-13  6:27     ` David Kastrup
2014-03-13 17:47       ` Junio C Hamano
2014-03-13 17:55         ` Jeff King

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).