git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: tboegi@web.de
To: git@vger.kernel.org
Cc: "Torsten Bögershausen" <tboegi@web.de>
Subject: [PATCH v7 2/2] convert: ce_compare_data() checks for a sha1 of a path
Date: Sat, 21 May 2016 12:01:49 +0200	[thread overview]
Message-ID: <1463824909-10229-1-git-send-email-tboegi@web.de> (raw)
In-Reply-To: <xmqqk2iphcqe.fsf@gitster.mtv.corp.google.com>

From: Torsten Bögershausen <tboegi@web.de>

To compare a file in working tree with the index, convert_to_git() is used,
the result is hashed and the hash value compared with ce->sha1.

Deep down would_convert_crlf_at_commit() is invoked, to check if CRLF
are converted or not.
The "new safer autocrlf handling" checks if CRLF had been in the index before,
and if, the CRLF in the working tree are not converted.

While in a merge, a file name in the working tree has different blobs
in the index with different hash values.
Forwarding ce->sha1 from ce_compare_data() into crlf_to_git() makes sure
the would_convert_crlf_at_commit() looks at the appropriate blob.

Add a new parameter index_blob_sha1 to convert_to_git(), and forward the
sha1 from ce_compare_data() into convert_to_git(). Other callers use NULL
for index_blob_sha1, and the sha1 is determined from path
using get_sha1_from_cache(path). This is the same handling as before.

In the same spirit, forward the sha1 into would_convert_to_git().

While at it, rename has_cr_in_index() into blob_has_cr() and replace
0 with SAFE_CRLF_FALSE.

Signed-off-by: Torsten Bögershausen <tboegi@web.de>
---
 V6 went into the wrong direction.
 V5 -> V7: Adds parameter index_blob_sha1 to (convert_to_git() and would_convert_to_git().

 builtin/apply.c |  3 ++-
 builtin/blame.c |  2 +-
 cache.h         |  1 +
 combine-diff.c  |  3 ++-
 convert.c       | 34 ++++++++++++++++++++++------------
 convert.h       | 15 +++++++++++----
 diff.c          |  3 ++-
 dir.c           |  2 +-
 read-cache.c    |  4 +++-
 sha1_file.c     | 12 +++++++++---
 10 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index 8e4da2e..0cf9a0a 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -2140,7 +2140,8 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
 	case S_IFREG:
 		if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
 			return error(_("unable to open or read %s"), path);
-		convert_to_git(path, buf->buf, buf->len, buf, 0);
+		convert_to_git(path, buf->buf, buf->len, buf,
+			       SAFE_CRLF_FALSE, NULL);
 		return 0;
 	default:
 		return -1;
diff --git a/builtin/blame.c b/builtin/blame.c
index 21f42b0..1c523b6 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2377,7 +2377,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
 		if (strbuf_read(&buf, 0, 0) < 0)
 			die_errno("failed to read from stdin");
 	}
-	convert_to_git(path, buf.buf, buf.len, &buf, 0);
+	convert_to_git(path, buf.buf, buf.len, &buf, SAFE_CRLF_FALSE, NULL);
 	origin->file.ptr = buf.buf;
 	origin->file.size = buf.len;
 	pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
diff --git a/cache.h b/cache.h
index 15a2a10..868599e 100644
--- a/cache.h
+++ b/cache.h
@@ -605,6 +605,7 @@ extern int ie_modified(const struct index_state *, const struct cache_entry *, s
 
 #define HASH_WRITE_OBJECT 1
 #define HASH_FORMAT_CHECK 2
+#define HASH_USE_SHA_NOT_PATH 4
 extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
 extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags);
 
diff --git a/combine-diff.c b/combine-diff.c
index 0e1d4b0..c4fa884 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -1053,7 +1053,8 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
 			if (is_file) {
 				struct strbuf buf = STRBUF_INIT;
 
-				if (convert_to_git(elem->path, result, len, &buf, safe_crlf)) {
+				if (convert_to_git(elem->path, result, len,
+						   &buf, safe_crlf, NULL)) {
 					free(result);
 					result = strbuf_detach(&buf, &len);
 					result_size = len;
diff --git a/convert.c b/convert.c
index f524b8d..f0eb4ed 100644
--- a/convert.c
+++ b/convert.c
@@ -217,23 +217,28 @@ static void check_safe_crlf(const char *path, enum crlf_action crlf_action,
 	}
 }
 
-static int has_cr_in_index(const char *path)
+static int blob_has_cr(const unsigned char *index_blob_sha1)
 {
 	unsigned long sz;
 	void *data;
-	int has_cr;
-
-	data = read_blob_data_from_cache(path, &sz);
+	int has_cr = 0;
+	enum object_type type;
+	if (!index_blob_sha1)
+		return 0;
+	data = read_sha1_file(index_blob_sha1, &type, &sz);
 	if (!data)
 		return 0;
-	has_cr = memchr(data, '\r', sz) != NULL;
+	if (type == OBJ_BLOB)
+		has_cr = memchr(data, '\r', sz) != NULL;
+
 	free(data);
 	return has_cr;
 }
 
 static int crlf_to_git(const char *path, const char *src, size_t len,
 		       struct strbuf *buf,
-		       enum crlf_action crlf_action, enum safe_crlf checksafe)
+		       enum crlf_action crlf_action, enum safe_crlf checksafe,
+		       const unsigned char *index_blob_sha1)
 {
 	struct text_stat stats;
 	char *dst;
@@ -260,7 +265,9 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 			 * If the file in the index has any CR in it, do not convert.
 			 * This is the new safer autocrlf handling.
 			 */
-			if (has_cr_in_index(path))
+			if (!index_blob_sha1)
+				index_blob_sha1 = get_sha1_from_cache(path);
+			if (blob_has_cr(index_blob_sha1))
 				return 0;
 		}
 	}
@@ -853,7 +860,8 @@ const char *get_convert_attr_ascii(const char *path)
 }
 
 int convert_to_git(const char *path, const char *src, size_t len,
-                   struct strbuf *dst, enum safe_crlf checksafe)
+		   struct strbuf *dst, enum safe_crlf checksafe,
+		   const unsigned char *index_blob_sha1)
 {
 	int ret = 0;
 	const char *filter = NULL;
@@ -874,7 +882,7 @@ int convert_to_git(const char *path, const char *src, size_t len,
 		src = dst->buf;
 		len = dst->len;
 	}
-	ret |= crlf_to_git(path, src, len, dst, ca.crlf_action, checksafe);
+	ret |= crlf_to_git(path, src, len, dst, ca.crlf_action, checksafe, index_blob_sha1);
 	if (ret && dst) {
 		src = dst->buf;
 		len = dst->len;
@@ -883,7 +891,8 @@ int convert_to_git(const char *path, const char *src, size_t len,
 }
 
 void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
-			      enum safe_crlf checksafe)
+			      enum safe_crlf checksafe,
+			      const unsigned char *index_blob_sha1)
 {
 	struct conv_attrs ca;
 	convert_attrs(&ca, path);
@@ -894,7 +903,8 @@ void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
 	if (!apply_filter(path, NULL, 0, fd, dst, ca.drv->clean))
 		die("%s: clean filter '%s' failed", path, ca.drv->name);
 
-	crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action, checksafe);
+	crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action,
+		    checksafe, index_blob_sha1);
 	ident_to_git(path, dst->buf, dst->len, dst, ca.ident);
 }
 
@@ -949,7 +959,7 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | convert_to_git(path, src, len, dst, SAFE_CRLF_FALSE);
+	return ret | convert_to_git(path, src, len, dst, SAFE_CRLF_FALSE, NULL);
 }
 
 /*****************************************************************
diff --git a/convert.h b/convert.h
index ccf436b..60c46b8 100644
--- a/convert.h
+++ b/convert.h
@@ -38,19 +38,26 @@ extern const char *get_convert_attr_ascii(const char *path);
 
 /* returns 1 if *dst was used */
 extern int convert_to_git(const char *path, const char *src, size_t len,
-			  struct strbuf *dst, enum safe_crlf checksafe);
+			  struct strbuf *dst, enum safe_crlf checksafe,
+			  const unsigned char *index_blob_sha1);
+
 extern int convert_to_working_tree(const char *path, const char *src,
 				   size_t len, struct strbuf *dst);
 extern int renormalize_buffer(const char *path, const char *src, size_t len,
 			      struct strbuf *dst);
-static inline int would_convert_to_git(const char *path)
+static inline int would_convert_to_git(const char *path,
+				       const unsigned char *index_blob_sha1)
 {
-	return convert_to_git(path, NULL, 0, NULL, 0);
+	return convert_to_git(path, NULL, 0, NULL, SAFE_CRLF_FALSE,
+			      index_blob_sha1);
 }
+
 /* Precondition: would_convert_to_git_filter_fd(path) == true */
 extern void convert_to_git_filter_fd(const char *path, int fd,
 				     struct strbuf *dst,
-				     enum safe_crlf checksafe);
+				     enum safe_crlf checksafe,
+				     const unsigned char *index_blob_sha1);
+
 extern int would_convert_to_git_filter_fd(const char *path);
 
 /*****************************************************************
diff --git a/diff.c b/diff.c
index d3734d3..a8308e0 100644
--- a/diff.c
+++ b/diff.c
@@ -2810,7 +2810,8 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
 		/*
 		 * Convert from working tree format to canonical git format
 		 */
-		if (convert_to_git(s->path, s->data, s->size, &buf, crlf_warn)) {
+		if (convert_to_git(s->path, s->data, s->size, &buf,
+				   crlf_warn, NULL)) {
 			size_t size = 0;
 			munmap(s->data, s->size);
 			s->should_munmap = 0;
diff --git a/dir.c b/dir.c
index 656f272..5ac379d 100644
--- a/dir.c
+++ b/dir.c
@@ -713,7 +713,7 @@ static int add_excludes(const char *fname, const char *base, int baselen,
 				 (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
 				 !ce_stage(active_cache[pos]) &&
 				 ce_uptodate(active_cache[pos]) &&
-				 !would_convert_to_git(fname))
+				 !would_convert_to_git(fname, NULL))
 				hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
 			else
 				hash_sha1_file(buf, size, "blob", sha1_stat->sha1);
diff --git a/read-cache.c b/read-cache.c
index a3ef967..c109b6d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -163,7 +163,9 @@ static int ce_compare_data(const struct cache_entry *ce, struct stat *st)
 
 	if (fd >= 0) {
 		unsigned char sha1[20];
-		if (!index_fd(sha1, fd, st, OBJ_BLOB, ce->name, 0))
+		unsigned flags = HASH_USE_SHA_NOT_PATH;
+		memcpy(sha1, ce->sha1, sizeof(sha1));
+		if (!index_fd(sha1, fd, st, OBJ_BLOB, ce->name, flags))
 			match = hashcmp(sha1, ce->sha1);
 		/* index_fd() closed the file descriptor already */
 	}
diff --git a/sha1_file.c b/sha1_file.c
index d0f2aa0..52e5c6f 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -3275,6 +3275,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 {
 	int ret, re_allocated = 0;
 	int write_object = flags & HASH_WRITE_OBJECT;
+	const int valid_sha1 = flags & HASH_USE_SHA_NOT_PATH;
 
 	if (!type)
 		type = OBJ_BLOB;
@@ -3285,7 +3286,8 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 	if ((type == OBJ_BLOB) && path) {
 		struct strbuf nbuf = STRBUF_INIT;
 		if (convert_to_git(path, buf, size, &nbuf,
-				   write_object ? safe_crlf : SAFE_CRLF_FALSE)) {
+				   write_object ? safe_crlf : SAFE_CRLF_FALSE,
+				   valid_sha1 ? sha1 : NULL)) {
 			buf = strbuf_detach(&nbuf, &size);
 			re_allocated = 1;
 		}
@@ -3313,13 +3315,15 @@ static int index_stream_convert_blob(unsigned char *sha1, int fd,
 {
 	int ret;
 	const int write_object = flags & HASH_WRITE_OBJECT;
+	const int valid_sha1 = flags & HASH_USE_SHA_NOT_PATH;
 	struct strbuf sbuf = STRBUF_INIT;
 
 	assert(path);
 	assert(would_convert_to_git_filter_fd(path));
 
 	convert_to_git_filter_fd(path, fd, &sbuf,
-				 write_object ? safe_crlf : SAFE_CRLF_FALSE);
+				 write_object ? safe_crlf : SAFE_CRLF_FALSE,
+				 valid_sha1 ? sha1 : NULL);
 
 	if (write_object)
 		ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
@@ -3396,6 +3400,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
 	     enum object_type type, const char *path, unsigned flags)
 {
 	int ret;
+	const unsigned char *sha1_ce;
+	sha1_ce = flags & HASH_USE_SHA_NOT_PATH ? sha1 : NULL;
 
 	/*
 	 * Call xsize_t() only when needed to avoid potentially unnecessary
@@ -3406,7 +3412,7 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
 	else if (!S_ISREG(st->st_mode))
 		ret = index_pipe(sha1, fd, type, path, flags);
 	else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
-		 (path && would_convert_to_git(path)))
+		 (path && would_convert_to_git(path,sha1_ce)))
 		ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
 				 flags);
 	else
-- 
2.0.0.rc1.6318.g0c2c796

  parent reply	other threads:[~2016-05-21  9:56 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-13 13:49 Bug report: Duplicate CRLF rewrite warnings on commit Adam Dinwoodie
2016-05-13 16:43 ` Junio C Hamano
2016-05-14  5:40   ` Torsten Bögershausen
2016-05-14 11:17     ` Adam Dinwoodie
2016-05-13 18:12 ` Jeff King
2016-05-13 19:46   ` Junio C Hamano
2016-05-13 19:53     ` Jeff King
2016-05-15  6:08 ` [PATCH/RFC v1 0/1] Quickfix ?No duplicate " tboegi
2016-05-15  6:08 ` [PATCH/RFC v1 1/1] No " tboegi
2016-05-15  6:15   ` Eric Sunshine
2016-05-15  6:37 ` [PATCH v1 0/3] CRLF-Handling: bug fix around ce_compare_data() tboegi
2016-05-15  6:38 ` [PATCH v1 1/3] t6038; use crlf on all platforms tboegi
2016-05-15  6:42   ` Eric Sunshine
2016-05-15  6:38 ` [PATCH v1 2/3] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-15  6:45   ` Eric Sunshine
2016-05-15  6:38 ` [PATCH v1 3/3] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-15  6:52   ` Eric Sunshine
2016-05-15 22:14   ` Junio C Hamano
2016-05-16 15:51     ` [PATCH v3 0/1] CRLF-Handling: bug fix around ce_compare_data() tboegi
2016-05-16 16:13       ` Junio C Hamano
2016-05-17  4:08         ` Torsten Bögershausen
2016-05-17 16:09           ` [PATCH v1 1/1] t6038; use crlf on all platforms tboegi
2016-05-17 18:39             ` Junio C Hamano
2016-05-17 16:41           ` [PATCH v4 0/2] CRLF: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-17 16:41           ` [PATCH v4 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-17 16:41           ` [PATCH v4 2/2] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-17 18:58             ` Junio C Hamano
2016-05-18  4:26               ` Torsten Bögershausen
2016-05-18 15:10                 ` Torsten Bögershausen
2016-05-19 14:21           ` [PATCH v5 0/2] CRLF: " tboegi
2016-05-19 23:10             ` Junio C Hamano
2016-05-19 14:21           ` [PATCH v5 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-19 14:21           ` [PATCH v5 2/2] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-19 23:03             ` Junio C Hamano
2016-05-20 17:12               ` [PATCH v6 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-20 17:12               ` [PATCH v6 2/2] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-20 17:46                 ` Junio C Hamano
2016-05-21 10:01               ` [PATCH v7 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-21 10:01               ` tboegi [this message]
2016-05-24 18:36                 ` [PATCH v7 2/2] convert: ce_compare_data() checks for a sha1 of a path Junio C Hamano
2016-05-16 15:51     ` [PATCH v3 1/1] " tboegi
2016-05-30 17:00     ` [PATCH v1 0/1] t6038-merge-text-auto.sh tboegi
2016-05-30 18:00       ` Junio C Hamano
2016-05-30 18:48         ` Junio C Hamano
2016-05-30 17:00     ` [PATCH v1 1/1] t6038: different eol for "Merge addition of text=auto" tboegi
2016-06-07 15:20     ` [PATCH v2 0/3] unified auto CRLF handling, V2 tboegi
2016-06-07 15:20     ` [PATCH v2 1/3] convert: unify the "auto" handling of CRLF tboegi
2016-06-07 15:20     ` [PATCH v2 2/3] read-cache: factor out get_sha1_from_index() helper tboegi
2016-06-07 15:20     ` [PATCH v2 3/3] Correct ce_compare_data() in a middle of a merge tboegi
2016-05-15 13:02 ` [PATCH v2 0/3] CRLF-Handling: bug fix around ce_compare_data() tboegi
2016-05-15 13:02 ` [PATCH v2 1/3] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-15 13:02 ` [PATCH v2 2/3] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-15 13:02 ` [PATCH v2 3/3] t6038; use crlf on all platforms tboegi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1463824909-10229-1-git-send-email-tboegi@web.de \
    --to=tboegi@web.de \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).