git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: tboegi@web.de
To: git@vger.kernel.org
Cc: "Torsten Bögershausen" <tboegi@web.de>
Subject: [PATCH v3 1/1] convert: ce_compare_data() checks for a sha1 of a path
Date: Mon, 16 May 2016 17:51:31 +0200	[thread overview]
Message-ID: <1463413891-12529-1-git-send-email-tboegi@web.de> (raw)
In-Reply-To: <xmqq7fev55qk.fsf@gitster.mtv.corp.google.com>

From: Torsten Bögershausen <tboegi@web.de>

To compare a file in working tree with the index, convert_to_git() is used,
the the result is hashed and the hash value compared with ce->sha1.

Deep down would_convert_crlf_at_commit() is invoked, to check if CRLF
are converted or not: When a CRLF had been in the index before, CRLF in
the working tree are not converted.

While in a merge, a file name in the working tree has different blobs
in the index with different hash values.
Forwarding ce->sha1 from ce_compare_data() into crlf_to_git() makes sure
the would_convert_crlf_at_commit() looks at the appropriate blob.

Signed-off-by: Torsten Bögershausen <tboegi@web.de>
---
 cache.h      |  1 +
 convert.c    | 35 +++++++++++++++++++++--------------
 convert.h    | 23 +++++++++++++++++++----
 read-cache.c |  4 +++-
 sha1_file.c  | 17 +++++++++++++----
 5 files changed, 57 insertions(+), 23 deletions(-)

diff --git a/cache.h b/cache.h
index 160f8e3..5b25462 100644
--- a/cache.h
+++ b/cache.h
@@ -604,6 +604,7 @@ extern int ie_modified(const struct index_state *, const struct cache_entry *, s
 
 #define HASH_WRITE_OBJECT 1
 #define HASH_FORMAT_CHECK 2
+#define HASH_CE_HAS_SHA1  4
 extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
 extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags);
 
diff --git a/convert.c b/convert.c
index f524b8d..dc86899 100644
--- a/convert.c
+++ b/convert.c
@@ -217,21 +217,25 @@ static void check_safe_crlf(const char *path, enum crlf_action crlf_action,
 	}
 }
 
-static int has_cr_in_index(const char *path)
+static int has_cr_in_index(const char *path, const unsigned char *sha1)
 {
 	unsigned long sz;
 	void *data;
-	int has_cr;
-
-	data = read_blob_data_from_cache(path, &sz);
-	if (!data)
-		return 0;
-	has_cr = memchr(data, '\r', sz) != NULL;
+	int has_cr = 0;
+	enum object_type type = OBJ_BLOB;
+	if (sha1)
+		data = read_sha1_file(sha1, &type, &sz);
+	else
+		data = read_blob_data_from_cache(path, &sz);
+
+	if (data)
+		has_cr = memchr(data, '\r', sz) != NULL;
 	free(data);
 	return has_cr;
 }
 
-static int crlf_to_git(const char *path, const char *src, size_t len,
+static int crlf_to_git(const char *path, const unsigned char *sha1,
+		       const char *src, size_t len,
 		       struct strbuf *buf,
 		       enum crlf_action crlf_action, enum safe_crlf checksafe)
 {
@@ -260,7 +264,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 			 * If the file in the index has any CR in it, do not convert.
 			 * This is the new safer autocrlf handling.
 			 */
-			if (has_cr_in_index(path))
+			if (has_cr_in_index(path, sha1))
 				return 0;
 		}
 	}
@@ -852,8 +856,9 @@ const char *get_convert_attr_ascii(const char *path)
 	return "";
 }
 
-int convert_to_git(const char *path, const char *src, size_t len,
-                   struct strbuf *dst, enum safe_crlf checksafe)
+int convert_to_git_ce_sha1(const char *path, const unsigned char *sha1,
+			   const char *src, size_t len,
+			   struct strbuf *dst, enum safe_crlf checksafe)
 {
 	int ret = 0;
 	const char *filter = NULL;
@@ -874,7 +879,7 @@ int convert_to_git(const char *path, const char *src, size_t len,
 		src = dst->buf;
 		len = dst->len;
 	}
-	ret |= crlf_to_git(path, src, len, dst, ca.crlf_action, checksafe);
+	ret |= crlf_to_git(path, sha1, src, len, dst, ca.crlf_action, checksafe);
 	if (ret && dst) {
 		src = dst->buf;
 		len = dst->len;
@@ -882,7 +887,9 @@ int convert_to_git(const char *path, const char *src, size_t len,
 	return ret | ident_to_git(path, src, len, dst, ca.ident);
 }
 
-void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
+void convert_to_git_filter_fd(const char *path,
+			      const unsigned char *sha1,
+			      int fd, struct strbuf *dst,
 			      enum safe_crlf checksafe)
 {
 	struct conv_attrs ca;
@@ -894,7 +901,7 @@ void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
 	if (!apply_filter(path, NULL, 0, fd, dst, ca.drv->clean))
 		die("%s: clean filter '%s' failed", path, ca.drv->name);
 
-	crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action, checksafe);
+	crlf_to_git(path, sha1, dst->buf, dst->len, dst, ca.crlf_action, checksafe);
 	ident_to_git(path, dst->buf, dst->len, dst, ca.ident);
 }
 
diff --git a/convert.h b/convert.h
index ccf436b..1634d37 100644
--- a/convert.h
+++ b/convert.h
@@ -37,8 +37,16 @@ extern const char *get_wt_convert_stats_ascii(const char *path);
 extern const char *get_convert_attr_ascii(const char *path);
 
 /* returns 1 if *dst was used */
-extern int convert_to_git(const char *path, const char *src, size_t len,
-			  struct strbuf *dst, enum safe_crlf checksafe);
+extern int convert_to_git_ce_sha1(const char *path, const unsigned char *sha1,
+				  const char *src, size_t len,
+				  struct strbuf *dst, enum safe_crlf checksafe);
+
+static inline int convert_to_git(const char *path, const char *src, size_t len,
+				 struct strbuf *dst, enum safe_crlf checksafe)
+{
+	return convert_to_git_ce_sha1(path, NULL, src, len, dst, checksafe);
+}
+
 extern int convert_to_working_tree(const char *path, const char *src,
 				   size_t len, struct strbuf *dst);
 extern int renormalize_buffer(const char *path, const char *src, size_t len,
@@ -47,9 +55,16 @@ static inline int would_convert_to_git(const char *path)
 {
 	return convert_to_git(path, NULL, 0, NULL, 0);
 }
+static inline int would_convert_to_git_ce_sha1(const char *path,
+					       const unsigned char *sha1)
+{
+	return convert_to_git_ce_sha1(path, sha1, NULL, 0, NULL, 0);
+}
+
 /* Precondition: would_convert_to_git_filter_fd(path) == true */
-extern void convert_to_git_filter_fd(const char *path, int fd,
-				     struct strbuf *dst,
+extern void convert_to_git_filter_fd(const char *path,
+				     const unsigned char *sha1,
+				     int fd, struct strbuf *dst,
 				     enum safe_crlf checksafe);
 extern int would_convert_to_git_filter_fd(const char *path);
 
diff --git a/read-cache.c b/read-cache.c
index d9fb78b..4b0e3c3 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -163,7 +163,9 @@ static int ce_compare_data(const struct cache_entry *ce, struct stat *st)
 
 	if (fd >= 0) {
 		unsigned char sha1[20];
-		if (!index_fd(sha1, fd, st, OBJ_BLOB, ce->name, 0))
+		unsigned flags = HASH_CE_HAS_SHA1;
+		memcpy(sha1, ce->sha1, sizeof(sha1));
+		if (!index_fd(sha1, fd, st, OBJ_BLOB, ce->name, flags))
 			match = hashcmp(sha1, ce->sha1);
 		/* index_fd() closed the file descriptor already */
 	}
diff --git a/sha1_file.c b/sha1_file.c
index d0f2aa0..dd013d5 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -3275,6 +3275,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 {
 	int ret, re_allocated = 0;
 	int write_object = flags & HASH_WRITE_OBJECT;
+	const int valid_sha1 = flags & HASH_CE_HAS_SHA1;
 
 	if (!type)
 		type = OBJ_BLOB;
@@ -3284,8 +3285,11 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 	 */
 	if ((type == OBJ_BLOB) && path) {
 		struct strbuf nbuf = STRBUF_INIT;
-		if (convert_to_git(path, buf, size, &nbuf,
-				   write_object ? safe_crlf : SAFE_CRLF_FALSE)) {
+		if (convert_to_git_ce_sha1(path,
+					   valid_sha1 ? sha1 : NULL,
+					   buf, size, &nbuf,
+					   write_object ? safe_crlf : SAFE_CRLF_FALSE)){
+
 			buf = strbuf_detach(&nbuf, &size);
 			re_allocated = 1;
 		}
@@ -3313,12 +3317,15 @@ static int index_stream_convert_blob(unsigned char *sha1, int fd,
 {
 	int ret;
 	const int write_object = flags & HASH_WRITE_OBJECT;
+	const int valid_sha1 = flags & HASH_CE_HAS_SHA1;
 	struct strbuf sbuf = STRBUF_INIT;
 
 	assert(path);
 	assert(would_convert_to_git_filter_fd(path));
 
-	convert_to_git_filter_fd(path, fd, &sbuf,
+	convert_to_git_filter_fd(path,
+				 valid_sha1 ? sha1 : NULL,
+				 fd, &sbuf,
 				 write_object ? safe_crlf : SAFE_CRLF_FALSE);
 
 	if (write_object)
@@ -3396,6 +3403,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
 	     enum object_type type, const char *path, unsigned flags)
 {
 	int ret;
+	const unsigned char *sha1_ce;
+	sha1_ce = flags & HASH_CE_HAS_SHA1 ? sha1 : NULL;
 
 	/*
 	 * Call xsize_t() only when needed to avoid potentially unnecessary
@@ -3406,7 +3415,7 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
 	else if (!S_ISREG(st->st_mode))
 		ret = index_pipe(sha1, fd, type, path, flags);
 	else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
-		 (path && would_convert_to_git(path)))
+		 (path && would_convert_to_git_ce_sha1(path,sha1_ce)))
 		ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
 				 flags);
 	else
-- 
2.0.0.rc1.6318.g0c2c796

  parent reply	other threads:[~2016-05-16 15:46 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-13 13:49 Bug report: Duplicate CRLF rewrite warnings on commit Adam Dinwoodie
2016-05-13 16:43 ` Junio C Hamano
2016-05-14  5:40   ` Torsten Bögershausen
2016-05-14 11:17     ` Adam Dinwoodie
2016-05-13 18:12 ` Jeff King
2016-05-13 19:46   ` Junio C Hamano
2016-05-13 19:53     ` Jeff King
2016-05-15  6:08 ` [PATCH/RFC v1 0/1] Quickfix ?No duplicate " tboegi
2016-05-15  6:08 ` [PATCH/RFC v1 1/1] No " tboegi
2016-05-15  6:15   ` Eric Sunshine
2016-05-15  6:37 ` [PATCH v1 0/3] CRLF-Handling: bug fix around ce_compare_data() tboegi
2016-05-15  6:38 ` [PATCH v1 1/3] t6038; use crlf on all platforms tboegi
2016-05-15  6:42   ` Eric Sunshine
2016-05-15  6:38 ` [PATCH v1 2/3] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-15  6:45   ` Eric Sunshine
2016-05-15  6:38 ` [PATCH v1 3/3] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-15  6:52   ` Eric Sunshine
2016-05-15 22:14   ` Junio C Hamano
2016-05-16 15:51     ` [PATCH v3 0/1] CRLF-Handling: bug fix around ce_compare_data() tboegi
2016-05-16 16:13       ` Junio C Hamano
2016-05-17  4:08         ` Torsten Bögershausen
2016-05-17 16:09           ` [PATCH v1 1/1] t6038; use crlf on all platforms tboegi
2016-05-17 18:39             ` Junio C Hamano
2016-05-17 16:41           ` [PATCH v4 0/2] CRLF: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-17 16:41           ` [PATCH v4 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-17 16:41           ` [PATCH v4 2/2] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-17 18:58             ` Junio C Hamano
2016-05-18  4:26               ` Torsten Bögershausen
2016-05-18 15:10                 ` Torsten Bögershausen
2016-05-19 14:21           ` [PATCH v5 0/2] CRLF: " tboegi
2016-05-19 23:10             ` Junio C Hamano
2016-05-19 14:21           ` [PATCH v5 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-19 14:21           ` [PATCH v5 2/2] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-19 23:03             ` Junio C Hamano
2016-05-20 17:12               ` [PATCH v6 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-20 17:12               ` [PATCH v6 2/2] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-20 17:46                 ` Junio C Hamano
2016-05-21 10:01               ` [PATCH v7 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-21 10:01               ` [PATCH v7 2/2] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-24 18:36                 ` Junio C Hamano
2016-05-16 15:51     ` tboegi [this message]
2016-05-30 17:00     ` [PATCH v1 0/1] t6038-merge-text-auto.sh tboegi
2016-05-30 18:00       ` Junio C Hamano
2016-05-30 18:48         ` Junio C Hamano
2016-05-30 17:00     ` [PATCH v1 1/1] t6038: different eol for "Merge addition of text=auto" tboegi
2016-06-07 15:20     ` [PATCH v2 0/3] unified auto CRLF handling, V2 tboegi
2016-06-07 15:20     ` [PATCH v2 1/3] convert: unify the "auto" handling of CRLF tboegi
2016-06-07 15:20     ` [PATCH v2 2/3] read-cache: factor out get_sha1_from_index() helper tboegi
2016-06-07 15:20     ` [PATCH v2 3/3] Correct ce_compare_data() in a middle of a merge tboegi
2016-05-15 13:02 ` [PATCH v2 0/3] CRLF-Handling: bug fix around ce_compare_data() tboegi
2016-05-15 13:02 ` [PATCH v2 1/3] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-15 13:02 ` [PATCH v2 2/3] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-15 13:02 ` [PATCH v2 3/3] t6038; use crlf on all platforms tboegi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1463413891-12529-1-git-send-email-tboegi@web.de \
    --to=tboegi@web.de \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).