From: tboegi@web.de
To: git@vger.kernel.org
Cc: "Torsten Bögershausen" <tboegi@web.de>
Subject: [PATCH v6 2/2] convert: ce_compare_data() checks for a sha1 of a path
Date: Fri, 20 May 2016 19:12:46 +0200 [thread overview]
Message-ID: <1463764366-21683-1-git-send-email-tboegi@web.de> (raw)
In-Reply-To: <xmqqk2iphcqe.fsf@gitster.mtv.corp.google.com>
From: Torsten Bögershausen <tboegi@web.de>
To compare a file in working tree with the index, convert_to_git() is used,
the result is hashed and the hash value compared with ce->sha1.
Deep down would_convert_crlf_at_commit() is invoked, to check if CRLF
are converted or not: When a CRLF had been in the index before, CRLF in
the working tree are not converted.
While in a merge, a file name in the working tree has different blobs
in the index with different hash values.
Forwarding ce->sha1 from ce_compare_data() into crlf_to_git() makes sure
the would_convert_crlf_at_commit() looks at the appropriate blob.
Forward sha1 from ce_compare_data() into convert_to_git().
All other callers use NULL, and the sha1 it is determined from path using
get_sha1_from_cache(path), this is the same handling as before.
Re-order the arguments for convert_to_git() according to their importance:
`src`, `len` and `dst` are the place in memory, where the conversion is done
`path` is the file name to look up the attributes.
`sha1` is needed by the "new safer autocrlf handling".
`checksafe` determines, if a warning is printed or an error is raised.
In the same spirit, forward the sha1 into would_convert_to_git().
While at it, rename has_cr_in_index() into blob_has_cr()
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Changes sinve v6:
decrease the messiness with 12 %
convert_to_git() has a re-ordered parameter list.
Describe whats going on better in the commit msg.
Cleanup: 0 -> SAFE_CRLF_FALSE at some places
---
builtin/apply.c | 3 ++-
builtin/blame.c | 2 +-
cache.h | 1 +
combine-diff.c | 4 +++-
convert.c | 38 +++++++++++++++++++++++++-------------
convert.h | 20 ++++++++++++++------
diff.c | 3 ++-
dir.c | 2 +-
read-cache.c | 4 +++-
sha1_file.c | 17 +++++++++++++----
10 files changed, 65 insertions(+), 29 deletions(-)
diff --git a/builtin/apply.c b/builtin/apply.c
index 8e4da2e..c01654a 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -2140,7 +2140,8 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
case S_IFREG:
if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
return error(_("unable to open or read %s"), path);
- convert_to_git(path, buf->buf, buf->len, buf, 0);
+ convert_to_git(buf->buf, buf->len, buf,
+ path, NULL, SAFE_CRLF_FALSE);
return 0;
default:
return -1;
diff --git a/builtin/blame.c b/builtin/blame.c
index 21f42b0..4a01e20 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2377,7 +2377,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
if (strbuf_read(&buf, 0, 0) < 0)
die_errno("failed to read from stdin");
}
- convert_to_git(path, buf.buf, buf.len, &buf, 0);
+ convert_to_git(buf.buf, buf.len, &buf, path, NULL, SAFE_CRLF_FALSE);
origin->file.ptr = buf.buf;
origin->file.size = buf.len;
pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
diff --git a/cache.h b/cache.h
index 15a2a10..868599e 100644
--- a/cache.h
+++ b/cache.h
@@ -605,6 +605,7 @@ extern int ie_modified(const struct index_state *, const struct cache_entry *, s
#define HASH_WRITE_OBJECT 1
#define HASH_FORMAT_CHECK 2
+#define HASH_USE_SHA_NOT_PATH 4
extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags);
diff --git a/combine-diff.c b/combine-diff.c
index 0e1d4b0..cac4c81 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -1053,7 +1053,9 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
if (is_file) {
struct strbuf buf = STRBUF_INIT;
- if (convert_to_git(elem->path, result, len, &buf, safe_crlf)) {
+ if (convert_to_git(result, len, &buf,
+ elem->path, NULL,
+ safe_crlf)) {
free(result);
result = strbuf_detach(&buf, &len);
result_size = len;
diff --git a/convert.c b/convert.c
index f524b8d..a58bb26 100644
--- a/convert.c
+++ b/convert.c
@@ -217,21 +217,26 @@ static void check_safe_crlf(const char *path, enum crlf_action crlf_action,
}
}
-static int has_cr_in_index(const char *path)
+static int blob_has_cr(const unsigned char *sha1)
{
unsigned long sz;
void *data;
- int has_cr;
-
- data = read_blob_data_from_cache(path, &sz);
+ int has_cr = 0;
+ enum object_type type;
+ if (!sha1)
+ return 0;
+ data = read_sha1_file(sha1, &type, &sz);
if (!data)
return 0;
- has_cr = memchr(data, '\r', sz) != NULL;
+ if (type == OBJ_BLOB)
+ has_cr = memchr(data, '\r', sz) != NULL;
+
free(data);
return has_cr;
}
-static int crlf_to_git(const char *path, const char *src, size_t len,
+static int crlf_to_git(const char *path, const unsigned char *sha1,
+ const char *src, size_t len,
struct strbuf *buf,
enum crlf_action crlf_action, enum safe_crlf checksafe)
{
@@ -260,7 +265,9 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
* If the file in the index has any CR in it, do not convert.
* This is the new safer autocrlf handling.
*/
- if (has_cr_in_index(path))
+ if (!sha1)
+ sha1 = get_sha1_from_cache(path);
+ if (blob_has_cr(sha1))
return 0;
}
}
@@ -852,8 +859,10 @@ const char *get_convert_attr_ascii(const char *path)
return "";
}
-int convert_to_git(const char *path, const char *src, size_t len,
- struct strbuf *dst, enum safe_crlf checksafe)
+int convert_to_git(const char *src, size_t len,
+ struct strbuf *dst,
+ const char *path, const unsigned char *sha1,
+ enum safe_crlf checksafe)
{
int ret = 0;
const char *filter = NULL;
@@ -874,7 +883,7 @@ int convert_to_git(const char *path, const char *src, size_t len,
src = dst->buf;
len = dst->len;
}
- ret |= crlf_to_git(path, src, len, dst, ca.crlf_action, checksafe);
+ ret |= crlf_to_git(path, sha1, src, len, dst, ca.crlf_action, checksafe);
if (ret && dst) {
src = dst->buf;
len = dst->len;
@@ -882,7 +891,9 @@ int convert_to_git(const char *path, const char *src, size_t len,
return ret | ident_to_git(path, src, len, dst, ca.ident);
}
-void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
+void convert_to_git_filter_fd(const char *path,
+ const unsigned char *sha1,
+ int fd, struct strbuf *dst,
enum safe_crlf checksafe)
{
struct conv_attrs ca;
@@ -894,7 +905,7 @@ void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
if (!apply_filter(path, NULL, 0, fd, dst, ca.drv->clean))
die("%s: clean filter '%s' failed", path, ca.drv->name);
- crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action, checksafe);
+ crlf_to_git(path, sha1, dst->buf, dst->len, dst, ca.crlf_action, checksafe);
ident_to_git(path, dst->buf, dst->len, dst, ca.ident);
}
@@ -949,7 +960,8 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
src = dst->buf;
len = dst->len;
}
- return ret | convert_to_git(path, src, len, dst, SAFE_CRLF_FALSE);
+ ret |= convert_to_git(src, len, dst, path, NULL, SAFE_CRLF_FALSE);
+ return ret;
}
/*****************************************************************
diff --git a/convert.h b/convert.h
index ccf436b..12fe767 100644
--- a/convert.h
+++ b/convert.h
@@ -37,19 +37,27 @@ extern const char *get_wt_convert_stats_ascii(const char *path);
extern const char *get_convert_attr_ascii(const char *path);
/* returns 1 if *dst was used */
-extern int convert_to_git(const char *path, const char *src, size_t len,
- struct strbuf *dst, enum safe_crlf checksafe);
+extern int convert_to_git(const char *src, size_t len,
+ struct strbuf *dst,
+ const char *path, const unsigned char *sha1,
+ enum safe_crlf checksafe);
+
extern int convert_to_working_tree(const char *path, const char *src,
size_t len, struct strbuf *dst);
extern int renormalize_buffer(const char *path, const char *src, size_t len,
struct strbuf *dst);
-static inline int would_convert_to_git(const char *path)
+
+static inline int would_convert_to_git(const char *path,
+ const unsigned char *sha1)
{
- return convert_to_git(path, NULL, 0, NULL, 0);
+ return convert_to_git(NULL, 0, NULL, path, sha1, SAFE_CRLF_FALSE);
}
+
+
/* Precondition: would_convert_to_git_filter_fd(path) == true */
-extern void convert_to_git_filter_fd(const char *path, int fd,
- struct strbuf *dst,
+extern void convert_to_git_filter_fd(const char *path,
+ const unsigned char *sha1,
+ int fd, struct strbuf *dst,
enum safe_crlf checksafe);
extern int would_convert_to_git_filter_fd(const char *path);
diff --git a/diff.c b/diff.c
index d3734d3..9c00973 100644
--- a/diff.c
+++ b/diff.c
@@ -2810,7 +2810,8 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
/*
* Convert from working tree format to canonical git format
*/
- if (convert_to_git(s->path, s->data, s->size, &buf, crlf_warn)) {
+ if (convert_to_git(s->data, s->size, &buf, s->path, NULL,
+ crlf_warn)) {
size_t size = 0;
munmap(s->data, s->size);
s->should_munmap = 0;
diff --git a/dir.c b/dir.c
index 656f272..5ac379d 100644
--- a/dir.c
+++ b/dir.c
@@ -713,7 +713,7 @@ static int add_excludes(const char *fname, const char *base, int baselen,
(pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
!ce_stage(active_cache[pos]) &&
ce_uptodate(active_cache[pos]) &&
- !would_convert_to_git(fname))
+ !would_convert_to_git(fname, NULL))
hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
else
hash_sha1_file(buf, size, "blob", sha1_stat->sha1);
diff --git a/read-cache.c b/read-cache.c
index a3ef967..c109b6d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -163,7 +163,9 @@ static int ce_compare_data(const struct cache_entry *ce, struct stat *st)
if (fd >= 0) {
unsigned char sha1[20];
- if (!index_fd(sha1, fd, st, OBJ_BLOB, ce->name, 0))
+ unsigned flags = HASH_USE_SHA_NOT_PATH;
+ memcpy(sha1, ce->sha1, sizeof(sha1));
+ if (!index_fd(sha1, fd, st, OBJ_BLOB, ce->name, flags))
match = hashcmp(sha1, ce->sha1);
/* index_fd() closed the file descriptor already */
}
diff --git a/sha1_file.c b/sha1_file.c
index d0f2aa0..48906b0 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -3275,6 +3275,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
{
int ret, re_allocated = 0;
int write_object = flags & HASH_WRITE_OBJECT;
+ const int valid_sha1 = flags & HASH_USE_SHA_NOT_PATH;
if (!type)
type = OBJ_BLOB;
@@ -3284,8 +3285,11 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
*/
if ((type == OBJ_BLOB) && path) {
struct strbuf nbuf = STRBUF_INIT;
- if (convert_to_git(path, buf, size, &nbuf,
- write_object ? safe_crlf : SAFE_CRLF_FALSE)) {
+ if (convert_to_git(
+ buf, size, &nbuf,path,
+ valid_sha1 ? sha1 : NULL,
+ write_object ? safe_crlf : SAFE_CRLF_FALSE)){
+
buf = strbuf_detach(&nbuf, &size);
re_allocated = 1;
}
@@ -3313,12 +3317,15 @@ static int index_stream_convert_blob(unsigned char *sha1, int fd,
{
int ret;
const int write_object = flags & HASH_WRITE_OBJECT;
+ const int valid_sha1 = flags & HASH_USE_SHA_NOT_PATH;
struct strbuf sbuf = STRBUF_INIT;
assert(path);
assert(would_convert_to_git_filter_fd(path));
- convert_to_git_filter_fd(path, fd, &sbuf,
+ convert_to_git_filter_fd(path,
+ valid_sha1 ? sha1 : NULL,
+ fd, &sbuf,
write_object ? safe_crlf : SAFE_CRLF_FALSE);
if (write_object)
@@ -3396,6 +3403,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
enum object_type type, const char *path, unsigned flags)
{
int ret;
+ const unsigned char *sha1_ce;
+ sha1_ce = flags & HASH_USE_SHA_NOT_PATH ? sha1 : NULL;
/*
* Call xsize_t() only when needed to avoid potentially unnecessary
@@ -3406,7 +3415,7 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
else if (!S_ISREG(st->st_mode))
ret = index_pipe(sha1, fd, type, path, flags);
else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
- (path && would_convert_to_git(path)))
+ (path && would_convert_to_git(path,sha1_ce)))
ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
flags);
else
--
2.0.0.rc1.6318.g0c2c796
next prev parent reply other threads:[~2016-05-20 17:07 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-05-13 13:49 Bug report: Duplicate CRLF rewrite warnings on commit Adam Dinwoodie
2016-05-13 16:43 ` Junio C Hamano
2016-05-14 5:40 ` Torsten Bögershausen
2016-05-14 11:17 ` Adam Dinwoodie
2016-05-13 18:12 ` Jeff King
2016-05-13 19:46 ` Junio C Hamano
2016-05-13 19:53 ` Jeff King
2016-05-15 6:08 ` [PATCH/RFC v1 0/1] Quickfix ?No duplicate " tboegi
2016-05-15 6:08 ` [PATCH/RFC v1 1/1] No " tboegi
2016-05-15 6:15 ` Eric Sunshine
2016-05-15 6:37 ` [PATCH v1 0/3] CRLF-Handling: bug fix around ce_compare_data() tboegi
2016-05-15 6:38 ` [PATCH v1 1/3] t6038; use crlf on all platforms tboegi
2016-05-15 6:42 ` Eric Sunshine
2016-05-15 6:38 ` [PATCH v1 2/3] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-15 6:45 ` Eric Sunshine
2016-05-15 6:38 ` [PATCH v1 3/3] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-15 6:52 ` Eric Sunshine
2016-05-15 22:14 ` Junio C Hamano
2016-05-16 15:51 ` [PATCH v3 0/1] CRLF-Handling: bug fix around ce_compare_data() tboegi
2016-05-16 16:13 ` Junio C Hamano
2016-05-17 4:08 ` Torsten Bögershausen
2016-05-17 16:09 ` [PATCH v1 1/1] t6038; use crlf on all platforms tboegi
2016-05-17 18:39 ` Junio C Hamano
2016-05-17 16:41 ` [PATCH v4 0/2] CRLF: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-17 16:41 ` [PATCH v4 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-17 16:41 ` [PATCH v4 2/2] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-17 18:58 ` Junio C Hamano
2016-05-18 4:26 ` Torsten Bögershausen
2016-05-18 15:10 ` Torsten Bögershausen
2016-05-19 14:21 ` [PATCH v5 0/2] CRLF: " tboegi
2016-05-19 23:10 ` Junio C Hamano
2016-05-19 14:21 ` [PATCH v5 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-19 14:21 ` [PATCH v5 2/2] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-19 23:03 ` Junio C Hamano
2016-05-20 17:12 ` [PATCH v6 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-20 17:12 ` tboegi [this message]
2016-05-20 17:46 ` [PATCH v6 2/2] convert: ce_compare_data() checks for a sha1 of a path Junio C Hamano
2016-05-21 10:01 ` [PATCH v7 1/2] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-21 10:01 ` [PATCH v7 2/2] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-24 18:36 ` Junio C Hamano
2016-05-16 15:51 ` [PATCH v3 1/1] " tboegi
2016-05-30 17:00 ` [PATCH v1 0/1] t6038-merge-text-auto.sh tboegi
2016-05-30 18:00 ` Junio C Hamano
2016-05-30 18:48 ` Junio C Hamano
2016-05-30 17:00 ` [PATCH v1 1/1] t6038: different eol for "Merge addition of text=auto" tboegi
2016-06-07 15:20 ` [PATCH v2 0/3] unified auto CRLF handling, V2 tboegi
2016-06-07 15:20 ` [PATCH v2 1/3] convert: unify the "auto" handling of CRLF tboegi
2016-06-07 15:20 ` [PATCH v2 2/3] read-cache: factor out get_sha1_from_index() helper tboegi
2016-06-07 15:20 ` [PATCH v2 3/3] Correct ce_compare_data() in a middle of a merge tboegi
2016-05-15 13:02 ` [PATCH v2 0/3] CRLF-Handling: bug fix around ce_compare_data() tboegi
2016-05-15 13:02 ` [PATCH v2 1/3] read-cache: factor out get_sha1_from_index() helper tboegi
2016-05-15 13:02 ` [PATCH v2 2/3] convert: ce_compare_data() checks for a sha1 of a path tboegi
2016-05-15 13:02 ` [PATCH v2 3/3] t6038; use crlf on all platforms tboegi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1463764366-21683-1-git-send-email-tboegi@web.de \
--to=tboegi@web.de \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).