From: Junio C Hamano <junkio@cox.net>
To: Linus Torvalds <torvalds@osdl.org>
Cc: git@vger.kernel.org
Subject: [PATCH 2/2] Fix oversimplified optimization for add_cache_entry().
Date: Fri, 24 Jun 2005 16:40:22 -0700 [thread overview]
Message-ID: <7vvf439vdl.fsf@assigned-by-dhcp.cox.net> (raw)
In-Reply-To: <7vaclgfynv.fsf@assigned-by-dhcp.cox.net> (Junio C. Hamano's message of "Thu, 23 Jun 2005 16:20:36 -0700")
An earlier change to optimize directory-file conflict check
broke what "read-tree --emu23" expects. Introduce an explicit
flag to tell add_cache_entry() not to check for conflicts and
use it when reading an existing tree into an empty stage ---
by definition this case can never introduce such conflicts.
Resurrect the unoptimized directory-file conflict check code for
now as well. The new one did not handle higher stages properly.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
cache.h | 1
read-cache.c | 177 +++++++++++++++++++------------------
t/t1005-read-tree-m-2way-emu23.sh | 6 +
tree.c | 2
4 files changed, 95 insertions(+), 91 deletions(-)
cb13405368b0132ec3b3edcda22d32d89e9c1f85
diff --git a/cache.h b/cache.h
--- a/cache.h
+++ b/cache.h
@@ -130,6 +130,7 @@ extern int write_cache(int newfd, struct
extern int cache_name_pos(const char *name, int namelen);
#define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */
#define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */
+#define ADD_CACHE_SKIP_DF_CHECK 4 /* Ok to skip directory/file conflict checks */
extern int add_cache_entry(struct cache_entry *ce, int option);
extern int remove_cache_entry_at(int pos);
extern int remove_file_from_cache(char *path);
diff --git a/read-cache.c b/read-cache.c
--- a/read-cache.c
+++ b/read-cache.c
@@ -171,83 +171,6 @@ int ce_same_name(struct cache_entry *a,
return ce_namelen(b) == len && !memcmp(a->name, b->name, len);
}
-/*
- * Do we have another file that has the beginning components being a
- * proper superset of the name we're trying to add?
- */
-static int has_file_name(const struct cache_entry *ce, int pos, int ok_to_replace)
-{
- int retval = 0;
- int len = ce_namelen(ce);
- const char *name = ce->name;
-
- while (pos < active_nr) {
- struct cache_entry *p = active_cache[pos++];
-
- if (len >= ce_namelen(p))
- break;
- if (memcmp(name, p->name, len))
- break;
- if (p->name[len] != '/')
- continue;
- retval = -1;
- if (!ok_to_replace)
- break;
- remove_cache_entry_at(--pos);
- }
- return retval;
-}
-
-/*
- * Do we have another file with a pathname that is a proper
- * subset of the name we're trying to add?
- */
-static int has_dir_name(const struct cache_entry *ce, int pos, int ok_to_replace)
-{
- int retval = 0;
- const char *name = ce->name;
- const char *slash = name + ce_namelen(ce);
-
- for (;;) {
- int len;
-
- for (;;) {
- if (*--slash == '/')
- break;
- if (slash <= ce->name)
- return retval;
- }
- len = slash - name;
-
- pos = cache_name_pos(name, len);
- if (pos >= 0) {
- retval = -1;
- if (ok_to_replace)
- break;
- remove_cache_entry_at(pos);
- continue;
- }
-
- /*
- * Trivial optimization: if we find an entry that
- * already matches the sub-directory, then we know
- * we're ok, and we can exit
- */
- pos = -pos-1;
- if (pos < active_nr) {
- struct cache_entry *p = active_cache[pos];
- if (ce_namelen(p) <= len)
- continue;
- if (p->name[len] != '/')
- continue;
- if (memcmp(p->name, name, len))
- continue;
- break;
- }
- }
- return retval;
-}
-
/* We may be in a situation where we already have path/file and path
* is being added, or we already have path and path/file is being
* added. Either one would result in a nonsense tree that has path
@@ -257,19 +180,98 @@ static int has_dir_name(const struct cac
* from the cache so the caller should recompute the insert position.
* When this happens, we return non-zero.
*/
-static int check_file_directory_conflict(const struct cache_entry *ce, int pos, int ok_to_replace)
+static int check_file_directory_conflict(const struct cache_entry *ce,
+ int ok_to_replace)
{
+ int pos, replaced = 0;
+ const char *path = ce->name;
+ int namelen = strlen(path);
+ int stage = ce_stage(ce);
+ char *pathbuf = xmalloc(namelen + 1);
+ char *cp;
+
+ memcpy(pathbuf, path, namelen + 1);
+
/*
- * We check if the path is a sub-path of a subsequent pathname
- * first, since removing those will not change the position
- * in the array
+ * We are inserting path/file. Do they have path registered at
+ * the same stage? We need to do this for all the levels of our
+ * subpath.
*/
- int retval = has_file_name(ce, pos, ok_to_replace);
- /*
- * Then check if the path might have a clashing sub-directory
- * before it.
+ cp = pathbuf;
+ while (1) {
+ char *ep = strchr(cp, '/');
+ int len;
+ if (!ep)
+ break;
+ *ep = 0; /* first cut it at slash */
+ len = ep - pathbuf;
+ pos = cache_name_pos(pathbuf,
+ ntohs(create_ce_flags(len, stage)));
+ if (0 <= pos) {
+ /* Our leading path component is registered as a file,
+ * and we are trying to make it a directory. This is
+ * bad.
+ */
+ if (!ok_to_replace) {
+ free(pathbuf);
+ return -1;
+ }
+ fprintf(stderr, "removing file '%s' to replace it with a directory to create '%s'.\n", pathbuf, path);
+ remove_cache_entry_at(pos);
+ replaced = 1;
+ }
+ *ep = '/'; /* then restore it and go downwards */
+ cp = ep + 1;
+ }
+ free(pathbuf);
+
+ /* Do we have an entry in the cache that makes our path a prefix
+ * of it? That is, are we creating a file where they already expect
+ * a directory there?
+ */
+ pos = cache_name_pos(path,
+ ntohs(create_ce_flags(namelen, stage)));
+
+ /* (0 <= pos) cannot happen because add_cache_entry()
+ * should have taken care of that case.
+ */
+ pos = -pos-1;
+
+ /* pos would point at an existing entry that would come immediately
+ * after our path. It could be the same as our path in higher stage,
+ * or different path but in a lower stage.
+ *
+ * E.g. when we are inserting path at stage 2,
+ *
+ * 1 path
+ * pos-> 3 path
+ * 2 path/file1
+ * 3 path/file1
+ * 2 path/file2
+ * 2 patho
+ *
+ * We need to examine pos, ignore it because it is at different
+ * stage, examine next to find the path/file at stage 2, and
+ * complain. We need to do this until we are not the leading
+ * path of an existing entry anymore.
*/
- return retval + has_dir_name(ce, pos, ok_to_replace);
+
+ while (pos < active_nr) {
+ struct cache_entry *other = active_cache[pos];
+ if (strncmp(other->name, path, namelen))
+ break; /* it is not our "subdirectory" anymore */
+ if ((ce_stage(other) == stage) &&
+ other->name[namelen] == '/') {
+ if (!ok_to_replace)
+ return -1;
+ fprintf(stderr, "removing file '%s' under '%s' to be replaced with a file\n", other->name, path);
+ remove_cache_entry_at(pos);
+ replaced = 1;
+ continue; /* cycle without updating pos */
+ }
+ pos++;
+ }
+ return replaced;
}
int add_cache_entry(struct cache_entry *ce, int option)
@@ -277,6 +279,7 @@ int add_cache_entry(struct cache_entry *
int pos;
int ok_to_add = option & ADD_CACHE_OK_TO_ADD;
int ok_to_replace = option & ADD_CACHE_OK_TO_REPLACE;
+ int skip_df_check = option & ADD_CACHE_SKIP_DF_CHECK;
pos = cache_name_pos(ce->name, ntohs(ce->ce_flags));
/* existing match? Just replace it */
@@ -302,7 +305,7 @@ int add_cache_entry(struct cache_entry *
if (!ok_to_add)
return -1;
- if (!ce_stage(ce) && check_file_directory_conflict(ce, pos, ok_to_replace)) {
+ if (!skip_df_check && check_file_directory_conflict(ce, ok_to_replace)) {
if (!ok_to_replace)
return -1;
pos = cache_name_pos(ce->name, ntohs(ce->ce_flags));
diff --git a/t/t1005-read-tree-m-2way-emu23.sh b/t/t1005-read-tree-m-2way-emu23.sh
--- a/t/t1005-read-tree-m-2way-emu23.sh
+++ b/t/t1005-read-tree-m-2way-emu23.sh
@@ -389,7 +389,7 @@ test_expect_success \
:'
# Emu23 can grok I having more than H. Make sure we did not
-# botch the conflict tests (Linus code botches this test).
+# botch the conflict tests (fixed).
test_expect_success \
'DF vs DF/DF case test (#2).' \
'rm -f .git/index &&
@@ -400,8 +400,8 @@ test_expect_success \
# This should fail because I and H have a conflict
# at DF.
if git-read-tree --emu23 $treeDF $treeDFDF
- then true ;# should be false
- else false ;# should be true
+ then false
+ else true
fi'
test_done
diff --git a/tree.c b/tree.c
--- a/tree.c
+++ b/tree.c
@@ -18,7 +18,7 @@ static int read_one_entry(unsigned char
memcpy(ce->name, base, baselen);
memcpy(ce->name + baselen, pathname, len+1);
memcpy(ce->sha1, sha1, 20);
- return add_cache_entry(ce, ADD_CACHE_OK_TO_ADD);
+ return add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_SKIP_DF_CHECK);
}
static int read_tree_recursive(void *buffer, unsigned long size,
------------
next prev parent reply other threads:[~2005-06-24 23:35 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-06-23 23:20 [PATCH 0/2] D/F conflicts fixes Junio C Hamano
2005-06-23 23:25 ` [PATCH 1/2] Add more tests for read-tree --emu23 Junio C Hamano
2005-06-24 23:40 ` Junio C Hamano [this message]
2005-06-25 0:57 ` [PATCH 2/2] Fix oversimplified optimization for add_cache_entry() Linus Torvalds
2005-06-25 2:40 ` Junio C Hamano
2005-06-25 9:16 ` [PATCH 0/9] " Junio C Hamano
2005-06-25 9:21 ` [PATCH 1/9] fix date parsing for GIT raw commit timestamp format Junio C Hamano
2005-06-25 9:22 ` [PATCH 2/9] git-commit-script: get commit message from an existing one Junio C Hamano
2005-06-25 9:22 ` [PATCH 3/9] git-cherry: find commits not merged upstream Junio C Hamano
2005-06-25 9:23 ` [PATCH 4/9] git-rebase-script: rebase local commits to new upstream head Junio C Hamano
2005-06-25 9:24 ` [PATCH 5/9] Add more tests for read-tree --emu23 Junio C Hamano
2005-06-25 9:24 ` [PATCH 6/9] git-merge-one-file-script: do not misinterpret rm failure Junio C Hamano
2005-06-25 9:25 ` [PATCH 7/9] Fix oversimplified optimization for add_cache_entry() Junio C Hamano
2005-06-25 9:25 ` [PATCH 8/9] http-pull: documentation updates Junio C Hamano
2005-06-25 9:26 ` [PATCH 9/9] Add a bit of developer documentation to pull.h Junio C Hamano
2005-06-26 1:02 ` [RFD] consider "git" wrapper semi-Porcelain Junio C Hamano
2005-06-26 1:21 ` Linus Torvalds
2005-06-26 12:02 ` Junio C Hamano
2005-06-26 12:36 ` Martijn Kuipers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7vvf439vdl.fsf@assigned-by-dhcp.cox.net \
--to=junkio@cox.net \
--cc=git@vger.kernel.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).