git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Junio C Hamano <junkio@cox.net>
To: Linus Torvalds <torvalds@osdl.org>
Cc: git@vger.kernel.org
Subject: [PATCH 2/2] Fix oversimplified optimization for add_cache_entry().
Date: Fri, 24 Jun 2005 16:40:22 -0700	[thread overview]
Message-ID: <7vvf439vdl.fsf@assigned-by-dhcp.cox.net> (raw)
In-Reply-To: <7vaclgfynv.fsf@assigned-by-dhcp.cox.net> (Junio C. Hamano's message of "Thu, 23 Jun 2005 16:20:36 -0700")

An earlier change to optimize directory-file conflict check
broke what "read-tree --emu23" expects.  Introduce an explicit
flag to tell add_cache_entry() not to check for conflicts and
use it when reading an existing tree into an empty stage ---
by definition this case can never introduce such conflicts.

Resurrect the unoptimized directory-file conflict check code for
now as well.  The new one did not handle higher stages properly.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

 cache.h                           |    1 
 read-cache.c                      |  177 +++++++++++++++++++------------------
 t/t1005-read-tree-m-2way-emu23.sh |    6 +
 tree.c                            |    2 
 4 files changed, 95 insertions(+), 91 deletions(-)

cb13405368b0132ec3b3edcda22d32d89e9c1f85
diff --git a/cache.h b/cache.h
--- a/cache.h
+++ b/cache.h
@@ -130,6 +130,7 @@ extern int write_cache(int newfd, struct
 extern int cache_name_pos(const char *name, int namelen);
 #define ADD_CACHE_OK_TO_ADD 1		/* Ok to add */
 #define ADD_CACHE_OK_TO_REPLACE 2	/* Ok to replace file/directory */
+#define ADD_CACHE_SKIP_DF_CHECK 4	/* Ok to skip directory/file conflict checks */
 extern int add_cache_entry(struct cache_entry *ce, int option);
 extern int remove_cache_entry_at(int pos);
 extern int remove_file_from_cache(char *path);
diff --git a/read-cache.c b/read-cache.c
--- a/read-cache.c
+++ b/read-cache.c
@@ -171,83 +171,6 @@ int ce_same_name(struct cache_entry *a, 
 	return ce_namelen(b) == len && !memcmp(a->name, b->name, len);
 }
 
-/*
- * Do we have another file that has the beginning components being a
- * proper superset of the name we're trying to add?
- */
-static int has_file_name(const struct cache_entry *ce, int pos, int ok_to_replace)
-{
-	int retval = 0;
-	int len = ce_namelen(ce);
-	const char *name = ce->name;
-
-	while (pos < active_nr) {
-		struct cache_entry *p = active_cache[pos++];
-
-		if (len >= ce_namelen(p))
-			break;
-		if (memcmp(name, p->name, len))
-			break;
-		if (p->name[len] != '/')
-			continue;
-		retval = -1;
-		if (!ok_to_replace)
-			break;
-		remove_cache_entry_at(--pos);
-	}
-	return retval;
-}
-
-/*
- * Do we have another file with a pathname that is a proper
- * subset of the name we're trying to add?
- */
-static int has_dir_name(const struct cache_entry *ce, int pos, int ok_to_replace)
-{
-	int retval = 0;
-	const char *name = ce->name;
-	const char *slash = name + ce_namelen(ce);
-
-	for (;;) {
-		int len;
-
-		for (;;) {
-			if (*--slash == '/')
-				break;
-			if (slash <= ce->name)
-				return retval;
-		}
-		len = slash - name;
-
-		pos = cache_name_pos(name, len);
-		if (pos >= 0) {
-			retval = -1;
-			if (ok_to_replace)
-				break;
-			remove_cache_entry_at(pos);
-			continue;
-		}
-
-		/*
-		 * Trivial optimization: if we find an entry that
-		 * already matches the sub-directory, then we know
-		 * we're ok, and we can exit
-		 */
-		pos = -pos-1;
-		if (pos < active_nr) {
-			struct cache_entry *p = active_cache[pos];
-			if (ce_namelen(p) <= len)
-				continue;
-			if (p->name[len] != '/')
-				continue;
-			if (memcmp(p->name, name, len))
-				continue;
-			break;
-		}
-	}
-	return retval;
-}
-
 /* We may be in a situation where we already have path/file and path
  * is being added, or we already have path and path/file is being
  * added.  Either one would result in a nonsense tree that has path
@@ -257,19 +180,98 @@ static int has_dir_name(const struct cac
  * from the cache so the caller should recompute the insert position.
  * When this happens, we return non-zero.
  */
-static int check_file_directory_conflict(const struct cache_entry *ce, int pos, int ok_to_replace)
+static int check_file_directory_conflict(const struct cache_entry *ce,
+					 int ok_to_replace)
 {
+	int pos, replaced = 0;
+	const char *path = ce->name;
+	int namelen = strlen(path);
+	int stage = ce_stage(ce);
+	char *pathbuf = xmalloc(namelen + 1);
+	char *cp;
+
+	memcpy(pathbuf, path, namelen + 1);
+
 	/*
-	 * We check if the path is a sub-path of a subsequent pathname
-	 * first, since removing those will not change the position
-	 * in the array
+	 * We are inserting path/file.  Do they have path registered at
+	 * the same stage?  We need to do this for all the levels of our
+	 * subpath.
 	 */
-	int retval = has_file_name(ce, pos, ok_to_replace);
-	/*
-	 * Then check if the path might have a clashing sub-directory
-	 * before it.
+	cp = pathbuf;
+	while (1) {
+		char *ep = strchr(cp, '/');
+		int len;
+		if (!ep)
+			break;
+		*ep = 0;    /* first cut it at slash */
+		len = ep - pathbuf;
+		pos = cache_name_pos(pathbuf,
+				     ntohs(create_ce_flags(len, stage)));
+		if (0 <= pos) {
+			/* Our leading path component is registered as a file,
+			 * and we are trying to make it a directory.  This is
+			 * bad.
+			 */
+			if (!ok_to_replace) {
+				free(pathbuf);
+				return -1;
+			}
+			fprintf(stderr, "removing file '%s' to replace it with a directory to create '%s'.\n", pathbuf, path);
+			remove_cache_entry_at(pos);
+			replaced = 1;
+		}
+		*ep = '/';  /* then restore it and go downwards */
+		cp = ep + 1;
+	}
+	free(pathbuf);
+
+	/* Do we have an entry in the cache that makes our path a prefix
+	 * of it?  That is, are we creating a file where they already expect
+	 * a directory there?
+	 */
+	pos = cache_name_pos(path,
+			     ntohs(create_ce_flags(namelen, stage)));
+
+	/* (0 <= pos) cannot happen because add_cache_entry()
+	 * should have taken care of that case.
+	 */
+	pos = -pos-1;
+
+	/* pos would point at an existing entry that would come immediately
+	 * after our path.  It could be the same as our path in higher stage,
+	 * or different path but in a lower stage.
+	 *
+	 * E.g. when we are inserting path at stage 2,
+	 *
+	 *        1 path
+	 * pos->  3 path
+	 *        2 path/file1
+	 *        3 path/file1
+	 *        2 path/file2
+	 *        2 patho
+	 *
+	 * We need to examine pos, ignore it because it is at different
+	 * stage, examine next to find the path/file at stage 2, and
+	 * complain.  We need to do this until we are not the leading
+	 * path of an existing entry anymore.
 	 */
-	return retval + has_dir_name(ce, pos, ok_to_replace);
+
+	while (pos < active_nr) {
+		struct cache_entry *other = active_cache[pos];
+		if (strncmp(other->name, path, namelen))
+			break; /* it is not our "subdirectory" anymore */
+		if ((ce_stage(other) == stage) &&
+		    other->name[namelen] == '/') {
+			if (!ok_to_replace)
+				return -1;
+			fprintf(stderr, "removing file '%s' under '%s' to be replaced with a file\n", other->name, path);
+			remove_cache_entry_at(pos);
+			replaced = 1;
+			continue; /* cycle without updating pos */
+		}
+		pos++;
+	}
+	return replaced;
 }
 
 int add_cache_entry(struct cache_entry *ce, int option)
@@ -277,6 +279,7 @@ int add_cache_entry(struct cache_entry *
 	int pos;
 	int ok_to_add = option & ADD_CACHE_OK_TO_ADD;
 	int ok_to_replace = option & ADD_CACHE_OK_TO_REPLACE;
+	int skip_df_check = option & ADD_CACHE_SKIP_DF_CHECK;
 	pos = cache_name_pos(ce->name, ntohs(ce->ce_flags));
 
 	/* existing match? Just replace it */
@@ -302,7 +305,7 @@ int add_cache_entry(struct cache_entry *
 	if (!ok_to_add)
 		return -1;
 
-	if (!ce_stage(ce) && check_file_directory_conflict(ce, pos, ok_to_replace)) {
+	if (!skip_df_check && check_file_directory_conflict(ce, ok_to_replace)) {
 		if (!ok_to_replace)
 			return -1;
 		pos = cache_name_pos(ce->name, ntohs(ce->ce_flags));
diff --git a/t/t1005-read-tree-m-2way-emu23.sh b/t/t1005-read-tree-m-2way-emu23.sh
--- a/t/t1005-read-tree-m-2way-emu23.sh
+++ b/t/t1005-read-tree-m-2way-emu23.sh
@@ -389,7 +389,7 @@ test_expect_success \
      :'
 
 # Emu23 can grok I having more than H.  Make sure we did not
-# botch the conflict tests (Linus code botches this test).
+# botch the conflict tests (fixed).
 test_expect_success \
     'DF vs DF/DF case test (#2).' \
     'rm -f .git/index &&
@@ -400,8 +400,8 @@ test_expect_success \
      # This should fail because I and H have a conflict
      # at DF.
      if git-read-tree --emu23 $treeDF $treeDFDF
-     then true  ;# should be false
-     else false ;# should be true
+     then false
+     else true
      fi'
 
 test_done
diff --git a/tree.c b/tree.c
--- a/tree.c
+++ b/tree.c
@@ -18,7 +18,7 @@ static int read_one_entry(unsigned char 
 	memcpy(ce->name, base, baselen);
 	memcpy(ce->name + baselen, pathname, len+1);
 	memcpy(ce->sha1, sha1, 20);
-	return add_cache_entry(ce, ADD_CACHE_OK_TO_ADD);
+	return add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_SKIP_DF_CHECK);
 }
 
 static int read_tree_recursive(void *buffer, unsigned long size,
------------


  parent reply	other threads:[~2005-06-24 23:35 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-06-23 23:20 [PATCH 0/2] D/F conflicts fixes Junio C Hamano
2005-06-23 23:25 ` [PATCH 1/2] Add more tests for read-tree --emu23 Junio C Hamano
2005-06-24 23:40 ` Junio C Hamano [this message]
2005-06-25  0:57   ` [PATCH 2/2] Fix oversimplified optimization for add_cache_entry() Linus Torvalds
2005-06-25  2:40     ` Junio C Hamano
2005-06-25  9:16       ` [PATCH 0/9] " Junio C Hamano
2005-06-25  9:21         ` [PATCH 1/9] fix date parsing for GIT raw commit timestamp format Junio C Hamano
2005-06-25  9:22         ` [PATCH 2/9] git-commit-script: get commit message from an existing one Junio C Hamano
2005-06-25  9:22         ` [PATCH 3/9] git-cherry: find commits not merged upstream Junio C Hamano
2005-06-25  9:23         ` [PATCH 4/9] git-rebase-script: rebase local commits to new upstream head Junio C Hamano
2005-06-25  9:24         ` [PATCH 5/9] Add more tests for read-tree --emu23 Junio C Hamano
2005-06-25  9:24         ` [PATCH 6/9] git-merge-one-file-script: do not misinterpret rm failure Junio C Hamano
2005-06-25  9:25         ` [PATCH 7/9] Fix oversimplified optimization for add_cache_entry() Junio C Hamano
2005-06-25  9:25         ` [PATCH 8/9] http-pull: documentation updates Junio C Hamano
2005-06-25  9:26         ` [PATCH 9/9] Add a bit of developer documentation to pull.h Junio C Hamano
2005-06-26  1:02         ` [RFD] consider "git" wrapper semi-Porcelain Junio C Hamano
2005-06-26  1:21           ` Linus Torvalds
2005-06-26 12:02             ` Junio C Hamano
2005-06-26 12:36               ` Martijn Kuipers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vvf439vdl.fsf@assigned-by-dhcp.cox.net \
    --to=junkio@cox.net \
    --cc=git@vger.kernel.org \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).