git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from
@ 2013-01-24  8:42 Nguyễn Thái Ngọc Duy
  2013-01-24  8:42 ` [PATCH 2/7] sha1_file: separate alt object db from own repos and submodules's Nguyễn Thái Ngọc Duy
                   ` (6 more replies)
  0 siblings, 7 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-01-24  8:42 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, Jens Lehmann,
	Nguyễn Thái Ngọc Duy

We currently know if an object is loose or packed. We do not know if
it's from the repo's object database, or via alternates
mechanism. With this patch, sha1_object_info_extended() can tell if an
object comes from alternates source (and which one).

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 How about this way instead: we keep track of where objects come from
 so we can verify object source when we create or update something
 that contains SHA-1.

 1/7 and 2/7 prepare for tracking object source. The rest verifies
 that new commits, trees, tags, indexes, refs or reflogs do not refer
 to an external source.

 This adds some cost when add_submodule_odb() is used. I did not
 measure, but I guess the added cost is much smaller compared to
 forking, especially on Windows. No breakages detected by the test
 suite, which is really good (or my code is really broken).

 builtin/index-pack.c |  2 +-
 cache.h              |  4 +++-
 fast-import.c        |  2 +-
 sha1_file.c          | 66 ++++++++++++++++++++++++++++++++++++----------------
 4 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 43d364b..a7de3f8 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1393,7 +1393,7 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
 
 static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
 {
-	struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+	struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1, NULL);
 
 	if (!p)
 		die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/cache.h b/cache.h
index c257953..92854ab 100644
--- a/cache.h
+++ b/cache.h
@@ -978,6 +978,7 @@ struct pack_window {
 extern struct packed_git {
 	struct packed_git *next;
 	struct pack_window *windows;
+	struct alternate_object_database *alt;
 	off_t pack_size;
 	const void *index_data;
 	size_t index_size;
@@ -1066,7 +1067,7 @@ extern void close_pack_windows(struct packed_git *);
 extern void unuse_pack(struct pack_window **);
 extern void free_pack_by_name(const char *);
 extern void clear_delta_base_cache(void);
-extern struct packed_git *add_packed_git(const char *, int, int);
+extern struct packed_git *add_packed_git(const char *, int, int, struct alternate_object_database *);
 extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t);
 extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t);
 extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
@@ -1102,6 +1103,7 @@ struct object_info {
 			unsigned int is_delta;
 		} packed;
 	} u;
+	struct alternate_object_database *alt;
 };
 extern int sha1_object_info_extended(const unsigned char *, struct object_info *);
 
diff --git a/fast-import.c b/fast-import.c
index c2a814e..4bf732e 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -964,7 +964,7 @@ static void end_packfile(void)
 		idx_name = keep_pack(create_index());
 
 		/* Register the packfile with core git's machinery. */
-		new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+		new_p = add_packed_git(idx_name, strlen(idx_name), 1, NULL);
 		if (!new_p)
 			die("core git rejected index %s", idx_name);
 		all_packs[pack_id] = new_p;
diff --git a/sha1_file.c b/sha1_file.c
index 40b2329..afc7355 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -933,7 +933,8 @@ static void try_to_free_pack_memory(size_t size)
 	release_pack_memory(size, -1);
 }
 
-struct packed_git *add_packed_git(const char *path, int path_len, int local)
+struct packed_git *add_packed_git(const char *path, int path_len, int local,
+				  struct alternate_object_database *alt)
 {
 	static int have_set_try_to_free_routine;
 	struct stat st;
@@ -973,6 +974,7 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local)
 	p->mtime = st.st_mtime;
 	if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
 		hashclr(p->sha1);
+	p->alt = alt;
 	return p;
 }
 
@@ -1000,7 +1002,8 @@ void install_packed_git(struct packed_git *pack)
 	packed_git = pack;
 }
 
-static void prepare_packed_git_one(char *objdir, int local)
+static void prepare_packed_git_one(char *objdir, int local,
+				   struct alternate_object_database *alt)
 {
 	/* Ensure that this buffer is large enough so that we can
 	   append "/pack/" without clobbering the stack even if
@@ -1041,7 +1044,7 @@ static void prepare_packed_git_one(char *objdir, int local)
 		/* See if it really is a valid .idx file with corresponding
 		 * .pack file that we can map.
 		 */
-		p = add_packed_git(path, len + namelen, local);
+		p = add_packed_git(path, len + namelen, local, alt);
 		if (!p)
 			continue;
 		install_packed_git(p);
@@ -1110,11 +1113,11 @@ void prepare_packed_git(void)
 
 	if (prepare_packed_git_run_once)
 		return;
-	prepare_packed_git_one(get_object_directory(), 1);
+	prepare_packed_git_one(get_object_directory(), 1, NULL);
 	prepare_alt_odb();
 	for (alt = alt_odb_list; alt; alt = alt->next) {
 		alt->name[-1] = 0;
-		prepare_packed_git_one(alt->base, 0);
+		prepare_packed_git_one(alt->base, 0, alt);
 		alt->name[-1] = '/';
 	}
 	rearrange_packed_git();
@@ -1215,15 +1218,19 @@ static int git_open_noatime(const char *name)
 	}
 }
 
-static int open_sha1_file(const unsigned char *sha1)
+static int open_sha1_file(const unsigned char *sha1,
+			  struct alternate_object_database **p_alt)
 {
 	int fd;
 	char *name = sha1_file_name(sha1);
 	struct alternate_object_database *alt;
 
 	fd = git_open_noatime(name);
-	if (fd >= 0)
+	if (fd >= 0) {
+		if (p_alt)
+			*p_alt = NULL;
 		return fd;
+	}
 
 	prepare_alt_odb();
 	errno = ENOENT;
@@ -1231,18 +1238,23 @@ static int open_sha1_file(const unsigned char *sha1)
 		name = alt->name;
 		fill_sha1_path(name, sha1);
 		fd = git_open_noatime(alt->base);
-		if (fd >= 0)
+		if (fd >= 0) {
+			if (p_alt)
+				*p_alt = alt;
 			return fd;
+		}
 	}
 	return -1;
 }
 
-void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+static void *map_sha1_file_extended(const unsigned char *sha1,
+				    unsigned long *size,
+				    struct alternate_object_database **p_alt)
 {
 	void *map;
 	int fd;
 
-	fd = open_sha1_file(sha1);
+	fd = open_sha1_file(sha1, p_alt);
 	map = NULL;
 	if (fd >= 0) {
 		struct stat st;
@@ -1261,6 +1273,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 	return map;
 }
 
+void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+{
+	return map_sha1_file_extended(sha1, size, NULL);
+}
+
 /*
  * There used to be a second loose object header format which
  * was meant to mimic the in-pack format, allowing for direct
@@ -2096,7 +2113,9 @@ static int fill_pack_entry(const unsigned char *sha1,
 	return 1;
 }
 
-static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
+static int find_pack_entry(const unsigned char *sha1,
+			   struct pack_entry *e,
+			   struct alternate_object_database **p_alt)
 {
 	struct packed_git *p;
 
@@ -2104,14 +2123,19 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 	if (!packed_git)
 		return 0;
 
-	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack)) {
+		if (p_alt)
+			*p_alt = last_found_pack->alt;
 		return 1;
+	}
 
 	for (p = packed_git; p; p = p->next) {
 		if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
 			continue;
 
 		last_found_pack = p;
+		if (p_alt)
+			*p_alt = p->alt;
 		return 1;
 	}
 	return 0;
@@ -2130,7 +2154,9 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
 
 }
 
-static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep)
+static int sha1_loose_object_info(const unsigned char *sha1,
+				  unsigned long *sizep,
+				  struct alternate_object_database **p_alt)
 {
 	int status;
 	unsigned long mapsize, size;
@@ -2138,7 +2164,7 @@ static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *size
 	git_zstream stream;
 	char hdr[32];
 
-	map = map_sha1_file(sha1, &mapsize);
+	map = map_sha1_file_extended(sha1, &mapsize, p_alt);
 	if (!map)
 		return error("unable to find %s", sha1_to_hex(sha1));
 	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
@@ -2168,9 +2194,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
 		return co->type;
 	}
 
-	if (!find_pack_entry(sha1, &e)) {
+	if (!find_pack_entry(sha1, &e, &oi->alt)) {
 		/* Most likely it's a loose object. */
-		status = sha1_loose_object_info(sha1, oi->sizep);
+		status = sha1_loose_object_info(sha1, oi->sizep, &oi->alt);
 		if (status >= 0) {
 			oi->whence = OI_LOOSE;
 			return status;
@@ -2178,7 +2204,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
 
 		/* Not a loose object; someone else may have just packed it. */
 		reprepare_packed_git();
-		if (!find_pack_entry(sha1, &e))
+		if (!find_pack_entry(sha1, &e, &oi->alt))
 			return status;
 	}
 
@@ -2213,7 +2239,7 @@ static void *read_packed_sha1(const unsigned char *sha1,
 	struct pack_entry e;
 	void *data;
 
-	if (!find_pack_entry(sha1, &e))
+	if (!find_pack_entry(sha1, &e, NULL))
 		return NULL;
 	data = cache_or_unpack_entry(e.p, e.offset, size, type, 1);
 	if (!data) {
@@ -2618,14 +2644,14 @@ int has_pack_index(const unsigned char *sha1)
 int has_sha1_pack(const unsigned char *sha1)
 {
 	struct pack_entry e;
-	return find_pack_entry(sha1, &e);
+	return find_pack_entry(sha1, &e, NULL);
 }
 
 int has_sha1_file(const unsigned char *sha1)
 {
 	struct pack_entry e;
 
-	if (find_pack_entry(sha1, &e))
+	if (find_pack_entry(sha1, &e, NULL))
 		return 1;
 	return has_loose_object(sha1);
 }
-- 
1.8.0.rc3.18.g0d9b108

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 2/7] sha1_file: separate alt object db from own repos and submodules's
  2013-01-24  8:42 [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Nguyễn Thái Ngọc Duy
@ 2013-01-24  8:42 ` Nguyễn Thái Ngọc Duy
  2013-01-24  8:42 ` [PATCH 3/7] sha1_file: refuse to write commits referring to external objects Nguyễn Thái Ngọc Duy
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-01-24  8:42 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, Jens Lehmann,
	Nguyễn Thái Ngọc Duy

A submodule's object database may be imported to in-core object pool
for a quick peek without paying the price of running a separate git
command. These databases are marked in for stricter checks later to
avoid accidentially refering to a submodule's SHA-1 from main repo
(except in gitlinks).

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h       |  4 +++-
 environment.c |  2 ++
 sha1_file.c   | 38 +++++++++++++++++++++++++++++---------
 submodule.c   |  2 +-
 4 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/cache.h b/cache.h
index 92854ab..b8d5826 100644
--- a/cache.h
+++ b/cache.h
@@ -561,6 +561,7 @@ extern int fsync_object_files;
 extern int core_preload_index;
 extern int core_apply_sparse_checkout;
 extern int precomposed_unicode;
+extern int object_database_contaminated;
 
 enum branch_track {
 	BRANCH_TRACK_UNSPECIFIED = -1,
@@ -957,11 +958,12 @@ extern void remove_scheduled_dirs(void);
 
 extern struct alternate_object_database {
 	struct alternate_object_database *next;
+	int external;
 	char *name;
 	char base[FLEX_ARRAY]; /* more */
 } *alt_odb_list;
 extern void prepare_alt_odb(void);
-extern void read_info_alternates(const char * relative_base, int depth);
+extern void read_external_info_alternates(const char * relative_base, int depth);
 extern void add_to_alternates_file(const char *reference);
 typedef int alt_odb_fn(struct alternate_object_database *, void *);
 extern void foreach_alt_odb(alt_odb_fn, void*);
diff --git a/environment.c b/environment.c
index 85edd7f..3c90d95 100644
--- a/environment.c
+++ b/environment.c
@@ -65,6 +65,8 @@ unsigned long pack_size_limit_cfg;
 /* Parallel index stat data preload? */
 int core_preload_index = 0;
 
+int object_database_contaminated;
+
 /* This is set by setup_git_dir_gently() and/or git_default_config() */
 char *git_work_tree_cfg;
 static char *work_tree;
diff --git a/sha1_file.c b/sha1_file.c
index afc7355..af71122 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -58,6 +58,9 @@ static struct cached_object empty_tree = {
 
 static struct packed_git *last_found_pack;
 
+static void read_info_alternates(const char * relative_base,
+				 int depth, int external);
+
 static struct cached_object *find_cached_object(const unsigned char *sha1)
 {
 	int i;
@@ -247,7 +250,10 @@ static int git_open_noatime(const char *name);
  * SHA1, an extra slash for the first level indirection, and the
  * terminating NUL.
  */
-static int link_alt_odb_entry(const char *entry, const char *relative_base, int depth)
+static int link_alt_odb_entry(const char *entry,
+			      const char *relative_base,
+			      int depth,
+			      int external)
 {
 	const char *objdir = get_object_directory();
 	struct alternate_object_database *ent;
@@ -277,6 +283,7 @@ static int link_alt_odb_entry(const char *entry, const char *relative_base, int
 	memcpy(ent->base, pathbuf.buf, pfxlen);
 	strbuf_release(&pathbuf);
 
+	ent->external = external;
 	ent->name = ent->base + pfxlen + 1;
 	ent->base[pfxlen + 3] = '/';
 	ent->base[pfxlen] = ent->base[entlen-1] = 0;
@@ -310,15 +317,19 @@ static int link_alt_odb_entry(const char *entry, const char *relative_base, int
 	ent->next = NULL;
 
 	/* recursively add alternates */
-	read_info_alternates(ent->base, depth + 1);
+	read_info_alternates(ent->base, depth + 1, 0);
 
 	ent->base[pfxlen] = '/';
 
+	if (external)
+		object_database_contaminated = 1;
+
 	return 0;
 }
 
 static void link_alt_odb_entries(const char *alt, int len, int sep,
-				 const char *relative_base, int depth)
+				 const char *relative_base,
+				 int depth, int external)
 {
 	struct string_list entries = STRING_LIST_INIT_NODUP;
 	char *alt_copy;
@@ -340,14 +351,16 @@ static void link_alt_odb_entries(const char *alt, int len, int sep,
 			error("%s: ignoring relative alternate object store %s",
 					relative_base, entry);
 		} else {
-			link_alt_odb_entry(entry, relative_base, depth);
+			link_alt_odb_entry(entry, relative_base,
+					   depth, external);
 		}
 	}
 	string_list_clear(&entries, 0);
 	free(alt_copy);
 }
 
-void read_info_alternates(const char * relative_base, int depth)
+static void read_info_alternates(const char * relative_base,
+				 int depth, int external)
 {
 	char *map;
 	size_t mapsz;
@@ -371,11 +384,18 @@ void read_info_alternates(const char * relative_base, int depth)
 	map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0);
 	close(fd);
 
-	link_alt_odb_entries(map, mapsz, '\n', relative_base, depth);
+	link_alt_odb_entries(map, mapsz, '\n', relative_base,
+			     depth, external);
 
 	munmap(map, mapsz);
 }
 
+void read_external_info_alternates(const char *relative_base,
+				   int depth)
+{
+	read_info_alternates(relative_base, depth, 1);
+}
+
 void add_to_alternates_file(const char *reference)
 {
 	struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
@@ -385,7 +405,7 @@ void add_to_alternates_file(const char *reference)
 	if (commit_lock_file(lock))
 		die("could not close alternates file");
 	if (alt_odb_tail)
-		link_alt_odb_entries(alt, strlen(alt), '\n', NULL, 0);
+		link_alt_odb_entries(alt, strlen(alt), '\n', NULL, 0, 0);
 }
 
 void foreach_alt_odb(alt_odb_fn fn, void *cb)
@@ -409,9 +429,9 @@ void prepare_alt_odb(void)
 	if (!alt) alt = "";
 
 	alt_odb_tail = &alt_odb_list;
-	link_alt_odb_entries(alt, strlen(alt), PATH_SEP, NULL, 0);
+	link_alt_odb_entries(alt, strlen(alt), PATH_SEP, NULL, 0, 0);
 
-	read_info_alternates(get_object_directory(), 0);
+	read_info_alternates(get_object_directory(), 0, 0);
 }
 
 static int has_loose_object_local(const unsigned char *sha1)
diff --git a/submodule.c b/submodule.c
index 2f55436..8e4e2ec 100644
--- a/submodule.c
+++ b/submodule.c
@@ -65,7 +65,7 @@ static int add_submodule_odb(const char *path)
 	alt_odb_list = alt_odb;
 
 	/* add possible alternates from the submodule */
-	read_info_alternates(objects_directory.buf, 0);
+	read_external_info_alternates(objects_directory.buf, 0);
 	prepare_alt_odb();
 done:
 	strbuf_release(&objects_directory);
-- 
1.8.0.rc3.18.g0d9b108

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 3/7] sha1_file: refuse to write commits referring to external objects
  2013-01-24  8:42 [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Nguyễn Thái Ngọc Duy
  2013-01-24  8:42 ` [PATCH 2/7] sha1_file: separate alt object db from own repos and submodules's Nguyễn Thái Ngọc Duy
@ 2013-01-24  8:42 ` Nguyễn Thái Ngọc Duy
  2013-01-24  8:42 ` [PATCH 4/7] sha1_file: refuse to write trees " Nguyễn Thái Ngọc Duy
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-01-24  8:42 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, Jens Lehmann,
	Nguyễn Thái Ngọc Duy


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 sha1_file.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/sha1_file.c b/sha1_file.c
index af71122..5948dcb 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2615,12 +2615,46 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
 	return move_temp_to_file(tmp_file, filename);
 }
 
+static void check_sha1_file_for_external_source(const char *buf,
+						unsigned long len,
+						const char *type)
+{
+	struct object_info oi;
+	unsigned char sha1[20];
+
+	/*
+	 * It is assumed that the object is well formatted. Otherwise
+	 * die() is waiting..
+	 */
+	if (!strcmp(type, "commit")) {
+		const char *tail = buf + len;
+		if (get_sha1_hex(buf + 5, sha1) < 0 ||
+		    sha1_object_info_extended(sha1, &oi) != OBJ_TREE ||
+		    (oi.alt && oi.alt->external))
+			die("cannot create a commit with external tree %s",
+			    sha1_to_hex(sha1));
+		buf += 46; /* "tree " + "hex sha1" + "\n" */
+
+		while (buf + 48 < tail && !memcmp(buf, "parent ", 7)) {
+			if (get_sha1_hex(buf + 7, sha1) < 0 ||
+			    sha1_object_info_extended(sha1, &oi) != OBJ_COMMIT ||
+			    (oi.alt && oi.alt->external))
+				die("cannot create a commit with external parent %s",
+				    sha1_to_hex(sha1));
+			buf += 48; /* "parent " + hex sha1 + "\n" */
+		}
+	}
+}
+
 int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
 {
 	unsigned char sha1[20];
 	char hdr[32];
 	int hdrlen;
 
+	if (object_database_contaminated)
+		check_sha1_file_for_external_source(buf, len, type);
+
 	/* Normally if we have it in the pack then we do not bother writing
 	 * it out into .git/objects/??/?{38} file.
 	 */
-- 
1.8.0.rc3.18.g0d9b108

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 4/7] sha1_file: refuse to write trees referring to external objects
  2013-01-24  8:42 [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Nguyễn Thái Ngọc Duy
  2013-01-24  8:42 ` [PATCH 2/7] sha1_file: separate alt object db from own repos and submodules's Nguyễn Thái Ngọc Duy
  2013-01-24  8:42 ` [PATCH 3/7] sha1_file: refuse to write commits referring to external objects Nguyễn Thái Ngọc Duy
@ 2013-01-24  8:42 ` Nguyễn Thái Ngọc Duy
  2013-01-24  8:42 ` [PATCH 5/7] sha1_file: refuse to write tags " Nguyễn Thái Ngọc Duy
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-01-24  8:42 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, Jens Lehmann,
	Nguyễn Thái Ngọc Duy


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 sha1_file.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/sha1_file.c b/sha1_file.c
index 5948dcb..ec3a040 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2643,6 +2643,29 @@ static void check_sha1_file_for_external_source(const char *buf,
 				    sha1_to_hex(sha1));
 			buf += 48; /* "parent " + hex sha1 + "\n" */
 		}
+	} else if (!strcmp(type, "tree")) {
+		struct tree_desc desc;
+		struct name_entry entry;
+
+		init_tree_desc(&desc, buf, len);
+		while (tree_entry(&desc, &entry))
+			switch (entry.mode) {
+			case S_IFGITLINK:
+				/* we already know we probably don't have this SHA-1 */
+				break;
+			case S_IFDIR:
+				if (sha1_object_info_extended(entry.sha1, &oi) != OBJ_TREE ||
+				    (oi.alt && oi.alt->external))
+					die("cannot create a tree with external tree %s",
+					    sha1_to_hex(entry.sha1));
+				break;
+			default:
+				if (sha1_object_info_extended(entry.sha1, &oi) != OBJ_BLOB ||
+				    (oi.alt && oi.alt->external))
+					die("cannot create a tree with external blob %s",
+					    sha1_to_hex(entry.sha1));
+				break;
+			}
 	}
 }
 
-- 
1.8.0.rc3.18.g0d9b108

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 5/7] sha1_file: refuse to write tags referring to external objects
  2013-01-24  8:42 [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Nguyễn Thái Ngọc Duy
                   ` (2 preceding siblings ...)
  2013-01-24  8:42 ` [PATCH 4/7] sha1_file: refuse to write trees " Nguyễn Thái Ngọc Duy
@ 2013-01-24  8:42 ` Nguyễn Thái Ngọc Duy
  2013-01-24  8:42 ` [PATCH 6/7] read-cache: refuse to create index " Nguyễn Thái Ngọc Duy
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-01-24  8:42 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, Jens Lehmann,
	Nguyễn Thái Ngọc Duy


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 sha1_file.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sha1_file.c b/sha1_file.c
index ec3a040..01681e5 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2666,6 +2666,12 @@ static void check_sha1_file_for_external_source(const char *buf,
 					    sha1_to_hex(entry.sha1));
 				break;
 			}
+	} else if (!strcmp(type, "tag")) {
+		if (get_sha1_hex(buf + 7, sha1) < 0 ||
+		    sha1_object_info_extended(sha1, &oi) != OBJ_TREE ||
+		    (oi.alt && oi.alt->external))
+			die("cannot create a tag with external tree %s",
+			    sha1_to_hex(sha1));
 	}
 }
 
-- 
1.8.0.rc3.18.g0d9b108

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 6/7] read-cache: refuse to create index referring to external objects
  2013-01-24  8:42 [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Nguyễn Thái Ngọc Duy
                   ` (3 preceding siblings ...)
  2013-01-24  8:42 ` [PATCH 5/7] sha1_file: refuse to write tags " Nguyễn Thái Ngọc Duy
@ 2013-01-24  8:42 ` Nguyễn Thái Ngọc Duy
  2013-01-24 19:15   ` Junio C Hamano
  2013-01-24  8:42 ` [PATCH 7/7] refs: do not update ref(log) " Nguyễn Thái Ngọc Duy
  2013-01-24 17:45 ` [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Junio C Hamano
  6 siblings, 1 reply; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-01-24  8:42 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, Jens Lehmann,
	Nguyễn Thái Ngọc Duy


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 read-cache.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index fda78bc..4579215 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1720,6 +1720,26 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
 			      ce->name + common, ce_namelen(ce) - common);
 	}
 
+	if (object_database_contaminated) {
+		struct object_info oi;
+		switch (ce->ce_mode) {
+		case S_IFGITLINK:
+			break;
+		case S_IFDIR:
+			if (sha1_object_info_extended(ce->sha1, &oi) != OBJ_TREE ||
+			    (oi.alt && oi.alt->external))
+				die("cannot create index referring to an external tree %s",
+				    sha1_to_hex(ce->sha1));
+			break;
+		default:
+			if (sha1_object_info_extended(ce->sha1, &oi) != OBJ_BLOB ||
+				    (oi.alt && oi.alt->external))
+				die("cannot create index referring to an external blob %s",
+				    sha1_to_hex(ce->sha1));
+			break;
+		}
+	}
+
 	result = ce_write(c, fd, ondisk, size);
 	free(ondisk);
 	return result;
-- 
1.8.0.rc3.18.g0d9b108

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 7/7] refs: do not update ref(log) referring to external objects
  2013-01-24  8:42 [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Nguyễn Thái Ngọc Duy
                   ` (4 preceding siblings ...)
  2013-01-24  8:42 ` [PATCH 6/7] read-cache: refuse to create index " Nguyễn Thái Ngọc Duy
@ 2013-01-24  8:42 ` Nguyễn Thái Ngọc Duy
  2013-01-24 17:45 ` [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Junio C Hamano
  6 siblings, 0 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-01-24  8:42 UTC (permalink / raw
  To: git; +Cc: Junio C Hamano, Jens Lehmann,
	Nguyễn Thái Ngọc Duy


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 refs.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/refs.c b/refs.c
index 541fec2..86003af 100644
--- a/refs.c
+++ b/refs.c
@@ -2014,6 +2014,18 @@ static int log_ref_write(const char *refname, const unsigned char *old_sha1,
 	char *logrec;
 	const char *committer;
 
+	if (object_database_contaminated) {
+		struct object_info oi;
+		if (sha1_object_info_extended(old_sha1, &oi) == -1 ||
+		    (oi.alt && oi.alt->external))
+			return error("cannot update reflog pointing to an external SHA-1 %s",
+				     sha1_to_hex(old_sha1));
+		if (sha1_object_info_extended(new_sha1, &oi) == -1 ||
+		    (oi.alt && oi.alt->external))
+			return error("cannot update reflog pointing to an external SHA-1 %s",
+				     sha1_to_hex(new_sha1));
+	}
+
 	if (log_all_ref_updates < 0)
 		log_all_ref_updates = !is_bare_repository();
 
@@ -2054,6 +2066,18 @@ int write_ref_sha1(struct ref_lock *lock,
 
 	if (!lock)
 		return -1;
+
+	if (object_database_contaminated) {
+		struct object_info oi;
+		if (sha1_object_info_extended(sha1, &oi) == -1 ||
+		    (oi.alt && oi.alt->external)) {
+			error("cannot update ref pointing to an external SHA-1 %s",
+			      sha1_to_hex(sha1));
+			unlock_ref(lock);
+			return -1;
+		}
+	}
+
 	if (!lock->force_write && !hashcmp(lock->old_sha1, sha1)) {
 		unlock_ref(lock);
 		return 0;
-- 
1.8.0.rc3.18.g0d9b108

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from
  2013-01-24  8:42 [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Nguyễn Thái Ngọc Duy
                   ` (5 preceding siblings ...)
  2013-01-24  8:42 ` [PATCH 7/7] refs: do not update ref(log) " Nguyễn Thái Ngọc Duy
@ 2013-01-24 17:45 ` Junio C Hamano
  2013-01-25  1:38   ` Duy Nguyen
  6 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2013-01-24 17:45 UTC (permalink / raw
  To: Nguyễn Thái Ngọc Duy; +Cc: git, Jens Lehmann

Nguyễn Thái Ngọc Duy <pclouds@gmail.com> writes:

> We currently know if an object is loose or packed. We do not know if
> it's from the repo's object database, or via alternates
> mechanism. With this patch, sha1_object_info_extended() can tell if an
> object comes from alternates source (and which one).
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
>  How about this way instead: we keep track of where objects come from
>  so we can verify object source when we create or update something
>  that contains SHA-1.

The overall approach taken by this series may be worth considering, but
I do not think the check implemented here is correct.

An object may be found in an alternate odb but we may also have our
own copy of the same object.  You need to prove that a suspicious
object is visible to us *ONLY* through add_submodule_odb().

Once you do add_submodule_odb() to contaminate our object pool, you
make everything a suspicious object that needs to be checked; that
is the worst part of the story.

If I understand the test case that triggers the issue correctly, it
makes a merge in the top-level superproject, and while doing so,
tries to three-way merge gitlink changes, so that it can detect
fast-forward situation inside the submodule.  It should just be done
with running "git merge-base A B" there; after all, we are in the
tree-wide merge of the top-level superproject, which is already a
heavy-weight operation. I do not see a reason to make it more brittle
than necessary.

If we rip out add_submodule_odb() we do not have to ever worry about
this kind of object database breakages that are hard to track down.
It is an optimization to allow the code become more brittle, violate
the project boundary, and break the object database faster as a
result.  That is not a good optimization at all.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/7] read-cache: refuse to create index referring to external objects
  2013-01-24  8:42 ` [PATCH 6/7] read-cache: refuse to create index " Nguyễn Thái Ngọc Duy
@ 2013-01-24 19:15   ` Junio C Hamano
  2013-01-25  2:00     ` Duy Nguyen
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2013-01-24 19:15 UTC (permalink / raw
  To: Nguyễn Thái Ngọc Duy; +Cc: git, Jens Lehmann

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
>  read-cache.c | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
>
> diff --git a/read-cache.c b/read-cache.c
> index fda78bc..4579215 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -1720,6 +1720,26 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
>  			      ce->name + common, ce_namelen(ce) - common);
>  	}
>  
> +	if (object_database_contaminated) {
> +		struct object_info oi;
> +		switch (ce->ce_mode) {
> +		case S_IFGITLINK:
> +			break;
> +		case S_IFDIR:
> +			if (sha1_object_info_extended(ce->sha1, &oi) != OBJ_TREE ||

This case should never happen.  Do we store any tree object in the
index in the first place?

> +			    (oi.alt && oi.alt->external))
> +				die("cannot create index referring to an external tree %s",
> +				    sha1_to_hex(ce->sha1));
> +			break;
> +		default:
> +			if (sha1_object_info_extended(ce->sha1, &oi) != OBJ_BLOB ||
> +				    (oi.alt && oi.alt->external))
> +				die("cannot create index referring to an external blob %s",
> +				    sha1_to_hex(ce->sha1));
> +			break;
> +		}
> +	}
> +
>  	result = ce_write(c, fd, ondisk, size);
>  	free(ondisk);
>  	return result;

I think the check you want to add is to the cache-tree code; before
writing the new index out, if you suspect you might have called
cache_tree_update() before, invalidate any hierarchy that is
recorded to produce a tree object that refers to an object that is
only available through external object store.

As to the logic to check if a object is something that we should
refuse to create a new dependent, I think you should not butcher
sha1_object_info_extended(), and instead add a new call that checks
if a given SHA-1 yields an object if you ignore alternate object
databases that are marked as "external", whose signature may
resemble:

	int has_sha1_file_proper(const unsigned char *sha1);

or something.

This is a tangent, but in addition, you may want to add an even
narrower variant that checks the same but ignoring _all_ alternate
object databases, "external" or not:

        int has_sha1_file_local(const unsigned char *sha1);

That may be useful if we want to later make the alternate safer to
use; instead of letting the codepath to create an object ask
has_sha1_file() to see if it already exists and refrain from writing
a new copy, we switch to ask has_sha1_file_locally() and even if an
alternate object database we borrow from has it, we keep our own
copy in our repository.

Not tested beyond syntax, but rebasing something like this patch on
top of your "mark external sources" change may take us closer to
that.

 cache.h     |  2 ++
 sha1_file.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/cache.h b/cache.h
index 1f96f65..8d651b6 100644
--- a/cache.h
+++ b/cache.h
@@ -766,6 +766,8 @@ extern int move_temp_to_file(const char *tmpfile, const char *filename);
 
 extern int has_sha1_pack(const unsigned char *sha1);
 extern int has_sha1_file(const unsigned char *sha1);
+extern int has_sha1_file_proper(const unsigned char *sha1);
+extern int has_sha1_file_local(const unsigned char *sha1);
 extern int has_loose_object_nonlocal(const unsigned char *sha1);
 
 extern int has_pack_index(const unsigned char *sha1);
diff --git a/sha1_file.c b/sha1_file.c
index 40b2329..1a3192a 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -420,11 +420,18 @@ static int has_loose_object_local(const unsigned char *sha1)
 	return !access(name, F_OK);
 }
 
-int has_loose_object_nonlocal(const unsigned char *sha1)
+enum odb_origin {
+	odb_default = 0, odb_proper, odb_local
+};
+
+static int has_loose_object_nonlocal_limited(const unsigned char *sha1,
+					     enum odb_origin origin)
 {
 	struct alternate_object_database *alt;
 	prepare_alt_odb();
 	for (alt = alt_odb_list; alt; alt = alt->next) {
+		if (origin == odb_proper && 0 /* alt->external */)
+			continue;
 		fill_sha1_path(alt->name, sha1);
 		if (!access(alt->base, F_OK))
 			return 1;
@@ -432,6 +439,11 @@ int has_loose_object_nonlocal(const unsigned char *sha1)
 	return 0;
 }
 
+int has_loose_object_nonlocal(const unsigned char *sha1)
+{
+	return has_loose_object_nonlocal_limited(sha1, odb_default);
+}
+
 static int has_loose_object(const unsigned char *sha1)
 {
 	return has_loose_object_local(sha1) ||
@@ -2062,12 +2074,28 @@ int is_pack_valid(struct packed_git *p)
 	return !open_packed_git(p);
 }
 
+static int limit_pack_odb_origin(enum odb_origin origin, struct packed_git *p)
+{
+	switch (origin) {
+	default:
+		return 0;
+	case odb_proper:
+		return 0; /* p->external */
+	case odb_local:
+		return !p->pack_local;
+	}
+}
+
 static int fill_pack_entry(const unsigned char *sha1,
 			   struct pack_entry *e,
-			   struct packed_git *p)
+			   struct packed_git *p,
+			   enum odb_origin origin)
 {
 	off_t offset;
 
+	if (limit_pack_odb_origin(origin, p))
+		return 0;
+
 	if (p->num_bad_objects) {
 		unsigned i;
 		for (i = 0; i < p->num_bad_objects; i++)
@@ -2096,7 +2124,8 @@ static int fill_pack_entry(const unsigned char *sha1,
 	return 1;
 }
 
-static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
+static int find_pack_entry_limited(const unsigned char *sha1, struct pack_entry *e,
+				   enum odb_origin origin)
 {
 	struct packed_git *p;
 
@@ -2104,11 +2133,11 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 	if (!packed_git)
 		return 0;
 
-	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack, origin))
 		return 1;
 
 	for (p = packed_git; p; p = p->next) {
-		if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
+		if (p == last_found_pack || !fill_pack_entry(sha1, e, p, origin))
 			continue;
 
 		last_found_pack = p;
@@ -2117,6 +2146,11 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 	return 0;
 }
 
+static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
+{
+	return find_pack_entry_limited(sha1, e, odb_default);
+}
+
 struct packed_git *find_sha1_pack(const unsigned char *sha1,
 				  struct packed_git *packs)
 {
@@ -2630,6 +2664,22 @@ int has_sha1_file(const unsigned char *sha1)
 	return has_loose_object(sha1);
 }
 
+int has_sha1_file_local(const unsigned char *sha1)
+{
+	struct pack_entry e;
+	if (find_pack_entry_limited(sha1, &e, odb_local))
+		return 1;
+	return has_loose_object_local(sha1);
+}
+
+int has_sha1_file_proper(const unsigned char *sha1)
+{
+	struct pack_entry e;
+	return (!!find_pack_entry_limited(sha1, &e, odb_proper) ||
+		has_loose_object_local(sha1) ||
+		has_loose_object_nonlocal_limited(sha1, odb_proper));
+}
+
 static void check_tree(const void *buf, size_t size)
 {
 	struct tree_desc desc;

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from
  2013-01-24 17:45 ` [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Junio C Hamano
@ 2013-01-25  1:38   ` Duy Nguyen
  2013-01-25  3:58     ` Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Duy Nguyen @ 2013-01-25  1:38 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git, Jens Lehmann

On Fri, Jan 25, 2013 at 12:45 AM, Junio C Hamano <gitster@pobox.com> wrote:
>>  How about this way instead: we keep track of where objects come from
>>  so we can verify object source when we create or update something
>>  that contains SHA-1.
>
> The overall approach taken by this series may be worth considering, but
> I do not think the check implemented here is correct.
>
> An object may be found in an alternate odb but we may also have our
> own copy of the same object.  You need to prove that a suspicious
> object is visible to us *ONLY* through add_submodule_odb().

The way alt odbs are linked (new odbs area always at the end), if we
have the same copy, their copy will never be read (we check out alt
odbs from head to tail). So those duplicate suspicious objects are
actually invisible to us.

> Once you do add_submodule_odb() to contaminate our object pool, you
> make everything a suspicious object that needs to be checked; that
> is the worst part of the story.

And because we never really touch their alt copy, the returned alt
source is "ours", trusted and not checked. The check should only occur
on objects that we do not have.
-- 
Duy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/7] read-cache: refuse to create index referring to external objects
  2013-01-24 19:15   ` Junio C Hamano
@ 2013-01-25  2:00     ` Duy Nguyen
  2013-01-25 19:00       ` Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Duy Nguyen @ 2013-01-25  2:00 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git, Jens Lehmann

On Fri, Jan 25, 2013 at 2:15 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:
>
>> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
>> ---
>>  read-cache.c | 20 ++++++++++++++++++++
>>  1 file changed, 20 insertions(+)
>>
>> diff --git a/read-cache.c b/read-cache.c
>> index fda78bc..4579215 100644
>> --- a/read-cache.c
>> +++ b/read-cache.c
>> @@ -1720,6 +1720,26 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
>>                             ce->name + common, ce_namelen(ce) - common);
>>       }
>>
>> +     if (object_database_contaminated) {
>> +             struct object_info oi;
>> +             switch (ce->ce_mode) {
>> +             case S_IFGITLINK:
>> +                     break;
>> +             case S_IFDIR:
>> +                     if (sha1_object_info_extended(ce->sha1, &oi) != OBJ_TREE ||
>
> This case should never happen.  Do we store any tree object in the
> index in the first place?

No it was copy/paste mistake (from cache-tree.c, before I deleted it
and added check_sha1_file_for_external_source in 3/7)

>> +                         (oi.alt && oi.alt->external))
>> +                             die("cannot create index referring to an external tree %s",
>> +                                 sha1_to_hex(ce->sha1));
>> +                     break;
>> +             default:
>> +                     if (sha1_object_info_extended(ce->sha1, &oi) != OBJ_BLOB ||
>> +                                 (oi.alt && oi.alt->external))
>> +                             die("cannot create index referring to an external blob %s",
>> +                                 sha1_to_hex(ce->sha1));
>> +                     break;
>> +             }
>> +     }
>> +
>>       result = ce_write(c, fd, ondisk, size);
>>       free(ondisk);
>>       return result;
>
> I think the check you want to add is to the cache-tree code; before
> writing the new index out, if you suspect you might have called
> cache_tree_update() before, invalidate any hierarchy that is
> recorded to produce a tree object that refers to an object that is
> only available through external object store.

cache-tree is covered by check_sha1_file_for_external_source() when it
actually writes a tree (dry-run mode goes through unchecked). Even
when cache-tree is not involved, I do not want the index to point to
an non-existing SHA-1 ("git diff --cached" may fail next time, for
example).

> As to the logic to check if a object is something that we should
> refuse to create a new dependent, I think you should not butcher
> sha1_object_info_extended(), and instead add a new call that checks
> if a given SHA-1 yields an object if you ignore alternate object
> databases that are marked as "external", whose signature may
> resemble:
>
>         int has_sha1_file_proper(const unsigned char *sha1);
>
> or something.

Agreed.

> This is a tangent, but in addition, you may want to add an even
> narrower variant that checks the same but ignoring _all_ alternate
> object databases, "external" or not:
>
>         int has_sha1_file_local(const unsigned char *sha1);
>
> That may be useful if we want to later make the alternate safer to
> use; instead of letting the codepath to create an object ask
> has_sha1_file() to see if it already exists and refrain from writing
> a new copy, we switch to ask has_sha1_file_locally() and even if an
> alternate object database we borrow from has it, we keep our own
> copy in our repository.
>
> Not tested beyond syntax, but rebasing something like this patch on
> top of your "mark external sources" change may take us closer to
> that.

Thanks, will incorporate in the reroll.
-- 
Duy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from
  2013-01-25  1:38   ` Duy Nguyen
@ 2013-01-25  3:58     ` Junio C Hamano
  2013-01-25  4:02       ` Duy Nguyen
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2013-01-25  3:58 UTC (permalink / raw
  To: Duy Nguyen; +Cc: git, Jens Lehmann

Duy Nguyen <pclouds@gmail.com> writes:

> On Fri, Jan 25, 2013 at 12:45 AM, Junio C Hamano <gitster@pobox.com> wrote:
>>>  How about this way instead: we keep track of where objects come from
>>>  so we can verify object source when we create or update something
>>>  that contains SHA-1.
>>
>> The overall approach taken by this series may be worth considering, but
>> I do not think the check implemented here is correct.
>>
>> An object may be found in an alternate odb but we may also have our
>> own copy of the same object.  You need to prove that a suspicious
>> object is visible to us *ONLY* through add_submodule_odb().
>
> The way alt odbs are linked (new odbs area always at the end), if we
> have the same copy, their copy will never be read (we check out alt
> odbs from head to tail). So those duplicate suspicious objects are
> actually invisible to us.

The way I read find_pack_entry() is that our heuristics to start
our search by looking at the pack in which we found an object the
last time will invalidate your assumption above.  Am I mistaken?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from
  2013-01-25  3:58     ` Junio C Hamano
@ 2013-01-25  4:02       ` Duy Nguyen
  0 siblings, 0 replies; 21+ messages in thread
From: Duy Nguyen @ 2013-01-25  4:02 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git, Jens Lehmann

On Fri, Jan 25, 2013 at 10:58 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Duy Nguyen <pclouds@gmail.com> writes:
>
>> On Fri, Jan 25, 2013 at 12:45 AM, Junio C Hamano <gitster@pobox.com> wrote:
>>>>  How about this way instead: we keep track of where objects come from
>>>>  so we can verify object source when we create or update something
>>>>  that contains SHA-1.
>>>
>>> The overall approach taken by this series may be worth considering, but
>>> I do not think the check implemented here is correct.
>>>
>>> An object may be found in an alternate odb but we may also have our
>>> own copy of the same object.  You need to prove that a suspicious
>>> object is visible to us *ONLY* through add_submodule_odb().
>>
>> The way alt odbs are linked (new odbs area always at the end), if we
>> have the same copy, their copy will never be read (we check out alt
>> odbs from head to tail). So those duplicate suspicious objects are
>> actually invisible to us.
>
> The way I read find_pack_entry() is that our heuristics to start
> our search by looking at the pack in which we found an object the
> last time will invalidate your assumption above.  Am I mistaken?

No, you are right. Loose objects are always searched from the start of
alt odb list. Packs are searched till the end then back to head again.
-- 
Duy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/7] read-cache: refuse to create index referring to external objects
  2013-01-25  2:00     ` Duy Nguyen
@ 2013-01-25 19:00       ` Junio C Hamano
  2013-01-28  5:48         ` Duy Nguyen
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2013-01-25 19:00 UTC (permalink / raw
  To: Duy Nguyen; +Cc: git, Jens Lehmann

Duy Nguyen <pclouds@gmail.com> writes:

> .... Even
> when cache-tree is not involved, I do not want the index to point to
> an non-existing SHA-1 ("git diff --cached" may fail next time, for
> example).

I think we have tests that explicitly add SHA-1 that names an object
that does not exist to the index and check failures; you may have to
think what to do with them.

>> This is a tangent, but in addition, you may want to add an even
>> narrower variant that checks the same but ignoring _all_ alternate
>> object databases, "external" or not:
>>
>>         int has_sha1_file_local(const unsigned char *sha1);
>>
>> That may be useful if we want to later make the alternate safer to
>> use; instead of letting the codepath to create an object ask
>> has_sha1_file() to see if it already exists and refrain from writing
>> a new copy, we switch to ask has_sha1_file_locally() and even if an
>> alternate object database we borrow from has it, we keep our own
>> copy in our repository.

This is not a tangent, but if you want to go this "forbid making our
repository depend on objects we do not have but we know about after
we peek submodule odb" route [*1*], write_sha1_file() needs to be
told about has_sha1_file_proper().  We may "git add" a new blob in
the superproject, the blob may not yet exist in *our* repository,
but may happen to already exist in the submodue odb.  In such a
case, write_sha1_file() has to write that blob in our repository,
without the existing has_sha1_file() check bypassing it.  Otherwise
our attempt to create a tree that contains that blob will fail,
saying that the blob only seems to exist to us via submodule odb but
not in our repository.


[Footnote]

*1* which I do not necessarily agree with---I am in favor of getting
rid of add_submodule_odb() to fix these issues at the root cause of
them.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/7] read-cache: refuse to create index referring to external objects
  2013-01-25 19:00       ` Junio C Hamano
@ 2013-01-28  5:48         ` Duy Nguyen
  2013-01-28  5:54           ` Junio C Hamano
  2013-01-28  6:18           ` Duy Nguyen
  0 siblings, 2 replies; 21+ messages in thread
From: Duy Nguyen @ 2013-01-28  5:48 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git, Jens Lehmann

On Sat, Jan 26, 2013 at 2:00 AM, Junio C Hamano <gitster@pobox.com> wrote:
> This is not a tangent, but if you want to go this "forbid making our
> repository depend on objects we do not have but we know about after
> we peek submodule odb" route [*1*], write_sha1_file() needs to be
> told about has_sha1_file_proper().  We may "git add" a new blob in
> the superproject, the blob may not yet exist in *our* repository,
> but may happen to already exist in the submodue odb.  In such a
> case, write_sha1_file() has to write that blob in our repository,
> without the existing has_sha1_file() check bypassing it.  Otherwise
> our attempt to create a tree that contains that blob will fail,
> saying that the blob only seems to exist to us via submodule odb but
> not in our repository.

Another thing needs to be done for this to work. The current reading
order is packs first, loose objects next. If we create a local loose
duplicate of an alternate packed object, our local version will never
be read. Regardless the submodule odb issue, I think we should prefer
reading local loose objects over alternate packed ones.
-- 
Duy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/7] read-cache: refuse to create index referring to external objects
  2013-01-28  5:48         ` Duy Nguyen
@ 2013-01-28  5:54           ` Junio C Hamano
  2013-01-28  5:57             ` Duy Nguyen
  2013-01-28  6:18           ` Duy Nguyen
  1 sibling, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2013-01-28  5:54 UTC (permalink / raw
  To: Duy Nguyen; +Cc: git, Jens Lehmann

Duy Nguyen <pclouds@gmail.com> writes:

> On Sat, Jan 26, 2013 at 2:00 AM, Junio C Hamano <gitster@pobox.com> wrote:
>> This is not a tangent, but if you want to go this "forbid making our
>> repository depend on objects we do not have but we know about after
>> we peek submodule odb" route [*1*], write_sha1_file() needs to be
>> told about has_sha1_file_proper().  We may "git add" a new blob in
>> the superproject, the blob may not yet exist in *our* repository,
>> but may happen to already exist in the submodue odb.  In such a
>> case, write_sha1_file() has to write that blob in our repository,
>> without the existing has_sha1_file() check bypassing it.  Otherwise
>> our attempt to create a tree that contains that blob will fail,
>> saying that the blob only seems to exist to us via submodule odb but
>> not in our repository.
>
> Another thing needs to be done for this to work. The current reading

For *what* to work???  I think the performance consideration is the
only thing that should drive the read-order; correctness should not
be affected.

> order is packs first, loose objects next. If we create a local loose
> duplicate of an alternate packed object, our local version will never
> be read. Regardless the submodule odb issue, I think we should prefer
> reading local loose objects over alternate packed ones.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/7] read-cache: refuse to create index referring to external objects
  2013-01-28  5:54           ` Junio C Hamano
@ 2013-01-28  5:57             ` Duy Nguyen
  2013-01-28  6:06               ` Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Duy Nguyen @ 2013-01-28  5:57 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git, Jens Lehmann

On Mon, Jan 28, 2013 at 12:54 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Duy Nguyen <pclouds@gmail.com> writes:
>
>> On Sat, Jan 26, 2013 at 2:00 AM, Junio C Hamano <gitster@pobox.com> wrote:
>>> This is not a tangent, but if you want to go this "forbid making our
>>> repository depend on objects we do not have but we know about after
>>> we peek submodule odb" route [*1*], write_sha1_file() needs to be
>>> told about has_sha1_file_proper().  We may "git add" a new blob in
>>> the superproject, the blob may not yet exist in *our* repository,
>>> but may happen to already exist in the submodue odb.  In such a
>>> case, write_sha1_file() has to write that blob in our repository,
>>> without the existing has_sha1_file() check bypassing it.  Otherwise
>>> our attempt to create a tree that contains that blob will fail,
>>> saying that the blob only seems to exist to us via submodule odb but
>>> not in our repository.
>>
>> Another thing needs to be done for this to work. The current reading
>
> For *what* to work???

The "forbid making our repository depend on objects we do not have but
we know about afterwe peek submodule odb"

> I think the performance consideration is the
> only thing that should drive the read-order; correctness should not
> be affected.
>
>> order is packs first, loose objects next. If we create a local loose
>> duplicate of an alternate packed object, our local version will never
>> be read. Regardless the submodule odb issue, I think we should prefer
>> reading local loose objects over alternate packed ones.
-- 
Duy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/7] read-cache: refuse to create index referring to external objects
  2013-01-28  5:57             ` Duy Nguyen
@ 2013-01-28  6:06               ` Junio C Hamano
  0 siblings, 0 replies; 21+ messages in thread
From: Junio C Hamano @ 2013-01-28  6:06 UTC (permalink / raw
  To: Duy Nguyen; +Cc: git, Jens Lehmann

Duy Nguyen <pclouds@gmail.com> writes:

>>> Another thing needs to be done for this to work. The current reading
>>
>> For *what* to work???
>
> The "forbid making our repository depend on objects we do not have but
> we know about afterwe peek submodule odb"

With your "when our object database is contaminated, check objects
we base our new object on are available in local or our alternates"
together with the "when we try write_object(), do not bypass it with
has_sha1_file() check because that may find the object in submodule odb
we should *not* have access to; instead check with the same 'local
or our alternates' test" I brought up in the message you were
responding to, I do not think object read order does not make a
difference to our effort to prevent the object database breakage due
to temporary contamination by submodule objects.

>>> Regardless the submodule odb issue, I think we should prefer
>>> reading local loose objects over alternate packed ones.

I suspect you are alluding to make write_object() check with
has_sha1_file_locally() so that we can wean our repository from
existing alternates, but I do not think it is a right approach
(instead, you just fully repack locally if you want to dissociate
yourself from your alternates).  What I was suggesting was to change
it to check with has_sha1_file_proper(), to make sure we do not omit
writing an object we need to access to, when we know it will vanish
once we stop temporarily borrowing from the submodule object store.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/7] read-cache: refuse to create index referring to external objects
  2013-01-28  5:48         ` Duy Nguyen
  2013-01-28  5:54           ` Junio C Hamano
@ 2013-01-28  6:18           ` Duy Nguyen
  2013-01-28  6:36             ` Junio C Hamano
  1 sibling, 1 reply; 21+ messages in thread
From: Duy Nguyen @ 2013-01-28  6:18 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git, Jens Lehmann

On Mon, Jan 28, 2013 at 12:48 PM, Duy Nguyen <pclouds@gmail.com> wrote:
> Regardless the submodule odb issue, I think we should prefer
> reading local loose objects over alternate packed ones.

I think I went from one problem to another and did not make it clear.
The reason behind this preference is security. With "all packs first"
reading order, someone can create a pack in the alternate source and
that pack will override the same local loose objects (local packed
ones are safe). If someone can create a malicious version with the
same SHA-1, we (well, the user) are in trouble. If the user uses this
repository directly then the malicious object can be used without
detected, even if it does not match the original SHA-1, as we do not
always verify content against its SHA-1 for common commands.
-- 
Duy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/7] read-cache: refuse to create index referring to external objects
  2013-01-28  6:18           ` Duy Nguyen
@ 2013-01-28  6:36             ` Junio C Hamano
  2013-01-28  6:57               ` Duy Nguyen
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2013-01-28  6:36 UTC (permalink / raw
  To: Duy Nguyen; +Cc: git, Jens Lehmann

Duy Nguyen <pclouds@gmail.com> writes:

> On Mon, Jan 28, 2013 at 12:48 PM, Duy Nguyen <pclouds@gmail.com> wrote:
>> Regardless the submodule odb issue, I think we should prefer
>> reading local loose objects over alternate packed ones.
>
> I think I went from one problem to another and did not make it clear.
> The reason behind this preference is security.

Reading from ours first would not help you at all if you are lacking
some that you do need from your local repository and the only
solution is not to borrow from untrustworthy sources, I think.

You borrow only from a trusted source in the first place, no?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/7] read-cache: refuse to create index referring to external objects
  2013-01-28  6:36             ` Junio C Hamano
@ 2013-01-28  6:57               ` Duy Nguyen
  0 siblings, 0 replies; 21+ messages in thread
From: Duy Nguyen @ 2013-01-28  6:57 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git, Jens Lehmann

On Mon, Jan 28, 2013 at 1:36 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Duy Nguyen <pclouds@gmail.com> writes:
>
>> On Mon, Jan 28, 2013 at 12:48 PM, Duy Nguyen <pclouds@gmail.com> wrote:
>>> Regardless the submodule odb issue, I think we should prefer
>>> reading local loose objects over alternate packed ones.
>>
>> I think I went from one problem to another and did not make it clear.
>> The reason behind this preference is security.
>
> Reading from ours first would not help you at all if you are lacking
> some that you do need from your local repository and the only
> solution is not to borrow from untrustworthy sources, I think.
>
> You borrow only from a trusted source in the first place, no?

Hmm.. yeah.
-- 
Duy

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2013-01-28  6:58 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-24  8:42 [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Nguyễn Thái Ngọc Duy
2013-01-24  8:42 ` [PATCH 2/7] sha1_file: separate alt object db from own repos and submodules's Nguyễn Thái Ngọc Duy
2013-01-24  8:42 ` [PATCH 3/7] sha1_file: refuse to write commits referring to external objects Nguyễn Thái Ngọc Duy
2013-01-24  8:42 ` [PATCH 4/7] sha1_file: refuse to write trees " Nguyễn Thái Ngọc Duy
2013-01-24  8:42 ` [PATCH 5/7] sha1_file: refuse to write tags " Nguyễn Thái Ngọc Duy
2013-01-24  8:42 ` [PATCH 6/7] read-cache: refuse to create index " Nguyễn Thái Ngọc Duy
2013-01-24 19:15   ` Junio C Hamano
2013-01-25  2:00     ` Duy Nguyen
2013-01-25 19:00       ` Junio C Hamano
2013-01-28  5:48         ` Duy Nguyen
2013-01-28  5:54           ` Junio C Hamano
2013-01-28  5:57             ` Duy Nguyen
2013-01-28  6:06               ` Junio C Hamano
2013-01-28  6:18           ` Duy Nguyen
2013-01-28  6:36             ` Junio C Hamano
2013-01-28  6:57               ` Duy Nguyen
2013-01-24  8:42 ` [PATCH 7/7] refs: do not update ref(log) " Nguyễn Thái Ngọc Duy
2013-01-24 17:45 ` [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from Junio C Hamano
2013-01-25  1:38   ` Duy Nguyen
2013-01-25  3:58     ` Junio C Hamano
2013-01-25  4:02       ` Duy Nguyen

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).