git@vger.kernel.org mailing list mirror (one of many)
 help / Atom feed
* [PATCH 0/2] Add commit-graph fuzzer and fix buffer overflow
@ 2018-12-05 22:32 Josh Steadmon
  2018-12-05 22:32 ` [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
                   ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Josh Steadmon @ 2018-12-05 22:32 UTC (permalink / raw)
  To: git, stolee

Add a new fuzz test for the commit graph and fix a buffer read-overflow
that it discovered.

Josh Steadmon (2):
  commit-graph, fuzz: Add fuzzer for commit-graph
  commit-graph: fix buffer read-overflow

 .gitignore          |  1 +
 Makefile            |  1 +
 commit-graph.c      | 76 +++++++++++++++++++++++++++++++++------------
 fuzz-commit-graph.c | 18 +++++++++++
 4 files changed, 77 insertions(+), 19 deletions(-)
 create mode 100644 fuzz-commit-graph.c

-- 
2.20.0.rc2.403.gdbc3b29805-goog


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph
  2018-12-05 22:32 [PATCH 0/2] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
@ 2018-12-05 22:32 ` Josh Steadmon
  2018-12-05 22:48   ` Ævar Arnfjörð Bjarmason
                     ` (2 more replies)
  2018-12-05 22:32 ` [PATCH 2/2] commit-graph: fix buffer read-overflow Josh Steadmon
  2018-12-06 20:20 ` [PATCH v2 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
  2 siblings, 3 replies; 20+ messages in thread
From: Josh Steadmon @ 2018-12-05 22:32 UTC (permalink / raw)
  To: git, stolee

Breaks load_commit_graph_one() into a new function,
parse_commit_graph(). The latter function operates on arbitrary buffers,
which makes it suitable as a fuzzing target.

Adds fuzz-commit-graph.c, which provides a fuzzing entry point
compatible with libFuzzer (and possibly other fuzzing engines).

Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 .gitignore          |  1 +
 Makefile            |  1 +
 commit-graph.c      | 63 +++++++++++++++++++++++++++++++++------------
 fuzz-commit-graph.c | 18 +++++++++++++
 4 files changed, 66 insertions(+), 17 deletions(-)
 create mode 100644 fuzz-commit-graph.c

diff --git a/.gitignore b/.gitignore
index 0d77ea5894..8bcf153ed9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+/fuzz-commit-graph
 /fuzz_corpora
 /fuzz-pack-headers
 /fuzz-pack-idx
diff --git a/Makefile b/Makefile
index 1a44c811aa..6b72f37c29 100644
--- a/Makefile
+++ b/Makefile
@@ -684,6 +684,7 @@ SCRIPTS = $(SCRIPT_SH_INS) \
 
 ETAGS_TARGET = TAGS
 
+FUZZ_OBJS += fuzz-commit-graph.o
 FUZZ_OBJS += fuzz-pack-headers.o
 FUZZ_OBJS += fuzz-pack-idx.o
 
diff --git a/commit-graph.c b/commit-graph.c
index 40c855f185..0755359b1a 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -46,6 +46,10 @@
 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
 			+ GRAPH_FANOUT_SIZE + GRAPH_OID_LEN)
 
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size);
+
+
 char *get_commit_graph_filename(const char *obj_dir)
 {
 	return xstrfmt("%s/info/commit-graph", obj_dir);
@@ -84,16 +88,10 @@ static int commit_graph_compatible(struct repository *r)
 struct commit_graph *load_commit_graph_one(const char *graph_file)
 {
 	void *graph_map;
-	const unsigned char *data, *chunk_lookup;
 	size_t graph_size;
 	struct stat st;
-	uint32_t i;
-	struct commit_graph *graph;
+	struct commit_graph *ret;
 	int fd = git_open(graph_file);
-	uint64_t last_chunk_offset;
-	uint32_t last_chunk_id;
-	uint32_t graph_signature;
-	unsigned char graph_version, hash_version;
 
 	if (fd < 0)
 		return NULL;
@@ -108,27 +106,61 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 		die(_("graph file %s is too small"), graph_file);
 	}
 	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	ret = parse_commit_graph(graph_map, fd, graph_size);
+
+	if (ret == NULL) {
+		munmap(graph_map, graph_size);
+		close(fd);
+		exit(1);
+	}
+
+	return ret;
+}
+
+/*
+ * This function is intended to be used only from load_commit_graph_one() or in
+ * fuzz tests.
+ */
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size)
+{
+	const unsigned char *data, *chunk_lookup;
+	uint32_t i;
+	struct commit_graph *graph;
+	uint64_t last_chunk_offset;
+	uint32_t last_chunk_id;
+	uint32_t graph_signature;
+	unsigned char graph_version, hash_version;
+
+	/*
+	 * This should already be checked in load_commit_graph_one, but we still
+	 * need a check here for when we're calling parse_commit_graph directly
+	 * from fuzz tests. We can omit the error message in that case.
+	 */
+	if (graph_size < GRAPH_MIN_SIZE)
+		return NULL;
+
 	data = (const unsigned char *)graph_map;
 
 	graph_signature = get_be32(data);
 	if (graph_signature != GRAPH_SIGNATURE) {
 		error(_("graph signature %X does not match signature %X"),
 		      graph_signature, GRAPH_SIGNATURE);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	graph_version = *(unsigned char*)(data + 4);
 	if (graph_version != GRAPH_VERSION) {
 		error(_("graph version %X does not match version %X"),
 		      graph_version, GRAPH_VERSION);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	hash_version = *(unsigned char*)(data + 5);
 	if (hash_version != GRAPH_OID_VERSION) {
 		error(_("hash version %X does not match version %X"),
 		      hash_version, GRAPH_OID_VERSION);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	graph = alloc_commit_graph();
@@ -152,7 +184,8 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 		if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
 			error(_("improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
 			      (uint32_t)chunk_offset);
-			goto cleanup_fail;
+			free(graph);
+			return NULL;
 		}
 
 		switch (chunk_id) {
@@ -187,7 +220,8 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 
 		if (chunk_repeated) {
 			error(_("chunk id %08x appears multiple times"), chunk_id);
-			goto cleanup_fail;
+			free(graph);
+			return NULL;
 		}
 
 		if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
@@ -201,11 +235,6 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 	}
 
 	return graph;
-
-cleanup_fail:
-	munmap(graph_map, graph_size);
-	close(fd);
-	exit(1);
 }
 
 static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
new file mode 100644
index 0000000000..420851d0d2
--- /dev/null
+++ b/fuzz-commit-graph.c
@@ -0,0 +1,18 @@
+#include "object-store.h"
+#include "commit-graph.h"
+
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size);
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
+{
+	struct commit_graph *g;
+
+	g = parse_commit_graph((void *) data, -1, size);
+	if (g)
+		free(g);
+
+	return 0;
+}
-- 
2.20.0.rc2.403.gdbc3b29805-goog


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 2/2] commit-graph: fix buffer read-overflow
  2018-12-05 22:32 [PATCH 0/2] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
  2018-12-05 22:32 ` [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
@ 2018-12-05 22:32 ` Josh Steadmon
  2018-12-06 13:11   ` Derrick Stolee
  2018-12-06 20:20 ` [PATCH v2 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
  2 siblings, 1 reply; 20+ messages in thread
From: Josh Steadmon @ 2018-12-05 22:32 UTC (permalink / raw)
  To: git, stolee

fuzz-commit-graph identified a case where Git will read past the end of
a buffer containing a commit graph if the graph's header has an
incorrect chunk count. A simple bounds check in parse_commit_graph()
prevents this.

Signed-off-by: Josh Steadmon <steadmon@google.com>
Helped-by: Derrick Stolee <stolee@gmail.com>
---
 commit-graph.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 0755359b1a..fee171a5f3 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -175,10 +175,19 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd,
 	last_chunk_offset = 8;
 	chunk_lookup = data + 8;
 	for (i = 0; i < graph->num_chunks; i++) {
-		uint32_t chunk_id = get_be32(chunk_lookup + 0);
-		uint64_t chunk_offset = get_be64(chunk_lookup + 4);
+		uint32_t chunk_id;
+		uint64_t chunk_offset;
 		int chunk_repeated = 0;
 
+		if (chunk_lookup + GRAPH_CHUNKLOOKUP_WIDTH > data + graph_size) {
+			error(_("chunk lookup table entry missing; graph file may be incomplete"));
+			free(graph);
+			return NULL;
+		}
+
+		chunk_id = get_be32(chunk_lookup + 0);
+		chunk_offset = get_be64(chunk_lookup + 4);
+
 		chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
 
 		if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
-- 
2.20.0.rc2.403.gdbc3b29805-goog


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph
  2018-12-05 22:32 ` [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
@ 2018-12-05 22:48   ` Ævar Arnfjörð Bjarmason
  2018-12-06  1:00     ` Josh Steadmon
  2018-12-06  1:32   ` Junio C Hamano
  2018-12-06  4:47   ` Junio C Hamano
  2 siblings, 1 reply; 20+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2018-12-05 22:48 UTC (permalink / raw)
  To: Josh Steadmon; +Cc: git, stolee


On Wed, Dec 05 2018, Josh Steadmon wrote:

> Breaks load_commit_graph_one() into a new function,
> parse_commit_graph(). The latter function operates on arbitrary buffers,
> which makes it suitable as a fuzzing target.
>
> Adds fuzz-commit-graph.c, which provides a fuzzing entry point
> compatible with libFuzzer (and possibly other fuzzing engines).
>
> Signed-off-by: Josh Steadmon <steadmon@google.com>
> ---
>  .gitignore          |  1 +
>  Makefile            |  1 +
>  commit-graph.c      | 63 +++++++++++++++++++++++++++++++++------------
>  fuzz-commit-graph.c | 18 +++++++++++++
>  4 files changed, 66 insertions(+), 17 deletions(-)
>  create mode 100644 fuzz-commit-graph.c
>
> diff --git a/.gitignore b/.gitignore
> index 0d77ea5894..8bcf153ed9 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -1,3 +1,4 @@
> +/fuzz-commit-graph
>  /fuzz_corpora
>  /fuzz-pack-headers
>  /fuzz-pack-idx
> diff --git a/Makefile b/Makefile
> index 1a44c811aa..6b72f37c29 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -684,6 +684,7 @@ SCRIPTS = $(SCRIPT_SH_INS) \
>
>  ETAGS_TARGET = TAGS
>
> +FUZZ_OBJS += fuzz-commit-graph.o
>  FUZZ_OBJS += fuzz-pack-headers.o
>  FUZZ_OBJS += fuzz-pack-idx.o
>
> diff --git a/commit-graph.c b/commit-graph.c
> index 40c855f185..0755359b1a 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -46,6 +46,10 @@
>  #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
>  			+ GRAPH_FANOUT_SIZE + GRAPH_OID_LEN)
>
> +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> +					size_t graph_size);
> +
> +
>  char *get_commit_graph_filename(const char *obj_dir)
>  {
>  	return xstrfmt("%s/info/commit-graph", obj_dir);
> @@ -84,16 +88,10 @@ static int commit_graph_compatible(struct repository *r)
>  struct commit_graph *load_commit_graph_one(const char *graph_file)
>  {
>  	void *graph_map;
> -	const unsigned char *data, *chunk_lookup;
>  	size_t graph_size;
>  	struct stat st;
> -	uint32_t i;
> -	struct commit_graph *graph;
> +	struct commit_graph *ret;
>  	int fd = git_open(graph_file);
> -	uint64_t last_chunk_offset;
> -	uint32_t last_chunk_id;
> -	uint32_t graph_signature;
> -	unsigned char graph_version, hash_version;
>
>  	if (fd < 0)
>  		return NULL;
> @@ -108,27 +106,61 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
>  		die(_("graph file %s is too small"), graph_file);
>  	}
>  	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
> +	ret = parse_commit_graph(graph_map, fd, graph_size);
> +
> +	if (ret == NULL) {

Code in git usually uses just !ret.

> +		munmap(graph_map, graph_size);
> +		close(fd);
> +		exit(1);

Ouch, exit(1) from load_commit_graph_one()? Can't we return NULL here
instead? Nasty to exit from a library routine, but then I see later...

> @@ -201,11 +235,6 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
>  	}
>
>  	return graph;
> -
> -cleanup_fail:
> -	munmap(graph_map, graph_size);
> -	close(fd);
> -	exit(1);
>  }

... ah, I see this is where you got the exit(1) from. So it was there
already.

>  static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
> diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
> new file mode 100644
> index 0000000000..420851d0d2
> --- /dev/null
> +++ b/fuzz-commit-graph.c
> @@ -0,0 +1,18 @@
> +#include "object-store.h"
> +#include "commit-graph.h"
> +
> +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> +					size_t graph_size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
> +{
> +	struct commit_graph *g;
> +
> +	g = parse_commit_graph((void *) data, -1, size);
> +	if (g)
> +		free(g);
> +
> +	return 0;
> +}

I hadn't looked at this before, but see your 5e47215080 ("fuzz: add
basic fuzz testing target.", 2018-10-12) for some prior art.

There's instructions there for a very long "make" invocation. Would be
nice if this were friendlier and we could just do "make test-fuzz" or
something...

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph
  2018-12-05 22:48   ` Ævar Arnfjörð Bjarmason
@ 2018-12-06  1:00     ` Josh Steadmon
  0 siblings, 0 replies; 20+ messages in thread
From: Josh Steadmon @ 2018-12-06  1:00 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason; +Cc: git, stolee

On 2018.12.05 23:48, Ævar Arnfjörð Bjarmason wrote:
> 
> On Wed, Dec 05 2018, Josh Steadmon wrote:
> 
> > Breaks load_commit_graph_one() into a new function,
> > parse_commit_graph(). The latter function operates on arbitrary buffers,
> > which makes it suitable as a fuzzing target.
> >
> > Adds fuzz-commit-graph.c, which provides a fuzzing entry point
> > compatible with libFuzzer (and possibly other fuzzing engines).
> >
> > Signed-off-by: Josh Steadmon <steadmon@google.com>
> > ---
> >  .gitignore          |  1 +
> >  Makefile            |  1 +
> >  commit-graph.c      | 63 +++++++++++++++++++++++++++++++++------------
> >  fuzz-commit-graph.c | 18 +++++++++++++
> >  4 files changed, 66 insertions(+), 17 deletions(-)
> >  create mode 100644 fuzz-commit-graph.c
> >
> > diff --git a/.gitignore b/.gitignore
> > index 0d77ea5894..8bcf153ed9 100644
> > --- a/.gitignore
> > +++ b/.gitignore
> > @@ -1,3 +1,4 @@
> > +/fuzz-commit-graph
> >  /fuzz_corpora
> >  /fuzz-pack-headers
> >  /fuzz-pack-idx
> > diff --git a/Makefile b/Makefile
> > index 1a44c811aa..6b72f37c29 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -684,6 +684,7 @@ SCRIPTS = $(SCRIPT_SH_INS) \
> >
> >  ETAGS_TARGET = TAGS
> >
> > +FUZZ_OBJS += fuzz-commit-graph.o
> >  FUZZ_OBJS += fuzz-pack-headers.o
> >  FUZZ_OBJS += fuzz-pack-idx.o
> >
> > diff --git a/commit-graph.c b/commit-graph.c
> > index 40c855f185..0755359b1a 100644
> > --- a/commit-graph.c
> > +++ b/commit-graph.c
> > @@ -46,6 +46,10 @@
> >  #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
> >  			+ GRAPH_FANOUT_SIZE + GRAPH_OID_LEN)
> >
> > +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> > +					size_t graph_size);
> > +
> > +
> >  char *get_commit_graph_filename(const char *obj_dir)
> >  {
> >  	return xstrfmt("%s/info/commit-graph", obj_dir);
> > @@ -84,16 +88,10 @@ static int commit_graph_compatible(struct repository *r)
> >  struct commit_graph *load_commit_graph_one(const char *graph_file)
> >  {
> >  	void *graph_map;
> > -	const unsigned char *data, *chunk_lookup;
> >  	size_t graph_size;
> >  	struct stat st;
> > -	uint32_t i;
> > -	struct commit_graph *graph;
> > +	struct commit_graph *ret;
> >  	int fd = git_open(graph_file);
> > -	uint64_t last_chunk_offset;
> > -	uint32_t last_chunk_id;
> > -	uint32_t graph_signature;
> > -	unsigned char graph_version, hash_version;
> >
> >  	if (fd < 0)
> >  		return NULL;
> > @@ -108,27 +106,61 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
> >  		die(_("graph file %s is too small"), graph_file);
> >  	}
> >  	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
> > +	ret = parse_commit_graph(graph_map, fd, graph_size);
> > +
> > +	if (ret == NULL) {
> 
> Code in git usually uses just !ret.

Will fix in V2, thanks.


> > +		munmap(graph_map, graph_size);
> > +		close(fd);
> > +		exit(1);
> 
> Ouch, exit(1) from load_commit_graph_one()? Can't we return NULL here
> instead? Nasty to exit from a library routine, but then I see later...
> 
> > @@ -201,11 +235,6 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
> >  	}
> >
> >  	return graph;
> > -
> > -cleanup_fail:
> > -	munmap(graph_map, graph_size);
> > -	close(fd);
> > -	exit(1);
> >  }
> 
> ... ah, I see this is where you got the exit(1) from. So it was there
> already.
> 
> >  static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
> > diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
> > new file mode 100644
> > index 0000000000..420851d0d2
> > --- /dev/null
> > +++ b/fuzz-commit-graph.c
> > @@ -0,0 +1,18 @@
> > +#include "object-store.h"
> > +#include "commit-graph.h"
> > +
> > +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> > +					size_t graph_size);
> > +
> > +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
> > +
> > +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
> > +{
> > +	struct commit_graph *g;
> > +
> > +	g = parse_commit_graph((void *) data, -1, size);
> > +	if (g)
> > +		free(g);
> > +
> > +	return 0;
> > +}
> 
> I hadn't looked at this before, but see your 5e47215080 ("fuzz: add
> basic fuzz testing target.", 2018-10-12) for some prior art.
> 
> There's instructions there for a very long "make" invocation. Would be
> nice if this were friendlier and we could just do "make test-fuzz" or
> something...

Yeah, the problem is that there are too many combinations of fuzzing
engine, sanitizer, and compiler to make any reasonable default here.
Even if you just stick with libFuzzer, address sanitizer, and clang, the
flags change radically depending on which version of clang you're using.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph
  2018-12-05 22:32 ` [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
  2018-12-05 22:48   ` Ævar Arnfjörð Bjarmason
@ 2018-12-06  1:32   ` Junio C Hamano
  2018-12-06  1:41     ` Junio C Hamano
  2018-12-06  4:47   ` Junio C Hamano
  2 siblings, 1 reply; 20+ messages in thread
From: Junio C Hamano @ 2018-12-06  1:32 UTC (permalink / raw)
  To: Josh Steadmon; +Cc: git, stolee

Josh Steadmon <steadmon@google.com> writes:

> @@ -108,27 +106,61 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
>  		die(_("graph file %s is too small"), graph_file);
>  	}
>  	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
> +	ret = parse_commit_graph(graph_map, fd, graph_size);

OK, assuming that the new helper returns NULL from all places in the
original that would have jumped to the cleanup-fail label, this would
do the same thing (it may not be the right thing to exit, but that
is OK for the purpose of this change).

> +	if (ret == NULL) {
> +		munmap(graph_map, graph_size);
> +		close(fd);
> +		exit(1);
> +	}
> +
> +	return ret;
> +}
> +
> +/*
> + * This function is intended to be used only from load_commit_graph_one() or in
> + * fuzz tests.
> + */

Hmph, is that necessary to say?  

"Right now, it has only these two callers" is sometimes handy for
those without good static analysis tools, like "git grep" ;-), but I
do not think of a reason why a new caller we'll add in the future,
who happens to have the data of the graph file in-core, should not
be allowed to call this function.


> +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> +					size_t graph_size)
> +{
> +	const unsigned char *data, *chunk_lookup;
> +	uint32_t i;
> +	struct commit_graph *graph;
> +	uint64_t last_chunk_offset;
> +	uint32_t last_chunk_id;
> +	uint32_t graph_signature;
> +	unsigned char graph_version, hash_version;
> +
> +	/*
> +	 * This should already be checked in load_commit_graph_one, but we still
> +	 * need a check here for when we're calling parse_commit_graph directly
> +	 * from fuzz tests. We can omit the error message in that case.
> +	 */

In the same spirit, I am dubious of the longer-term value of this
comment.  As an explanation of why this conversion is correct in the
proposed log message for this change, it perfectly makes sense,
though.

> +	if (graph_size < GRAPH_MIN_SIZE)
> +		return NULL;
> +

The load_commit_graph_one() grabbed graph_map out of xmmap() so it
is guaranteed to be non-NULL, but we need to check graph_map != NULL
when we're calling this directly from the fuzz tests, exactly in the
same spirit that we check graph_size above.  Besides, these are to
make sure future callers won't misuse the API.

>  	data = (const unsigned char *)graph_map;

And the reset of the function is the same as the original modulo
jumping to the cleanup_fail label has been replaced with returning
NULL.

Looks good.

> ...
> -
> -cleanup_fail:
> -	munmap(graph_map, graph_size);
> -	close(fd);
> -	exit(1);
>  }
>  
>  static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
> diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
> new file mode 100644
> index 0000000000..420851d0d2
> --- /dev/null
> +++ b/fuzz-commit-graph.c
> @@ -0,0 +1,18 @@
> +#include "object-store.h"
> +#include "commit-graph.h"
> +
> +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> +					size_t graph_size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
> +{
> +	struct commit_graph *g;
> +
> +	g = parse_commit_graph((void *) data, -1, size);
> +	if (g)
> +		free(g);
> +
> +	return 0;
> +}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph
  2018-12-06  1:32   ` Junio C Hamano
@ 2018-12-06  1:41     ` Junio C Hamano
  0 siblings, 0 replies; 20+ messages in thread
From: Junio C Hamano @ 2018-12-06  1:41 UTC (permalink / raw)
  To: Josh Steadmon; +Cc: git, stolee

Junio C Hamano <gitster@pobox.com> writes:

>> +	if (graph_size < GRAPH_MIN_SIZE)
>> +		return NULL;
>> +
>
> The load_commit_graph_one() grabbed graph_map out of xmmap() so it
> is guaranteed to be non-NULL, but we need to check graph_map != NULL
> when we're calling this directly from the fuzz tests, exactly in the
> same spirit that we check graph_size above.  Besides, these are to
> make sure future callers won't misuse the API.

Insert "Please check graph_map != NULL here, too." before the above
paragraph.

>>  	data = (const unsigned char *)graph_map;
>
> And the reset of the function is the same as the original modulo
> jumping to the cleanup_fail label has been replaced with returning
> NULL.
>
> Looks good.

Of course, s/reset/rest/ is what I meant.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph
  2018-12-05 22:32 ` [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
  2018-12-05 22:48   ` Ævar Arnfjörð Bjarmason
  2018-12-06  1:32   ` Junio C Hamano
@ 2018-12-06  4:47   ` Junio C Hamano
  2 siblings, 0 replies; 20+ messages in thread
From: Junio C Hamano @ 2018-12-06  4:47 UTC (permalink / raw)
  To: Josh Steadmon; +Cc: git, stolee

Josh Steadmon <steadmon@google.com> writes:

> diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
> new file mode 100644
> index 0000000000..420851d0d2
> --- /dev/null
> +++ b/fuzz-commit-graph.c
> @@ -0,0 +1,18 @@
> +#include "object-store.h"
> +#include "commit-graph.h"
> +
> +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> +					size_t graph_size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
> +{
> +	struct commit_graph *g;
> +
> +	g = parse_commit_graph((void *) data, -1, size);
> +	if (g)
> +		free(g);

As it is perfectly OK to free(NULL), please lose "if (g)" and a
level of indentation; otherwise, "make coccicheck" would complain.

Thanks.

> +	return 0;
> +}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] commit-graph: fix buffer read-overflow
  2018-12-05 22:32 ` [PATCH 2/2] commit-graph: fix buffer read-overflow Josh Steadmon
@ 2018-12-06 13:11   ` Derrick Stolee
  0 siblings, 0 replies; 20+ messages in thread
From: Derrick Stolee @ 2018-12-06 13:11 UTC (permalink / raw)
  To: Josh Steadmon, git

On 12/5/2018 5:32 PM, Josh Steadmon wrote:
>   
> +		if (chunk_lookup + GRAPH_CHUNKLOOKUP_WIDTH > data + graph_size) {
> +			error(_("chunk lookup table entry missing; graph file may be incomplete"));
> +			free(graph);
> +			return NULL;
> +		}

Something I forgot earlier: there are several tests in 
t5318-commit-graph.sh that use 'git commit-graph verify' to ensure we 
hit these error conditions on a corrupted commit-graph file. Could you 
try adding a test there that looks for this error message?

Thanks,
-Stolee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 0/3] Add commit-graph fuzzer and fix buffer overflow
  2018-12-05 22:32 [PATCH 0/2] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
  2018-12-05 22:32 ` [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
  2018-12-05 22:32 ` [PATCH 2/2] commit-graph: fix buffer read-overflow Josh Steadmon
@ 2018-12-06 20:20 ` Josh Steadmon
  2018-12-06 20:20   ` [PATCH v2 1/3] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
                     ` (3 more replies)
  2 siblings, 4 replies; 20+ messages in thread
From: Josh Steadmon @ 2018-12-06 20:20 UTC (permalink / raw)
  To: git, gitster, stolee, avarab

Add a new fuzz test for the commit graph and fix a buffer read-overflow
that it discovered. Additionally, fix the Makefile instructions for
building fuzzers.

Changes since V1:
  * Moved the parse_commit_graph() declaration to the header file, since
    we don't mind if others use it.
  * Moved some unnecessary comments into commit messages.
  * Fixed some style issues.
  * Added a test case for detecting commit graphs with missing chunk
    lookup entries.
  * Ævar's comments on the Makefile made me realize the fuzzer build
    instructions were using the wrong variable. Added a new commit to
    fix this.

Josh Steadmon (3):
  commit-graph, fuzz: Add fuzzer for commit-graph
  commit-graph: fix buffer read-overflow
  Makefile: correct example fuzz build

 .gitignore              |  1 +
 Makefile                |  3 +-
 commit-graph.c          | 67 +++++++++++++++++++++++++++++------------
 commit-graph.h          |  3 ++
 fuzz-commit-graph.c     | 16 ++++++++++
 t/t5318-commit-graph.sh | 28 +++++++++++++++++
 6 files changed, 98 insertions(+), 20 deletions(-)
 create mode 100644 fuzz-commit-graph.c

Range-diff against v1:
1:  53e62baaa8 ! 1:  0b57ecbe1b commit-graph, fuzz: Add fuzzer for commit-graph
    @@ -4,7 +4,9 @@
     
         Breaks load_commit_graph_one() into a new function,
         parse_commit_graph(). The latter function operates on arbitrary buffers,
    -    which makes it suitable as a fuzzing target.
    +    which makes it suitable as a fuzzing target. Since parse_commit_graph()
    +    is only called by load_commit_graph_one() (and the fuzzer described
    +    below), we omit error messages that would be duplicated by the caller.
     
         Adds fuzz-commit-graph.c, which provides a fuzzing entry point
         compatible with libFuzzer (and possibly other fuzzing engines).
    @@ -35,17 +37,6 @@
      diff --git a/commit-graph.c b/commit-graph.c
      --- a/commit-graph.c
      +++ b/commit-graph.c
    -@@
    - #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
    - 			+ GRAPH_FANOUT_SIZE + GRAPH_OID_LEN)
    - 
    -+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
    -+					size_t graph_size);
    -+
    -+
    - char *get_commit_graph_filename(const char *obj_dir)
    - {
    - 	return xstrfmt("%s/info/commit-graph", obj_dir);
     @@
      struct commit_graph *load_commit_graph_one(const char *graph_file)
      {
    @@ -70,7 +61,7 @@
      	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
     +	ret = parse_commit_graph(graph_map, fd, graph_size);
     +
    -+	if (ret == NULL) {
    ++	if (!ret) {
     +		munmap(graph_map, graph_size);
     +		close(fd);
     +		exit(1);
    @@ -79,10 +70,6 @@
     +	return ret;
     +}
     +
    -+/*
    -+ * This function is intended to be used only from load_commit_graph_one() or in
    -+ * fuzz tests.
    -+ */
     +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
     +					size_t graph_size)
     +{
    @@ -94,11 +81,9 @@
     +	uint32_t graph_signature;
     +	unsigned char graph_version, hash_version;
     +
    -+	/*
    -+	 * This should already be checked in load_commit_graph_one, but we still
    -+	 * need a check here for when we're calling parse_commit_graph directly
    -+	 * from fuzz tests. We can omit the error message in that case.
    -+	 */
    ++	if (!graph_map)
    ++		return NULL;
    ++
     +	if (graph_size < GRAPH_MIN_SIZE)
     +		return NULL;
     +
    @@ -162,12 +147,25 @@
      
      static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
     
    + diff --git a/commit-graph.h b/commit-graph.h
    + --- a/commit-graph.h
    + +++ b/commit-graph.h
    +@@
    + 
    + struct commit_graph *load_commit_graph_one(const char *graph_file);
    + 
    ++struct commit_graph *parse_commit_graph(void *graph_map, int fd,
    ++					size_t graph_size);
    ++
    + /*
    +  * Return 1 if and only if the repository has a commit-graph
    +  * file and generation numbers are computed in that file.
    +
      diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
      new file mode 100644
      --- /dev/null
      +++ b/fuzz-commit-graph.c
     @@
    -+#include "object-store.h"
     +#include "commit-graph.h"
     +
     +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
    @@ -179,9 +177,8 @@
     +{
     +	struct commit_graph *g;
     +
    -+	g = parse_commit_graph((void *) data, -1, size);
    -+	if (g)
    -+		free(g);
    ++	g = parse_commit_graph((void *)data, -1, size);
    ++	free(g);
     +
     +	return 0;
     +}
2:  ad2e761f44 ! 2:  af45c2337f commit-graph: fix buffer read-overflow
    @@ -22,7 +22,8 @@
     +		uint64_t chunk_offset;
      		int chunk_repeated = 0;
      
    -+		if (chunk_lookup + GRAPH_CHUNKLOOKUP_WIDTH > data + graph_size) {
    ++		if (chunk_lookup + GRAPH_CHUNKLOOKUP_WIDTH >
    ++		    data + graph_size) {
     +			error(_("chunk lookup table entry missing; graph file may be incomplete"));
     +			free(graph);
     +			return NULL;
    @@ -34,3 +35,49 @@
      		chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
      
      		if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
    +
    + diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
    + --- a/t/t5318-commit-graph.sh
    + +++ b/t/t5318-commit-graph.sh
    +@@
    + 	test_i18ngrep "$grepstr" err
    + }
    + 
    ++
    ++# usage: corrupt_and_zero_graph_then_verify <corrupt_position> <data> <zero_position> <string>
    ++# Manipulates the commit-graph file at <corrupt_position> by inserting the data,
    ++# then zeros the file starting at <zero_position>. Finally, runs
    ++# 'git commit-graph verify' and places the output in the file 'err'. Tests 'err'
    ++# for the given string.
    ++corrupt_and_zero_graph_then_verify() {
    ++	corrupt_pos=$1
    ++	data="${2:-\0}"
    ++	zero_pos=$3
    ++	grepstr=$4
    ++	orig_size=$(stat --format=%s $objdir/info/commit-graph)
    ++	cd "$TRASH_DIRECTORY/full" &&
    ++	test_when_finished mv commit-graph-backup $objdir/info/commit-graph &&
    ++	cp $objdir/info/commit-graph commit-graph-backup &&
    ++	printf "$data" | dd of="$objdir/info/commit-graph" bs=1 seek="$corrupt_pos" conv=notrunc &&
    ++	truncate --size=$zero_pos $objdir/info/commit-graph &&
    ++	truncate --size=$orig_size $objdir/info/commit-graph &&
    ++	test_must_fail git commit-graph verify 2>test_err &&
    ++	grep -v "^+" test_err >err &&
    ++	test_i18ngrep "$grepstr" err
    ++}
    ++
    + test_expect_success 'detect bad signature' '
    + 	corrupt_graph_and_verify 0 "\0" \
    + 		"graph signature"
    +@@
    + 		"incorrect checksum"
    + '
    + 
    ++test_expect_success 'detect truncated graph' '
    ++	corrupt_and_zero_graph_then_verify $GRAPH_BYTE_CHUNK_COUNT "\xff" \
    ++		$GRAPH_CHUNK_LOOKUP_OFFSET "chunk lookup table entry missing"
    ++'
    ++
    + test_expect_success 'git fsck (checks commit-graph)' '
    + 	cd "$TRASH_DIRECTORY/full" &&
    + 	git fsck &&
-:  ---------- > 3:  7519fc76df Makefile: correct example fuzz build
-- 
2.20.0.rc2.10.g7519fc76df


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 1/3] commit-graph, fuzz: Add fuzzer for commit-graph
  2018-12-06 20:20 ` [PATCH v2 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
@ 2018-12-06 20:20   ` Josh Steadmon
  2018-12-06 20:20   ` [PATCH v2 2/3] commit-graph: fix buffer read-overflow Josh Steadmon
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 20+ messages in thread
From: Josh Steadmon @ 2018-12-06 20:20 UTC (permalink / raw)
  To: git, gitster, stolee, avarab

Breaks load_commit_graph_one() into a new function,
parse_commit_graph(). The latter function operates on arbitrary buffers,
which makes it suitable as a fuzzing target. Since parse_commit_graph()
is only called by load_commit_graph_one() (and the fuzzer described
below), we omit error messages that would be duplicated by the caller.

Adds fuzz-commit-graph.c, which provides a fuzzing entry point
compatible with libFuzzer (and possibly other fuzzing engines).

Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 .gitignore          |  1 +
 Makefile            |  1 +
 commit-graph.c      | 53 ++++++++++++++++++++++++++++++---------------
 commit-graph.h      |  3 +++
 fuzz-commit-graph.c | 16 ++++++++++++++
 5 files changed, 57 insertions(+), 17 deletions(-)
 create mode 100644 fuzz-commit-graph.c

diff --git a/.gitignore b/.gitignore
index 0d77ea5894..8bcf153ed9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+/fuzz-commit-graph
 /fuzz_corpora
 /fuzz-pack-headers
 /fuzz-pack-idx
diff --git a/Makefile b/Makefile
index 1a44c811aa..6b72f37c29 100644
--- a/Makefile
+++ b/Makefile
@@ -684,6 +684,7 @@ SCRIPTS = $(SCRIPT_SH_INS) \
 
 ETAGS_TARGET = TAGS
 
+FUZZ_OBJS += fuzz-commit-graph.o
 FUZZ_OBJS += fuzz-pack-headers.o
 FUZZ_OBJS += fuzz-pack-idx.o
 
diff --git a/commit-graph.c b/commit-graph.c
index 40c855f185..07dd410f3c 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -84,16 +84,10 @@ static int commit_graph_compatible(struct repository *r)
 struct commit_graph *load_commit_graph_one(const char *graph_file)
 {
 	void *graph_map;
-	const unsigned char *data, *chunk_lookup;
 	size_t graph_size;
 	struct stat st;
-	uint32_t i;
-	struct commit_graph *graph;
+	struct commit_graph *ret;
 	int fd = git_open(graph_file);
-	uint64_t last_chunk_offset;
-	uint32_t last_chunk_id;
-	uint32_t graph_signature;
-	unsigned char graph_version, hash_version;
 
 	if (fd < 0)
 		return NULL;
@@ -108,27 +102,55 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 		die(_("graph file %s is too small"), graph_file);
 	}
 	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	ret = parse_commit_graph(graph_map, fd, graph_size);
+
+	if (!ret) {
+		munmap(graph_map, graph_size);
+		close(fd);
+		exit(1);
+	}
+
+	return ret;
+}
+
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size)
+{
+	const unsigned char *data, *chunk_lookup;
+	uint32_t i;
+	struct commit_graph *graph;
+	uint64_t last_chunk_offset;
+	uint32_t last_chunk_id;
+	uint32_t graph_signature;
+	unsigned char graph_version, hash_version;
+
+	if (!graph_map)
+		return NULL;
+
+	if (graph_size < GRAPH_MIN_SIZE)
+		return NULL;
+
 	data = (const unsigned char *)graph_map;
 
 	graph_signature = get_be32(data);
 	if (graph_signature != GRAPH_SIGNATURE) {
 		error(_("graph signature %X does not match signature %X"),
 		      graph_signature, GRAPH_SIGNATURE);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	graph_version = *(unsigned char*)(data + 4);
 	if (graph_version != GRAPH_VERSION) {
 		error(_("graph version %X does not match version %X"),
 		      graph_version, GRAPH_VERSION);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	hash_version = *(unsigned char*)(data + 5);
 	if (hash_version != GRAPH_OID_VERSION) {
 		error(_("hash version %X does not match version %X"),
 		      hash_version, GRAPH_OID_VERSION);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	graph = alloc_commit_graph();
@@ -152,7 +174,8 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 		if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
 			error(_("improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
 			      (uint32_t)chunk_offset);
-			goto cleanup_fail;
+			free(graph);
+			return NULL;
 		}
 
 		switch (chunk_id) {
@@ -187,7 +210,8 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 
 		if (chunk_repeated) {
 			error(_("chunk id %08x appears multiple times"), chunk_id);
-			goto cleanup_fail;
+			free(graph);
+			return NULL;
 		}
 
 		if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
@@ -201,11 +225,6 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 	}
 
 	return graph;
-
-cleanup_fail:
-	munmap(graph_map, graph_size);
-	close(fd);
-	exit(1);
 }
 
 static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
diff --git a/commit-graph.h b/commit-graph.h
index 9db40b4d3a..813e7c19f1 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -54,6 +54,9 @@ struct commit_graph {
 
 struct commit_graph *load_commit_graph_one(const char *graph_file);
 
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size);
+
 /*
  * Return 1 if and only if the repository has a commit-graph
  * file and generation numbers are computed in that file.
diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
new file mode 100644
index 0000000000..cf790c9d04
--- /dev/null
+++ b/fuzz-commit-graph.c
@@ -0,0 +1,16 @@
+#include "commit-graph.h"
+
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size);
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
+{
+	struct commit_graph *g;
+
+	g = parse_commit_graph((void *)data, -1, size);
+	free(g);
+
+	return 0;
+}
-- 
2.20.0.rc2.10.g7519fc76df


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 2/3] commit-graph: fix buffer read-overflow
  2018-12-06 20:20 ` [PATCH v2 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
  2018-12-06 20:20   ` [PATCH v2 1/3] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
@ 2018-12-06 20:20   ` Josh Steadmon
  2018-12-07  9:07     ` Jeff King
  2018-12-07 13:33     ` Derrick Stolee
  2018-12-06 20:20   ` [PATCH v2 3/3] Makefile: correct example fuzz build Josh Steadmon
  2018-12-07 22:27   ` [PATCH v3 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
  3 siblings, 2 replies; 20+ messages in thread
From: Josh Steadmon @ 2018-12-06 20:20 UTC (permalink / raw)
  To: git, gitster, stolee, avarab

fuzz-commit-graph identified a case where Git will read past the end of
a buffer containing a commit graph if the graph's header has an
incorrect chunk count. A simple bounds check in parse_commit_graph()
prevents this.

Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 commit-graph.c          | 14 ++++++++++++--
 t/t5318-commit-graph.sh | 28 ++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 07dd410f3c..224a5f161e 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -165,10 +165,20 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd,
 	last_chunk_offset = 8;
 	chunk_lookup = data + 8;
 	for (i = 0; i < graph->num_chunks; i++) {
-		uint32_t chunk_id = get_be32(chunk_lookup + 0);
-		uint64_t chunk_offset = get_be64(chunk_lookup + 4);
+		uint32_t chunk_id;
+		uint64_t chunk_offset;
 		int chunk_repeated = 0;
 
+		if (chunk_lookup + GRAPH_CHUNKLOOKUP_WIDTH >
+		    data + graph_size) {
+			error(_("chunk lookup table entry missing; graph file may be incomplete"));
+			free(graph);
+			return NULL;
+		}
+
+		chunk_id = get_be32(chunk_lookup + 0);
+		chunk_offset = get_be64(chunk_lookup + 4);
+
 		chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
 
 		if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index 5fe21db99f..2503cb0345 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -384,6 +384,29 @@ corrupt_graph_and_verify() {
 	test_i18ngrep "$grepstr" err
 }
 
+
+# usage: corrupt_and_zero_graph_then_verify <corrupt_position> <data> <zero_position> <string>
+# Manipulates the commit-graph file at <corrupt_position> by inserting the data,
+# then zeros the file starting at <zero_position>. Finally, runs
+# 'git commit-graph verify' and places the output in the file 'err'. Tests 'err'
+# for the given string.
+corrupt_and_zero_graph_then_verify() {
+	corrupt_pos=$1
+	data="${2:-\0}"
+	zero_pos=$3
+	grepstr=$4
+	orig_size=$(stat --format=%s $objdir/info/commit-graph)
+	cd "$TRASH_DIRECTORY/full" &&
+	test_when_finished mv commit-graph-backup $objdir/info/commit-graph &&
+	cp $objdir/info/commit-graph commit-graph-backup &&
+	printf "$data" | dd of="$objdir/info/commit-graph" bs=1 seek="$corrupt_pos" conv=notrunc &&
+	truncate --size=$zero_pos $objdir/info/commit-graph &&
+	truncate --size=$orig_size $objdir/info/commit-graph &&
+	test_must_fail git commit-graph verify 2>test_err &&
+	grep -v "^+" test_err >err &&
+	test_i18ngrep "$grepstr" err
+}
+
 test_expect_success 'detect bad signature' '
 	corrupt_graph_and_verify 0 "\0" \
 		"graph signature"
@@ -484,6 +507,11 @@ test_expect_success 'detect invalid checksum hash' '
 		"incorrect checksum"
 '
 
+test_expect_success 'detect truncated graph' '
+	corrupt_and_zero_graph_then_verify $GRAPH_BYTE_CHUNK_COUNT "\xff" \
+		$GRAPH_CHUNK_LOOKUP_OFFSET "chunk lookup table entry missing"
+'
+
 test_expect_success 'git fsck (checks commit-graph)' '
 	cd "$TRASH_DIRECTORY/full" &&
 	git fsck &&
-- 
2.20.0.rc2.10.g7519fc76df


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 3/3] Makefile: correct example fuzz build
  2018-12-06 20:20 ` [PATCH v2 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
  2018-12-06 20:20   ` [PATCH v2 1/3] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
  2018-12-06 20:20   ` [PATCH v2 2/3] commit-graph: fix buffer read-overflow Josh Steadmon
@ 2018-12-06 20:20   ` Josh Steadmon
  2018-12-07 22:27   ` [PATCH v3 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
  3 siblings, 0 replies; 20+ messages in thread
From: Josh Steadmon @ 2018-12-06 20:20 UTC (permalink / raw)
  To: git, gitster, stolee, avarab

Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6b72f37c29..bbcfc2bc9f 100644
--- a/Makefile
+++ b/Makefile
@@ -3104,7 +3104,7 @@ cover_db_html: cover_db
 # An example command to build against libFuzzer from LLVM 4.0.0:
 #
 # make CC=clang CXX=clang++ \
-#      FUZZ_CXXFLAGS="-fsanitize-coverage=trace-pc-guard -fsanitize=address" \
+#      CFLAGS="-fsanitize-coverage=trace-pc-guard -fsanitize=address" \
 #      LIB_FUZZING_ENGINE=/usr/lib/llvm-4.0/lib/libFuzzer.a \
 #      fuzz-all
 #
-- 
2.20.0.rc2.10.g7519fc76df


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/3] commit-graph: fix buffer read-overflow
  2018-12-06 20:20   ` [PATCH v2 2/3] commit-graph: fix buffer read-overflow Josh Steadmon
@ 2018-12-07  9:07     ` Jeff King
  2018-12-07 13:33     ` Derrick Stolee
  1 sibling, 0 replies; 20+ messages in thread
From: Jeff King @ 2018-12-07  9:07 UTC (permalink / raw)
  To: Josh Steadmon; +Cc: git, gitster, stolee, avarab

On Thu, Dec 06, 2018 at 12:20:54PM -0800, Josh Steadmon wrote:

> diff --git a/commit-graph.c b/commit-graph.c
> index 07dd410f3c..224a5f161e 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -165,10 +165,20 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd,
>  	last_chunk_offset = 8;
>  	chunk_lookup = data + 8;
>  	for (i = 0; i < graph->num_chunks; i++) {
> -		uint32_t chunk_id = get_be32(chunk_lookup + 0);
> -		uint64_t chunk_offset = get_be64(chunk_lookup + 4);
> +		uint32_t chunk_id;
> +		uint64_t chunk_offset;
>  		int chunk_repeated = 0;
>  
> +		if (chunk_lookup + GRAPH_CHUNKLOOKUP_WIDTH >
> +		    data + graph_size) {
> +			error(_("chunk lookup table entry missing; graph file may be incomplete"));
> +			free(graph);
> +			return NULL;
> +		}

Is it possible to overflow the addition here? E.g., if I'm on a 32-bit
system and the truncated chunk appears right at the 4GB limit, in which
case we wrap back around? I guess that's pretty implausible, since it
would mean that the mmap is bumping up against the end of the address
space. I didn't check, but I wouldn't be surprised if sane operating
systems avoid allocating those addresses.

But I think you could write this as:

  if (data + graph_size - chunk_lookup < GRAPH_CHUNKLOOKUP_WIDTH)

to avoid overflow (we know that "data + graph_size" is sane because
that's our mmap, and chunk_lookup is somewhere between "data" and "data
+ graph_size", so the result is between 0 and graph_size).

I dunno. I think I've convinced myself it's a non-issue here, but it may
be good to get in the habit of writing these sorts of offset checks in
an overflow-proof order.

-Peff

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/3] commit-graph: fix buffer read-overflow
  2018-12-06 20:20   ` [PATCH v2 2/3] commit-graph: fix buffer read-overflow Josh Steadmon
  2018-12-07  9:07     ` Jeff King
@ 2018-12-07 13:33     ` Derrick Stolee
  1 sibling, 0 replies; 20+ messages in thread
From: Derrick Stolee @ 2018-12-07 13:33 UTC (permalink / raw)
  To: Josh Steadmon, git, gitster, avarab

On 12/6/2018 3:20 PM, Josh Steadmon wrote:
> +
> +# usage: corrupt_and_zero_graph_then_verify <corrupt_position> <data> <zero_position> <string>
> +# Manipulates the commit-graph file at <corrupt_position> by inserting the data,
> +# then zeros the file starting at <zero_position>. Finally, runs
> +# 'git commit-graph verify' and places the output in the file 'err'. Tests 'err'
> +# for the given string.
> +corrupt_and_zero_graph_then_verify() {

This method is very similar to to 'corrupt_graph_and_verify()', the only 
difference being the zero_pos, which zeroes the graph.

Could it instead be a modification of corrupt_graph_and_verify() where 
$4 is interpreted as zero_pos, and if it is blank we don't do the 
truncation?

> +test_expect_success 'detect truncated graph' '
> +	corrupt_and_zero_graph_then_verify $GRAPH_BYTE_CHUNK_COUNT "\xff" \
> +		$GRAPH_CHUNK_LOOKUP_OFFSET "chunk lookup table entry missing"
> +'
> +

Thanks for this! I think it's valuable to keep explicit tests around 
that were discovered from your fuzz tests. Specifically, I can repeat 
the test when I get around to the next file format.

Thanks,
-Stolee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 0/3] Add commit-graph fuzzer and fix buffer overflow
  2018-12-06 20:20 ` [PATCH v2 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
                     ` (2 preceding siblings ...)
  2018-12-06 20:20   ` [PATCH v2 3/3] Makefile: correct example fuzz build Josh Steadmon
@ 2018-12-07 22:27   ` Josh Steadmon
  2018-12-07 22:27     ` [PATCH v3 1/3] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
                       ` (2 more replies)
  3 siblings, 3 replies; 20+ messages in thread
From: Josh Steadmon @ 2018-12-07 22:27 UTC (permalink / raw)
  To: git; +Cc: gitster, stolee, avarab, peff

Ad a new fuzz test for the commit graph and fix a buffer read-overflow
that it discovered. Additionally, fix the Makefile instructions for
building fuzzers.

Changes since V2:
  * Avoid pointer arithmetic overflow when checking the graph's chunk
    count.
  * Merge the corrupt_graph_and_verify and
    corrupt_and_zero_graph_then_verify test functions.

Josh Steadmon (3):
  commit-graph, fuzz: Add fuzzer for commit-graph
  commit-graph: fix buffer read-overflow
  Makefile: correct example fuzz build

 .gitignore              |  1 +
 Makefile                |  3 +-
 commit-graph.c          | 67 +++++++++++++++++++++++++++++------------
 commit-graph.h          |  3 ++
 fuzz-commit-graph.c     | 16 ++++++++++
 t/t5318-commit-graph.sh | 15 +++++++--
 6 files changed, 83 insertions(+), 22 deletions(-)
 create mode 100644 fuzz-commit-graph.c

Range-diff against v2:
1:  af45c2337f ! 1:  675d58ecea commit-graph: fix buffer read-overflow
    @@ -22,8 +22,8 @@
     +		uint64_t chunk_offset;
      		int chunk_repeated = 0;
      
    -+		if (chunk_lookup + GRAPH_CHUNKLOOKUP_WIDTH >
    -+		    data + graph_size) {
    ++		if (data + graph_size - chunk_lookup <
    ++		    GRAPH_CHUNKLOOKUP_WIDTH) {
     +			error(_("chunk lookup table entry missing; graph file may be incomplete"));
     +			free(graph);
     +			return NULL;
    @@ -40,31 +40,34 @@
      --- a/t/t5318-commit-graph.sh
      +++ b/t/t5318-commit-graph.sh
     @@
    - 	test_i18ngrep "$grepstr" err
    - }
    + GRAPH_BYTE_OCTOPUS=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4))
    + GRAPH_BYTE_FOOTER=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4 * $NUM_OCTOPUS_EDGES))
      
    -+
    -+# usage: corrupt_and_zero_graph_then_verify <corrupt_position> <data> <zero_position> <string>
    -+# Manipulates the commit-graph file at <corrupt_position> by inserting the data,
    -+# then zeros the file starting at <zero_position>. Finally, runs
    -+# 'git commit-graph verify' and places the output in the file 'err'. Tests 'err'
    -+# for the given string.
    -+corrupt_and_zero_graph_then_verify() {
    -+	corrupt_pos=$1
    -+	data="${2:-\0}"
    -+	zero_pos=$3
    -+	grepstr=$4
    +-# usage: corrupt_graph_and_verify <position> <data> <string>
    ++# usage: corrupt_graph_and_verify <position> <data> <string> [<zero_pos>]
    + # Manipulates the commit-graph file at the position
    +-# by inserting the data, then runs 'git commit-graph verify'
    ++# by inserting the data, optionally zeroing the file
    ++# starting at <zero_pos>, then runs 'git commit-graph verify'
    + # and places the output in the file 'err'. Test 'err' for
    + # the given string.
    + corrupt_graph_and_verify() {
    + 	pos=$1
    + 	data="${2:-\0}"
    + 	grepstr=$3
     +	orig_size=$(stat --format=%s $objdir/info/commit-graph)
    -+	cd "$TRASH_DIRECTORY/full" &&
    -+	test_when_finished mv commit-graph-backup $objdir/info/commit-graph &&
    -+	cp $objdir/info/commit-graph commit-graph-backup &&
    -+	printf "$data" | dd of="$objdir/info/commit-graph" bs=1 seek="$corrupt_pos" conv=notrunc &&
    ++	zero_pos=${4:-${orig_size}}
    + 	cd "$TRASH_DIRECTORY/full" &&
    + 	test_when_finished mv commit-graph-backup $objdir/info/commit-graph &&
    + 	cp $objdir/info/commit-graph commit-graph-backup &&
    + 	printf "$data" | dd of="$objdir/info/commit-graph" bs=1 seek="$pos" conv=notrunc &&
     +	truncate --size=$zero_pos $objdir/info/commit-graph &&
     +	truncate --size=$orig_size $objdir/info/commit-graph &&
    -+	test_must_fail git commit-graph verify 2>test_err &&
    -+	grep -v "^+" test_err >err &&
    -+	test_i18ngrep "$grepstr" err
    -+}
    + 	test_must_fail git commit-graph verify 2>test_err &&
    + 	grep -v "^+" test_err >err
    + 	test_i18ngrep "$grepstr" err
    + }
    + 
     +
      test_expect_success 'detect bad signature' '
      	corrupt_graph_and_verify 0 "\0" \
    @@ -73,9 +76,9 @@
      		"incorrect checksum"
      '
      
    -+test_expect_success 'detect truncated graph' '
    -+	corrupt_and_zero_graph_then_verify $GRAPH_BYTE_CHUNK_COUNT "\xff" \
    -+		$GRAPH_CHUNK_LOOKUP_OFFSET "chunk lookup table entry missing"
    ++test_expect_success 'detect incorrect chunk count' '
    ++	corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\xff" \
    ++		"chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET
     +'
     +
      test_expect_success 'git fsck (checks commit-graph)' '
2:  7519fc76df = 2:  06a32bfe8b Makefile: correct example fuzz build
-- 
2.20.0.rc2.12.g4c11c11dec


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 1/3] commit-graph, fuzz: Add fuzzer for commit-graph
  2018-12-07 22:27   ` [PATCH v3 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
@ 2018-12-07 22:27     ` Josh Steadmon
  2018-12-07 22:27     ` [PATCH v3 2/3] commit-graph: fix buffer read-overflow Josh Steadmon
  2018-12-07 22:27     ` [PATCH v3 3/3] Makefile: correct example fuzz build Josh Steadmon
  2 siblings, 0 replies; 20+ messages in thread
From: Josh Steadmon @ 2018-12-07 22:27 UTC (permalink / raw)
  To: git; +Cc: gitster, stolee, avarab, peff

Breaks load_commit_graph_one() into a new function,
parse_commit_graph(). The latter function operates on arbitrary buffers,
which makes it suitable as a fuzzing target. Since parse_commit_graph()
is only called by load_commit_graph_one() (and the fuzzer described
below), we omit error messages that would be duplicated by the caller.

Adds fuzz-commit-graph.c, which provides a fuzzing entry point
compatible with libFuzzer (and possibly other fuzzing engines).

Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 .gitignore          |  1 +
 Makefile            |  1 +
 commit-graph.c      | 53 ++++++++++++++++++++++++++++++---------------
 commit-graph.h      |  3 +++
 fuzz-commit-graph.c | 16 ++++++++++++++
 5 files changed, 57 insertions(+), 17 deletions(-)
 create mode 100644 fuzz-commit-graph.c

diff --git a/.gitignore b/.gitignore
index 0d77ea5894..8bcf153ed9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+/fuzz-commit-graph
 /fuzz_corpora
 /fuzz-pack-headers
 /fuzz-pack-idx
diff --git a/Makefile b/Makefile
index 1a44c811aa..6b72f37c29 100644
--- a/Makefile
+++ b/Makefile
@@ -684,6 +684,7 @@ SCRIPTS = $(SCRIPT_SH_INS) \
 
 ETAGS_TARGET = TAGS
 
+FUZZ_OBJS += fuzz-commit-graph.o
 FUZZ_OBJS += fuzz-pack-headers.o
 FUZZ_OBJS += fuzz-pack-idx.o
 
diff --git a/commit-graph.c b/commit-graph.c
index 40c855f185..07dd410f3c 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -84,16 +84,10 @@ static int commit_graph_compatible(struct repository *r)
 struct commit_graph *load_commit_graph_one(const char *graph_file)
 {
 	void *graph_map;
-	const unsigned char *data, *chunk_lookup;
 	size_t graph_size;
 	struct stat st;
-	uint32_t i;
-	struct commit_graph *graph;
+	struct commit_graph *ret;
 	int fd = git_open(graph_file);
-	uint64_t last_chunk_offset;
-	uint32_t last_chunk_id;
-	uint32_t graph_signature;
-	unsigned char graph_version, hash_version;
 
 	if (fd < 0)
 		return NULL;
@@ -108,27 +102,55 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 		die(_("graph file %s is too small"), graph_file);
 	}
 	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	ret = parse_commit_graph(graph_map, fd, graph_size);
+
+	if (!ret) {
+		munmap(graph_map, graph_size);
+		close(fd);
+		exit(1);
+	}
+
+	return ret;
+}
+
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size)
+{
+	const unsigned char *data, *chunk_lookup;
+	uint32_t i;
+	struct commit_graph *graph;
+	uint64_t last_chunk_offset;
+	uint32_t last_chunk_id;
+	uint32_t graph_signature;
+	unsigned char graph_version, hash_version;
+
+	if (!graph_map)
+		return NULL;
+
+	if (graph_size < GRAPH_MIN_SIZE)
+		return NULL;
+
 	data = (const unsigned char *)graph_map;
 
 	graph_signature = get_be32(data);
 	if (graph_signature != GRAPH_SIGNATURE) {
 		error(_("graph signature %X does not match signature %X"),
 		      graph_signature, GRAPH_SIGNATURE);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	graph_version = *(unsigned char*)(data + 4);
 	if (graph_version != GRAPH_VERSION) {
 		error(_("graph version %X does not match version %X"),
 		      graph_version, GRAPH_VERSION);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	hash_version = *(unsigned char*)(data + 5);
 	if (hash_version != GRAPH_OID_VERSION) {
 		error(_("hash version %X does not match version %X"),
 		      hash_version, GRAPH_OID_VERSION);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	graph = alloc_commit_graph();
@@ -152,7 +174,8 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 		if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
 			error(_("improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
 			      (uint32_t)chunk_offset);
-			goto cleanup_fail;
+			free(graph);
+			return NULL;
 		}
 
 		switch (chunk_id) {
@@ -187,7 +210,8 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 
 		if (chunk_repeated) {
 			error(_("chunk id %08x appears multiple times"), chunk_id);
-			goto cleanup_fail;
+			free(graph);
+			return NULL;
 		}
 
 		if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
@@ -201,11 +225,6 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
 	}
 
 	return graph;
-
-cleanup_fail:
-	munmap(graph_map, graph_size);
-	close(fd);
-	exit(1);
 }
 
 static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
diff --git a/commit-graph.h b/commit-graph.h
index 9db40b4d3a..813e7c19f1 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -54,6 +54,9 @@ struct commit_graph {
 
 struct commit_graph *load_commit_graph_one(const char *graph_file);
 
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size);
+
 /*
  * Return 1 if and only if the repository has a commit-graph
  * file and generation numbers are computed in that file.
diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
new file mode 100644
index 0000000000..cf790c9d04
--- /dev/null
+++ b/fuzz-commit-graph.c
@@ -0,0 +1,16 @@
+#include "commit-graph.h"
+
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size);
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
+{
+	struct commit_graph *g;
+
+	g = parse_commit_graph((void *)data, -1, size);
+	free(g);
+
+	return 0;
+}
-- 
2.20.0.rc2.12.g4c11c11dec


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 2/3] commit-graph: fix buffer read-overflow
  2018-12-07 22:27   ` [PATCH v3 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
  2018-12-07 22:27     ` [PATCH v3 1/3] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
@ 2018-12-07 22:27     ` Josh Steadmon
  2018-12-09  4:01       ` Junio C Hamano
  2018-12-07 22:27     ` [PATCH v3 3/3] Makefile: correct example fuzz build Josh Steadmon
  2 siblings, 1 reply; 20+ messages in thread
From: Josh Steadmon @ 2018-12-07 22:27 UTC (permalink / raw)
  To: git; +Cc: gitster, stolee, avarab, peff

fuzz-commit-graph identified a case where Git will read past the end of
a buffer containing a commit graph if the graph's header has an
incorrect chunk count. A simple bounds check in parse_commit_graph()
prevents this.

Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 commit-graph.c          | 14 ++++++++++++--
 t/t5318-commit-graph.sh | 15 +++++++++++++--
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 07dd410f3c..836d65a1d3 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -165,10 +165,20 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd,
 	last_chunk_offset = 8;
 	chunk_lookup = data + 8;
 	for (i = 0; i < graph->num_chunks; i++) {
-		uint32_t chunk_id = get_be32(chunk_lookup + 0);
-		uint64_t chunk_offset = get_be64(chunk_lookup + 4);
+		uint32_t chunk_id;
+		uint64_t chunk_offset;
 		int chunk_repeated = 0;
 
+		if (data + graph_size - chunk_lookup <
+		    GRAPH_CHUNKLOOKUP_WIDTH) {
+			error(_("chunk lookup table entry missing; graph file may be incomplete"));
+			free(graph);
+			return NULL;
+		}
+
+		chunk_id = get_be32(chunk_lookup + 0);
+		chunk_offset = get_be64(chunk_lookup + 4);
+
 		chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
 
 		if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index 5fe21db99f..5b6b44b78e 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -366,24 +366,30 @@ GRAPH_OCTOPUS_DATA_OFFSET=$(($GRAPH_COMMIT_DATA_OFFSET + \
 GRAPH_BYTE_OCTOPUS=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4))
 GRAPH_BYTE_FOOTER=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4 * $NUM_OCTOPUS_EDGES))
 
-# usage: corrupt_graph_and_verify <position> <data> <string>
+# usage: corrupt_graph_and_verify <position> <data> <string> [<zero_pos>]
 # Manipulates the commit-graph file at the position
-# by inserting the data, then runs 'git commit-graph verify'
+# by inserting the data, optionally zeroing the file
+# starting at <zero_pos>, then runs 'git commit-graph verify'
 # and places the output in the file 'err'. Test 'err' for
 # the given string.
 corrupt_graph_and_verify() {
 	pos=$1
 	data="${2:-\0}"
 	grepstr=$3
+	orig_size=$(stat --format=%s $objdir/info/commit-graph)
+	zero_pos=${4:-${orig_size}}
 	cd "$TRASH_DIRECTORY/full" &&
 	test_when_finished mv commit-graph-backup $objdir/info/commit-graph &&
 	cp $objdir/info/commit-graph commit-graph-backup &&
 	printf "$data" | dd of="$objdir/info/commit-graph" bs=1 seek="$pos" conv=notrunc &&
+	truncate --size=$zero_pos $objdir/info/commit-graph &&
+	truncate --size=$orig_size $objdir/info/commit-graph &&
 	test_must_fail git commit-graph verify 2>test_err &&
 	grep -v "^+" test_err >err
 	test_i18ngrep "$grepstr" err
 }
 
+
 test_expect_success 'detect bad signature' '
 	corrupt_graph_and_verify 0 "\0" \
 		"graph signature"
@@ -484,6 +490,11 @@ test_expect_success 'detect invalid checksum hash' '
 		"incorrect checksum"
 '
 
+test_expect_success 'detect incorrect chunk count' '
+	corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\xff" \
+		"chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET
+'
+
 test_expect_success 'git fsck (checks commit-graph)' '
 	cd "$TRASH_DIRECTORY/full" &&
 	git fsck &&
-- 
2.20.0.rc2.12.g4c11c11dec


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 3/3] Makefile: correct example fuzz build
  2018-12-07 22:27   ` [PATCH v3 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
  2018-12-07 22:27     ` [PATCH v3 1/3] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
  2018-12-07 22:27     ` [PATCH v3 2/3] commit-graph: fix buffer read-overflow Josh Steadmon
@ 2018-12-07 22:27     ` Josh Steadmon
  2 siblings, 0 replies; 20+ messages in thread
From: Josh Steadmon @ 2018-12-07 22:27 UTC (permalink / raw)
  To: git; +Cc: gitster, stolee, avarab, peff

Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6b72f37c29..bbcfc2bc9f 100644
--- a/Makefile
+++ b/Makefile
@@ -3104,7 +3104,7 @@ cover_db_html: cover_db
 # An example command to build against libFuzzer from LLVM 4.0.0:
 #
 # make CC=clang CXX=clang++ \
-#      FUZZ_CXXFLAGS="-fsanitize-coverage=trace-pc-guard -fsanitize=address" \
+#      CFLAGS="-fsanitize-coverage=trace-pc-guard -fsanitize=address" \
 #      LIB_FUZZING_ENGINE=/usr/lib/llvm-4.0/lib/libFuzzer.a \
 #      fuzz-all
 #
-- 
2.20.0.rc2.12.g4c11c11dec


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/3] commit-graph: fix buffer read-overflow
  2018-12-07 22:27     ` [PATCH v3 2/3] commit-graph: fix buffer read-overflow Josh Steadmon
@ 2018-12-09  4:01       ` Junio C Hamano
  0 siblings, 0 replies; 20+ messages in thread
From: Junio C Hamano @ 2018-12-09  4:01 UTC (permalink / raw)
  To: Josh Steadmon; +Cc: git, stolee, avarab, peff

Josh Steadmon <steadmon@google.com> writes:

> diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
> index 5fe21db99f..5b6b44b78e 100755
> --- a/t/t5318-commit-graph.sh
> +++ b/t/t5318-commit-graph.sh
> @@ -366,24 +366,30 @@ GRAPH_OCTOPUS_DATA_OFFSET=$(($GRAPH_COMMIT_DATA_OFFSET + \
>  GRAPH_BYTE_OCTOPUS=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4))
>  GRAPH_BYTE_FOOTER=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4 * $NUM_OCTOPUS_EDGES))
>  
> -# usage: corrupt_graph_and_verify <position> <data> <string>
> +# usage: corrupt_graph_and_verify <position> <data> <string> [<zero_pos>]
>  # Manipulates the commit-graph file at the position
> -# by inserting the data, then runs 'git commit-graph verify'
> +# by inserting the data, optionally zeroing the file
> +# starting at <zero_pos>, then runs 'git commit-graph verify'
>  # and places the output in the file 'err'. Test 'err' for
>  # the given string.
>  corrupt_graph_and_verify() {
>  	pos=$1
>  	data="${2:-\0}"
>  	grepstr=$3
> +	orig_size=$(stat --format=%s $objdir/info/commit-graph)

"stat(1)" is not so portable, so you'll get complaints from minority
platform users later.  So is "truncate(1)".

> +	zero_pos=${4:-${orig_size}}
>  	cd "$TRASH_DIRECTORY/full" &&
>  	test_when_finished mv commit-graph-backup $objdir/info/commit-graph &&
>  	cp $objdir/info/commit-graph commit-graph-backup &&
>  	printf "$data" | dd of="$objdir/info/commit-graph" bs=1 seek="$pos" conv=notrunc &&
> +	truncate --size=$zero_pos $objdir/info/commit-graph &&
> +	truncate --size=$orig_size $objdir/info/commit-graph &&
>  	test_must_fail git commit-graph verify 2>test_err &&
>  	grep -v "^+" test_err >err
>  	test_i18ngrep "$grepstr" err
>  }
>  
> +
>  test_expect_success 'detect bad signature' '
>  	corrupt_graph_and_verify 0 "\0" \
>  		"graph signature"
> @@ -484,6 +490,11 @@ test_expect_success 'detect invalid checksum hash' '
>  		"incorrect checksum"
>  '
>  
> +test_expect_success 'detect incorrect chunk count' '
> +	corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\xff" \

Implementations of printf(1) may not grok "\xff" as a valid
representation of "\377".  The shell built-in of dash(1) for example
would not work with this.

> +		"chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET
> +'
> +
>  test_expect_success 'git fsck (checks commit-graph)' '
>  	cd "$TRASH_DIRECTORY/full" &&
>  	git fsck &&

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, back to index

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-05 22:32 [PATCH 0/2] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
2018-12-05 22:32 ` [PATCH 1/2] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
2018-12-05 22:48   ` Ævar Arnfjörð Bjarmason
2018-12-06  1:00     ` Josh Steadmon
2018-12-06  1:32   ` Junio C Hamano
2018-12-06  1:41     ` Junio C Hamano
2018-12-06  4:47   ` Junio C Hamano
2018-12-05 22:32 ` [PATCH 2/2] commit-graph: fix buffer read-overflow Josh Steadmon
2018-12-06 13:11   ` Derrick Stolee
2018-12-06 20:20 ` [PATCH v2 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
2018-12-06 20:20   ` [PATCH v2 1/3] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
2018-12-06 20:20   ` [PATCH v2 2/3] commit-graph: fix buffer read-overflow Josh Steadmon
2018-12-07  9:07     ` Jeff King
2018-12-07 13:33     ` Derrick Stolee
2018-12-06 20:20   ` [PATCH v2 3/3] Makefile: correct example fuzz build Josh Steadmon
2018-12-07 22:27   ` [PATCH v3 0/3] Add commit-graph fuzzer and fix buffer overflow Josh Steadmon
2018-12-07 22:27     ` [PATCH v3 1/3] commit-graph, fuzz: Add fuzzer for commit-graph Josh Steadmon
2018-12-07 22:27     ` [PATCH v3 2/3] commit-graph: fix buffer read-overflow Josh Steadmon
2018-12-09  4:01       ` Junio C Hamano
2018-12-07 22:27     ` [PATCH v3 3/3] Makefile: correct example fuzz build Josh Steadmon

git@vger.kernel.org mailing list mirror (one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox