git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 3/4] Convert zlib.c to size_t
@ 2017-08-10 18:13 Martin Koegler
  2017-08-10 18:13 ` [PATCH 4/4] Fix delta offset overflow Martin Koegler
  2017-08-10 21:52 ` [PATCH 3/4] Convert zlib.c to size_t Junio C Hamano
  0 siblings, 2 replies; 6+ messages in thread
From: Martin Koegler @ 2017-08-10 18:13 UTC (permalink / raw
  To: git, gitster, Johannes.Schindelin; +Cc: Martin Koegler

From: Martin Koegler <martin.koegler@chello.at>

Signed-off-by: Martin Koegler <martin.koegler@chello.at>
---
 builtin/pack-objects.c | 10 +++++-----
 cache.h                | 12 ++++++------
 pack-check.c           |  6 +++---
 pack.h                 |  2 +-
 sha1_file.c            |  6 +++---
 wrapper.c              |  8 ++++----
 zlib.c                 |  8 ++++----
 7 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index d94fd17..aa70f80 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -222,15 +222,15 @@ static void copy_pack_data(struct sha1file *f,
 		struct packed_git *p,
 		struct pack_window **w_curs,
 		off_t offset,
-		off_t len)
+		size_t len)
 {
 	unsigned char *in;
-	unsigned long avail;
+	size_t avail;
 
 	while (len) {
 		in = use_pack(p, w_curs, offset, &avail);
 		if (avail > len)
-			avail = (unsigned long)len;
+			avail = len;
 		sha1write(f, in, avail);
 		offset += avail;
 		len -= avail;
@@ -1388,8 +1388,8 @@ static void check_object(struct object_entry *entry)
 		struct pack_window *w_curs = NULL;
 		const unsigned char *base_ref = NULL;
 		struct object_entry *base_entry;
-		unsigned long used, used_0;
-		unsigned long avail;
+		size_t used, used_0;
+		size_t avail;
 		off_t ofs;
 		unsigned char *buf, c;
 
diff --git a/cache.h b/cache.h
index 26a3eaa..9185763 100644
--- a/cache.h
+++ b/cache.h
@@ -42,10 +42,10 @@
 #include <zlib.h>
 typedef struct git_zstream {
 	z_stream z;
-	unsigned long avail_in;
-	unsigned long avail_out;
-	unsigned long total_in;
-	unsigned long total_out;
+	size_t avail_in;
+	size_t avail_out;
+	size_t total_in;
+	size_t total_out;
 	unsigned char *next_in;
 	unsigned char *next_out;
 } git_zstream;
@@ -62,7 +62,7 @@ void git_deflate_end(git_zstream *);
 int git_deflate_abort(git_zstream *);
 int git_deflate_end_gently(git_zstream *);
 int git_deflate(git_zstream *, int flush);
-unsigned long git_deflate_bound(git_zstream *, unsigned long);
+size_t git_deflate_bound(git_zstream *, size_t);
 
 /* The length in bytes and in hex digits of an object name (SHA-1 value). */
 #define GIT_SHA1_RAWSZ 20
@@ -1678,7 +1678,7 @@ extern int open_pack_index(struct packed_git *);
  */
 extern void close_pack_index(struct packed_git *);
 
-extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);
+extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, size_t *);
 extern void close_pack_windows(struct packed_git *);
 extern void close_all_packs(void);
 extern void unuse_pack(struct pack_window **);
diff --git a/pack-check.c b/pack-check.c
index 6f7714f..8cc2364 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -24,13 +24,13 @@ static int compare_entries(const void *e1, const void *e2)
 }
 
 int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
-		   off_t offset, off_t len, unsigned int nr)
+		   off_t offset, size_t len, unsigned int nr)
 {
 	const uint32_t *index_crc;
 	uint32_t data_crc = crc32(0, NULL, 0);
 
 	do {
-		unsigned long avail;
+		size_t avail;
 		void *data = use_pack(p, w_curs, offset, &avail);
 		if (avail > len)
 			avail = len;
@@ -65,7 +65,7 @@ static int verify_packfile(struct packed_git *p,
 
 	git_SHA1_Init(&ctx);
 	do {
-		unsigned long remaining;
+		size_t remaining;
 		unsigned char *in = use_pack(p, w_curs, offset, &remaining);
 		offset += remaining;
 		if (!pack_sig_ofs)
diff --git a/pack.h b/pack.h
index 8294341..ca89ad0 100644
--- a/pack.h
+++ b/pack.h
@@ -78,7 +78,7 @@ struct progress;
 typedef int (*verify_fn)(const struct object_id *, enum object_type, unsigned long, void*, int*);
 
 extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, const unsigned char *sha1);
-extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
+extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, size_t len, unsigned int nr);
 extern int verify_pack_index(struct packed_git *);
 extern int verify_pack(struct packed_git *, verify_fn fn, struct progress *, uint32_t);
 extern off_t write_pack_header(struct sha1file *f, uint32_t);
diff --git a/sha1_file.c b/sha1_file.c
index 97b39b0..3428172 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1228,7 +1228,7 @@ static int in_window(struct pack_window *win, off_t offset)
 unsigned char *use_pack(struct packed_git *p,
 		struct pack_window **w_cursor,
 		off_t offset,
-		unsigned long *left)
+		size_t *left)
 {
 	struct pack_window *win = *w_cursor;
 
@@ -2122,8 +2122,8 @@ int unpack_object_header(struct packed_git *p,
 			 size_t *sizep)
 {
 	unsigned char *base;
-	unsigned long left;
-	unsigned long used;
+	size_t left;
+	size_t used;
 	enum object_type type;
 
 	/* use_pack() assures us we have [base, base + 20) available
diff --git a/wrapper.c b/wrapper.c
index 36630e5..c4253f7 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -67,11 +67,11 @@ static void *do_xmalloc(size_t size, int gentle)
 			ret = malloc(1);
 		if (!ret) {
 			if (!gentle)
-				die("Out of memory, malloc failed (tried to allocate %lu bytes)",
-				    (unsigned long)size);
+				die("Out of memory, malloc failed (tried to allocate %" PRIuMAX " bytes)",
+				    (uintmax_t)size);
 			else {
-				error("Out of memory, malloc failed (tried to allocate %lu bytes)",
-				      (unsigned long)size);
+				error("Out of memory, malloc failed (tried to allocate %" PRIuMAX " bytes)",
+				      (uintmax_t)size);
 				return NULL;
 			}
 		}
diff --git a/zlib.c b/zlib.c
index 4223f1a..e98e499 100644
--- a/zlib.c
+++ b/zlib.c
@@ -29,7 +29,7 @@ static const char *zerr_to_string(int status)
  */
 /* #define ZLIB_BUF_MAX ((uInt)-1) */
 #define ZLIB_BUF_MAX ((uInt) 1024 * 1024 * 1024) /* 1GB */
-static inline uInt zlib_buf_cap(unsigned long len)
+static inline uInt zlib_buf_cap(size_t len)
 {
 	return (ZLIB_BUF_MAX < len) ? ZLIB_BUF_MAX : len;
 }
@@ -46,8 +46,8 @@ static void zlib_pre_call(git_zstream *s)
 
 static void zlib_post_call(git_zstream *s)
 {
-	unsigned long bytes_consumed;
-	unsigned long bytes_produced;
+	size_t bytes_consumed;
+	size_t bytes_produced;
 
 	bytes_consumed = s->z.next_in - s->next_in;
 	bytes_produced = s->z.next_out - s->next_out;
@@ -150,7 +150,7 @@ int git_inflate(git_zstream *strm, int flush)
 #define deflateBound(c,s)  ((s) + (((s) + 7) >> 3) + (((s) + 63) >> 6) + 11)
 #endif
 
-unsigned long git_deflate_bound(git_zstream *strm, unsigned long size)
+size_t git_deflate_bound(git_zstream *strm, size_t size)
 {
 	return deflateBound(&strm->z, size);
 }
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/4] Fix delta offset overflow
  2017-08-10 18:13 [PATCH 3/4] Convert zlib.c to size_t Martin Koegler
@ 2017-08-10 18:13 ` Martin Koegler
  2017-08-10 20:49   ` Junio C Hamano
  2017-08-10 21:52 ` [PATCH 3/4] Convert zlib.c to size_t Junio C Hamano
  1 sibling, 1 reply; 6+ messages in thread
From: Martin Koegler @ 2017-08-10 18:13 UTC (permalink / raw
  To: git, gitster, Johannes.Schindelin; +Cc: Martin Koegler

From: Martin Koegler <martin.koegler@chello.at>

Prevent generating delta offsets beyond 4G.

Signed-off-by: Martin Koegler <martin.koegler@chello.at>
---
 diff-delta.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/diff-delta.c b/diff-delta.c
index 3d5e1ef..633883e 100644
--- a/diff-delta.c
+++ b/diff-delta.c
@@ -454,6 +454,9 @@ create_delta(const struct delta_index *index,
 			moff += msize;
 			msize = left;
 
+			if (moff > 0xffffffff)
+				msize = 0;
+
 			if (msize < 4096) {
 				int j;
 				val = 0;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 4/4] Fix delta offset overflow
  2017-08-10 18:13 ` [PATCH 4/4] Fix delta offset overflow Martin Koegler
@ 2017-08-10 20:49   ` Junio C Hamano
  2017-08-11  6:57     ` Martin Koegler
  0 siblings, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2017-08-10 20:49 UTC (permalink / raw
  To: Martin Koegler; +Cc: git, Johannes.Schindelin

Martin Koegler <martin.koegler@chello.at> writes:

> From: Martin Koegler <martin.koegler@chello.at>
>
> Prevent generating delta offsets beyond 4G.
>
> Signed-off-by: Martin Koegler <martin.koegler@chello.at>
> ---
>  diff-delta.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/diff-delta.c b/diff-delta.c
> index 3d5e1ef..633883e 100644
> --- a/diff-delta.c
> +++ b/diff-delta.c
> @@ -454,6 +454,9 @@ create_delta(const struct delta_index *index,
>  			moff += msize;
>  			msize = left;
>  
> +			if (moff > 0xffffffff)
> +				msize = 0;
> +

The lower 4-byte of moff (before incrementing it with msize) were
already encoded to the output stream before this hunk.  Shouldn't
we be checking if moff would fit in uint32_t _before_ that happens?

IOW, in a much earlier part of that "while (data < top)" loop, there
is a code that tries to find a match that gives us a large msize by
iterating over index->hash[], and it sets msize and moff as a potential
location that we may want to use.  If moff is already big there, then
we shouldn't consider it a match because we cannot encode its location
using 4-byte anyway, no?

Cutting it off at here by resetting msize to 0 might help the next
iteration (I didn't check, but is the effect of it is to corrupt the
"val" rolling checksum and make it unlikely that the hash
computation would not find a correct match?) but it somehow feels
like closing the barn door after the horse has already bolted...

>  			if (msize < 4096) {
>  				int j;
>  				val = 0;

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 3/4] Convert zlib.c to size_t
  2017-08-10 18:13 [PATCH 3/4] Convert zlib.c to size_t Martin Koegler
  2017-08-10 18:13 ` [PATCH 4/4] Fix delta offset overflow Martin Koegler
@ 2017-08-10 21:52 ` Junio C Hamano
  1 sibling, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2017-08-10 21:52 UTC (permalink / raw
  To: Martin Koegler; +Cc: git, Johannes.Schindelin

Martin Koegler <martin.koegler@chello.at> writes:

> From: Martin Koegler <martin.koegler@chello.at>
>
> Signed-off-by: Martin Koegler <martin.koegler@chello.at>
> ---

Thanks.  I haven't thought things through but this looks sensible.
Will queue.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 4/4] Fix delta offset overflow
  2017-08-10 20:49   ` Junio C Hamano
@ 2017-08-11  6:57     ` Martin Koegler
  2017-08-11 18:50       ` Junio C Hamano
  0 siblings, 1 reply; 6+ messages in thread
From: Martin Koegler @ 2017-08-11  6:57 UTC (permalink / raw
  To: Junio C Hamano; +Cc: Martin Koegler, git, Johannes.Schindelin

On Thu, Aug 10, 2017 at 01:49:24PM -0700, Junio C Hamano wrote:
> The lower 4-byte of moff (before incrementing it with msize) were
> already encoded to the output stream before this hunk.  Shouldn't
> we be checking if moff would fit in uint32_t _before_ that happens?

moff is otherwise only decremented or assigned with an offset generated by
create_delta_index. These offsets are limited by 4GB.

Any larger offets would be a programming bug - so qualify for just a "assert".
 
> IOW, in a much earlier part of that "while (data < top)" loop, there
> is a code that tries to find a match that gives us a large msize by
> iterating over index->hash[], and it sets msize and moff as a potential
> location that we may want to use.  If moff is already big there, then
> we shouldn't consider it a match because we cannot encode its location
> using 4-byte anyway, no?

Recovering from an incorrect moff would add more complex code for a case, that
should not happen. A bailout would be sufficient.
 
> Cutting it off at here by resetting msize to 0 might help the next
> iteration (I didn't check, but is the effect of it is to corrupt the
> "val" rolling checksum and make it unlikely that the hash
> computation would not find a correct match?) but it somehow feels
> like closing the barn door after the horse has already bolted...

The current code produces incorrect deltas - its not just a checksum issue.

By the way: 

Somebody interested in JGIT should also look at these two bugs:

https://github.com/eclipse/jgit/blob/005e5feb4ecd08c4e4d141a38b9e7942accb3212/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaEncoder.java
copy would also encode beyond 4GB - producing truncated delta offset.

https://github.com/eclipse/jgit/blob/005e5feb4ecd08c4e4d141a38b9e7942accb3212/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/BinaryDelta.java
apply uses int for decoding length values.

Regards,
Martin

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 4/4] Fix delta offset overflow
  2017-08-11  6:57     ` Martin Koegler
@ 2017-08-11 18:50       ` Junio C Hamano
  0 siblings, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2017-08-11 18:50 UTC (permalink / raw
  To: Martin Koegler; +Cc: git, Johannes.Schindelin, Shawn Pearce

Martin Koegler <martin.koegler@chello.at> writes:

> On Thu, Aug 10, 2017 at 01:49:24PM -0700, Junio C Hamano wrote:
>> The lower 4-byte of moff (before incrementing it with msize) were
>> already encoded to the output stream before this hunk.  Shouldn't
>> we be checking if moff would fit in uint32_t _before_ that happens?
>
> moff is otherwise only decremented or assigned with an offset generated by
> create_delta_index. These offsets are limited by 4GB.
>
> Any larger offets would be a programming bug - so qualify for just a "assert".

OK, in that case, I agree that a check before encoding moff into
(upto) 4 output bytes is unnecessary.  Sorry, I didn't read the
function that populates index->hash[] before responding, and I admit
that I haven't read it for a while.

>> Cutting it off at here by resetting msize to 0 might help the next
>> iteration (I didn't check, but is the effect of it is to corrupt the
>> "val" rolling checksum and make it unlikely that the hash
>> computation would not find a correct match?) but it somehow feels
>> like closing the barn door after the horse has already bolted...
>
> The current code produces incorrect deltas - its not just a checksum issue.

Again, I mis-read what role msize was playing in the original (or in
your update).  I'd need to re-read that part of the code to make
sure I get how your change will fix the issue.

Thanks.

> By the way: 
>
> Somebody interested in JGIT should also look at these two bugs:
>
> https://github.com/eclipse/jgit/blob/005e5feb4ecd08c4e4d141a38b9e7942accb3212/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaEncoder.java
> copy would also encode beyond 4GB - producing truncated delta offset.
>
> https://github.com/eclipse/jgit/blob/005e5feb4ecd08c4e4d141a38b9e7942accb3212/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/BinaryDelta.java
> apply uses int for decoding length values.

I'll cc: an obvious suspect; thanks for the note.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-08-11 18:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-08-10 18:13 [PATCH 3/4] Convert zlib.c to size_t Martin Koegler
2017-08-10 18:13 ` [PATCH 4/4] Fix delta offset overflow Martin Koegler
2017-08-10 20:49   ` Junio C Hamano
2017-08-11  6:57     ` Martin Koegler
2017-08-11 18:50       ` Junio C Hamano
2017-08-10 21:52 ` [PATCH 3/4] Convert zlib.c to size_t Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).