git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 1/9] Convert pack-objects to size_t
@ 2017-08-12  8:47 Martin Koegler
  2017-08-12  8:47 ` [PATCH 2/9] Convert index-pack " Martin Koegler
                   ` (9 more replies)
  0 siblings, 10 replies; 27+ messages in thread
From: Martin Koegler @ 2017-08-12  8:47 UTC (permalink / raw)
  To: git, gitster, Johannes.Schindelin; +Cc: Martin Koegler

From: Martin Koegler <martin.koegler@chello.at>

Signed-off-by: Martin Koegler <martin.koegler@chello.at>
---
 builtin/pack-objects.c | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index aa70f80..f8db283 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -56,7 +56,7 @@ static struct pack_idx_option pack_idx_opts;
 static const char *base_name;
 static int progress = 1;
 static int window = 10;
-static unsigned long pack_size_limit;
+static size_t pack_size_limit;
 static int depth = 50;
 static int delta_search_threads;
 static int pack_to_stdout;
@@ -72,11 +72,11 @@ static int use_bitmap_index = -1;
 static int write_bitmap_index;
 static uint16_t write_bitmap_options;
 
-static unsigned long delta_cache_size = 0;
-static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
-static unsigned long cache_max_small_delta_size = 1000;
+static size_t delta_cache_size = 0;
+static size_t max_delta_cache_size = 256 * 1024 * 1024;
+static size_t cache_max_small_delta_size = 1000;
 
-static unsigned long window_memory_limit = 0;
+static size_t window_memory_limit = 0;
 
 /*
  * stats
@@ -124,11 +124,11 @@ static void *get_delta(struct object_entry *entry)
 	return delta_buf;
 }
 
-static unsigned long do_compress(void **pptr, unsigned long size)
+static size_t do_compress(void **pptr, size_t size)
 {
 	git_zstream stream;
 	void *in, *out;
-	unsigned long maxsize;
+	size_t maxsize;
 
 	git_deflate_init(&stream, pack_compression_level);
 	maxsize = git_deflate_bound(&stream, size);
@@ -149,13 +149,13 @@ static unsigned long do_compress(void **pptr, unsigned long size)
 	return stream.total_out;
 }
 
-static unsigned long write_large_blob_data(struct git_istream *st, struct sha1file *f,
+static size_t write_large_blob_data(struct git_istream *st, struct sha1file *f,
 					   const unsigned char *sha1)
 {
 	git_zstream stream;
 	unsigned char ibuf[1024 * 16];
 	unsigned char obuf[1024 * 16];
-	unsigned long olen = 0;
+	size_t olen = 0;
 
 	git_deflate_init(&stream, pack_compression_level);
 
@@ -196,7 +196,7 @@ static int check_pack_inflate(struct packed_git *p,
 		struct pack_window **w_curs,
 		off_t offset,
 		off_t len,
-		unsigned long expect)
+		size_t expect)
 {
 	git_zstream stream;
 	unsigned char fakebuf[4096], *in;
@@ -238,13 +238,13 @@ static void copy_pack_data(struct sha1file *f,
 }
 
 /* Return 0 if we will bust the pack-size limit */
-static unsigned long write_no_reuse_object(struct sha1file *f, struct object_entry *entry,
-					   unsigned long limit, int usable_delta)
+static size_t write_no_reuse_object(struct sha1file *f, struct object_entry *entry,
+				    size_t limit, int usable_delta)
 {
 	size_t size, datalen;
 	unsigned char header[MAX_PACK_OBJECT_HEADER],
 		      dheader[MAX_PACK_OBJECT_HEADER];
-	unsigned hdrlen;
+	size_t hdrlen;
 	enum object_type type;
 	void *buf;
 	struct git_istream *st = NULL;
@@ -350,17 +350,17 @@ static unsigned long write_no_reuse_object(struct sha1file *f, struct object_ent
 
 /* Return 0 if we will bust the pack-size limit */
 static off_t write_reuse_object(struct sha1file *f, struct object_entry *entry,
-				unsigned long limit, int usable_delta)
+				size_t limit, int usable_delta)
 {
 	struct packed_git *p = entry->in_pack;
 	struct pack_window *w_curs = NULL;
 	struct revindex_entry *revidx;
 	off_t offset;
 	enum object_type type = entry->type;
-	off_t datalen;
+	size_t datalen;
 	unsigned char header[MAX_PACK_OBJECT_HEADER],
 		      dheader[MAX_PACK_OBJECT_HEADER];
-	unsigned hdrlen;
+	size_t hdrlen;
 
 	if (entry->delta)
 		type = (allow_ofs_delta && entry->delta->idx.offset) ?
@@ -431,7 +431,7 @@ static off_t write_object(struct sha1file *f,
 			  struct object_entry *entry,
 			  off_t write_offset)
 {
-	unsigned long limit;
+	size_t limit;
 	off_t len;
 	int usable_delta, to_reuse;
 
@@ -1120,7 +1120,7 @@ struct pbase_tree_cache {
 	int ref;
 	int temporary;
 	void *tree_data;
-	unsigned long tree_size;
+	size_t tree_size;
 };
 
 static struct pbase_tree_cache *(pbase_tree_cache[256]);
@@ -1759,8 +1759,8 @@ struct unpacked {
 	unsigned depth;
 };
 
-static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
-			   unsigned long delta_size)
+static int delta_cacheable(size_t src_size, size_t trg_size,
+			   size_t delta_size)
 {
 	if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
 		return 0;
@@ -1801,7 +1801,7 @@ static pthread_mutex_t progress_mutex;
 #endif
 
 static int try_delta(struct unpacked *trg, struct unpacked *src,
-		     unsigned max_depth, unsigned long *mem_usage)
+		     unsigned max_depth, size_t *mem_usage)
 {
 	struct object_entry *trg_entry = trg->entry;
 	struct object_entry *src_entry = src->entry;
@@ -1962,9 +1962,9 @@ static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
 	return m;
 }
 
-static unsigned long free_unpacked(struct unpacked *n)
+static size_t free_unpacked(struct unpacked *n)
 {
-	unsigned long freed_mem = sizeof_delta_index(n->index);
+	size_t freed_mem = sizeof_delta_index(n->index);
 	free_delta_index(n->index);
 	n->index = NULL;
 	if (n->data) {
@@ -1981,7 +1981,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
 {
 	uint32_t i, idx = 0, count = 0;
 	struct unpacked *array;
-	unsigned long mem_usage = 0;
+	size_t mem_usage = 0;
 
 	array = xcalloc(window, sizeof(struct unpacked));
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 2/9] Convert index-pack to size_t
  2017-08-12  8:47 [PATCH 1/9] Convert pack-objects to size_t Martin Koegler
@ 2017-08-12  8:47 ` Martin Koegler
  2017-08-12 13:51   ` Ramsay Jones
  2017-08-12  8:47 ` [PATCH 3/9] Convert unpack-objects " Martin Koegler
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 27+ messages in thread
From: Martin Koegler @ 2017-08-12  8:47 UTC (permalink / raw)
  To: git, gitster, Johannes.Schindelin; +Cc: Martin Koegler

From: Martin Koegler <martin.koegler@chello.at>

Signed-off-by: Martin Koegler <martin.koegler@chello.at>
---
 builtin/index-pack.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 7f3ccd0..bf2d728 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -435,7 +435,7 @@ static int is_delta_type(enum object_type type)
 	return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
 }
 
-static void *unpack_entry_data(off_t offset, unsigned long size,
+static void *unpack_entry_data(off_t offset, size_t size,
 			       enum object_type type, unsigned char *sha1)
 {
 	static char fixed_buf[8192];
@@ -444,10 +444,10 @@ static void *unpack_entry_data(off_t offset, unsigned long size,
 	void *buf;
 	git_SHA_CTX c;
 	char hdr[32];
-	int hdrlen;
+	size_t hdrlen;
 
 	if (!is_delta_type(type)) {
-		hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(type), size) + 1;
+		hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %" PRIuMAX, typename(type), (uintmax_t)size) + 1;
 		git_SHA1_Init(&c);
 		git_SHA1_Update(&c, hdr, hdrlen);
 	} else
@@ -489,7 +489,7 @@ static void *unpack_raw_entry(struct object_entry *obj,
 			      unsigned char *sha1)
 {
 	unsigned char *p;
-	unsigned long size, c;
+	size_t size, c;
 	off_t base_offset;
 	unsigned shift;
 	void *data;
@@ -551,11 +551,11 @@ static void *unpack_raw_entry(struct object_entry *obj,
 }
 
 static void *unpack_data(struct object_entry *obj,
-			 int (*consume)(const unsigned char *, unsigned long, void *),
+			 int (*consume)(const unsigned char *, size_t, void *),
 			 void *cb_data)
 {
 	off_t from = obj[0].idx.offset + obj[0].hdr_size;
-	off_t len = obj[1].idx.offset - from;
+	size_t len = obj[1].idx.offset - from;
 	unsigned char *data, *inbuf;
 	git_zstream stream;
 	int status;
@@ -728,10 +728,10 @@ struct compare_data {
 	struct object_entry *entry;
 	struct git_istream *st;
 	unsigned char *buf;
-	unsigned long buf_size;
+	size_t buf_size;
 };
 
-static int compare_objects(const unsigned char *buf, unsigned long size,
+static int compare_objects(const unsigned char *buf, size_t size,
 			   void *cb_data)
 {
 	struct compare_data *data = cb_data;
@@ -783,7 +783,7 @@ static int check_collison(struct object_entry *entry)
 }
 
 static void sha1_object(const void *data, struct object_entry *obj_entry,
-			unsigned long size, enum object_type type,
+			size_t size, enum object_type type,
 			const struct object_id *oid)
 {
 	void *new_data = NULL;
@@ -1311,11 +1311,11 @@ static int write_compressed(struct sha1file *f, void *in, unsigned int size)
 
 static struct object_entry *append_obj_to_pack(struct sha1file *f,
 			       const unsigned char *sha1, void *buf,
-			       unsigned long size, enum object_type type)
+			       size_t size, enum object_type type)
 {
 	struct object_entry *obj = &objects[nr_objects++];
 	unsigned char header[10];
-	unsigned long s = size;
+	size_t s = size;
 	int n = 0;
 	unsigned char c = (type << 4) | (s & 15);
 	s >>= 4;
@@ -1585,10 +1585,10 @@ static void show_pack_info(int stat_only)
 			chain_histogram[obj_stat[i].delta_depth - 1]++;
 		if (stat_only)
 			continue;
-		printf("%s %-6s %" PRIuMAX " %lu %" PRIuMAX,
+		printf("%s %-6s %" PRIuMAX " %" PRIuMAX " %" PRIuMAX,
 		       oid_to_hex(&obj->idx.oid),
 		       typename(obj->real_type), (uintmax_t)obj->size,
-		       (unsigned long)(obj[1].idx.offset - obj->idx.offset),
+		       (uintmax_t)(obj[1].idx.offset - obj->idx.offset),
 		       (uintmax_t)obj->idx.offset);
 		if (is_delta_type(obj->type)) {
 			struct object_entry *bobj = &objects[obj_stat[i].base_object_no];
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 3/9] Convert unpack-objects to size_t
  2017-08-12  8:47 [PATCH 1/9] Convert pack-objects to size_t Martin Koegler
  2017-08-12  8:47 ` [PATCH 2/9] Convert index-pack " Martin Koegler
@ 2017-08-12  8:47 ` Martin Koegler
  2017-08-12 14:07   ` Martin Ågren
  2017-08-12  8:47 ` [PATCH 4/9] Convert archive functions " Martin Koegler
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 27+ messages in thread
From: Martin Koegler @ 2017-08-12  8:47 UTC (permalink / raw)
  To: git, gitster, Johannes.Schindelin; +Cc: Martin Koegler

From: Martin Koegler <martin.koegler@chello.at>

Signed-off-by: Martin Koegler <martin.koegler@chello.at>
---
 builtin/unpack-objects.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 001dd4b..0d8b6b3 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -31,7 +31,7 @@ static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
  */
 struct obj_buffer {
 	char *buffer;
-	unsigned long size;
+	size_t size;
 };
 
 static struct decoration obj_decorate;
@@ -41,7 +41,7 @@ static struct obj_buffer *lookup_object_buffer(struct object *base)
 	return lookup_decoration(&obj_decorate, base);
 }
 
-static void add_object_buffer(struct object *object, char *buffer, unsigned long size)
+static void add_object_buffer(struct object *object, char *buffer, size_t size)
 {
 	struct obj_buffer *obj;
 	obj = xcalloc(1, sizeof(struct obj_buffer));
@@ -93,7 +93,7 @@ static void use(int bytes)
 		die(_("pack exceeds maximum allowed size"));
 }
 
-static void *get_data(unsigned long size)
+static void *get_data(size_t size)
 {
 	git_zstream stream;
 	void *buf = xmallocz(size);
@@ -130,7 +130,7 @@ struct delta_info {
 	struct object_id base_oid;
 	unsigned nr;
 	off_t base_offset;
-	unsigned long size;
+	size_t size;
 	void *delta;
 	struct delta_info *next;
 };
@@ -139,7 +139,7 @@ static struct delta_info *delta_list;
 
 static void add_delta_to_list(unsigned nr, const struct object_id *base_oid,
 			      off_t base_offset,
-			      void *delta, unsigned long size)
+			      void *delta, size_t size)
 {
 	struct delta_info *info = xmalloc(sizeof(*info));
 
@@ -226,7 +226,7 @@ static void write_rest(void)
 }
 
 static void added_object(unsigned nr, enum object_type type,
-			 void *data, unsigned long size);
+			 void *data, size_t size);
 
 /*
  * Write out nr-th object from the list, now we know the contents
@@ -234,7 +234,7 @@ static void added_object(unsigned nr, enum object_type type,
  * to be checked at the end.
  */
 static void write_object(unsigned nr, enum object_type type,
-			 void *buf, unsigned long size)
+			 void *buf, size_t size)
 {
 	if (!strict) {
 		if (write_sha1_file(buf, size, typename(type), obj_list[nr].oid.hash) < 0)
@@ -291,7 +291,7 @@ static void resolve_delta(unsigned nr, enum object_type type,
  * resolve all the deltified objects that are based on it.
  */
 static void added_object(unsigned nr, enum object_type type,
-			 void *data, unsigned long size)
+			 void *data, size_t size)
 {
 	struct delta_info **p = &delta_list;
 	struct delta_info *info;
@@ -310,7 +310,7 @@ static void added_object(unsigned nr, enum object_type type,
 	}
 }
 
-static void unpack_non_delta_entry(enum object_type type, unsigned long size,
+static void unpack_non_delta_entry(enum object_type type, size_t size,
 				   unsigned nr)
 {
 	void *buf = get_data(size);
@@ -436,7 +436,7 @@ static void unpack_one(unsigned nr)
 {
 	unsigned shift;
 	unsigned char *pack;
-	unsigned long size, c;
+	size_t size, c;
 	enum object_type type;
 
 	obj_list[nr].offset = consumed_bytes;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 4/9] Convert archive functions to size_t
  2017-08-12  8:47 [PATCH 1/9] Convert pack-objects to size_t Martin Koegler
  2017-08-12  8:47 ` [PATCH 2/9] Convert index-pack " Martin Koegler
  2017-08-12  8:47 ` [PATCH 3/9] Convert unpack-objects " Martin Koegler
@ 2017-08-12  8:47 ` Martin Koegler
  2017-08-12  8:47 ` [PATCH 5/9] Convert various things " Martin Koegler
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Martin Koegler @ 2017-08-12  8:47 UTC (permalink / raw)
  To: git, gitster, Johannes.Schindelin; +Cc: Martin Koegler

From: Martin Koegler <martin.koegler@chello.at>

---
 archive-tar.c | 16 ++++++++--------
 archive-zip.c | 22 +++++++++++-----------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index 719673d..ee56b2b 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -12,7 +12,7 @@
 #define BLOCKSIZE	(RECORDSIZE * 20)
 
 static char block[BLOCKSIZE];
-static unsigned long offset;
+static size_t offset;
 
 static int tar_umask = 002;
 
@@ -50,12 +50,12 @@ static void write_if_needed(void)
  * queues up writes, so that all our write(2) calls write exactly one
  * full block; pads writes to RECORDSIZE
  */
-static void do_write_blocked(const void *data, unsigned long size)
+static void do_write_blocked(const void *data, size_t size)
 {
 	const char *buf = data;
 
 	if (offset) {
-		unsigned long chunk = BLOCKSIZE - offset;
+		size_t chunk = BLOCKSIZE - offset;
 		if (size < chunk)
 			chunk = size;
 		memcpy(block + offset, buf, chunk);
@@ -77,7 +77,7 @@ static void do_write_blocked(const void *data, unsigned long size)
 
 static void finish_record(void)
 {
-	unsigned long tail;
+	size_t tail;
 	tail = offset % RECORDSIZE;
 	if (tail)  {
 		memset(block + offset, 0, RECORDSIZE - tail);
@@ -86,7 +86,7 @@ static void finish_record(void)
 	write_if_needed();
 }
 
-static void write_blocked(const void *data, unsigned long size)
+static void write_blocked(const void *data, size_t size)
 {
 	do_write_blocked(data, size);
 	finish_record();
@@ -198,10 +198,10 @@ static size_t get_path_prefix(const char *path, size_t pathlen, size_t maxlen)
 
 static void prepare_header(struct archiver_args *args,
 			   struct ustar_header *header,
-			   unsigned int mode, unsigned long size)
+			   unsigned int mode, size_t size)
 {
 	xsnprintf(header->mode, sizeof(header->mode), "%07o", mode & 07777);
-	xsnprintf(header->size, sizeof(header->size), "%011lo", S_ISREG(mode) ? size : 0);
+	xsnprintf(header->size, sizeof(header->size), "%011lo", S_ISREG(mode) ? (unsigned long)size : 0);
 	xsnprintf(header->mtime, sizeof(header->mtime), "%011lo", (unsigned long) args->time);
 
 	xsnprintf(header->uid, sizeof(header->uid), "%07o", 0);
@@ -219,7 +219,7 @@ static void prepare_header(struct archiver_args *args,
 
 static void write_extended_header(struct archiver_args *args,
 				  const unsigned char *sha1,
-				  const void *buffer, unsigned long size)
+				  const void *buffer, size_t size)
 {
 	struct ustar_header header;
 	unsigned int mode;
diff --git a/archive-zip.c b/archive-zip.c
index 4492d64..3a54d80 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -186,12 +186,12 @@ static uint32_t clamp32(uintmax_t n)
 	return (n < max) ? n : max;
 }
 
-static void *zlib_deflate_raw(void *data, unsigned long size,
+static void *zlib_deflate_raw(void *data, size_t size,
 			      int compression_level,
-			      unsigned long *compressed_size)
+			      size_t *compressed_size)
 {
 	git_zstream stream;
-	unsigned long maxsize;
+	size_t maxsize;
 	void *buffer;
 	int result;
 
@@ -219,9 +219,9 @@ static void *zlib_deflate_raw(void *data, unsigned long size,
 	return buffer;
 }
 
-static void write_zip_data_desc(unsigned long size,
-				unsigned long compressed_size,
-				unsigned long crc)
+static void write_zip_data_desc(size_t size,
+				size_t compressed_size,
+				uint32_t crc)
 {
 	if (size >= 0xffffffff || compressed_size >= 0xffffffff) {
 		struct zip64_data_desc trailer;
@@ -243,9 +243,9 @@ static void write_zip_data_desc(unsigned long size,
 }
 
 static void set_zip_header_data_desc(struct zip_local_header *header,
-				     unsigned long size,
-				     unsigned long compressed_size,
-				     unsigned long crc)
+				     size_t size,
+				     size_t compressed_size,
+				     uint32_t crc)
 {
 	copy_le32(header->crc32, crc);
 	copy_le32(header->compressed_size, compressed_size);
@@ -287,8 +287,8 @@ static int write_zip_entry(struct archiver_args *args,
 	size_t header_extra_size = ZIP_EXTRA_MTIME_SIZE;
 	int need_zip64_extra = 0;
 	unsigned long attr2;
-	unsigned long compressed_size;
-	unsigned long crc;
+	size_t compressed_size;
+	uint32_t crc;
 	int method;
 	unsigned char *out;
 	void *deflated = NULL;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 5/9] Convert various things to size_t
  2017-08-12  8:47 [PATCH 1/9] Convert pack-objects to size_t Martin Koegler
                   ` (2 preceding siblings ...)
  2017-08-12  8:47 ` [PATCH 4/9] Convert archive functions " Martin Koegler
@ 2017-08-12  8:47 ` Martin Koegler
  2017-08-12 13:27   ` Martin Ågren
  2017-08-12  8:47 ` [PATCH 6/9] Use size_t for config parsing Martin Koegler
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 27+ messages in thread
From: Martin Koegler @ 2017-08-12  8:47 UTC (permalink / raw)
  To: git, gitster, Johannes.Schindelin; +Cc: Martin Koegler

From: Martin Koegler <martin.koegler@chello.at>

---
 bisect.c                | 2 +-
 blame.c                 | 2 +-
 builtin/fmt-merge-msg.c | 2 +-
 builtin/mktag.c         | 2 +-
 dir.c                   | 4 ++--
 dir.h                   | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/bisect.c b/bisect.c
index 2549eaf..0580c82 100644
--- a/bisect.c
+++ b/bisect.c
@@ -131,7 +131,7 @@ static void show_list(const char *debug, int counted, int nr,
 		struct commit *commit = p->item;
 		unsigned flags = commit->object.flags;
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *buf = read_sha1_file(commit->object.sha1, &type, &size);
 		const char *subject_start;
 		int subject_len;
diff --git a/blame.c b/blame.c
index 739a280..f628b42 100644
--- a/blame.c
+++ b/blame.c
@@ -1621,7 +1621,7 @@ static const char *get_next_line(const char *start, const char *end)
 static int prepare_lines(struct blame_scoreboard *sb)
 {
 	const char *buf = sb->final_buf;
-	unsigned long len = sb->final_buf_size;
+	size_t len = sb->final_buf_size;
 	const char *end = buf + len;
 	const char *p;
 	int *lineno;
diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c
index 61ab796..3faf100 100644
--- a/builtin/fmt-merge-msg.c
+++ b/builtin/fmt-merge-msg.c
@@ -464,7 +464,7 @@ static void fmt_merge_msg_title(struct strbuf *out,
 static void fmt_tag_signature(struct strbuf *tagbuf,
 			      struct strbuf *sig,
 			      const char *buf,
-			      unsigned long len)
+			      size_t len)
 {
 	const char *tag_body = strstr(buf, "\n\n");
 	if (tag_body) {
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 0663106..ff919a7 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -34,7 +34,7 @@ static int verify_object(const unsigned char *sha1, const char *expected_type)
 	return ret;
 }
 
-static int verify_tag(char *buffer, unsigned long size)
+static int verify_tag(char *buffer, size_t size)
 {
 	int typelen;
 	char type[20];
diff --git a/dir.c b/dir.c
index f161c26..0c7c767 100644
--- a/dir.c
+++ b/dir.c
@@ -180,7 +180,7 @@ static size_t common_prefix_len(const struct pathspec *pathspec)
  */
 char *common_prefix(const struct pathspec *pathspec)
 {
-	unsigned long len = common_prefix_len(pathspec);
+	size_t len = common_prefix_len(pathspec);
 
 	return len ? xmemdupz(pathspec->items[0].match, len) : NULL;
 }
@@ -2673,7 +2673,7 @@ static void load_sha1_stat(struct sha1_stat *sha1_stat,
 	sha1_stat->valid = 1;
 }
 
-struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz)
+struct untracked_cache *read_untracked_extension(const void *data, size_t sz)
 {
 	struct untracked_cache *uc;
 	struct read_data rd;
diff --git a/dir.h b/dir.h
index e371705..709c72c 100644
--- a/dir.h
+++ b/dir.h
@@ -349,7 +349,7 @@ void untracked_cache_remove_from_index(struct index_state *, const char *);
 void untracked_cache_add_to_index(struct index_state *, const char *);
 
 void free_untracked_cache(struct untracked_cache *);
-struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz);
+struct untracked_cache *read_untracked_extension(const void *data, size_t sz);
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
 void add_untracked_cache(struct index_state *istate);
 void remove_untracked_cache(struct index_state *istate);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 6/9] Use size_t for config parsing
  2017-08-12  8:47 [PATCH 1/9] Convert pack-objects to size_t Martin Koegler
                   ` (3 preceding siblings ...)
  2017-08-12  8:47 ` [PATCH 5/9] Convert various things " Martin Koegler
@ 2017-08-12  8:47 ` Martin Koegler
  2017-08-12  8:47 ` [PATCH 7/9] Convert ref-filter to size_t Martin Koegler
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Martin Koegler @ 2017-08-12  8:47 UTC (permalink / raw)
  To: git, gitster, Johannes.Schindelin; +Cc: Martin Koegler

From: Martin Koegler <martin.koegler@chello.at>

Signed-off-by: Martin Koegler <martin.koegler@chello.at>
---
 builtin/pack-objects.c |  6 +++---
 config.c               | 27 ++++++++++++++++++++++-----
 config.h               |  1 +
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index f8db283..0086da7 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -2450,7 +2450,7 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 		return 0;
 	}
 	if (!strcmp(k, "pack.windowmemory")) {
-		window_memory_limit = git_config_ulong(k, v);
+		window_memory_limit = git_config_size_t(k, v);
 		return 0;
 	}
 	if (!strcmp(k, "pack.depth")) {
@@ -2458,11 +2458,11 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 		return 0;
 	}
 	if (!strcmp(k, "pack.deltacachesize")) {
-		max_delta_cache_size = git_config_int(k, v);
+		max_delta_cache_size = git_config_size_t(k, v);
 		return 0;
 	}
 	if (!strcmp(k, "pack.deltacachelimit")) {
-		cache_max_small_delta_size = git_config_int(k, v);
+		cache_max_small_delta_size = git_config_size_t(k, v);
 		return 0;
 	}
 	if (!strcmp(k, "pack.writebitmaphashcache")) {
diff --git a/config.c b/config.c
index 831cf8b..444b1d1 100644
--- a/config.c
+++ b/config.c
@@ -863,6 +863,15 @@ static int git_parse_ssize_t(const char *value, ssize_t *ret)
 	return 1;
 }
 
+static int git_parse_size_t(const char *value, size_t *ret)
+{
+	uintmax_t tmp;
+	if (!git_parse_unsigned(value, &tmp, maximum_signed_value_of_type(size_t)))
+		return 0;
+	*ret = tmp;
+	return 1;
+}
+
 NORETURN
 static void die_bad_number(const char *name, const char *value)
 {
@@ -929,6 +938,14 @@ ssize_t git_config_ssize_t(const char *name, const char *value)
 	return ret;
 }
 
+size_t git_config_size_t(const char *name, const char *value)
+{
+	size_t ret;
+	if (!git_parse_size_t(value, &ret))
+		die_bad_number(name, value);
+	return ret;
+}
+
 int git_parse_maybe_bool(const char *value)
 {
 	if (!value)
@@ -1105,7 +1122,7 @@ static int git_default_core_config(const char *var, const char *value)
 
 	if (!strcmp(var, "core.packedgitwindowsize")) {
 		int pgsz_x2 = getpagesize() * 2;
-		packed_git_window_size = git_config_ulong(var, value);
+		packed_git_window_size = git_config_size_t(var, value);
 
 		/* This value must be multiple of (pagesize * 2) */
 		packed_git_window_size /= pgsz_x2;
@@ -1116,17 +1133,17 @@ static int git_default_core_config(const char *var, const char *value)
 	}
 
 	if (!strcmp(var, "core.bigfilethreshold")) {
-		big_file_threshold = git_config_ulong(var, value);
+		big_file_threshold = git_config_size_t(var, value);
 		return 0;
 	}
 
 	if (!strcmp(var, "core.packedgitlimit")) {
-		packed_git_limit = git_config_ulong(var, value);
+		packed_git_limit = git_config_size_t(var, value);
 		return 0;
 	}
 
 	if (!strcmp(var, "core.deltabasecachelimit")) {
-		delta_base_cache_limit = git_config_ulong(var, value);
+		delta_base_cache_limit = git_config_size_t(var, value);
 		return 0;
 	}
 
@@ -1360,7 +1377,7 @@ int git_default_config(const char *var, const char *value, void *dummy)
 	}
 
 	if (!strcmp(var, "pack.packsizelimit")) {
-		pack_size_limit_cfg = git_config_ulong(var, value);
+		pack_size_limit_cfg = git_config_size_t(var, value);
 		return 0;
 	}
 
diff --git a/config.h b/config.h
index fb76729..c264ee8 100644
--- a/config.h
+++ b/config.h
@@ -54,6 +54,7 @@ extern int git_config_int(const char *, const char *);
 extern int64_t git_config_int64(const char *, const char *);
 extern unsigned long git_config_ulong(const char *, const char *);
 extern ssize_t git_config_ssize_t(const char *, const char *);
+extern size_t git_config_size_t(const char *, const char *);
 extern int git_config_bool_or_int(const char *, const char *, int *);
 extern int git_config_bool(const char *, const char *);
 extern int git_config_maybe_bool(const char *, const char *);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 7/9] Convert ref-filter to size_t
  2017-08-12  8:47 [PATCH 1/9] Convert pack-objects to size_t Martin Koegler
                   ` (4 preceding siblings ...)
  2017-08-12  8:47 ` [PATCH 6/9] Use size_t for config parsing Martin Koegler
@ 2017-08-12  8:47 ` Martin Koegler
  2017-08-12  8:47 ` [PATCH 8/9] Convert tree-walk " Martin Koegler
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Martin Koegler @ 2017-08-12  8:47 UTC (permalink / raw)
  To: git, gitster, Johannes.Schindelin; +Cc: Martin Koegler

From: Martin Koegler <martin.koegler@chello.at>

Signed-off-by: Martin Koegler <martin.koegler@chello.at>
---
 ref-filter.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 5c903a5..30f249c 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -724,7 +724,7 @@ static int grab_objectname(const char *name, const unsigned char *sha1,
 }
 
 /* See grab_values */
-static void grab_common_values(struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
+static void grab_common_values(struct atom_value *val, int deref, struct object *obj, void *buf, size_t sz)
 {
 	int i;
 
@@ -739,7 +739,7 @@ static void grab_common_values(struct atom_value *val, int deref, struct object
 			v->s = typename(obj->type);
 		else if (!strcmp(name, "objectsize")) {
 			v->value = sz;
-			v->s = xstrfmt("%lu", sz);
+			v->s = xstrfmt("%" PRIuMAX, (uintmax_t)sz);
 		}
 		else if (deref)
 			grab_objectname(name, obj->oid.hash, v, &used_atom[i]);
@@ -747,7 +747,7 @@ static void grab_common_values(struct atom_value *val, int deref, struct object
 }
 
 /* See grab_values */
-static void grab_tag_values(struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
+static void grab_tag_values(struct atom_value *val, int deref, struct object *obj, void *buf, size_t sz)
 {
 	int i;
 	struct tag *tag = (struct tag *) obj;
@@ -769,7 +769,7 @@ static void grab_tag_values(struct atom_value *val, int deref, struct object *ob
 }
 
 /* See grab_values */
-static void grab_commit_values(struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
+static void grab_commit_values(struct atom_value *val, int deref, struct object *obj, void *buf, size_t sz)
 {
 	int i;
 	struct commit *commit = (struct commit *) obj;
@@ -786,7 +786,7 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object
 		}
 		else if (!strcmp(name, "numparent")) {
 			v->value = commit_list_count(commit->parents);
-			v->s = xstrfmt("%lu", (unsigned long)v->value);
+			v->s = xstrfmt("%" PRIuMAX, (uintmax_t)v->value);
 		}
 		else if (!strcmp(name, "parent")) {
 			struct commit_list *parents;
@@ -802,7 +802,7 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object
 	}
 }
 
-static const char *find_wholine(const char *who, int wholen, const char *buf, unsigned long sz)
+static const char *find_wholine(const char *who, int wholen, const char *buf, size_t sz)
 {
 	const char *eol;
 	while (*buf) {
@@ -848,7 +848,7 @@ static const char *copy_email(const char *buf)
 	return xmemdupz(email, eoemail + 1 - email);
 }
 
-static char *copy_subject(const char *buf, unsigned long len)
+static char *copy_subject(const char *buf, size_t len)
 {
 	char *r = xmemdupz(buf, len);
 	int i;
@@ -898,7 +898,7 @@ static void grab_date(const char *buf, struct atom_value *v, const char *atomnam
 }
 
 /* See grab_values */
-static void grab_person(const char *who, struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
+static void grab_person(const char *who, struct atom_value *val, int deref, struct object *obj, void *buf, size_t sz)
 {
 	int i;
 	int wholen = strlen(who);
@@ -957,11 +957,11 @@ static void grab_person(const char *who, struct atom_value *val, int deref, stru
 	}
 }
 
-static void find_subpos(const char *buf, unsigned long sz,
-			const char **sub, unsigned long *sublen,
-			const char **body, unsigned long *bodylen,
-			unsigned long *nonsiglen,
-			const char **sig, unsigned long *siglen)
+static void find_subpos(const char *buf, size_t sz,
+			const char **sub, size_t *sublen,
+			const char **body, size_t *bodylen,
+			size_t *nonsiglen,
+			const char **sig, size_t *siglen)
 {
 	const char *eol;
 	/* skip past header until we hit empty line */
@@ -1005,7 +1005,7 @@ static void find_subpos(const char *buf, unsigned long sz,
  * If 'lines' is greater than 0, append that many lines from the given
  * 'buf' of length 'size' to the given strbuf.
  */
-static void append_lines(struct strbuf *out, const char *buf, unsigned long size, int lines)
+static void append_lines(struct strbuf *out, const char *buf, size_t size, int lines)
 {
 	int i;
 	const char *sp, *eol;
@@ -1026,11 +1026,11 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
 }
 
 /* See grab_values */
-static void grab_sub_body_contents(struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
+static void grab_sub_body_contents(struct atom_value *val, int deref, struct object *obj, void *buf, size_t sz)
 {
 	int i;
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
-	unsigned long sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
+	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
@@ -1100,7 +1100,7 @@ static void fill_missing_values(struct atom_value *val)
  * pointed at by the ref itself; otherwise it is the object the
  * ref (which is a tag) refers to.
  */
-static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
+static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf, size_t sz)
 {
 	grab_common_values(val, deref, obj, buf, sz);
 	switch (obj->type) {
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 8/9] Convert tree-walk to size_t
  2017-08-12  8:47 [PATCH 1/9] Convert pack-objects to size_t Martin Koegler
                   ` (5 preceding siblings ...)
  2017-08-12  8:47 ` [PATCH 7/9] Convert ref-filter to size_t Martin Koegler
@ 2017-08-12  8:47 ` Martin Koegler
  2017-08-12  8:47 ` [PATCH 9/9] Convert xdiff-interface " Martin Koegler
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Martin Koegler @ 2017-08-12  8:47 UTC (permalink / raw)
  To: git, gitster, Johannes.Schindelin; +Cc: Martin Koegler

From: Martin Koegler <martin.koegler@chello.at>

Signed-off-by: Martin Koegler <martin.koegler@chello.at>
---
 tree-walk.c | 17 +++++++++--------
 tree-walk.h |  4 ++--
 tree.h      |  2 +-
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/tree-walk.c b/tree-walk.c
index 7c9f9e3..a7d8b2a 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -22,10 +22,11 @@ static const char *get_mode(const char *str, unsigned int *modep)
 	return str;
 }
 
-static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size, struct strbuf *err)
+static int decode_tree_entry(struct tree_desc *desc, const char *buf, size_t size, struct strbuf *err)
 {
 	const char *path;
-	unsigned int mode, len;
+	unsigned int mode;
+	size_t len;
 
 	if (size < 23 || buf[size - 21]) {
 		strbuf_addstr(err, _("too-short tree object"));
@@ -51,7 +52,7 @@ static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned l
 	return 0;
 }
 
-static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, unsigned long size, struct strbuf *err)
+static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, size_t size, struct strbuf *err)
 {
 	desc->buffer = buffer;
 	desc->size = size;
@@ -60,7 +61,7 @@ static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, u
 	return 0;
 }
 
-void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
+void init_tree_desc(struct tree_desc *desc, const void *buffer, size_t size)
 {
 	struct strbuf err = STRBUF_INIT;
 	if (init_tree_desc_internal(desc, buffer, size, &err))
@@ -68,7 +69,7 @@ void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long si
 	strbuf_release(&err);
 }
 
-int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size)
+int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, size_t size)
 {
 	struct strbuf err = STRBUF_INIT;
 	int result = init_tree_desc_internal(desc, buffer, size, &err);
@@ -106,8 +107,8 @@ static int update_tree_entry_internal(struct tree_desc *desc, struct strbuf *err
 {
 	const void *buf = desc->buffer;
 	const unsigned char *end = desc->entry.oid->hash + 20;
-	unsigned long size = desc->size;
-	unsigned long len = end - (const unsigned char *)buf;
+	size_t size = desc->size;
+	size_t len = end - (const unsigned char *)buf;
 
 	if (size < len)
 		die(_("too-short tree file"));
@@ -487,7 +488,7 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 
 struct dir_state {
 	void *tree;
-	unsigned long size;
+	size_t size;
 	unsigned char sha1[20];
 };
 
diff --git a/tree-walk.h b/tree-walk.h
index 68bb78b..9160cc2 100644
--- a/tree-walk.h
+++ b/tree-walk.h
@@ -32,8 +32,8 @@ static inline int tree_entry_len(const struct name_entry *ne)
 
 void update_tree_entry(struct tree_desc *);
 int update_tree_entry_gently(struct tree_desc *);
-void init_tree_desc(struct tree_desc *desc, const void *buf, unsigned long size);
-int init_tree_desc_gently(struct tree_desc *desc, const void *buf, unsigned long size);
+void init_tree_desc(struct tree_desc *desc, const void *buf, size_t size);
+int init_tree_desc_gently(struct tree_desc *desc, const void *buf, size_t size);
 
 /*
  * Helper function that does both tree_entry_extract() and update_tree_entry()
diff --git a/tree.h b/tree.h
index 1dac7fc..69b9233 100644
--- a/tree.h
+++ b/tree.h
@@ -9,7 +9,7 @@ struct strbuf;
 struct tree {
 	struct object object;
 	void *buffer;
-	unsigned long size;
+	size_t size;
 };
 
 struct tree *lookup_tree(const struct object_id *oid);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 9/9] Convert xdiff-interface to size_t
  2017-08-12  8:47 [PATCH 1/9] Convert pack-objects to size_t Martin Koegler
                   ` (6 preceding siblings ...)
  2017-08-12  8:47 ` [PATCH 8/9] Convert tree-walk " Martin Koegler
@ 2017-08-12  8:47 ` Martin Koegler
  2017-08-12  9:59 ` [PATCH 1/9] Convert pack-objects " Torsten Bögershausen
  2017-08-12 13:47 ` Ramsay Jones
  9 siblings, 0 replies; 27+ messages in thread
From: Martin Koegler @ 2017-08-12  8:47 UTC (permalink / raw)
  To: git, gitster, Johannes.Schindelin; +Cc: Martin Koegler

From: Martin Koegler <martin.koegler@chello.at>

Signed-off-by: Martin Koegler <martin.koegler@chello.at>
---
 combine-diff.c     |  2 +-
 diff.c             | 28 ++++++++++++++--------------
 diffcore-pickaxe.c |  4 ++--
 xdiff-interface.c  |  6 +++---
 xdiff-interface.h  |  6 +++---
 5 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/combine-diff.c b/combine-diff.c
index acf39ec..ad5d177 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -343,7 +343,7 @@ struct combine_diff_state {
 	struct sline *lost_bucket;
 };
 
-static void consume_line(void *state_, char *line, unsigned long len)
+static void consume_line(void *state_, char *line, size_t len)
 {
 	struct combine_diff_state *state = state_;
 	if (5 < len && !memcmp("@@ -", line, 4)) {
diff --git a/diff.c b/diff.c
index dd6ff0a..1f56205 100644
--- a/diff.c
+++ b/diff.c
@@ -464,7 +464,7 @@ static struct diff_tempfile {
 	struct tempfile tempfile;
 } diff_temp[2];
 
-typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
+typedef size_t (*sane_truncate_fn)(char *line, size_t len);
 
 struct emit_callback {
 	int color_diff;
@@ -519,7 +519,7 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 }
 
 /* like fill_mmfile, but only for size, so we can avoid retrieving blob */
-static unsigned long diff_filespec_size(struct diff_filespec *one)
+static size_t diff_filespec_size(struct diff_filespec *one)
 {
 	if (!DIFF_FILE_VALID(one))
 		return 0;
@@ -1522,7 +1522,7 @@ struct diff_words_buffer {
 	int orig_nr, orig_alloc;
 };
 
-static void diff_words_append(char *line, unsigned long len,
+static void diff_words_append(char *line, size_t len,
 		struct diff_words_buffer *buffer)
 {
 	ALLOC_GROW(buffer->text.ptr, buffer->text.size + len, buffer->alloc);
@@ -1651,7 +1651,7 @@ static int color_words_output_graph_prefix(struct diff_words_data *diff_words)
 	}
 }
 
-static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
+static void fn_out_diff_words_aux(void *priv, char *line, size_t len)
 {
 	struct diff_words_data *diff_words = priv;
 	struct diff_words_style *style = diff_words->style;
@@ -1962,10 +1962,10 @@ const char *diff_line_prefix(struct diff_options *opt)
 	return msgbuf->buf;
 }
 
-static unsigned long sane_truncate_line(struct emit_callback *ecb, char *line, unsigned long len)
+static size_t sane_truncate_line(struct emit_callback *ecb, char *line, size_t len)
 {
 	const char *cp;
-	unsigned long allot;
+	size_t allot;
 	size_t l = len;
 
 	if (ecb->truncate)
@@ -1995,7 +1995,7 @@ static void find_lno(const char *line, struct emit_callback *ecbdata)
 	ecbdata->lno_in_postimage = strtol(p + 1, NULL, 10);
 }
 
-static void fn_out_consume(void *priv, char *line, unsigned long len)
+static void fn_out_consume(void *priv, char *line, size_t len)
 {
 	struct emit_callback *ecbdata = priv;
 	const char *reset = diff_get_color(ecbdata->color_diff, DIFF_RESET);
@@ -2199,7 +2199,7 @@ static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat,
 	return x;
 }
 
-static void diffstat_consume(void *priv, char *line, unsigned long len)
+static void diffstat_consume(void *priv, char *line, size_t len)
 {
 	struct diffstat_t *diffstat = priv;
 	struct diffstat_file *x = diffstat->files[diffstat->nr - 1];
@@ -2854,7 +2854,7 @@ struct checkdiff_t {
 	unsigned status;
 };
 
-static int is_conflict_marker(const char *line, int marker_size, unsigned long len)
+static int is_conflict_marker(const char *line, int marker_size, size_t len)
 {
 	char firstchar;
 	int cnt;
@@ -2877,7 +2877,7 @@ static int is_conflict_marker(const char *line, int marker_size, unsigned long l
 	return 1;
 }
 
-static void checkdiff_consume(void *priv, char *line, unsigned long len)
+static void checkdiff_consume(void *priv, char *line, size_t len)
 {
 	struct checkdiff_t *data = priv;
 	int marker_size = data->conflict_marker_size;
@@ -3692,7 +3692,7 @@ void diff_free_filespec_data(struct diff_filespec *s)
 
 static void prep_temp_blob(const char *path, struct diff_tempfile *temp,
 			   void *blob,
-			   unsigned long size,
+			   size_t size,
 			   const struct object_id *oid,
 			   int mode)
 {
@@ -5301,7 +5301,7 @@ struct patch_id_t {
 	int patchlen;
 };
 
-static int remove_space(char *line, int len)
+static size_t remove_space(char *line, size_t len)
 {
 	int i;
 	char *dst = line;
@@ -5314,10 +5314,10 @@ static int remove_space(char *line, int len)
 	return dst - line;
 }
 
-static void patch_id_consume(void *priv, char *line, unsigned long len)
+static void patch_id_consume(void *priv, char *line, size_t len)
 {
 	struct patch_id_t *data = priv;
-	int new_len;
+	size_t new_len;
 
 	/* Ignore line numbers when computing the SHA1 of the patch */
 	if (starts_with(line, "@@ -"))
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 341529b..db73cb4 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -19,7 +19,7 @@ struct diffgrep_cb {
 	int hit;
 };
 
-static void diffgrep_consume(void *priv, char *line, unsigned long len)
+static void diffgrep_consume(void *priv, char *line, size_t len)
 {
 	struct diffgrep_cb *data = priv;
 	regmatch_t regmatch;
@@ -70,7 +70,7 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
 static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
 {
 	unsigned int cnt;
-	unsigned long sz;
+	size_t sz;
 	const char *data;
 
 	sz = mf->size;
diff --git a/xdiff-interface.c b/xdiff-interface.c
index d82cd4a..65cb08b 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -57,12 +57,12 @@ int parse_hunk_header(char *line, int len,
 	return -!!memcmp(cp, " @@", 3);
 }
 
-static void consume_one(void *priv_, char *s, unsigned long size)
+static void consume_one(void *priv_, char *s, size_t size)
 {
 	struct xdiff_emit_state *priv = priv_;
 	char *ep;
 	while (size) {
-		unsigned long this_size;
+		size_t this_size;
 		ep = memchr(s, '\n', size);
 		this_size = (ep == NULL) ? size : (ep - s + 1);
 		priv->consume(priv->consume_callback_data, s, this_size);
@@ -197,7 +197,7 @@ void read_mmblob(mmfile_t *ptr, const struct object_id *oid)
 }
 
 #define FIRST_FEW_BYTES 8000
-int buffer_is_binary(const char *ptr, unsigned long size)
+int buffer_is_binary(const char *ptr, size_t size)
 {
 	if (FIRST_FEW_BYTES < size)
 		size = FIRST_FEW_BYTES;
diff --git a/xdiff-interface.h b/xdiff-interface.h
index 6f6ba90..c7c1676 100644
--- a/xdiff-interface.h
+++ b/xdiff-interface.h
@@ -11,18 +11,18 @@
  */
 #define MAX_XDIFF_SIZE (1024UL * 1024 * 1023)
 
-typedef void (*xdiff_emit_consume_fn)(void *, char *, unsigned long);
+typedef void (*xdiff_emit_consume_fn)(void *, char *, size_t);
 
 int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t const *xecfg, xdemitcb_t *ecb);
 int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2,
 		  xdiff_emit_consume_fn fn, void *consume_callback_data,
 		  xpparam_t const *xpp, xdemitconf_t const *xecfg);
-int parse_hunk_header(char *line, int len,
+int parse_hunk_header(char *line, int size_t,
 		      int *ob, int *on,
 		      int *nb, int *nn);
 int read_mmfile(mmfile_t *ptr, const char *filename);
 void read_mmblob(mmfile_t *ptr, const struct object_id *oid);
-int buffer_is_binary(const char *ptr, unsigned long size);
+int buffer_is_binary(const char *ptr, size_t size);
 
 extern void xdiff_set_find_func(xdemitconf_t *xecfg, const char *line, int cflags);
 extern void xdiff_clear_find_func(xdemitconf_t *xecfg);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-12  8:47 [PATCH 1/9] Convert pack-objects to size_t Martin Koegler
                   ` (7 preceding siblings ...)
  2017-08-12  8:47 ` [PATCH 9/9] Convert xdiff-interface " Martin Koegler
@ 2017-08-12  9:59 ` Torsten Bögershausen
  2017-08-13 18:27   ` Martin Koegler
  2017-08-12 13:47 ` Ramsay Jones
  9 siblings, 1 reply; 27+ messages in thread
From: Torsten Bögershausen @ 2017-08-12  9:59 UTC (permalink / raw)
  To: Martin Koegler; +Cc: git, gitster, Johannes.Schindelin

Thanks for working on this - unfortunatly the patch does not apply on
git.git/master.

Which baseline did you use ?


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 5/9] Convert various things to size_t
  2017-08-12  8:47 ` [PATCH 5/9] Convert various things " Martin Koegler
@ 2017-08-12 13:27   ` Martin Ågren
  2017-08-13 17:48     ` Martin Koegler
  0 siblings, 1 reply; 27+ messages in thread
From: Martin Ågren @ 2017-08-12 13:27 UTC (permalink / raw)
  To: Martin Koegler; +Cc: Git Mailing List, Junio C Hamano, Johannes.Schindelin

On 12 August 2017 at 10:47, Martin Koegler <martin.koegler@chello.at> wrote:
> From: Martin Koegler <martin.koegler@chello.at>
>
> ---
>  bisect.c                | 2 +-
>  blame.c                 | 2 +-
>  builtin/fmt-merge-msg.c | 2 +-
>  builtin/mktag.c         | 2 +-
>  dir.c                   | 4 ++--
>  dir.h                   | 2 +-
>  6 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/bisect.c b/bisect.c
> index 2549eaf..0580c82 100644
> --- a/bisect.c
> +++ b/bisect.c
> @@ -131,7 +131,7 @@ static void show_list(const char *debug, int counted, int nr,
>                 struct commit *commit = p->item;
>                 unsigned flags = commit->object.flags;
>                 enum object_type type;
> -               unsigned long size;
> +               size_t size;
>                 char *buf = read_sha1_file(commit->object.sha1, &type, &size);
>                 const char *subject_start;
>                 int subject_len;

Would this need to be done in a patch where read_sha1_file is converted?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-12  8:47 [PATCH 1/9] Convert pack-objects to size_t Martin Koegler
                   ` (8 preceding siblings ...)
  2017-08-12  9:59 ` [PATCH 1/9] Convert pack-objects " Torsten Bögershausen
@ 2017-08-12 13:47 ` Ramsay Jones
  2017-08-13 18:30   ` Martin Koegler
  9 siblings, 1 reply; 27+ messages in thread
From: Ramsay Jones @ 2017-08-12 13:47 UTC (permalink / raw)
  To: Martin Koegler, git, gitster, Johannes.Schindelin



On 12/08/17 09:47, Martin Koegler wrote:
> From: Martin Koegler <martin.koegler@chello.at>
> 
> Signed-off-by: Martin Koegler <martin.koegler@chello.at>
> ---
>  builtin/pack-objects.c | 48 ++++++++++++++++++++++++------------------------
>  1 file changed, 24 insertions(+), 24 deletions(-)
> 
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index aa70f80..f8db283 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -56,7 +56,7 @@ static struct pack_idx_option pack_idx_opts;
>  static const char *base_name;
>  static int progress = 1;
>  static int window = 10;
> -static unsigned long pack_size_limit;
> +static size_t pack_size_limit;
>  static int depth = 50;
>  static int delta_search_threads;
>  static int pack_to_stdout;
> @@ -72,11 +72,11 @@ static int use_bitmap_index = -1;
>  static int write_bitmap_index;
>  static uint16_t write_bitmap_options;
>  
> -static unsigned long delta_cache_size = 0;
> -static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
> -static unsigned long cache_max_small_delta_size = 1000;
> +static size_t delta_cache_size = 0;
> +static size_t max_delta_cache_size = 256 * 1024 * 1024;
> +static size_t cache_max_small_delta_size = 1000;
>  
> -static unsigned long window_memory_limit = 0;
> +static size_t window_memory_limit = 0;
>  
>  /*
>   * stats
> @@ -124,11 +124,11 @@ static void *get_delta(struct object_entry *entry)
>  	return delta_buf;
>  }
>  
> -static unsigned long do_compress(void **pptr, unsigned long size)
> +static size_t do_compress(void **pptr, size_t size)
>  {
>  	git_zstream stream;
>  	void *in, *out;
> -	unsigned long maxsize;
> +	size_t maxsize;
>  
>  	git_deflate_init(&stream, pack_compression_level);
>  	maxsize = git_deflate_bound(&stream, size);
> @@ -149,13 +149,13 @@ static unsigned long do_compress(void **pptr, unsigned long size)
>  	return stream.total_out;
>  }
>  
> -static unsigned long write_large_blob_data(struct git_istream *st, struct sha1file *f,
> +static size_t write_large_blob_data(struct git_istream *st, struct sha1file *f,
>  					   const unsigned char *sha1)
>  {
>  	git_zstream stream;
>  	unsigned char ibuf[1024 * 16];
>  	unsigned char obuf[1024 * 16];
> -	unsigned long olen = 0;
> +	size_t olen = 0;
>  
>  	git_deflate_init(&stream, pack_compression_level);
>  
> @@ -196,7 +196,7 @@ static int check_pack_inflate(struct packed_git *p,
>  		struct pack_window **w_curs,
>  		off_t offset,
>  		off_t len,
> -		unsigned long expect)
> +		size_t expect)
>  {
>  	git_zstream stream;
>  	unsigned char fakebuf[4096], *in;
> @@ -238,13 +238,13 @@ static void copy_pack_data(struct sha1file *f,
>  }
>  
>  /* Return 0 if we will bust the pack-size limit */
> -static unsigned long write_no_reuse_object(struct sha1file *f, struct object_entry *entry,
> -					   unsigned long limit, int usable_delta)
> +static size_t write_no_reuse_object(struct sha1file *f, struct object_entry *entry,
> +				    size_t limit, int usable_delta)
>  {
>  	size_t size, datalen;
>  	unsigned char header[MAX_PACK_OBJECT_HEADER],
>  		      dheader[MAX_PACK_OBJECT_HEADER];
> -	unsigned hdrlen;
> +	size_t hdrlen;
>  	enum object_type type;
>  	void *buf;
>  	struct git_istream *st = NULL;
> @@ -350,17 +350,17 @@ static unsigned long write_no_reuse_object(struct sha1file *f, struct object_ent
>  
>  /* Return 0 if we will bust the pack-size limit */
>  static off_t write_reuse_object(struct sha1file *f, struct object_entry *entry,
> -				unsigned long limit, int usable_delta)
> +				size_t limit, int usable_delta)
>  {
>  	struct packed_git *p = entry->in_pack;
>  	struct pack_window *w_curs = NULL;
>  	struct revindex_entry *revidx;
>  	off_t offset;
>  	enum object_type type = entry->type;
> -	off_t datalen;
> +	size_t datalen;

On 32-bit Linux, off_t is 64-bit and size_t is 32-bit.

Later in this function, datalen is set the the difference of
two (revidx) off_t offsets. I haven't applied and tried this
on my 32-bit Linux install yet (that installation only gets
booted about once a week), but I suspect a 'possible data-loss'
warning.

For other reasons, I don't think it is not possible for
a 32-bit system to create a packfile with an individual object
of size > 4GB anyway, so this may not be a problem in practice.
(a 32-bit system should be able to read such a packfile, presumably
created on a 64-bit system, and refuse to load such an object, of
course).

ATB,
Ramsay Jones

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/9] Convert index-pack to size_t
  2017-08-12  8:47 ` [PATCH 2/9] Convert index-pack " Martin Koegler
@ 2017-08-12 13:51   ` Ramsay Jones
  0 siblings, 0 replies; 27+ messages in thread
From: Ramsay Jones @ 2017-08-12 13:51 UTC (permalink / raw)
  To: Martin Koegler, git, gitster, Johannes.Schindelin



On 12/08/17 09:47, Martin Koegler wrote:
> From: Martin Koegler <martin.koegler@chello.at>
> 
> Signed-off-by: Martin Koegler <martin.koegler@chello.at>
> ---
>  builtin/index-pack.c | 26 +++++++++++++-------------
>  1 file changed, 13 insertions(+), 13 deletions(-)
> 
> diff --git a/builtin/index-pack.c b/builtin/index-pack.c
> index 7f3ccd0..bf2d728 100644
> --- a/builtin/index-pack.c
> +++ b/builtin/index-pack.c
> @@ -435,7 +435,7 @@ static int is_delta_type(enum object_type type)
>  	return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
>  }
>  
> -static void *unpack_entry_data(off_t offset, unsigned long size,
> +static void *unpack_entry_data(off_t offset, size_t size,
>  			       enum object_type type, unsigned char *sha1)
>  {
>  	static char fixed_buf[8192];
> @@ -444,10 +444,10 @@ static void *unpack_entry_data(off_t offset, unsigned long size,
>  	void *buf;
>  	git_SHA_CTX c;
>  	char hdr[32];
> -	int hdrlen;
> +	size_t hdrlen;
>  
>  	if (!is_delta_type(type)) {
> -		hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(type), size) + 1;
> +		hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %" PRIuMAX, typename(type), (uintmax_t)size) + 1;
>  		git_SHA1_Init(&c);
>  		git_SHA1_Update(&c, hdr, hdrlen);
>  	} else
> @@ -489,7 +489,7 @@ static void *unpack_raw_entry(struct object_entry *obj,
>  			      unsigned char *sha1)
>  {
>  	unsigned char *p;
> -	unsigned long size, c;
> +	size_t size, c;
>  	off_t base_offset;
>  	unsigned shift;
>  	void *data;
> @@ -551,11 +551,11 @@ static void *unpack_raw_entry(struct object_entry *obj,
>  }
>  
>  static void *unpack_data(struct object_entry *obj,
> -			 int (*consume)(const unsigned char *, unsigned long, void *),
> +			 int (*consume)(const unsigned char *, size_t, void *),
>  			 void *cb_data)
>  {
>  	off_t from = obj[0].idx.offset + obj[0].hdr_size;
> -	off_t len = obj[1].idx.offset - from;
> +	size_t len = obj[1].idx.offset - from;

Similar comment to previous patch.

ATB,
Ramsay Jones


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/9] Convert unpack-objects to size_t
  2017-08-12  8:47 ` [PATCH 3/9] Convert unpack-objects " Martin Koegler
@ 2017-08-12 14:07   ` Martin Ågren
  2017-08-13 18:25     ` Martin Koegler
  0 siblings, 1 reply; 27+ messages in thread
From: Martin Ågren @ 2017-08-12 14:07 UTC (permalink / raw)
  To: Martin Koegler; +Cc: Git Mailing List, Junio C Hamano, Johannes.Schindelin

On 12 August 2017 at 10:47, Martin Koegler <martin.koegler@chello.at> wrote:
> From: Martin Koegler <martin.koegler@chello.at>
>
> Signed-off-by: Martin Koegler <martin.koegler@chello.at>
> ---
>  builtin/unpack-objects.c | 20 ++++++++++----------
>  1 file changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
> index 001dd4b..0d8b6b3 100644
> --- a/builtin/unpack-objects.c
> +++ b/builtin/unpack-objects.c
> @@ -31,7 +31,7 @@ static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
>   */
>  struct obj_buffer {
>         char *buffer;
> -       unsigned long size;
> +       size_t size;
>  };
>
>  static struct decoration obj_decorate;
> @@ -41,7 +41,7 @@ static struct obj_buffer *lookup_object_buffer(struct object *base)
>         return lookup_decoration(&obj_decorate, base);
>  }
>
> -static void add_object_buffer(struct object *object, char *buffer, unsigned long size)
> +static void add_object_buffer(struct object *object, char *buffer, size_t size)
>  {
>         struct obj_buffer *obj;
>         obj = xcalloc(1, sizeof(struct obj_buffer));
> @@ -93,7 +93,7 @@ static void use(int bytes)
>                 die(_("pack exceeds maximum allowed size"));
>  }
>
> -static void *get_data(unsigned long size)
> +static void *get_data(size_t size)
>  {
>         git_zstream stream;
>         void *buf = xmallocz(size);

"size" is handed over to a "git_zstream" and goes through zlib.c,
eventually ending up in zlib, which is outside Git's control, and which
seems to work with "uLong"s. How do these kind of changes interact with
zlib? For example, I wonder about this line further down in get_data:

if (stream.total_out == size && ret == Z_STREAM_END)

If total_out isn't converted, I guess this would never hit if "size" is
too large. And if total_out /is/ converted, I guess we'd risk truncation
in zlib_pre_call in zlib.c. Maybe that might cause Git and zlib to have
different ideas about how much data is available and/or should be
processed. Maybe we could then hit things like this in git.c:

if (s->z.total_out != s->total_out + bytes_produced)
        die("BUG: total_out mismatch");

I am not very familiar with zlib, so apologies if this is just noise...

Martin

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 5/9] Convert various things to size_t
  2017-08-12 13:27   ` Martin Ågren
@ 2017-08-13 17:48     ` Martin Koegler
  0 siblings, 0 replies; 27+ messages in thread
From: Martin Koegler @ 2017-08-13 17:48 UTC (permalink / raw)
  To: Martin Ågren
  Cc: Martin Koegler, Git Mailing List, Junio C Hamano,
	Johannes.Schindelin

On Sat, Aug 12, 2017 at 03:27:21PM +0200, Martin Ågren wrote:
> On 12 August 2017 at 10:47, Martin Koegler <martin.koegler@chello.at> wrote:
> > From: Martin Koegler <martin.koegler@chello.at>
> >
> > ---
> >  bisect.c                | 2 +-
> >  blame.c                 | 2 +-
> >  builtin/fmt-merge-msg.c | 2 +-
> >  builtin/mktag.c         | 2 +-
> >  dir.c                   | 4 ++--
> >  dir.h                   | 2 +-
> >  6 files changed, 7 insertions(+), 7 deletions(-)
> >
> > diff --git a/bisect.c b/bisect.c
> > index 2549eaf..0580c82 100644
> > --- a/bisect.c
> > +++ b/bisect.c
> > @@ -131,7 +131,7 @@ static void show_list(const char *debug, int counted, int nr,
> >                 struct commit *commit = p->item;
> >                 unsigned flags = commit->object.flags;
> >                 enum object_type type;
> > -               unsigned long size;
> > +               size_t size;
> >                 char *buf = read_sha1_file(commit->object.sha1, &type, &size);
> >                 const char *subject_start;
> >                 int subject_len;
> 
> Would this need to be done in a patch where read_sha1_file is converted?

I missed this first, because it is debug code requiring a code change to be enabled. 

It probably should be merged in my second patch. On the other hand that patch is already considered too large.

Regards,
Martin

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/9] Convert unpack-objects to size_t
  2017-08-12 14:07   ` Martin Ågren
@ 2017-08-13 18:25     ` Martin Koegler
  0 siblings, 0 replies; 27+ messages in thread
From: Martin Koegler @ 2017-08-13 18:25 UTC (permalink / raw)
  To: Martin Ågren
  Cc: Martin Koegler, Git Mailing List, Junio C Hamano,
	Johannes.Schindelin

On Sat, Aug 12, 2017 at 04:07:50PM +0200, Martin Ågren wrote:
> On 12 August 2017 at 10:47, Martin Koegler <martin.koegler@chello.at> wrote:
> "size" is handed over to a "git_zstream" and goes through zlib.c,
> eventually ending up in zlib, which is outside Git's control, and which
> seems to work with "uLong"s. How do these kind of changes interact with
> zlib? For example, I wonder about this line further down in get_data:
> 
> if (stream.total_out == size && ret == Z_STREAM_END)
> 
> If total_out isn't converted, I guess this would never hit if "size" is
> too large. And if total_out /is/ converted, I guess we'd risk truncation

I posted a patch changing git_zstream.

> in zlib_pre_call in zlib.c. Maybe that might cause Git and zlib to have
> different ideas about how much data is available and/or should be
> processed. Maybe we could then hit things like this in git.c:
> 
> if (s->z.total_out != s->total_out + bytes_produced)
>         die("BUG: total_out mismatch");
> 
> I am not very familiar with zlib, so apologies if this is just noise...

You are right, if sizeof(size_t) != sizeof(unsigned long), there can be truncations.

Currently, an object size is read/passed as ulong including to the memory allocation functions.
(x)malloc & Co take a length - so the whole GIT code might assume a larger object size than the
memory allocation functions.
Migrating everything to size_t means, that we move the truncation locations to places, where values 
enter/leave GIT.

My patches are just a starting point to fix the size handling. They concentrate of fixing data types - 
not avoiding any possible overflow.  Merging them will already be a challenging task, because they 
touch many functions and will likely conflict with other changes (eg. moving functions).

Regards,
Martin


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-12  9:59 ` [PATCH 1/9] Convert pack-objects " Torsten Bögershausen
@ 2017-08-13 18:27   ` Martin Koegler
  0 siblings, 0 replies; 27+ messages in thread
From: Martin Koegler @ 2017-08-13 18:27 UTC (permalink / raw)
  To: Torsten Bögershausen
  Cc: Martin Koegler, git, gitster, Johannes.Schindelin

On Sat, Aug 12, 2017 at 11:59:15AM +0200, Torsten Bögershausen wrote:
> Thanks for working on this - unfortunatly the patch does not apply on
> git.git/master.
> 
> Which baseline did you use ?

next - 98096fd7a85b93626db8757f944f2d8ffdf7e96a
It accumulated to 19 patches.

Regards,
Martin 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-12 13:47 ` Ramsay Jones
@ 2017-08-13 18:30   ` Martin Koegler
  2017-08-13 19:45     ` Ramsay Jones
  0 siblings, 1 reply; 27+ messages in thread
From: Martin Koegler @ 2017-08-13 18:30 UTC (permalink / raw)
  To: Ramsay Jones; +Cc: Martin Koegler, git, gitster, Johannes.Schindelin

On Sat, Aug 12, 2017 at 02:47:25PM +0100, Ramsay Jones wrote:
> On 32-bit Linux, off_t is 64-bit and size_t is 32-bit.

--- t.c ---
#include <stddef.h>
#include <stdio.h>

int main()
{
printf("%d %d\n", sizeof(size_t), sizeof(off_t));
}
------------
$ gcc -m32 -o t t.c
$ ./t.c
4 4

So is that really true?

Regards,
Martin 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-13 18:30   ` Martin Koegler
@ 2017-08-13 19:45     ` Ramsay Jones
  2017-08-13 22:15       ` Junio C Hamano
  0 siblings, 1 reply; 27+ messages in thread
From: Ramsay Jones @ 2017-08-13 19:45 UTC (permalink / raw)
  To: Martin Koegler; +Cc: git, gitster, Johannes.Schindelin



On 13/08/17 19:30, Martin Koegler wrote:
> On Sat, Aug 12, 2017 at 02:47:25PM +0100, Ramsay Jones wrote:
>> On 32-bit Linux, off_t is 64-bit and size_t is 32-bit.
> 
> --- t.c ---
> #include <stddef.h>
> #include <stdio.h>
> 
> int main()
> {
> printf("%d %d\n", sizeof(size_t), sizeof(off_t));
> }
> ------------
> $ gcc -m32 -o t t.c
> $ ./t.c
> 4 4
> 
> So is that really true?

It should be, see commit b97e911643 ("Support for large files
on 32bit systems.", 17-02-2007), where you can see that the
_FILE_OFFSET_BITS macro is set to 64. This asks <stdio.h> et.al.,
to use the "Large File System" API and a 64-bit off_t.

I can't boot my 32-bit installation at the moment, and it seems
that my 64-bit multilib system is not playing ball:

  $ gcc -m32 -D_FILE_OFFSET_BITS=64 -o t t.c
  In file included from /usr/include/stdio.h:27:0,
                   from t.c:2:
  /usr/include/features.h:367:25: fatal error: sys/cdefs.h: No such file or directory
  compilation terminated.
  $ 

ATB,
Ramsay Jones


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-13 19:45     ` Ramsay Jones
@ 2017-08-13 22:15       ` Junio C Hamano
  2017-08-14 17:08         ` Junio C Hamano
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2017-08-13 22:15 UTC (permalink / raw)
  To: Ramsay Jones; +Cc: Martin Koegler, git, Johannes.Schindelin

Ramsay Jones <ramsay@ramsayjones.plus.com> writes:

> It should be, see commit b97e911643 ("Support for large files
> on 32bit systems.", 17-02-2007), where you can see that the
> _FILE_OFFSET_BITS macro is set to 64. This asks <stdio.h> et.al.,
> to use the "Large File System" API and a 64-bit off_t.

Correct.  Roughly speaking, we should view off_t as size of things
you can store in a file, and size_t as size of things you can have
in core.  When we allocate a region of memory, and then repeatedly
fill it with some data and hand that memory to a helper function
e.g. git_deflate(), each of these calls should expect to get data
whose size can be representable by size_t, and if that is shorter
than ulong which we currently use, we are artificially limiting our
potential by using a type that is narrower than necessary.  

The result from these helper functions that are repeatedly called
may be sent to a file as the output from the loop.  If that logic in
the outer loop wants to keep a tally of the total size of data they
processed, that number may not be fit in size_t and instead may
require off_t.

One interesting question is which of these two types we should use
for the size of objects Git uses.  

Most of the "interesting" operations done by Git require that the
thing is in core as a whole before we can do anything (e.g. compare
two such things to produce delta, have one in core and apply patch),
so it is tempting that we deal with size_t, but at the lowest level
to serve as a SCM, i.e. recording the state of a file at each
version, we actually should be able to exceed the in-core
limit---both "git add" of a huge file whose contents would not fit
in-core and "git checkout" of a huge blob whose inflated contents
would not fit in-core should (in theory, modulo bugs) be able to
exercise the streaming interface to handle such case without holding
everything in-core at once.  So from that point of view, even size_t
may not be the "correct" type to use.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-13 22:15       ` Junio C Hamano
@ 2017-08-14 17:08         ` Junio C Hamano
  2017-08-14 19:31           ` Ramsay Jones
  2017-08-16 20:22           ` Martin Koegler
  0 siblings, 2 replies; 27+ messages in thread
From: Junio C Hamano @ 2017-08-14 17:08 UTC (permalink / raw)
  To: Martin Koegler, Ramsay Jones; +Cc: git, Johannes.Schindelin

Junio C Hamano <gitster@pobox.com> writes:

> One interesting question is which of these two types we should use
> for the size of objects Git uses.  
>
> Most of the "interesting" operations done by Git require that the
> thing is in core as a whole before we can do anything (e.g. compare
> two such things to produce delta, have one in core and apply patch),
> so it is tempting that we deal with size_t, but at the lowest level
> to serve as a SCM, i.e. recording the state of a file at each
> version, we actually should be able to exceed the in-core
> limit---both "git add" of a huge file whose contents would not fit
> in-core and "git checkout" of a huge blob whose inflated contents
> would not fit in-core should (in theory, modulo bugs) be able to
> exercise the streaming interface to handle such case without holding
> everything in-core at once.  So from that point of view, even size_t
> may not be the "correct" type to use.

A few additions to the above observations.

 - We have varint that encodes how far the location from a delta
   representation of an object to its base object in the packfile.
   Both encoding and decoding sides in the current code use off_t to
   represent this offset, so we can already reference an object that
   is far in the same packfile as a base.

 - I think it is OK in practice to limit the size of individual
   objects to size_t (i.e. on 32-bit arch, you cannot interact with
   a repository with an object whose size exceeds 4GB).  Using off_t
   would allow occasional ultra-huge objects that can only be added
   and checked in via the streaming API on such a platform, but I
   suspect that it may become too much of a hassle to maintain.

   It may help reducing the maintenance if we introduced obj_size_t
   that is defined to be size_t for now, so that we can later swap
   it to ofs_t or some larger type when we know we do need to
   support objects whose size cannot be expressed in size_t, but I
   do not offhand know what the pros-and-cons with such an approach
   would look like.

Thanks.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-14 17:08         ` Junio C Hamano
@ 2017-08-14 19:31           ` Ramsay Jones
  2017-08-14 19:58             ` Junio C Hamano
  2017-08-14 20:32             ` Torsten Bögershausen
  2017-08-16 20:22           ` Martin Koegler
  1 sibling, 2 replies; 27+ messages in thread
From: Ramsay Jones @ 2017-08-14 19:31 UTC (permalink / raw)
  To: Junio C Hamano, Martin Koegler; +Cc: git, Johannes.Schindelin



On 14/08/17 18:08, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
> 
>> One interesting question is which of these two types we should use
>> for the size of objects Git uses.  
>>
>> Most of the "interesting" operations done by Git require that the
>> thing is in core as a whole before we can do anything (e.g. compare
>> two such things to produce delta, have one in core and apply patch),
>> so it is tempting that we deal with size_t, but at the lowest level
>> to serve as a SCM, i.e. recording the state of a file at each
>> version, we actually should be able to exceed the in-core
>> limit---both "git add" of a huge file whose contents would not fit
>> in-core and "git checkout" of a huge blob whose inflated contents
>> would not fit in-core should (in theory, modulo bugs) be able to
>> exercise the streaming interface to handle such case without holding
>> everything in-core at once.  So from that point of view, even size_t
>> may not be the "correct" type to use.
> 
> A few additions to the above observations.
> 
>  - We have varint that encodes how far the location from a delta
>    representation of an object to its base object in the packfile.
>    Both encoding and decoding sides in the current code use off_t to
>    represent this offset, so we can already reference an object that
>    is far in the same packfile as a base.
> 
>  - I think it is OK in practice to limit the size of individual
>    objects to size_t (i.e. on 32-bit arch, you cannot interact with
>    a repository with an object whose size exceeds 4GB).  Using off_t
>    would allow occasional ultra-huge objects that can only be added
>    and checked in via the streaming API on such a platform, but I
>    suspect that it may become too much of a hassle to maintain.

In a previous comment, I said that (on 32-bit Linux) it was likely
that an object of > 4GB could not be handled correctly anyway. (more
likely > 2GB). This was based on the code from (quite some) years ago.
In particular, before you added the "streaming API". So, maybe a 32-bit
arch _should_ be able to handle objects as large as the LFS API allows.
(Ignoring, for the moment, that I think anybody who puts files of that
size into an SCM probably gets what they deserve. :-P ).

The two patches I commented on, however, changed the type of some
variables from off_t to size_t. In general, the patches did not
seem to make anything worse, but these type changes could potentially
do harm. Hence my comment. (I still haven't tried the patches on my
32-bit Linux system. I only boot it up about once a week, and I would
rather wait until the patches are in the 'pu' branch before testing).

ATB,
Ramsay Jones


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-14 19:31           ` Ramsay Jones
@ 2017-08-14 19:58             ` Junio C Hamano
  2017-08-14 20:32             ` Torsten Bögershausen
  1 sibling, 0 replies; 27+ messages in thread
From: Junio C Hamano @ 2017-08-14 19:58 UTC (permalink / raw)
  To: Ramsay Jones; +Cc: Martin Koegler, git, Johannes.Schindelin

Ramsay Jones <ramsay@ramsayjones.plus.com> writes:

> In a previous comment, I said that (on 32-bit Linux) it was likely
> that an object of > 4GB could not be handled correctly anyway. (more
> likely > 2GB). This was based on the code from (quite some) years ago.
> In particular, before you added the "streaming API". So, maybe a 32-bit
> arch _should_ be able to handle objects as large as the LFS API allows.
> (Ignoring, for the moment, that I think anybody who puts files of that
> size into an SCM probably gets what they deserve. :-P ).
>
> The two patches I commented on, however, changed the type of some
> variables from off_t to size_t. In general, the patches did not
> seem to make anything worse, but these type changes could potentially
> do harm. Hence my comment. (I still haven't tried the patches on my
> 32-bit Linux system. I only boot it up about once a week, and I would
> rather wait until the patches are in the 'pu' branch before testing).

We are in perfect agreement.

I didn't mean to say that it is OK to replace off_t with size_t
without a good reason, especially when the current code (at least
the part I looked at anyway, like the OFS_DELTA part) seems to use
off_t correctly, and your review comments are very much appreciated,
so is the effort started by Martin to take us in the direction of
using types more appropriate than "ulong".

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-14 19:31           ` Ramsay Jones
  2017-08-14 19:58             ` Junio C Hamano
@ 2017-08-14 20:32             ` Torsten Bögershausen
  2017-08-15  0:30               ` Ramsay Jones
  1 sibling, 1 reply; 27+ messages in thread
From: Torsten Bögershausen @ 2017-08-14 20:32 UTC (permalink / raw)
  To: Ramsay Jones; +Cc: Junio C Hamano, Martin Koegler, git, Johannes.Schindelin

On Mon, Aug 14, 2017 at 08:31:50PM +0100, Ramsay Jones wrote:
> 
> 
> On 14/08/17 18:08, Junio C Hamano wrote:
> > Junio C Hamano <gitster@pobox.com> writes:
> > 
> >> One interesting question is which of these two types we should use
> >> for the size of objects Git uses.  
> >>
> >> Most of the "interesting" operations done by Git require that the
> >> thing is in core as a whole before we can do anything (e.g. compare
> >> two such things to produce delta, have one in core and apply patch),
> >> so it is tempting that we deal with size_t, but at the lowest level
> >> to serve as a SCM, i.e. recording the state of a file at each
> >> version, we actually should be able to exceed the in-core
> >> limit---both "git add" of a huge file whose contents would not fit
> >> in-core and "git checkout" of a huge blob whose inflated contents
> >> would not fit in-core should (in theory, modulo bugs) be able to
> >> exercise the streaming interface to handle such case without holding
> >> everything in-core at once.  So from that point of view, even size_t
> >> may not be the "correct" type to use.
> > 
> > A few additions to the above observations.
> > 
> >  - We have varint that encodes how far the location from a delta
> >    representation of an object to its base object in the packfile.
> >    Both encoding and decoding sides in the current code use off_t to
> >    represent this offset, so we can already reference an object that
> >    is far in the same packfile as a base.
> > 
> >  - I think it is OK in practice to limit the size of individual
> >    objects to size_t (i.e. on 32-bit arch, you cannot interact with
> >    a repository with an object whose size exceeds 4GB).  Using off_t
> >    would allow occasional ultra-huge objects that can only be added
> >    and checked in via the streaming API on such a platform, but I
> >    suspect that it may become too much of a hassle to maintain.
> 
> In a previous comment, I said that (on 32-bit Linux) it was likely
> that an object of > 4GB could not be handled correctly anyway. (more
> likely > 2GB). This was based on the code from (quite some) years ago.
> In particular, before you added the "streaming API". So, maybe a 32-bit
> arch _should_ be able to handle objects as large as the LFS API allows.
> (Ignoring, for the moment, that I think anybody who puts files of that
> size into an SCM probably gets what they deserve. :-P ).

In general, yes.
I had a patch that allowed a 32 bit linux to commit a file >4GB.
There is however a restriction: The file must not yet be known to the
index. Otherwise the "diff" machinery kicks in, and tries to
compare the "old" and the "new" versions, which means that -both-
must fit into "memory" at the same time. Memory means here the available
address space rather then real memory.
So there may be room for improvements for 32 bit systems, but that is another
story, which can be developped independent of the ulong->size_t conversion.

> 
> The two patches I commented on, however, changed the type of some
> variables from off_t to size_t. In general, the patches did not
> seem to make anything worse, but these type changes could potentially
> do harm. Hence my comment. (I still haven't tried the patches on my
> 32-bit Linux system. I only boot it up about once a week, and I would
> rather wait until the patches are in the 'pu' branch before testing).
> 
> ATB,
> Ramsay Jones
> 

And here (in agreement with the answer from Junio):
We should not change any off_t into size_t, since that means a possible
downgrade.
Whenever an off_t is converted into size_t, a function in git-compat-util.h is
used:

static inline size_t xsize_t(off_t len)
{
	if (len > (size_t) len)
		die("Cannot handle files this big");
	return (size_t)len;
}

Having written this, it may be a good idea to define a similar function
xulong() which is used when we do calls into zlib.

Thanks to Martin for working on this.
If there is a branch on a public repo, I can e.g. run the test suite on
different systems.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-14 20:32             ` Torsten Bögershausen
@ 2017-08-15  0:30               ` Ramsay Jones
  0 siblings, 0 replies; 27+ messages in thread
From: Ramsay Jones @ 2017-08-15  0:30 UTC (permalink / raw)
  To: Torsten Bögershausen
  Cc: Junio C Hamano, Martin Koegler, git, Johannes.Schindelin



On 14/08/17 21:32, Torsten Bögershausen wrote:
> In general, yes.
> I had a patch that allowed a 32 bit linux to commit a file >4GB.
> There is however a restriction: The file must not yet be known to the
> index. Otherwise the "diff" machinery kicks in, and tries to
> compare the "old" and the "new" versions, which means that -both-
> must fit into "memory" at the same time. Memory means here the available
> address space rather then real memory.
> So there may be room for improvements for 32 bit systems, but that is another
> story, which can be developped independent of the ulong->size_t conversion.

Oh, I absolutely agree.

> Thanks to Martin for working on this.

Indeed! Thanks Martin.

ATB,
Ramsay Jones



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-14 17:08         ` Junio C Hamano
  2017-08-14 19:31           ` Ramsay Jones
@ 2017-08-16 20:22           ` Martin Koegler
  2017-08-17 10:38             ` Torsten Bögershausen
  1 sibling, 1 reply; 27+ messages in thread
From: Martin Koegler @ 2017-08-16 20:22 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Martin Koegler, Ramsay Jones, git, Johannes.Schindelin

On Mon, Aug 14, 2017 at 10:08:05AM -0700, Junio C Hamano wrote:
>    It may help reducing the maintenance if we introduced obj_size_t
>    that is defined to be size_t for now, so that we can later swap
>    it to ofs_t or some larger type when we know we do need to
>    support objects whose size cannot be expressed in size_t, but I
>    do not offhand know what the pros-and-cons with such an approach
>    would look like.

Where should the use of obj_size_t end and the use of size_t start? 

We often determine a object size and then pass it to malloc. 
We would start with a larger datatyp and then truncate for memory allocation, which use size_t.

Regards,
Martin

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Convert pack-objects to size_t
  2017-08-16 20:22           ` Martin Koegler
@ 2017-08-17 10:38             ` Torsten Bögershausen
  0 siblings, 0 replies; 27+ messages in thread
From: Torsten Bögershausen @ 2017-08-17 10:38 UTC (permalink / raw)
  To: Martin Koegler; +Cc: Junio C Hamano, Ramsay Jones, git, Johannes.Schindelin

On Wed, Aug 16, 2017 at 10:22:39PM +0200, Martin Koegler wrote:
> On Mon, Aug 14, 2017 at 10:08:05AM -0700, Junio C Hamano wrote:
> >    It may help reducing the maintenance if we introduced obj_size_t
> >    that is defined to be size_t for now, so that we can later swap
> >    it to ofs_t or some larger type when we know we do need to
> >    support objects whose size cannot be expressed in size_t, but I
> >    do not offhand know what the pros-and-cons with such an approach
> >    would look like.
> 
> Where should the use of obj_size_t end and the use of size_t start? 
> 
> We often determine a object size and then pass it to malloc. 
> We would start with a larger datatyp and then truncate for memory allocation, which use size_t.

The truncation is done with xsize_t:
The "old" sha1_file.c has something like this:
    idx_size = xsize_t(st.st_size);

I personally don't think that obj_size_t gives us so much.
There are "objects" which are "on disk", and they may have a length off_t,
And there are "objects" loaded into memory, and they have a length size_t.
And everybody can check that we use the right type.
Additionally I don't like it very much, when object size in memory is counted
in a 64 bit value (and this will happen if  obj_size_ = off_t == 64bit)

Anyhow, to answer your question:
All calles xmalloc() must be prepared like this:

p = xmalloc(xsize_t(size_of_object));

That should do the trick.

> 
> Regards,
> Martin

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2017-08-17 10:38 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-12  8:47 [PATCH 1/9] Convert pack-objects to size_t Martin Koegler
2017-08-12  8:47 ` [PATCH 2/9] Convert index-pack " Martin Koegler
2017-08-12 13:51   ` Ramsay Jones
2017-08-12  8:47 ` [PATCH 3/9] Convert unpack-objects " Martin Koegler
2017-08-12 14:07   ` Martin Ågren
2017-08-13 18:25     ` Martin Koegler
2017-08-12  8:47 ` [PATCH 4/9] Convert archive functions " Martin Koegler
2017-08-12  8:47 ` [PATCH 5/9] Convert various things " Martin Koegler
2017-08-12 13:27   ` Martin Ågren
2017-08-13 17:48     ` Martin Koegler
2017-08-12  8:47 ` [PATCH 6/9] Use size_t for config parsing Martin Koegler
2017-08-12  8:47 ` [PATCH 7/9] Convert ref-filter to size_t Martin Koegler
2017-08-12  8:47 ` [PATCH 8/9] Convert tree-walk " Martin Koegler
2017-08-12  8:47 ` [PATCH 9/9] Convert xdiff-interface " Martin Koegler
2017-08-12  9:59 ` [PATCH 1/9] Convert pack-objects " Torsten Bögershausen
2017-08-13 18:27   ` Martin Koegler
2017-08-12 13:47 ` Ramsay Jones
2017-08-13 18:30   ` Martin Koegler
2017-08-13 19:45     ` Ramsay Jones
2017-08-13 22:15       ` Junio C Hamano
2017-08-14 17:08         ` Junio C Hamano
2017-08-14 19:31           ` Ramsay Jones
2017-08-14 19:58             ` Junio C Hamano
2017-08-14 20:32             ` Torsten Bögershausen
2017-08-15  0:30               ` Ramsay Jones
2017-08-16 20:22           ` Martin Koegler
2017-08-17 10:38             ` Torsten Bögershausen

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).