git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 1/3] Drop strbuf's 'eof' marker, and make read_line a first class citizen.
  2007-09-17 12:52 [PATCH 0/3] the return of the strbuf Pierre Habouzit
@ 2007-09-17  9:19 ` Pierre Habouzit
  2007-09-17 11:48 ` [PATCH 2/3] fast-import was using dbuf's, replace them with strbuf's Pierre Habouzit
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Pierre Habouzit @ 2007-09-17  9:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

read_line is now strbuf_getline, and is a first class citizen, it returns 0
when reading a line worked, EOF else.

The ->eof marker was used non-locally by fast-import.c, mimic the same
behaviour using a static int in "read_next_command", that now returns -1 on
EOF, and avoids to call strbuf_getline when it's in EOF state.

Also no longer automagically strbuf_release the buffer, it's counter
intuitive and breaks fast-import in a very subtle way.

Note: being at EOF implies that command_buf.len == 0.

Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---
 builtin-checkout-index.c |    5 ++---
 builtin-update-index.c   |    8 ++++----
 fast-import.c            |   34 +++++++++++++++++++---------------
 fetch.c                  |    4 ++--
 mktree.c                 |    4 ++--
 strbuf.c                 |   20 ++++++++------------
 strbuf.h                 |    5 ++---
 7 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/builtin-checkout-index.c b/builtin-checkout-index.c
index 85e8efe..a18ecc4 100644
--- a/builtin-checkout-index.c
+++ b/builtin-checkout-index.c
@@ -277,9 +277,7 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix)
 		while (1) {
 			char *path_name;
 			const char *p;
-
-			read_line(&buf, stdin, line_termination);
-			if (buf.eof)
+			if (strbuf_getline(&buf, stdin, line_termination) == EOF)
 				break;
 			if (line_termination && buf.buf[0] == '"')
 				path_name = unquote_c_style(buf.buf, NULL);
@@ -292,6 +290,7 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix)
 			if (path_name != buf.buf)
 				free(path_name);
 		}
+		strbuf_release(&buf);
 	}
 
 	if (all)
diff --git a/builtin-update-index.c b/builtin-update-index.c
index 0b60513..acd5ab5 100644
--- a/builtin-update-index.c
+++ b/builtin-update-index.c
@@ -320,8 +320,7 @@ static void read_index_info(int line_termination)
 		 * This format is to put higher order stages into the
 		 * index file and matches git-ls-files --stage output.
 		 */
-		read_line(&buf, stdin, line_termination);
-		if (buf.eof)
+		if (strbuf_getline(&buf, stdin, line_termination) == EOF)
 			break;
 
 		errno = 0;
@@ -383,6 +382,7 @@ static void read_index_info(int line_termination)
 	bad_line:
 		die("malformed index info %s", buf.buf);
 	}
+	strbuf_release(&buf);
 }
 
 static const char update_index_usage[] =
@@ -710,8 +710,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 		while (1) {
 			char *path_name;
 			const char *p;
-			read_line(&buf, stdin, line_termination);
-			if (buf.eof)
+			if (strbuf_getline(&buf, stdin, line_termination) == EOF)
 				break;
 			if (line_termination && buf.buf[0] == '"')
 				path_name = unquote_c_style(buf.buf, NULL);
@@ -726,6 +725,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 			if (path_name != buf.buf)
 				free(path_name);
 		}
+		strbuf_release(&buf);
 	}
 
  finish:
diff --git a/fast-import.c b/fast-import.c
index 1866d34..da04566 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1584,20 +1584,25 @@ static void dump_marks(void)
 			mark_file, strerror(errno));
 }
 
-static void read_next_command(void)
+static int read_next_command(void)
 {
+	static int stdin_eof = 0;
+
+	if (stdin_eof) {
+		unread_command_buf = 0;
+		return EOF;
+	}
+
 	do {
 		if (unread_command_buf) {
 			unread_command_buf = 0;
-			if (command_buf.eof)
-				return;
 		} else {
 			struct recent_command *rc;
 
 			strbuf_detach(&command_buf);
-			read_line(&command_buf, stdin, '\n');
-			if (command_buf.eof)
-				return;
+			stdin_eof = strbuf_getline(&command_buf, stdin, '\n');
+			if (stdin_eof)
+				return EOF;
 
 			rc = rc_free;
 			if (rc)
@@ -1616,6 +1621,8 @@ static void read_next_command(void)
 			cmd_tail = rc;
 		}
 	} while (command_buf.buf[0] == '#');
+
+	return 0;
 }
 
 static void skip_optional_lf(void)
@@ -1648,8 +1655,7 @@ static void *cmd_data (size_t *size)
 		size_t term_len = command_buf.len - 5 - 2;
 
 		for (;;) {
-			read_line(&command_buf, stdin, '\n');
-			if (command_buf.eof)
+			if (strbuf_getline(&command_buf, stdin, '\n') == EOF)
 				die("EOF in data (terminator '%s' not found)", term);
 			if (term_len == command_buf.len
 				&& !strcmp(term, command_buf.buf))
@@ -2095,7 +2101,7 @@ static void cmd_new_commit(void)
 	}
 
 	/* file_change* */
-	while (!command_buf.eof && command_buf.len > 0) {
+	while (command_buf.len > 0) {
 		if (!prefixcmp(command_buf.buf, "M "))
 			file_change_m(b);
 		else if (!prefixcmp(command_buf.buf, "D "))
@@ -2110,7 +2116,8 @@ static void cmd_new_commit(void)
 			unread_command_buf = 1;
 			break;
 		}
-		read_next_command();
+		if (read_next_command() == EOF)
+			break;
 	}
 
 	/* build the tree and the commit */
@@ -2375,11 +2382,8 @@ int main(int argc, const char **argv)
 	prepare_packed_git();
 	start_packfile();
 	set_die_routine(die_nicely);
-	for (;;) {
-		read_next_command();
-		if (command_buf.eof)
-			break;
-		else if (!strcmp("blob", command_buf.buf))
+	while (read_next_command() != EOF) {
+		if (!strcmp("blob", command_buf.buf))
 			cmd_new_blob();
 		else if (!prefixcmp(command_buf.buf, "commit "))
 			cmd_new_commit();
diff --git a/fetch.c b/fetch.c
index c256e6f..b1c1f07 100644
--- a/fetch.c
+++ b/fetch.c
@@ -222,8 +222,7 @@ int pull_targets_stdin(char ***target, const char ***write_ref)
 		char *rf_one = NULL;
 		char *tg_one;
 
-		read_line(&buf, stdin, '\n');
-		if (buf.eof)
+		if (strbuf_getline(&buf, stdin, '\n') == EOF)
 			break;
 		tg_one = buf.buf;
 		rf_one = strchr(tg_one, '\t');
@@ -239,6 +238,7 @@ int pull_targets_stdin(char ***target, const char ***write_ref)
 		(*write_ref)[targets] = rf_one ? xstrdup(rf_one) : NULL;
 		targets++;
 	}
+	strbuf_release(&buf);
 	return targets;
 }
 
diff --git a/mktree.c b/mktree.c
index 5dab4bd..9c137de 100644
--- a/mktree.c
+++ b/mktree.c
@@ -88,8 +88,7 @@ int main(int ac, char **av)
 		enum object_type type;
 		char *path;
 
-		read_line(&sb, stdin, line_termination);
-		if (sb.eof)
+		if (strbuf_getline(&sb, stdin, line_termination) == EOF)
 			break;
 		ptr = sb.buf;
 		/* Input is non-recursive ls-tree output format
@@ -121,6 +120,7 @@ int main(int ac, char **av)
 		if (path != ntr)
 			free(path);
 	}
+	strbuf_release(&sb);
 	write_tree(sha1);
 	puts(sha1_to_hex(sha1));
 	exit(0);
diff --git a/strbuf.c b/strbuf.c
index c5f9e2a..59383ac 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -17,7 +17,6 @@ void strbuf_reset(struct strbuf *sb)
 {
 	if (sb->len)
 		strbuf_setlen(sb, 0);
-	sb->eof = 0;
 }
 
 char *strbuf_detach(struct strbuf *sb)
@@ -145,14 +144,13 @@ ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint)
 	return sb->len - oldlen;
 }
 
-void read_line(struct strbuf *sb, FILE *fp, int term)
+int strbuf_getline(struct strbuf *sb, FILE *fp, int term)
 {
 	int ch;
-	if (feof(fp)) {
-		strbuf_release(sb);
-		sb->eof = 1;
-		return;
-	}
+
+	strbuf_grow(sb, 0);
+	if (feof(fp))
+		return EOF;
 
 	strbuf_reset(sb);
 	while ((ch = fgetc(fp)) != EOF) {
@@ -161,11 +159,9 @@ void read_line(struct strbuf *sb, FILE *fp, int term)
 		strbuf_grow(sb, 1);
 		sb->buf[sb->len++] = ch;
 	}
-	if (ch == EOF && sb->len == 0) {
-		strbuf_release(sb);
-		sb->eof = 1;
-	}
+	if (ch == EOF && sb->len == 0)
+		return EOF;
 
-	strbuf_grow(sb, 1);
 	sb->buf[sb->len] = '\0';
+	return 0;
 }
diff --git a/strbuf.h b/strbuf.h
index f163c63..b2cbd97 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -44,11 +44,10 @@
 struct strbuf {
 	size_t alloc;
 	size_t len;
-	int eof;
 	char *buf;
 };
 
-#define STRBUF_INIT  { 0, 0, 0, NULL }
+#define STRBUF_INIT  { 0, 0, NULL }
 
 /*----- strbuf life cycle -----*/
 extern void strbuf_init(struct strbuf *, size_t);
@@ -101,6 +100,6 @@ extern size_t strbuf_fread(struct strbuf *, size_t, FILE *);
 /* XXX: if read fails, any partial read is undone */
 extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint);
 
-extern void read_line(struct strbuf *, FILE *, int);
+extern int strbuf_getline(struct strbuf *, FILE *, int);
 
 #endif /* STRBUF_H */
-- 
1.5.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] fast-import was using dbuf's, replace them with strbuf's.
  2007-09-17 12:52 [PATCH 0/3] the return of the strbuf Pierre Habouzit
  2007-09-17  9:19 ` [PATCH 1/3] Drop strbuf's 'eof' marker, and make read_line a first class citizen Pierre Habouzit
@ 2007-09-17 11:48 ` Pierre Habouzit
  2007-09-17 12:00 ` [PATCH 3/3] fast-import optimization: Pierre Habouzit
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Pierre Habouzit @ 2007-09-17 11:48 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---
 fast-import.c |  177 ++++++++++++++++++++++-----------------------------------
 1 files changed, 68 insertions(+), 109 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index da04566..e456eab 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -250,12 +250,6 @@ struct tag
 	unsigned char sha1[20];
 };
 
-struct dbuf
-{
-	void *buffer;
-	size_t capacity;
-};
-
 struct hash_list
 {
 	struct hash_list *next;
@@ -323,8 +317,8 @@ static unsigned int tree_entry_alloc = 1000;
 static void *avail_tree_entry;
 static unsigned int avail_tree_table_sz = 100;
 static struct avail_tree_content **avail_tree_table;
-static struct dbuf old_tree;
-static struct dbuf new_tree;
+static struct strbuf old_tree = STRBUF_INIT;
+static struct strbuf new_tree = STRBUF_INIT;
 
 /* Branch data */
 static unsigned long max_active_branches = 5;
@@ -346,7 +340,7 @@ static struct recent_command *cmd_tail = &cmd_hist;
 static struct recent_command *rc_free;
 static unsigned int cmd_save = 100;
 static uintmax_t next_mark;
-static struct dbuf new_data;
+static struct strbuf new_data = STRBUF_INIT;
 
 static void write_branch_report(FILE *rpt, struct branch *b)
 {
@@ -566,17 +560,6 @@ static char *pool_strdup(const char *s)
 	return r;
 }
 
-static void size_dbuf(struct dbuf *b, size_t maxlen)
-{
-	if (b->buffer) {
-		if (b->capacity >= maxlen)
-			return;
-		free(b->buffer);
-	}
-	b->capacity = ((maxlen / 1024) + 1) * 1024;
-	b->buffer = xmalloc(b->capacity);
-}
-
 static void insert_mark(uintmax_t idnum, struct object_entry *oe)
 {
 	struct mark_set *s = marks;
@@ -1005,8 +988,7 @@ static size_t encode_header(
 
 static int store_object(
 	enum object_type type,
-	void *dat,
-	size_t datlen,
+	struct strbuf *dat,
 	struct last_object *last,
 	unsigned char *sha1out,
 	uintmax_t mark)
@@ -1020,10 +1002,10 @@ static int store_object(
 	z_stream s;
 
 	hdrlen = sprintf((char*)hdr,"%s %lu", typename(type),
-		(unsigned long)datlen) + 1;
+		(unsigned long)dat->len) + 1;
 	SHA1_Init(&c);
 	SHA1_Update(&c, hdr, hdrlen);
-	SHA1_Update(&c, dat, datlen);
+	SHA1_Update(&c, dat->buf, dat->len);
 	SHA1_Final(sha1, &c);
 	if (sha1out)
 		hashcpy(sha1out, sha1);
@@ -1044,9 +1026,9 @@ static int store_object(
 
 	if (last && last->data && last->depth < max_depth) {
 		delta = diff_delta(last->data, last->len,
-			dat, datlen,
+			dat->buf, dat->len,
 			&deltalen, 0);
-		if (delta && deltalen >= datlen) {
+		if (delta && deltalen >= dat->len) {
 			free(delta);
 			delta = NULL;
 		}
@@ -1059,8 +1041,8 @@ static int store_object(
 		s.next_in = delta;
 		s.avail_in = deltalen;
 	} else {
-		s.next_in = dat;
-		s.avail_in = datlen;
+		s.next_in = (void *)dat->buf;
+		s.avail_in = dat->len;
 	}
 	s.avail_out = deflateBound(&s, s.avail_in);
 	s.next_out = out = xmalloc(s.avail_out);
@@ -1083,8 +1065,8 @@ static int store_object(
 
 			memset(&s, 0, sizeof(s));
 			deflateInit(&s, zlib_compression_level);
-			s.next_in = dat;
-			s.avail_in = datlen;
+			s.next_in = (void *)dat->buf;
+			s.avail_in = dat->len;
 			s.avail_out = deflateBound(&s, s.avail_in);
 			s.next_out = out = xrealloc(out, s.avail_out);
 			while (deflate(&s, Z_FINISH) == Z_OK)
@@ -1118,7 +1100,7 @@ static int store_object(
 	} else {
 		if (last)
 			last->depth = 0;
-		hdrlen = encode_header(type, datlen, hdr);
+		hdrlen = encode_header(type, dat->len, hdr);
 		write_or_die(pack_data->pack_fd, hdr, hdrlen);
 		pack_size += hdrlen;
 	}
@@ -1131,9 +1113,9 @@ static int store_object(
 	if (last) {
 		if (!last->no_free)
 			free(last->data);
-		last->data = dat;
 		last->offset = e->offset;
-		last->len = datlen;
+		last->data = dat->buf;
+		last->len = dat->len;
 	}
 	return 0;
 }
@@ -1229,14 +1211,10 @@ static int tecmp1 (const void *_a, const void *_b)
 		b->name->str_dat, b->name->str_len, b->versions[1].mode);
 }
 
-static void mktree(struct tree_content *t,
-	int v,
-	unsigned long *szp,
-	struct dbuf *b)
+static void mktree(struct tree_content *t, int v, struct strbuf *b)
 {
 	size_t maxlen = 0;
 	unsigned int i;
-	char *c;
 
 	if (!v)
 		qsort(t->entries,t->entry_count,sizeof(t->entries[0]),tecmp0);
@@ -1248,27 +1226,22 @@ static void mktree(struct tree_content *t,
 			maxlen += t->entries[i]->name->str_len + 34;
 	}
 
-	size_dbuf(b, maxlen);
-	c = b->buffer;
+	strbuf_reset(b);
+	strbuf_grow(b, maxlen);
 	for (i = 0; i < t->entry_count; i++) {
 		struct tree_entry *e = t->entries[i];
 		if (!e->versions[v].mode)
 			continue;
-		c += sprintf(c, "%o", (unsigned int)e->versions[v].mode);
-		*c++ = ' ';
-		strcpy(c, e->name->str_dat);
-		c += e->name->str_len + 1;
-		hashcpy((unsigned char*)c, e->versions[v].sha1);
-		c += 20;
+		strbuf_addf(b, "%o %s%c", (unsigned int)e->versions[v].mode,
+					e->name->str_dat, '\0');
+		strbuf_add(b, e->versions[v].sha1, 20);
 	}
-	*szp = c - (char*)b->buffer;
 }
 
 static void store_tree(struct tree_entry *root)
 {
 	struct tree_content *t = root->tree;
 	unsigned int i, j, del;
-	unsigned long new_len;
 	struct last_object lo;
 	struct object_entry *le;
 
@@ -1288,16 +1261,16 @@ static void store_tree(struct tree_entry *root)
 		lo.depth = 0;
 		lo.no_free = 0;
 	} else {
-		mktree(t, 0, &lo.len, &old_tree);
-		lo.data = old_tree.buffer;
+		mktree(t, 0, &old_tree);
+		lo.len  = old_tree.len;
+		lo.data = old_tree.buf;
 		lo.offset = le->offset;
 		lo.depth = t->delta_depth;
 		lo.no_free = 1;
 	}
 
-	mktree(t, 1, &new_len, &new_tree);
-	store_object(OBJ_TREE, new_tree.buffer, new_len,
-		&lo, root->versions[1].sha1, 0);
+	mktree(t, 1, &new_tree);
+	store_object(OBJ_TREE, &new_tree, &lo, root->versions[1].sha1, 0);
 
 	t->delta_depth = lo.depth;
 	for (i = 0, j = 0, del = 0; i < t->entry_count; i++) {
@@ -1642,11 +1615,10 @@ static void cmd_mark(void)
 		next_mark = 0;
 }
 
-static void *cmd_data (size_t *size)
+static void cmd_data(struct strbuf *sb)
 {
-	struct strbuf buffer;
+	strbuf_reset(sb);
 
-	strbuf_init(&buffer, 0);
 	if (prefixcmp(command_buf.buf, "data "))
 		die("Expected 'data n' command, found: %s", command_buf.buf);
 
@@ -1660,8 +1632,8 @@ static void *cmd_data (size_t *size)
 			if (term_len == command_buf.len
 				&& !strcmp(term, command_buf.buf))
 				break;
-			strbuf_addbuf(&buffer, &command_buf);
-			strbuf_addch(&buffer, '\n');
+			strbuf_addbuf(sb, &command_buf);
+			strbuf_addch(sb, '\n');
 		}
 		free(term);
 	}
@@ -1671,7 +1643,7 @@ static void *cmd_data (size_t *size)
 		length = strtoul(command_buf.buf + 5, NULL, 10);
 
 		while (n < length) {
-			size_t s = strbuf_fread(&buffer, length - n, stdin);
+			size_t s = strbuf_fread(sb, length - n, stdin);
 			if (!s && feof(stdin))
 				die("EOF in data (%lu bytes remaining)",
 					(unsigned long)(length - n));
@@ -1680,8 +1652,6 @@ static void *cmd_data (size_t *size)
 	}
 
 	skip_optional_lf();
-	*size = buffer.len;
-	return strbuf_detach(&buffer);
 }
 
 static int validate_raw_date(const char *src, char *result, int maxlen)
@@ -1744,15 +1714,14 @@ static char *parse_ident(const char *buf)
 
 static void cmd_new_blob(void)
 {
-	size_t l;
-	void *d;
+	struct strbuf buf;
 
 	read_next_command();
 	cmd_mark();
-	d = cmd_data(&l);
-
-	if (store_object(OBJ_BLOB, d, l, &last_blob, NULL, next_mark))
-		free(d);
+	strbuf_init(&buf, 0);
+	cmd_data(&buf);
+	if (store_object(OBJ_BLOB, &buf, &last_blob, NULL, next_mark))
+		strbuf_release(&buf);
 }
 
 static void unload_one_branch(void)
@@ -1848,14 +1817,15 @@ static void file_change_m(struct branch *b)
 	}
 
 	if (inline_data) {
-		size_t l;
-		void *d;
+		struct strbuf buf;
+
 		if (!p_uq)
 			p = p_uq = xstrdup(p);
 		read_next_command();
-		d = cmd_data(&l);
-		if (store_object(OBJ_BLOB, d, l, &last_blob, sha1, 0))
-			free(d);
+		strbuf_init(&buf, 0);
+		cmd_data(&buf);
+		if (store_object(OBJ_BLOB, &buf, &last_blob, sha1, 0))
+			strbuf_release(&buf);
 	} else if (oe) {
 		if (oe->type != OBJ_BLOB)
 			die("Not a blob (actually a %s): %s",
@@ -2062,9 +2032,8 @@ static struct hash_list *cmd_merge(unsigned int *count)
 
 static void cmd_new_commit(void)
 {
+	static struct strbuf msg = STRBUF_INIT;
 	struct branch *b;
-	void *msg;
-	size_t msglen;
 	char *sp;
 	char *author = NULL;
 	char *committer = NULL;
@@ -2089,7 +2058,7 @@ static void cmd_new_commit(void)
 	}
 	if (!committer)
 		die("Expected committer but didn't get one");
-	msg = cmd_data(&msglen);
+	cmd_data(&msg);
 	read_next_command();
 	cmd_from(b);
 	merge_list = cmd_merge(&merge_count);
@@ -2124,46 +2093,39 @@ static void cmd_new_commit(void)
 	store_tree(&b->branch_tree);
 	hashcpy(b->branch_tree.versions[0].sha1,
 		b->branch_tree.versions[1].sha1);
-	size_dbuf(&new_data, 114 + msglen
-		+ merge_count * 49
-		+ (author
-			? strlen(author) + strlen(committer)
-			: 2 * strlen(committer)));
-	sp = new_data.buffer;
-	sp += sprintf(sp, "tree %s\n",
+
+	strbuf_reset(&new_data);
+	strbuf_addf(&new_data, "tree %s\n",
 		sha1_to_hex(b->branch_tree.versions[1].sha1));
 	if (!is_null_sha1(b->sha1))
-		sp += sprintf(sp, "parent %s\n", sha1_to_hex(b->sha1));
+		strbuf_addf(&new_data, "parent %s\n", sha1_to_hex(b->sha1));
 	while (merge_list) {
 		struct hash_list *next = merge_list->next;
-		sp += sprintf(sp, "parent %s\n", sha1_to_hex(merge_list->sha1));
+		strbuf_addf(&new_data, "parent %s\n", sha1_to_hex(merge_list->sha1));
 		free(merge_list);
 		merge_list = next;
 	}
-	sp += sprintf(sp, "author %s\n", author ? author : committer);
-	sp += sprintf(sp, "committer %s\n", committer);
-	*sp++ = '\n';
-	memcpy(sp, msg, msglen);
-	sp += msglen;
+	strbuf_addf(&new_data,
+		"author %s\n"
+		"committer %s\n"
+		"\n",
+		author ? author : committer, committer);
+	strbuf_addbuf(&new_data, &msg);
 	free(author);
 	free(committer);
-	free(msg);
 
-	if (!store_object(OBJ_COMMIT,
-		new_data.buffer, sp - (char*)new_data.buffer,
-		NULL, b->sha1, next_mark))
+	if (!store_object(OBJ_COMMIT, &new_data, NULL, b->sha1, next_mark))
 		b->pack_id = pack_id;
 	b->last_commit = object_count_by_type[OBJ_COMMIT];
 }
 
 static void cmd_new_tag(void)
 {
+	static struct strbuf msg = STRBUF_INIT;
 	char *sp;
 	const char *from;
 	char *tagger;
 	struct branch *s;
-	void *msg;
-	size_t msglen;
 	struct tag *t;
 	uintmax_t from_mark = 0;
 	unsigned char sha1[20];
@@ -2214,24 +2176,21 @@ static void cmd_new_tag(void)
 
 	/* tag payload/message */
 	read_next_command();
-	msg = cmd_data(&msglen);
+	cmd_data(&msg);
 
 	/* build the tag object */
-	size_dbuf(&new_data, 67+strlen(t->name)+strlen(tagger)+msglen);
-	sp = new_data.buffer;
-	sp += sprintf(sp, "object %s\n", sha1_to_hex(sha1));
-	sp += sprintf(sp, "type %s\n", commit_type);
-	sp += sprintf(sp, "tag %s\n", t->name);
-	sp += sprintf(sp, "tagger %s\n", tagger);
-	*sp++ = '\n';
-	memcpy(sp, msg, msglen);
-	sp += msglen;
+	strbuf_reset(&new_data);
+	strbuf_addf(&new_data,
+		"object %s\n"
+		"type %s\n"
+		"tag %s\n"
+		"tagger %s\n"
+		"\n",
+		sha1_to_hex(sha1), commit_type, t->name, tagger);
+	strbuf_addbuf(&new_data, &msg);
 	free(tagger);
-	free(msg);
 
-	if (store_object(OBJ_TAG, new_data.buffer,
-		sp - (char*)new_data.buffer,
-		NULL, t->sha1, 0))
+	if (store_object(OBJ_TAG, &new_data, NULL, t->sha1, 0))
 		t->pack_id = MAX_PACK_ID;
 	else
 		t->pack_id = pack_id;
-- 
1.5.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] fast-import optimization:
  2007-09-17 12:52 [PATCH 0/3] the return of the strbuf Pierre Habouzit
  2007-09-17  9:19 ` [PATCH 1/3] Drop strbuf's 'eof' marker, and make read_line a first class citizen Pierre Habouzit
  2007-09-17 11:48 ` [PATCH 2/3] fast-import was using dbuf's, replace them with strbuf's Pierre Habouzit
@ 2007-09-17 12:00 ` Pierre Habouzit
  2007-09-17 13:35 ` [PATCH 0/3] the return of the strbuf Pierre Habouzit
  2007-09-18  7:56 ` Junio C Hamano
  4 siblings, 0 replies; 11+ messages in thread
From: Pierre Habouzit @ 2007-09-17 12:00 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Now that cmd_data acts on a strbuf, make last_object stashed buffer be a
strbuf as well. On new stash, don't free the last stashed buffer, rather
swap it with the one you will stash, this way, callers of store_object can
act on static strbufs, and at some point, fast-import won't allocate new
memory for objects buffers.
---
 fast-import.c |   52 ++++++++++++++++++++--------------------------------
 1 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index e456eab..e2a4834 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -182,11 +182,10 @@ struct mark_set
 
 struct last_object
 {
-	void *data;
-	unsigned long len;
+	struct strbuf data;
 	uint32_t offset;
 	unsigned int depth;
-	unsigned no_free:1;
+	unsigned no_swap : 1;
 };
 
 struct mem_pool
@@ -310,7 +309,7 @@ static struct mark_set *marks;
 static const char* mark_file;
 
 /* Our last blob */
-static struct last_object last_blob;
+static struct last_object last_blob = { STRBUF_INIT, 0, 0, 0 };
 
 /* Tree management */
 static unsigned int tree_entry_alloc = 1000;
@@ -950,9 +949,7 @@ static void end_packfile(void)
 	free(old_p);
 
 	/* We can't carry a delta across packfiles. */
-	free(last_blob.data);
-	last_blob.data = NULL;
-	last_blob.len = 0;
+	strbuf_release(&last_blob.data);
 	last_blob.offset = 0;
 	last_blob.depth = 0;
 }
@@ -1024,8 +1021,8 @@ static int store_object(
 		return 1;
 	}
 
-	if (last && last->data && last->depth < max_depth) {
-		delta = diff_delta(last->data, last->len,
+	if (last && last->data.buf && last->depth < max_depth) {
+		delta = diff_delta(last->data.buf, last->data.len,
 			dat->buf, dat->len,
 			&deltalen, 0);
 		if (delta && deltalen >= dat->len) {
@@ -1111,11 +1108,14 @@ static int store_object(
 	free(out);
 	free(delta);
 	if (last) {
-		if (!last->no_free)
-			free(last->data);
+		if (last->no_swap) {
+			last->data = *dat;
+		} else {
+			struct strbuf tmp = *dat;
+			*dat = last->data;
+			last->data = tmp;
+		}
 		last->offset = e->offset;
-		last->data = dat->buf;
-		last->len = dat->len;
 	}
 	return 0;
 }
@@ -1242,7 +1242,7 @@ static void store_tree(struct tree_entry *root)
 {
 	struct tree_content *t = root->tree;
 	unsigned int i, j, del;
-	struct last_object lo;
+	struct last_object lo = { STRBUF_INIT, 0, 0, /* no_swap */ 1 };
 	struct object_entry *le;
 
 	if (!is_null_sha1(root->versions[1].sha1))
@@ -1254,19 +1254,11 @@ static void store_tree(struct tree_entry *root)
 	}
 
 	le = find_object(root->versions[0].sha1);
-	if (!S_ISDIR(root->versions[0].mode)
-		|| !le
-		|| le->pack_id != pack_id) {
-		lo.data = NULL;
-		lo.depth = 0;
-		lo.no_free = 0;
-	} else {
+	if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) {
 		mktree(t, 0, &old_tree);
-		lo.len  = old_tree.len;
-		lo.data = old_tree.buf;
+		lo.data = old_tree;
 		lo.offset = le->offset;
 		lo.depth = t->delta_depth;
-		lo.no_free = 1;
 	}
 
 	mktree(t, 1, &new_tree);
@@ -1714,14 +1706,12 @@ static char *parse_ident(const char *buf)
 
 static void cmd_new_blob(void)
 {
-	struct strbuf buf;
+	static struct strbuf buf = STRBUF_INIT;
 
 	read_next_command();
 	cmd_mark();
-	strbuf_init(&buf, 0);
 	cmd_data(&buf);
-	if (store_object(OBJ_BLOB, &buf, &last_blob, NULL, next_mark))
-		strbuf_release(&buf);
+	store_object(OBJ_BLOB, &buf, &last_blob, NULL, next_mark);
 }
 
 static void unload_one_branch(void)
@@ -1817,15 +1807,13 @@ static void file_change_m(struct branch *b)
 	}
 
 	if (inline_data) {
-		struct strbuf buf;
+		static struct strbuf buf = STRBUF_INIT;
 
 		if (!p_uq)
 			p = p_uq = xstrdup(p);
 		read_next_command();
-		strbuf_init(&buf, 0);
 		cmd_data(&buf);
-		if (store_object(OBJ_BLOB, &buf, &last_blob, sha1, 0))
-			strbuf_release(&buf);
+		store_object(OBJ_BLOB, &buf, &last_blob, sha1, 0);
 	} else if (oe) {
 		if (oe->type != OBJ_BLOB)
 			die("Not a blob (actually a %s): %s",
-- 
1.5.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 0/3] the return of the strbuf
@ 2007-09-17 12:52 Pierre Habouzit
  2007-09-17  9:19 ` [PATCH 1/3] Drop strbuf's 'eof' marker, and make read_line a first class citizen Pierre Habouzit
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Pierre Habouzit @ 2007-09-17 12:52 UTC (permalink / raw)
  To: git

[-- Attachment #1: Type: text/plain, Size: 1049 bytes --]

  While getting rid of ->eof in strbuf (as it was somehow tasteless). It
made me aware of the fact that fast-import.c was using a custom buffer
implementation (I think that was the fourth if not the fifth). So here
is the series that eradicates it.

  Trying to understand fast-import.c code, I happened to remark that it
was possible to avoid many reallocations, just by reusing old buffers
rather than dropping them (this was not possible in a readable way
before, but it is now, and uses the same mechanisms that was garbage
collecting buffers, to swap them instead).

  I've not enough stuff to do real-life tests of the old fast-import and
the new one, but I wouldn't be surprised that it gives a quite nice
speed improvements for tools that use long fast-import batches. If not,
well, the code is shorter and more readable, hence it's still a gain.

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/3] the return of the strbuf
  2007-09-17 12:52 [PATCH 0/3] the return of the strbuf Pierre Habouzit
                   ` (2 preceding siblings ...)
  2007-09-17 12:00 ` [PATCH 3/3] fast-import optimization: Pierre Habouzit
@ 2007-09-17 13:35 ` Pierre Habouzit
  2007-09-18  3:57   ` Shawn O. Pearce
  2007-09-18  7:56 ` Junio C Hamano
  4 siblings, 1 reply; 11+ messages in thread
From: Pierre Habouzit @ 2007-09-17 13:35 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 1577 bytes --]

On lun, sep 17, 2007 at 12:52:11 +0000, Pierre Habouzit wrote:
>   While getting rid of ->eof in strbuf (as it was somehow tasteless). It
> made me aware of the fact that fast-import.c was using a custom buffer
> implementation (I think that was the fourth if not the fifth). So here
> is the series that eradicates it.
> 
>   Trying to understand fast-import.c code, I happened to remark that it
> was possible to avoid many reallocations, just by reusing old buffers
> rather than dropping them (this was not possible in a readable way
> before, but it is now, and uses the same mechanisms that was garbage
> collecting buffers, to swap them instead).
> 
>   I've not enough stuff to do real-life tests of the old fast-import and
> the new one, but I wouldn't be surprised that it gives a quite nice
> speed improvements for tools that use long fast-import batches. If not,
> well, the code is shorter and more readable, hence it's still a gain.

Shawn: Johannes makes me remark that you are git-fast-import author,
hence may want to be Cc-ed of that series, so here is a mail so that you
don't miss the thread.


The list: it's often hard to know who you should Cc on a given change,
I use format.headers to force Junio and git@, but sometimes you want a
different set of persons. I wonder if this could not be wired in the
repository, as a .gitattribute extension ?


-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/3] the return of the strbuf
  2007-09-17 13:35 ` [PATCH 0/3] the return of the strbuf Pierre Habouzit
@ 2007-09-18  3:57   ` Shawn O. Pearce
  2007-09-20 11:49     ` Johannes Schindelin
  0 siblings, 1 reply; 11+ messages in thread
From: Shawn O. Pearce @ 2007-09-18  3:57 UTC (permalink / raw)
  To: Pierre Habouzit; +Cc: git

Pierre Habouzit <madcoder@debian.org> wrote:
> On lun, sep 17, 2007 at 12:52:11 +0000, Pierre Habouzit wrote:
> >   While getting rid of ->eof in strbuf (as it was somehow tasteless). It
> > made me aware of the fact that fast-import.c was using a custom buffer
> > implementation (I think that was the fourth if not the fifth). So here
> > is the series that eradicates it.
> 
> Shawn: Johannes makes me remark that you are git-fast-import author,
> hence may want to be Cc-ed of that series, so here is a mail so that you
> don't miss the thread.

Thanks.  The subject caught my attention.  The patches look good and
fast-import is usually maintained by me applying them and Junio pulling
from my fast-import tree.  I'm doing that now.

BTW, I haven't thanked you for doing this cleanup work.  It really
is appreciated.  The code is definately more readable in the end.
Thanks.
 
> The list: it's often hard to know who you should Cc on a given change,
> I use format.headers to force Junio and git@, but sometimes you want a
> different set of persons. I wonder if this could not be wired in the
> repository, as a .gitattribute extension ?

The Linux kernel folks were talking about this not too long ago
and the discussion spilled onto the git list when someone CC'd it
in the middle of the thread.  If I recall correctly it ended off with
this is probably something that a build script should be doing, not
something Git should do natively, as the data is readily available
from tools like git-log.

Indeed in this case if you do:

 $ git log --pretty=format:%an --since=6.months.ago -- fast-import.c \
      | sort | uniq -c | sort -nr
  14 Shawn O. Pearce
   3 Pierre Habouzit
   3 Junio C Hamano
   2 Simon Hausmann
   2 Alex Riesen
   1 Theodore Ts'o
   1 Sven Verdoolaege
   1 Sami Farin
   1 Nicolas Pitre
   1 Luiz Fernando N. Capitulino
   1 Dana L. How
 
So you'd probably want to CC me on stuff in that file.  On the other hand
looking at something else that's much more important to Git:

  $ git log --pretty=format:%an --since=6.months.ago -- builtin-pack-objects.c \
       | sort | uniq -c | sort -nr
  38 Nicolas Pitre
  12 Junio C Hamano
   4 Dana L. How
   3 Martin Koegler
   3 Linus Torvalds
   3 Brian Downing
   2 Theodore Ts'o
   2 Dana How
   1 Luiz Fernando N. Capitulino
   1 Geert Bosch
   1 Alex Riesen

I don't think anyone would argue that CC'ing Nico and Junio on all
sizeable pack-objects changes would be prudent.  Dana too actually
as he has been active in here recently.  Meanwhile I don't make
the 6 month cut.  ;-)

The other alternative that men with real computing horsepower at
their disposal can ask is through git-blame:

  $ git blame -C -C -w --porcelain builtin-pack-objects.c | grep 'author ' \
       | sort | uniq -c | sort -nr
  52 author Nicolas Pitre
  39 author Junio C Hamano
  22 author Linus Torvalds
   6 author Shawn O. Pearce
   6 author Dana L. How
   3 author Martin Koegler
  ...

Again Nico scores very high (52 commits surviving in current `next`)
but so does Junio and Linus.  The output of git-blame may actually
be a better indicator of who Junio likes to CC when changes are
made in this module.

Doing something like this automatically based on the filepaths shown
by `git diff --name-only $cmit^ $cmt` could be expensive in terms
of CPU time, and might offer little gain for an old hand who knows
where the maintainer boundaries tend to be, but it does really help
someone who is newer to the project and might not know who is the
best person to talk to.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/3] the return of the strbuf
  2007-09-17 12:52 [PATCH 0/3] the return of the strbuf Pierre Habouzit
                   ` (3 preceding siblings ...)
  2007-09-17 13:35 ` [PATCH 0/3] the return of the strbuf Pierre Habouzit
@ 2007-09-18  7:56 ` Junio C Hamano
  4 siblings, 0 replies; 11+ messages in thread
From: Junio C Hamano @ 2007-09-18  7:56 UTC (permalink / raw)
  To: Pierre Habouzit; +Cc: git

Thanks, will queue in 'pu' and hopefully soon move to 'next'.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/3] the return of the strbuf
  2007-09-18  3:57   ` Shawn O. Pearce
@ 2007-09-20 11:49     ` Johannes Schindelin
  2007-09-20 14:19       ` Shawn O. Pearce
  2007-09-20 21:41       ` Junio C Hamano
  0 siblings, 2 replies; 11+ messages in thread
From: Johannes Schindelin @ 2007-09-20 11:49 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: Pierre Habouzit, git

Hi,

On Mon, 17 Sep 2007, Shawn O. Pearce wrote:


>  $ git log --pretty=format:%an --since=6.months.ago -- fast-import.c \
>       | sort | uniq -c | sort -nr
>   14 Shawn O. Pearce
>    3 Pierre Habouzit
>    3 Junio C Hamano
>    2 Simon Hausmann
>    2 Alex Riesen
>    1 Theodore Ts'o
>    1 Sven Verdoolaege
>    1 Sami Farin
>    1 Nicolas Pitre
>    1 Luiz Fernando N. Capitulino
>    1 Dana L. How

FWIW I'd do

git shortlog -n --since=6.months.ago HEAD -- fast-import.c|grep "^[A-Z]"

instead...

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/3] the return of the strbuf
  2007-09-20 11:49     ` Johannes Schindelin
@ 2007-09-20 14:19       ` Shawn O. Pearce
  2007-09-20 21:41       ` Junio C Hamano
  1 sibling, 0 replies; 11+ messages in thread
From: Shawn O. Pearce @ 2007-09-20 14:19 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Pierre Habouzit, git

Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> On Mon, 17 Sep 2007, Shawn O. Pearce wrote:
> 
> >  $ git log --pretty=format:%an --since=6.months.ago -- fast-import.c \
> >       | sort | uniq -c | sort -nr
...
> FWIW I'd do
> 
> git shortlog -n --since=6.months.ago HEAD -- fast-import.c|grep "^[A-Z]"
> 
> instead...

Yea, Junio pointed out how stupid I was being on #git.  I don't
know why I didn't think of using shortlog here as this is one
of the things it was built for.  Whatever.  I forgot my git-fu
on Monday.  :)

-- 
Shawn.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/3] the return of the strbuf
  2007-09-20 11:49     ` Johannes Schindelin
  2007-09-20 14:19       ` Shawn O. Pearce
@ 2007-09-20 21:41       ` Junio C Hamano
  2007-09-20 21:52         ` Johannes Schindelin
  1 sibling, 1 reply; 11+ messages in thread
From: Junio C Hamano @ 2007-09-20 21:41 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Shawn O. Pearce, Pierre Habouzit, git

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> On Mon, 17 Sep 2007, Shawn O. Pearce wrote:
>
>>  $ git log --pretty=format:%an --since=6.months.ago -- fast-import.c \
>>       | sort | uniq -c | sort -nr
>>   14 Shawn O. Pearce
>>    3 Pierre Habouzit
>>    3 Junio C Hamano
>>    2 Simon Hausmann
>>    2 Alex Riesen
>>    1 Theodore Ts'o
>>    1 Sven Verdoolaege
>>    1 Sami Farin
>>    1 Nicolas Pitre
>>    1 Luiz Fernando N. Capitulino
>>    1 Dana L. How
>
> FWIW I'd do
>
> git shortlog -n --since=6.months.ago HEAD -- fast-import.c|grep "^[A-Z]"
>
> instead...

I am sure you certainly meant a single command without grep, like:

	$ git shortlog -n -s --since=6.months.ago HEAD -- fast-import.c

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/3] the return of the strbuf
  2007-09-20 21:41       ` Junio C Hamano
@ 2007-09-20 21:52         ` Johannes Schindelin
  0 siblings, 0 replies; 11+ messages in thread
From: Johannes Schindelin @ 2007-09-20 21:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Shawn O. Pearce, Pierre Habouzit, git

Hi,

On Thu, 20 Sep 2007, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> 
> > On Mon, 17 Sep 2007, Shawn O. Pearce wrote:
> >
> >>  $ git log --pretty=format:%an --since=6.months.ago -- fast-import.c \
> >>       | sort | uniq -c | sort -nr
> >>   14 Shawn O. Pearce
> >>    3 Pierre Habouzit
> >>    3 Junio C Hamano
> >>    2 Simon Hausmann
> >>    2 Alex Riesen
> >>    1 Theodore Ts'o
> >>    1 Sven Verdoolaege
> >>    1 Sami Farin
> >>    1 Nicolas Pitre
> >>    1 Luiz Fernando N. Capitulino
> >>    1 Dana L. How
> >
> > FWIW I'd do
> >
> > git shortlog -n --since=6.months.ago HEAD -- fast-import.c|grep "^[A-Z]"
> >
> > instead...
> 
> I am sure you certainly meant a single command without grep, like:
> 
> 	$ git shortlog -n -s --since=6.months.ago HEAD -- fast-import.c

Hehe.  I said "I'd do" ;-)

Thanks,
Dscho

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2007-09-20 21:53 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-09-17 12:52 [PATCH 0/3] the return of the strbuf Pierre Habouzit
2007-09-17  9:19 ` [PATCH 1/3] Drop strbuf's 'eof' marker, and make read_line a first class citizen Pierre Habouzit
2007-09-17 11:48 ` [PATCH 2/3] fast-import was using dbuf's, replace them with strbuf's Pierre Habouzit
2007-09-17 12:00 ` [PATCH 3/3] fast-import optimization: Pierre Habouzit
2007-09-17 13:35 ` [PATCH 0/3] the return of the strbuf Pierre Habouzit
2007-09-18  3:57   ` Shawn O. Pearce
2007-09-20 11:49     ` Johannes Schindelin
2007-09-20 14:19       ` Shawn O. Pearce
2007-09-20 21:41       ` Junio C Hamano
2007-09-20 21:52         ` Johannes Schindelin
2007-09-18  7:56 ` Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).