git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] New strbuf APIs: splice and embed.
  2007-09-15 14:12 [RFC] strbuf's in builtin-apply Pierre Habouzit
@ 2007-09-15 13:56 ` Pierre Habouzit
  2007-09-16  0:57   ` Junio C Hamano
  2007-09-15 14:04 ` [PATCH] builtin-apply: use strbuf's instead of buffer_desc's Pierre Habouzit
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-15 13:56 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

* strbuf_splice replace a portion of the buffer with another.
* strbuf_embed replace a strbuf buffer with the given one, that should be
  malloc'ed. Then it enforces strbuf's invariants. If alloc > len, then this
  function has negligible cost, else it will perform a realloc, possibly
  with a cost.
---
 strbuf.c |   31 +++++++++++++++++++++++++++++--
 strbuf.h |    5 +++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/strbuf.c b/strbuf.c
index d919047..ad82eaf 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -24,14 +24,22 @@ char *strbuf_detach(struct strbuf *sb) {
 	return res;
 }
 
+void strbuf_embed(struct strbuf *sb, void *buf, size_t len, size_t alloc) {
+	strbuf_release(sb);
+	sb->buf   = buf;
+	sb->len   = len;
+	sb->alloc = alloc;
+	strbuf_grow(sb, 0);
+	sb->buf[sb->len] = '\0';
+}
+
 void strbuf_grow(struct strbuf *sb, size_t extra) {
 	if (sb->len + extra + 1 <= sb->len)
 		die("you want to use way too much memory");
 	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
 }
 
-void strbuf_rtrim(struct strbuf *sb)
-{
+void strbuf_rtrim(struct strbuf *sb) {
 	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
 		sb->len--;
 	sb->buf[sb->len] = '\0';
@@ -48,6 +56,25 @@ void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len)
 	strbuf_setlen(sb, sb->len + len);
 }
 
+void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
+				   const void *data, size_t dlen)
+{
+	if (pos + len < pos)
+		die("you want to splice outside from the buffer");
+	if (pos > sb->len)
+		pos = sb->len;
+	if (pos + len > sb->len)
+		len = sb->len - pos;
+
+	if (dlen >= len)
+		strbuf_grow(sb, dlen - len);
+	memmove(sb->buf + pos + dlen,
+			sb->buf + pos + len,
+			sb->len - pos - len);
+	memcpy(sb->buf + pos, data, dlen);
+	strbuf_setlen(sb, sb->len + dlen - len);
+}
+
 void strbuf_add(struct strbuf *sb, const void *data, size_t len) {
 	strbuf_grow(sb, len);
 	memcpy(sb->buf + sb->len, data, len);
diff --git a/strbuf.h b/strbuf.h
index 21fc111..7e009d1 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -55,6 +55,7 @@ extern void strbuf_init(struct strbuf *, size_t);
 extern void strbuf_release(struct strbuf *);
 extern void strbuf_reset(struct strbuf *);
 extern char *strbuf_detach(struct strbuf *);
+extern void strbuf_embed(struct strbuf *, void *, size_t, size_t);
 
 /*----- strbuf size related -----*/
 static inline size_t strbuf_avail(struct strbuf *sb) {
@@ -81,6 +82,10 @@ static inline void strbuf_addch(struct strbuf *sb, int c) {
 /* inserts after pos, or appends if pos >= sb->len */
 extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t);
 
+/* splice pos..pos+len with given data */
+extern void strbuf_splice(struct strbuf *, size_t pos, size_t len,
+						  const void *, size_t);
+
 extern void strbuf_add(struct strbuf *, const void *, size_t);
 static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
 	strbuf_add(sb, s, strlen(s));
-- 
1.5.3.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH] New strbuf APIs: splice and attach.
  2007-09-16 17:21 ` Pierre Habouzit
  2007-09-15 13:56   ` [PATCH] Now that cache.h needs strbuf.h, remove useless includes Pierre Habouzit
@ 2007-09-15 13:56   ` Pierre Habouzit
  2007-09-16 20:20     ` Florian Weimer
  2007-09-15 21:50   ` [PATCH] Refactor replace_encoding_header Pierre Habouzit
                     ` (4 subsequent siblings)
  6 siblings, 1 reply; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-15 13:56 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

* strbuf_splice replace a portion of the buffer with another.
* strbuf_attach replace a strbuf buffer with the given one, that should be
  malloc'ed. Then it enforces strbuf's invariants. If alloc > len, then this
  function has negligible cost, else it will perform a realloc, possibly
  with a cost.

Also some style issues are fixed now.

Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---
 strbuf.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++-------------
 strbuf.h |    5 ++++
 2 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/strbuf.c b/strbuf.c
index d919047..ff551ac 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -1,30 +1,45 @@
 #include "cache.h"
 #include "strbuf.h"
 
-void strbuf_init(struct strbuf *sb, size_t hint) {
+void strbuf_init(struct strbuf *sb, size_t hint)
+{
 	memset(sb, 0, sizeof(*sb));
 	if (hint)
 		strbuf_grow(sb, hint);
 }
 
-void strbuf_release(struct strbuf *sb) {
+void strbuf_release(struct strbuf *sb)
+{
 	free(sb->buf);
 	memset(sb, 0, sizeof(*sb));
 }
 
-void strbuf_reset(struct strbuf *sb) {
+void strbuf_reset(struct strbuf *sb)
+{
 	if (sb->len)
 		strbuf_setlen(sb, 0);
 	sb->eof = 0;
 }
 
-char *strbuf_detach(struct strbuf *sb) {
+char *strbuf_detach(struct strbuf *sb)
+{
 	char *res = sb->buf;
 	strbuf_init(sb, 0);
 	return res;
 }
 
-void strbuf_grow(struct strbuf *sb, size_t extra) {
+void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc)
+{
+	strbuf_release(sb);
+	sb->buf   = buf;
+	sb->len   = len;
+	sb->alloc = alloc;
+	strbuf_grow(sb, 0);
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_grow(struct strbuf *sb, size_t extra)
+{
 	if (sb->len + extra + 1 <= sb->len)
 		die("you want to use way too much memory");
 	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
@@ -37,24 +52,44 @@ void strbuf_rtrim(struct strbuf *sb)
 	sb->buf[sb->len] = '\0';
 }
 
-void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len) {
+void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len)
+{
 	strbuf_grow(sb, len);
-	if (pos >= sb->len) {
-		pos = sb->len;
-	} else {
-		memmove(sb->buf + pos + len, sb->buf + pos, sb->len - pos);
-	}
+	if (pos > sb->len)
+		die("`pos' is too far after the end of the buffer");
+	memmove(sb->buf + pos + len, sb->buf + pos, sb->len - pos);
 	memcpy(sb->buf + pos, data, len);
 	strbuf_setlen(sb, sb->len + len);
 }
 
-void strbuf_add(struct strbuf *sb, const void *data, size_t len) {
+void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
+				   const void *data, size_t dlen)
+{
+	if (pos + len < pos)
+		die("you want to use way too much memory");
+	if (pos > sb->len)
+		die("`pos' is too far after the end of the buffer");
+	if (pos + len > sb->len)
+		die("`pos + len' is too far after the end of the buffer");
+
+	if (dlen >= len)
+		strbuf_grow(sb, dlen - len);
+	memmove(sb->buf + pos + dlen,
+			sb->buf + pos + len,
+			sb->len - pos - len);
+	memcpy(sb->buf + pos, data, dlen);
+	strbuf_setlen(sb, sb->len + dlen - len);
+}
+
+void strbuf_add(struct strbuf *sb, const void *data, size_t len)
+{
 	strbuf_grow(sb, len);
 	memcpy(sb->buf + sb->len, data, len);
 	strbuf_setlen(sb, sb->len + len);
 }
 
-void strbuf_addf(struct strbuf *sb, const char *fmt, ...) {
+void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
+{
 	int len;
 	va_list ap;
 
@@ -76,7 +111,8 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...) {
 	strbuf_setlen(sb, sb->len + len);
 }
 
-size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) {
+size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
+{
 	size_t res;
 
 	strbuf_grow(sb, size);
@@ -110,7 +146,8 @@ ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint)
 	return sb->len - oldlen;
 }
 
-void read_line(struct strbuf *sb, FILE *fp, int term) {
+void read_line(struct strbuf *sb, FILE *fp, int term)
+{
 	int ch;
 	if (feof(fp)) {
 		strbuf_release(sb);
diff --git a/strbuf.h b/strbuf.h
index 21fc111..f163c63 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -55,6 +55,7 @@ extern void strbuf_init(struct strbuf *, size_t);
 extern void strbuf_release(struct strbuf *);
 extern void strbuf_reset(struct strbuf *);
 extern char *strbuf_detach(struct strbuf *);
+extern void strbuf_attach(struct strbuf *, void *, size_t, size_t);
 
 /*----- strbuf size related -----*/
 static inline size_t strbuf_avail(struct strbuf *sb) {
@@ -81,6 +82,10 @@ static inline void strbuf_addch(struct strbuf *sb, int c) {
 /* inserts after pos, or appends if pos >= sb->len */
 extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t);
 
+/* splice pos..pos+len with given data */
+extern void strbuf_splice(struct strbuf *, size_t pos, size_t len,
+						  const void *, size_t);
+
 extern void strbuf_add(struct strbuf *, const void *, size_t);
 static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
 	strbuf_add(sb, s, strlen(s));
-- 
1.5.3.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH] Now that cache.h needs strbuf.h, remove useless includes.
  2007-09-16 17:21 ` Pierre Habouzit
@ 2007-09-15 13:56   ` Pierre Habouzit
  2007-09-15 13:56   ` [PATCH] New strbuf APIs: splice and attach Pierre Habouzit
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-15 13:56 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---
 archive-tar.c            |    1 -
 builtin-apply.c          |    1 -
 builtin-blame.c          |    1 -
 builtin-checkout-index.c |    1 -
 builtin-commit-tree.c    |    1 -
 builtin-fetch--tool.c    |    1 -
 builtin-rerere.c         |    1 -
 builtin-stripspace.c     |    1 -
 builtin-tag.c            |    1 -
 builtin-update-index.c   |    1 -
 cache-tree.c             |    1 -
 convert.c                |    1 -
 diff.c                   |    1 -
 fast-import.c            |    1 -
 fetch.c                  |    1 -
 imap-send.c              |    1 -
 mktag.c                  |    1 -
 mktree.c                 |    1 -
 sha1_file.c              |    1 -
 strbuf.c                 |    1 -
 20 files changed, 0 insertions(+), 20 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index cc94cf3..a87bc4b 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -3,7 +3,6 @@
  */
 #include "cache.h"
 #include "commit.h"
-#include "strbuf.h"
 #include "tar.h"
 #include "builtin.h"
 #include "archive.h"
diff --git a/builtin-apply.c b/builtin-apply.c
index 9735b47..ae708d7 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -12,7 +12,6 @@
 #include "blob.h"
 #include "delta.h"
 #include "builtin.h"
-#include "strbuf.h"
 
 /*
  *  --check turns on checking that the working tree matches the
diff --git a/builtin-blame.c b/builtin-blame.c
index b004f06..e364b6c 100644
--- a/builtin-blame.c
+++ b/builtin-blame.c
@@ -18,7 +18,6 @@
 #include "cache-tree.h"
 #include "path-list.h"
 #include "mailmap.h"
-#include "strbuf.h"
 
 static char blame_usage[] =
 "git-blame [-c] [-b] [-l] [--root] [-t] [-f] [-n] [-s] [-p] [-w] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [--contents <filename>] [--incremental] [commit] [--] file\n"
diff --git a/builtin-checkout-index.c b/builtin-checkout-index.c
index 153ba7d..85e8efe 100644
--- a/builtin-checkout-index.c
+++ b/builtin-checkout-index.c
@@ -38,7 +38,6 @@
  */
 #include "builtin.h"
 #include "cache.h"
-#include "strbuf.h"
 #include "quote.h"
 #include "cache-tree.h"
 
diff --git a/builtin-commit-tree.c b/builtin-commit-tree.c
index 325334f..88b0ab3 100644
--- a/builtin-commit-tree.c
+++ b/builtin-commit-tree.c
@@ -8,7 +8,6 @@
 #include "tree.h"
 #include "builtin.h"
 #include "utf8.h"
-#include "strbuf.h"
 
 #define BLOCKING (1ul << 14)
 
diff --git a/builtin-fetch--tool.c b/builtin-fetch--tool.c
index 90bdc32..514a3cc 100644
--- a/builtin-fetch--tool.c
+++ b/builtin-fetch--tool.c
@@ -2,7 +2,6 @@
 #include "cache.h"
 #include "refs.h"
 #include "commit.h"
-#include "strbuf.h"
 
 static char *get_stdin(void)
 {
diff --git a/builtin-rerere.c b/builtin-rerere.c
index 826d346..58288f6 100644
--- a/builtin-rerere.c
+++ b/builtin-rerere.c
@@ -1,7 +1,6 @@
 #include "builtin.h"
 #include "cache.h"
 #include "path-list.h"
-#include "strbuf.h"
 #include "xdiff/xdiff.h"
 #include "xdiff-interface.h"
 
diff --git a/builtin-stripspace.c b/builtin-stripspace.c
index c4cf2f0..1ce2847 100644
--- a/builtin-stripspace.c
+++ b/builtin-stripspace.c
@@ -1,6 +1,5 @@
 #include "builtin.h"
 #include "cache.h"
-#include "strbuf.h"
 
 /*
  * Returns the length of a line, without trailing spaces.
diff --git a/builtin-tag.c b/builtin-tag.c
index 9f29342..82ebda1 100644
--- a/builtin-tag.c
+++ b/builtin-tag.c
@@ -8,7 +8,6 @@
 
 #include "cache.h"
 #include "builtin.h"
-#include "strbuf.h"
 #include "refs.h"
 #include "tag.h"
 #include "run-command.h"
diff --git a/builtin-update-index.c b/builtin-update-index.c
index e02431c..0b60513 100644
--- a/builtin-update-index.c
+++ b/builtin-update-index.c
@@ -4,7 +4,6 @@
  * Copyright (C) Linus Torvalds, 2005
  */
 #include "cache.h"
-#include "strbuf.h"
 #include "quote.h"
 #include "cache-tree.h"
 #include "tree-walk.h"
diff --git a/cache-tree.c b/cache-tree.c
index 8f53c99..5471844 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -1,5 +1,4 @@
 #include "cache.h"
-#include "strbuf.h"
 #include "tree.h"
 #include "cache-tree.h"
 
diff --git a/convert.c b/convert.c
index 00a341c..508d30b 100644
--- a/convert.c
+++ b/convert.c
@@ -1,7 +1,6 @@
 #include "cache.h"
 #include "attr.h"
 #include "run-command.h"
-#include "strbuf.h"
 
 /*
  * convert.c - convert a file when checking it out and checking it in.
diff --git a/diff.c b/diff.c
index d46dd11..a5b69ed 100644
--- a/diff.c
+++ b/diff.c
@@ -9,7 +9,6 @@
 #include "xdiff-interface.h"
 #include "color.h"
 #include "attr.h"
-#include "strbuf.h"
 
 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
diff --git a/fast-import.c b/fast-import.c
index 2c0bfb9..1866d34 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -149,7 +149,6 @@ Format of STDIN stream:
 #include "pack.h"
 #include "refs.h"
 #include "csum-file.h"
-#include "strbuf.h"
 #include "quote.h"
 
 #define PACK_ID_BITS 16
diff --git a/fetch.c b/fetch.c
index dd6ed9e..c256e6f 100644
--- a/fetch.c
+++ b/fetch.c
@@ -6,7 +6,6 @@
 #include "tag.h"
 #include "blob.h"
 #include "refs.h"
-#include "strbuf.h"
 
 int get_tree = 0;
 int get_history = 0;
diff --git a/imap-send.c b/imap-send.c
index ecd4216..86e4a0f 100644
--- a/imap-send.c
+++ b/imap-send.c
@@ -23,7 +23,6 @@
  */
 
 #include "cache.h"
-#include "strbuf.h"
 
 typedef struct store_conf {
 	char *name;
diff --git a/mktag.c b/mktag.c
index 7567f9e..b05260c 100644
--- a/mktag.c
+++ b/mktag.c
@@ -1,5 +1,4 @@
 #include "cache.h"
-#include "strbuf.h"
 #include "tag.h"
 
 /*
diff --git a/mktree.c b/mktree.c
index 3891cd9..5dab4bd 100644
--- a/mktree.c
+++ b/mktree.c
@@ -4,7 +4,6 @@
  * Copyright (c) Junio C Hamano, 2006
  */
 #include "cache.h"
-#include "strbuf.h"
 #include "quote.h"
 #include "tree.h"
 
diff --git a/sha1_file.c b/sha1_file.c
index 64b5b46..59325d4 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -14,7 +14,6 @@
 #include "tag.h"
 #include "tree.h"
 #include "refs.h"
-#include "strbuf.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
diff --git a/strbuf.c b/strbuf.c
index ff551ac..c5f9e2a 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -1,5 +1,4 @@
 #include "cache.h"
-#include "strbuf.h"
 
 void strbuf_init(struct strbuf *sb, size_t hint)
 {
-- 
1.5.3.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH] builtin-apply: use strbuf's instead of buffer_desc's.
  2007-09-15 14:12 [RFC] strbuf's in builtin-apply Pierre Habouzit
  2007-09-15 13:56 ` [PATCH] New strbuf APIs: splice and embed Pierre Habouzit
@ 2007-09-15 14:04 ` Pierre Habouzit
  2007-09-16  0:56   ` Junio C Hamano
  2007-09-15 17:07 ` [RFC] strbuf's in builtin-apply Pierre Habouzit
  2007-09-16 17:21 ` Pierre Habouzit
  3 siblings, 1 reply; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-15 14:04 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---
 builtin-apply.c |  203 ++++++++++++++++++++-----------------------------------
 1 files changed, 73 insertions(+), 130 deletions(-)

diff --git a/builtin-apply.c b/builtin-apply.c
index 7057d0d..6c24f40 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -1442,39 +1442,34 @@ static void show_stats(struct patch *patch)
 	free(qname);
 }
 
-static int read_old_data(struct stat *st, const char *path, char **buf_p, unsigned long *alloc_p, unsigned long *size_p)
+static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
 {
 	int fd;
-	unsigned long got;
 	unsigned long nsize;
 	char *nbuf;
-	unsigned long size = *size_p;
-	char *buf = *buf_p;
 
 	switch (st->st_mode & S_IFMT) {
 	case S_IFLNK:
-		return readlink(path, buf, size) != size;
+		strbuf_grow(buf, st->st_size);
+		if (readlink(path, buf->buf, st->st_size) != st->st_size)
+			return -1;
+		strbuf_setlen(buf, st->st_size);
+		return 0;
 	case S_IFREG:
 		fd = open(path, O_RDONLY);
 		if (fd < 0)
 			return error("unable to open %s", path);
-		got = 0;
-		for (;;) {
-			ssize_t ret = xread(fd, buf + got, size - got);
-			if (ret <= 0)
-				break;
-			got += ret;
+		if (strbuf_read(buf, fd, st->st_size) < 0) {
+			close(fd);
+			return -1;
 		}
 		close(fd);
-		nsize = got;
-		nbuf = convert_to_git(path, buf, &nsize);
-		if (nbuf) {
-			free(buf);
-			*buf_p = nbuf;
-			*alloc_p = nsize;
-			*size_p = nsize;
-		}
-		return got != size;
+
+		nsize = buf->len;
+		nbuf = convert_to_git(path, buf->buf, &nsize);
+		if (nbuf)
+			strbuf_embed(buf, nbuf, nsize, nsize);
+		return 0;
 	default:
 		return -1;
 	}
@@ -1579,12 +1574,6 @@ static void remove_last_line(const char **rbuf, int *rsize)
 	*rsize = offset + 1;
 }
 
-struct buffer_desc {
-	char *buffer;
-	unsigned long size;
-	unsigned long alloc;
-};
-
 static int apply_line(char *output, const char *patch, int plen)
 {
 	/* plen is number of bytes to be copied from patch,
@@ -1654,10 +1643,9 @@ static int apply_line(char *output, const char *patch, int plen)
 	return output + plen - buf;
 }
 
-static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag, int inaccurate_eof)
+static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, int inaccurate_eof)
 {
 	int match_beginning, match_end;
-	char *buf = desc->buffer;
 	const char *patch = frag->patch;
 	int offset, size = frag->size;
 	char *old = xmalloc(size);
@@ -1768,24 +1756,17 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag, i
 	lines = 0;
 	pos = frag->newpos;
 	for (;;) {
-		offset = find_offset(buf, desc->size,
+		offset = find_offset(buf->buf, buf->len,
 				     oldlines, oldsize, pos, &lines);
-		if (match_end && offset + oldsize != desc->size)
+		if (match_end && offset + oldsize != buf->len)
 			offset = -1;
 		if (match_beginning && offset)
 			offset = -1;
 		if (offset >= 0) {
-			int diff;
-			unsigned long size, alloc;
-
 			if (new_whitespace == strip_whitespace &&
-			    (desc->size - oldsize - offset == 0)) /* end of file? */
+			    (buf->len - oldsize - offset == 0)) /* end of file? */
 				newsize -= new_blank_lines_at_end;
 
-			diff = newsize - oldsize;
-			size = desc->size + diff;
-			alloc = desc->alloc;
-
 			/* Warn if it was necessary to reduce the number
 			 * of context lines.
 			 */
@@ -1795,19 +1776,8 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag, i
 					" to apply fragment at %d\n",
 					leading, trailing, pos + lines);
 
-			if (size > alloc) {
-				alloc = size + 8192;
-				desc->alloc = alloc;
-				buf = xrealloc(buf, alloc);
-				desc->buffer = buf;
-			}
-			desc->size = size;
-			memmove(buf + offset + newsize,
-				buf + offset + oldsize,
-				size - offset - newsize);
-			memcpy(buf + offset, newlines, newsize);
+			strbuf_splice(buf, offset, oldsize, newlines, newsize);
 			offset = 0;
-
 			break;
 		}
 
@@ -1843,12 +1813,11 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag, i
 	return offset;
 }
 
-static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch)
+static int apply_binary_fragment(struct strbuf *buf, struct patch *patch)
 {
-	unsigned long dst_size;
 	struct fragment *fragment = patch->fragments;
-	void *data;
-	void *result;
+	unsigned long len;
+	void *dst;
 
 	/* Binary patch is irreversible without the optional second hunk */
 	if (apply_in_reverse) {
@@ -1859,29 +1828,23 @@ static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch)
 				     ? patch->new_name : patch->old_name);
 		fragment = fragment->next;
 	}
-	data = (void*) fragment->patch;
 	switch (fragment->binary_patch_method) {
 	case BINARY_DELTA_DEFLATED:
-		result = patch_delta(desc->buffer, desc->size,
-				     data,
-				     fragment->size,
-				     &dst_size);
-		free(desc->buffer);
-		desc->buffer = result;
-		break;
+		dst = patch_delta(buf->buf, buf->len, fragment->patch,
+				  fragment->size, &len);
+		if (!dst)
+			return -1;
+		/* XXX patch_delta NUL-terminates */
+		strbuf_embed(buf, dst, len, len + 1);
+		return 0;
 	case BINARY_LITERAL_DEFLATED:
-		free(desc->buffer);
-		desc->buffer = data;
-		dst_size = fragment->size;
-		break;
+		strbuf_embed(buf, fragment->patch, fragment->size, fragment->size);
+		return 0;
 	}
-	if (!desc->buffer)
-		return -1;
-	desc->size = desc->alloc = dst_size;
-	return 0;
+	return -1;
 }
 
-static int apply_binary(struct buffer_desc *desc, struct patch *patch)
+static int apply_binary(struct strbuf *buf, struct patch *patch)
 {
 	const char *name = patch->old_name ? patch->old_name : patch->new_name;
 	unsigned char sha1[20];
@@ -1900,7 +1863,7 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
 		/* See if the old one matches what the patch
 		 * applies to.
 		 */
-		hash_sha1_file(desc->buffer, desc->size, blob_type, sha1);
+		hash_sha1_file(buf->buf, buf->len, blob_type, sha1);
 		if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix))
 			return error("the patch applies to '%s' (%s), "
 				     "which does not match the "
@@ -1909,16 +1872,14 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
 	}
 	else {
 		/* Otherwise, the old one must be empty. */
-		if (desc->size)
+		if (buf->len)
 			return error("the patch applies to an empty "
 				     "'%s' but it is not empty", name);
 	}
 
 	get_sha1_hex(patch->new_sha1_prefix, sha1);
 	if (is_null_sha1(sha1)) {
-		free(desc->buffer);
-		desc->alloc = desc->size = 0;
-		desc->buffer = NULL;
+		strbuf_release(buf);
 		return 0; /* deletion patch */
 	}
 
@@ -1926,43 +1887,44 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
 		/* We already have the postimage */
 		enum object_type type;
 		unsigned long size;
+		char *result;
 
-		free(desc->buffer);
-		desc->buffer = read_sha1_file(sha1, &type, &size);
-		if (!desc->buffer)
+		result = read_sha1_file(sha1, &type, &size);
+		if (!result)
 			return error("the necessary postimage %s for "
 				     "'%s' cannot be read",
 				     patch->new_sha1_prefix, name);
-		desc->alloc = desc->size = size;
-	}
-	else {
-		/* We have verified desc matches the preimage;
+		/* XXX read_sha1_file NUL-terminates */
+		strbuf_embed(buf, result, size, size + 1);
+	} else {
+		/* We have verified buf matches the preimage;
 		 * apply the patch data to it, which is stored
 		 * in the patch->fragments->{patch,size}.
 		 */
-		if (apply_binary_fragment(desc, patch))
+		if (apply_binary_fragment(buf, patch))
 			return error("binary patch does not apply to '%s'",
 				     name);
 
 		/* verify that the result matches */
-		hash_sha1_file(desc->buffer, desc->size, blob_type, sha1);
+		hash_sha1_file(buf->buf, buf->len, blob_type, sha1);
 		if (strcmp(sha1_to_hex(sha1), patch->new_sha1_prefix))
-			return error("binary patch to '%s' creates incorrect result (expecting %s, got %s)", name, patch->new_sha1_prefix, sha1_to_hex(sha1));
+			return error("binary patch to '%s' creates incorrect result (expecting %s, got %s)",
+				name, patch->new_sha1_prefix, sha1_to_hex(sha1));
 	}
 
 	return 0;
 }
 
-static int apply_fragments(struct buffer_desc *desc, struct patch *patch)
+static int apply_fragments(struct strbuf *buf, struct patch *patch)
 {
 	struct fragment *frag = patch->fragments;
 	const char *name = patch->old_name ? patch->old_name : patch->new_name;
 
 	if (patch->is_binary)
-		return apply_binary(desc, patch);
+		return apply_binary(buf, patch);
 
 	while (frag) {
-		if (apply_one_fragment(desc, frag, patch->inaccurate_eof)) {
+		if (apply_one_fragment(buf, frag, patch->inaccurate_eof)) {
 			error("patch failed: %s:%ld", name, frag->oldpos);
 			if (!apply_with_reject)
 				return -1;
@@ -1973,76 +1935,57 @@ static int apply_fragments(struct buffer_desc *desc, struct patch *patch)
 	return 0;
 }
 
-static int read_file_or_gitlink(struct cache_entry *ce, char **buf_p,
-				unsigned long *size_p)
+static int read_file_or_gitlink(struct cache_entry *ce, struct strbuf *buf)
 {
 	if (!ce)
 		return 0;
 
 	if (S_ISGITLINK(ntohl(ce->ce_mode))) {
-		*buf_p = xmalloc(100);
-		*size_p = snprintf(*buf_p, 100,
-			"Subproject commit %s\n", sha1_to_hex(ce->sha1));
+		strbuf_grow(buf, 100);
+		strbuf_addf(buf, "Subproject commit %s\n", sha1_to_hex(ce->sha1));
 	} else {
 		enum object_type type;
-		*buf_p = read_sha1_file(ce->sha1, &type, size_p);
-		if (!*buf_p)
+		unsigned long sz;
+		char *result;
+
+		result = read_sha1_file(ce->sha1, &type, &sz);
+		if (!result)
 			return -1;
+		/* XXX read_sha1_file NUL-terminates */
+		strbuf_embed(buf, result, sz, sz + 1);
 	}
 	return 0;
 }
 
 static int apply_data(struct patch *patch, struct stat *st, struct cache_entry *ce)
 {
-	char *buf;
-	unsigned long size, alloc;
-	struct buffer_desc desc;
+	struct strbuf buf;
 
-	size = 0;
-	alloc = 0;
-	buf = NULL;
+	strbuf_init(&buf, 0);
 	if (cached) {
-		if (read_file_or_gitlink(ce, &buf, &size))
+		if (read_file_or_gitlink(ce, &buf))
 			return error("read of %s failed", patch->old_name);
-		alloc = size;
 	} else if (patch->old_name) {
 		if (S_ISGITLINK(patch->old_mode)) {
-			if (ce)
-				read_file_or_gitlink(ce, &buf, &size);
-			else {
+			if (ce) {
+				read_file_or_gitlink(ce, &buf);
+			} else {
 				/*
 				 * There is no way to apply subproject
 				 * patch without looking at the index.
 				 */
 				patch->fragments = NULL;
-				size = 0;
 			}
-		}
-		else {
-			size = xsize_t(st->st_size);
-			alloc = size + 8192;
-			buf = xmalloc(alloc);
-			if (read_old_data(st, patch->old_name,
-					  &buf, &alloc, &size))
-				return error("read of %s failed",
-					     patch->old_name);
+		} else {
+			if (read_old_data(st, patch->old_name, &buf))
+				return error("read of %s failed", patch->old_name);
 		}
 	}
 
-	desc.size = size;
-	desc.alloc = alloc;
-	desc.buffer = buf;
-
-	if (apply_fragments(&desc, patch) < 0)
+	if (apply_fragments(&buf, patch) < 0)
 		return -1; /* note with --reject this succeeds. */
-
-	/* NUL terminate the result */
-	if (desc.alloc <= desc.size)
-		desc.buffer = xrealloc(desc.buffer, desc.size + 1);
-	desc.buffer[desc.size] = 0;
-
-	patch->result = desc.buffer;
-	patch->resultsize = desc.size;
+	patch->result = buf.buf;
+	patch->resultsize = buf.len;
 
 	if (0 < patch->is_delete && patch->resultsize)
 		return error("removal patch leaves file contents");
-- 
1.5.3.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [RFC] strbuf's in builtin-apply
@ 2007-09-15 14:12 Pierre Habouzit
  2007-09-15 13:56 ` [PATCH] New strbuf APIs: splice and embed Pierre Habouzit
                   ` (3 more replies)
  0 siblings, 4 replies; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-15 14:12 UTC (permalink / raw)
  To: git, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 1907 bytes --]

  I promised it, here it comes. It comes though with some possible
problems, so I'd like to see them discussed first.

  builtin-apply.c was messing with its custom buffers a lot,
reallocating the buffers and so on, hence I had to create strbuf_embed
to have a strbuf-safe API doing exactly the same. Though, because of the
"I want an extra NUL in the end"-invariant, this can come with a quite
high price. in the second patch, there is a hunk:


-		nsize = got;
-		nbuf = convert_to_git(path, buf, &nsize);
-		if (nbuf) {
-			free(buf);
-			*buf_p = nbuf;
-			*alloc_p = nsize;
-			*size_p = nsize;
-		}
-		return got != size;
+
+		nsize = buf->len;
+		nbuf = convert_to_git(path, buf->buf, &nsize);
+		if (nbuf)
+			strbuf_embed(buf, nbuf, nsize, nsize);
+		return 0;

  Here, I've not been able to check if convert_to_git was in fact always
dealing with a NUL-terminated buffer (That would be in fact nsize+1) or
not, hence here this strbuf_embed will likely perform a realloc. I don't
know git enough to know if this can become an horrible burden though.

  Another suspicious hunk is:

-	data = (void*) fragment->patch;
[...]
 	case BINARY_LITERAL_DEFLATED:
-		free(desc->buffer);
-		desc->buffer = data;
-		dst_size = fragment->size;
-		break;
+		strbuf_embed(buf, fragment->patch, fragment->size, fragment->size);
+		return 0;

  TTBOMK the ->patch pointer is a pointer inside a buffer, not a buffer
that has been malloc'ed (and there are code paths before my patch that
would still realloc the buffer so I don't think I introduce an issue).
It passes the test-suite without crashing, but well, maybe this should
be a copy instead.

  The rest is pretty straightforward.

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] strbuf's in builtin-apply
  2007-09-15 14:12 [RFC] strbuf's in builtin-apply Pierre Habouzit
  2007-09-15 13:56 ` [PATCH] New strbuf APIs: splice and embed Pierre Habouzit
  2007-09-15 14:04 ` [PATCH] builtin-apply: use strbuf's instead of buffer_desc's Pierre Habouzit
@ 2007-09-15 17:07 ` Pierre Habouzit
  2007-09-16 17:21 ` Pierre Habouzit
  3 siblings, 0 replies; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-15 17:07 UTC (permalink / raw)
  To: git, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 1497 bytes --]

On Sat, Sep 15, 2007 at 02:12:10PM +0000, Pierre Habouzit wrote:
> +		nsize = buf->len;
> +		nbuf = convert_to_git(path, buf->buf, &nsize);
> +		if (nbuf)
> +			strbuf_embed(buf, nbuf, nsize, nsize);

  Okay, I managed to be able to be sure that convert_to_git always have
an extra ending NUL byte, with an intermediate patch.  So now I call
strbuf_embed with nsize, nsize + 1 which has negligible cost.

  Though this question remains:

>   Another suspicious hunk is:
> 
> -	data = (void*) fragment->patch;
> [...]
>  	case BINARY_LITERAL_DEFLATED:
> -		free(desc->buffer);
> -		desc->buffer = data;
> -		dst_size = fragment->size;
> -		break;
> +		strbuf_embed(buf, fragment->patch, fragment->size, fragment->size);
> +		return 0;
> 
>   TTBOMK the ->patch pointer is a pointer inside a buffer, not a buffer
> that has been malloc'ed (and there are code paths before my patch that
> would still realloc the buffer so I don't think I introduce an issue).
> It passes the test-suite without crashing, but well, maybe this should
> be a copy instead.
> 
>   The rest is pretty straightforward.
> 
> -- 
> ·O·  Pierre Habouzit
> ··O                                                madcoder@debian.org
> OOO                                                http://www.madism.org



-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH] Refactor replace_encoding_header.
  2007-09-16 17:21 ` Pierre Habouzit
  2007-09-15 13:56   ` [PATCH] Now that cache.h needs strbuf.h, remove useless includes Pierre Habouzit
  2007-09-15 13:56   ` [PATCH] New strbuf APIs: splice and attach Pierre Habouzit
@ 2007-09-15 21:50   ` Pierre Habouzit
  2007-09-16  8:19   ` [PATCH] Remove preemptive allocations Pierre Habouzit
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-15 21:50 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

* Be more clever in how we search for "encoding ...\n": parse for real
  instead of the sloppy strstr's.
* use strbuf_splice to do the substring replacements.

Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---
 commit.c |   59 +++++++++++++++++++++++------------------------------------
 1 files changed, 23 insertions(+), 36 deletions(-)

diff --git a/commit.c b/commit.c
index 6602e2c..13af933 100644
--- a/commit.c
+++ b/commit.c
@@ -648,47 +648,34 @@ static char *get_header(const struct commit *commit, const char *key)
 
 static char *replace_encoding_header(char *buf, const char *encoding)
 {
-	char *encoding_header = strstr(buf, "\nencoding ");
-	char *header_end = strstr(buf, "\n\n");
-	char *end_of_encoding_header;
-	int encoding_header_pos;
-	int encoding_header_len;
-	int new_len;
-	int need_len;
-	int buflen = strlen(buf) + 1;
-
-	if (!header_end)
-		header_end = buf + buflen;
-	if (!encoding_header || encoding_header >= header_end)
-		return buf;
-	encoding_header++;
-	end_of_encoding_header = strchr(encoding_header, '\n');
-	if (!end_of_encoding_header)
+	struct strbuf tmp;
+	size_t start, len;
+	char *cp = buf;
+
+	/* guess if there is an encoding header before a \n\n */
+	while (strncmp(cp, "encoding ", strlen("encoding "))) {
+		cp = strchr(cp, '\n');
+		if (!cp || *++cp == '\n')
+			return buf;
+	}
+	start = cp - buf;
+	cp = strchr(cp, '\n');
+	if (!cp)
 		return buf; /* should not happen but be defensive */
-	end_of_encoding_header++;
-
-	encoding_header_len = end_of_encoding_header - encoding_header;
-	encoding_header_pos = encoding_header - buf;
+	len = cp + 1 - (buf + start);
 
+	strbuf_init(&tmp, 0);
+	strbuf_attach(&tmp, buf, strlen(buf), strlen(buf) + 1);
 	if (is_encoding_utf8(encoding)) {
 		/* we have re-coded to UTF-8; drop the header */
-		memmove(encoding_header, end_of_encoding_header,
-			buflen - (encoding_header_pos + encoding_header_len));
-		return buf;
-	}
-	new_len = strlen(encoding);
-	need_len = new_len + strlen("encoding \n");
-	if (encoding_header_len < need_len) {
-		buf = xrealloc(buf, buflen + (need_len - encoding_header_len));
-		encoding_header = buf + encoding_header_pos;
-		end_of_encoding_header = encoding_header + encoding_header_len;
+		strbuf_splice(&tmp, start, len, NULL, 0);
+	} else {
+		/* just replaces XXXX in 'encoding XXXX\n' */
+		strbuf_splice(&tmp, start + strlen("encoding "),
+					  len - strlen("encoding \n"),
+					  encoding, strlen(encoding));
 	}
-	memmove(end_of_encoding_header + (need_len - encoding_header_len),
-		end_of_encoding_header,
-		buflen - (encoding_header_pos + encoding_header_len));
-	memcpy(encoding_header + 9, encoding, strlen(encoding));
-	encoding_header[9 + new_len] = '\n';
-	return buf;
+	return tmp.buf;
 }
 
 static char *logmsg_reencode(const struct commit *commit,
-- 
1.5.3.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH] builtin-apply: use strbuf's instead of buffer_desc's.
  2007-09-15 14:04 ` [PATCH] builtin-apply: use strbuf's instead of buffer_desc's Pierre Habouzit
@ 2007-09-16  0:56   ` Junio C Hamano
  2007-09-16  8:15     ` Pierre Habouzit
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2007-09-16  0:56 UTC (permalink / raw)
  To: Pierre Habouzit; +Cc: git

Pierre Habouzit <madcoder@debian.org> writes:

>  1 files changed, 73 insertions(+), 130 deletions(-)

Nice reduction.

> -		}
> -		return got != size;
> +
> +		nsize = buf->len;
> +		nbuf = convert_to_git(path, buf->buf, &nsize);
> +		if (nbuf)
> +			strbuf_embed(buf, nbuf, nsize, nsize);
> +		return 0;


I suspect that changing the convert_to_git() interface to work
on strbuf instead of (char*, size_t *) pair might make things
simpler and easier.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] New strbuf APIs: splice and embed.
  2007-09-15 13:56 ` [PATCH] New strbuf APIs: splice and embed Pierre Habouzit
@ 2007-09-16  0:57   ` Junio C Hamano
  2007-09-16  8:10     ` Pierre Habouzit
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2007-09-16  0:57 UTC (permalink / raw)
  To: Pierre Habouzit; +Cc: git

Pierre Habouzit <madcoder@debian.org> writes:

> * strbuf_splice replace a portion of the buffer with another.
> * strbuf_embed replace a strbuf buffer with the given one, that should be
>   malloc'ed. Then it enforces strbuf's invariants. If alloc > len, then this
>   function has negligible cost, else it will perform a realloc, possibly
>   with a cost.

"embed" does not sound quite right, does it?  It is a reverse
operation of strbuf_detach() as far as I can tell.

> -void strbuf_rtrim(struct strbuf *sb)
> -{
> +void strbuf_rtrim(struct strbuf *sb) {
>  	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
>  		sb->len--;
>  	sb->buf[sb->len] = '\0';

This is changing the style in the wrong direction, isn't it?  We
start our functions like this:

	type name(proto)
        {
        	...

> +void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
> +				   const void *data, size_t dlen)
> +{
> +	if (pos + len < pos)
> +		die("you want to splice outside from the buffer");

That is a funny error message for an integer wrap-around check.

> +	if (pos > sb->len)
> +		pos = sb->len;

Shouldn't this be flagged as a programming error?

> +	if (pos + len > sb->len)
> +		len = sb->len - pos;

Likewise.

By the way, this is the kind of situation I wish everybody wrote
their comparison in textual order.  I had to draw a picture like
this to see what was going on.

        sb->buf
        xxxxxxxxxxxzzzzzxxxxxxxxxxx\0
        ^                         ^
        0                   sb->len
                   ^    ^               ^    ^        
                   pos  pos+len         pos  pos+len  
                   v                    v
                   yyyyyyyyyy           yyyyyyyyyy    
                             ^                    ^   
                             dlen                 dlen
                   -------------        --------------
                   pos < sb->len        sb->len < pos

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] New strbuf APIs: splice and embed.
  2007-09-16  0:57   ` Junio C Hamano
@ 2007-09-16  8:10     ` Pierre Habouzit
  0 siblings, 0 replies; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-16  8:10 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 2178 bytes --]

On Sun, Sep 16, 2007 at 12:57:44AM +0000, Junio C Hamano wrote:
> Pierre Habouzit <madcoder@debian.org> writes:
> 
> > * strbuf_splice replace a portion of the buffer with another.
> > * strbuf_embed replace a strbuf buffer with the given one, that should be
> >   malloc'ed. Then it enforces strbuf's invariants. If alloc > len, then this
> >   function has negligible cost, else it will perform a realloc, possibly
> >   with a cost.
> 
> "embed" does not sound quite right, does it?  It is a reverse
> operation of strbuf_detach() as far as I can tell.

  Well I don't like either, and indeed strbuf_attach() seems better.

> > -void strbuf_rtrim(struct strbuf *sb)
> > -{
> > +void strbuf_rtrim(struct strbuf *sb) {
> >  	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
> >  		sb->len--;
> >  	sb->buf[sb->len] = '\0';
> 
> This is changing the style in the wrong direction, isn't it?  We
> start our functions like this:
> 
> 	type name(proto)
>         {

  Well that's what I usually do for my code, but I thought git was
putting the opening brace on the same line, my bad.

> > +void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
> > +				   const void *data, size_t dlen)
> > +{
> > +	if (pos + len < pos)
> > +		die("you want to splice outside from the buffer");
> 
> That is a funny error message for an integer wrap-around check.

  right ;)

> 
> > +	if (pos > sb->len)
> > +		pos = sb->len;
>
> Shouldn't this be flagged as a programming error?
> 
> > +	if (pos + len > sb->len)
> > +		len = sb->len - pos;
> 
> Likewise.

  I just attached the same semantics that was chose for strbuf_insert
when the insertion position is outside from the buffer. Though I can
enforce those to stay inside the buffer and just die(). I don't care
much I shall say. Maybe it hides some programmers being sloppy, hence is
a bad semantics. I'll propose a patch where the check die()s instead of
"fixing" the values.

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] builtin-apply: use strbuf's instead of buffer_desc's.
  2007-09-16  0:56   ` Junio C Hamano
@ 2007-09-16  8:15     ` Pierre Habouzit
  0 siblings, 0 replies; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-16  8:15 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 1040 bytes --]

On Sun, Sep 16, 2007 at 12:56:49AM +0000, Junio C Hamano wrote:
> Pierre Habouzit <madcoder@debian.org> writes:
> 
> >  1 files changed, 73 insertions(+), 130 deletions(-)
> 
> Nice reduction.
> 
> > -		}
> > -		return got != size;
> > +
> > +		nsize = buf->len;
> > +		nbuf = convert_to_git(path, buf->buf, &nsize);
> > +		if (nbuf)
> > +			strbuf_embed(buf, nbuf, nsize, nsize);
> > +		return 0;
> 
> 
> I suspect that changing the convert_to_git() interface to work
> on strbuf instead of (char*, size_t *) pair might make things
> simpler and easier.

  Well, yes, maybe it could use:
  (const char *, char *, size_t, struct strbuf *out);

  But this function is used elsewhere where there isn't strbuf's (yet ?)
so I wasn't willing to do such a big change. But as you seem to think it
would help, I'll evaluate where it's going.

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH] Remove preemptive allocations.
  2007-09-16 17:21 ` Pierre Habouzit
                     ` (2 preceding siblings ...)
  2007-09-15 21:50   ` [PATCH] Refactor replace_encoding_header Pierre Habouzit
@ 2007-09-16  8:19   ` Pierre Habouzit
  2007-09-16 13:51   ` [PATCH] Rewrite convert_to_{git,working_tree} to use strbuf's Pierre Habouzit
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-16  8:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Careful profiling shows that we spend more time guessing what pattern
allocation will have, whereas we can delay it only at the point where
add_rfc2047 will be used and don't allocate huge memory area for the many
cases where it's not.

Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---
 commit.c |   35 +++++------------------------------
 1 files changed, 5 insertions(+), 30 deletions(-)

diff --git a/commit.c b/commit.c
index 13af933..85889f9 100644
--- a/commit.c
+++ b/commit.c
@@ -501,6 +501,7 @@ static void add_rfc2047(struct strbuf *sb, const char *line, int len,
 	return;
 
 needquote:
+	strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
 	strbuf_addf(sb, "=?%s?q?", encoding);
 	for (i = last = 0; i < len; i++) {
 		unsigned ch = line[i] & 0xFF;
@@ -520,14 +521,6 @@ needquote:
 	strbuf_addstr(sb, "?=");
 }
 
-static unsigned long bound_rfc2047(unsigned long len, const char *encoding)
-{
-	/* upper bound of q encoded string of length 'len' */
-	unsigned long elen = strlen(encoding);
-
-	return len * 3 + elen + 100;
-}
-
 static void add_user_info(const char *what, enum cmit_fmt fmt, struct strbuf *sb,
 			 const char *line, enum date_mode dmode,
 			 const char *encoding)
@@ -560,8 +553,7 @@ static void add_user_info(const char *what, enum cmit_fmt fmt, struct strbuf *sb
 		add_rfc2047(sb, line, display_name_length, encoding);
 		strbuf_add(sb, name_tail, namelen - display_name_length);
 		strbuf_addch(sb, '\n');
-	}
-	else {
+	} else {
 		strbuf_addf(sb, "%s: %.*s%.*s\n", what,
 			      (fmt == CMIT_FMT_FULLER) ? 4 : 0,
 			      filler, namelen, line);
@@ -955,19 +947,12 @@ static void pp_header(enum cmit_fmt fmt,
 		 * FULLER shows both authors and dates.
 		 */
 		if (!memcmp(line, "author ", 7)) {
-			unsigned long len = linelen;
-			if (fmt == CMIT_FMT_EMAIL)
-				len = bound_rfc2047(linelen, encoding);
-			strbuf_grow(sb, len + 80);
+			strbuf_grow(sb, linelen + 80);
 			add_user_info("Author", fmt, sb, line + 7, dmode, encoding);
 		}
-
 		if (!memcmp(line, "committer ", 10) &&
 		    (fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER)) {
-			unsigned long len = linelen;
-			if (fmt == CMIT_FMT_EMAIL)
-				len = bound_rfc2047(linelen, encoding);
-			strbuf_grow(sb, len + 80);
+			strbuf_grow(sb, linelen + 80);
 			add_user_info("Commit", fmt, sb, line + 10, dmode, encoding);
 		}
 	}
@@ -982,7 +967,6 @@ static void pp_title_line(enum cmit_fmt fmt,
 			  int plain_non_ascii)
 {
 	struct strbuf title;
-	unsigned long len;
 
 	strbuf_init(&title, 80);
 
@@ -1004,16 +988,7 @@ static void pp_title_line(enum cmit_fmt fmt,
 		strbuf_add(&title, line, linelen);
 	}
 
-	/* Enough slop for the MIME header and rfc2047 */
-	len = bound_rfc2047(title.len, encoding) + 1000;
-	if (subject)
-		len += strlen(subject);
-	if (after_subject)
-		len += strlen(after_subject);
-	if (encoding)
-		len += strlen(encoding);
-
-	strbuf_grow(sb, title.len + len);
+	strbuf_grow(sb, title.len + 1024);
 	if (subject) {
 		strbuf_addstr(sb, subject);
 		add_rfc2047(sb, title.buf, title.len, encoding);
-- 
1.5.3.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH] Rewrite convert_to_{git,working_tree} to use strbuf's.
  2007-09-16 17:21 ` Pierre Habouzit
                     ` (3 preceding siblings ...)
  2007-09-16  8:19   ` [PATCH] Remove preemptive allocations Pierre Habouzit
@ 2007-09-16 13:51   ` Pierre Habouzit
  2007-09-16 18:27     ` Linus Torvalds
  2007-09-16 16:54   ` [PATCH] builtin-apply: use strbuf's instead of buffer_desc's Pierre Habouzit
  2007-09-16 17:28   ` [RFC] strbuf's in builtin-apply Pierre Habouzit
  6 siblings, 1 reply; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-16 13:51 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

* Now, those functions take an "out" strbuf argument, where they store their
  result if any. In that case, it also returns 1, else it returns 0.
* those functions support "in place" editing, in the sense that it's OK to
  call them this way:
    convert_to_git(path, sb->buf, sb->len, sb);
  When doable, conversions are done in place for real, else the strbuf
  content is just replaced with the new one, transparentely for the caller.

If you want to create a new filter working this way, being the accumulation
of filter1, filter2, ... filtern, then your meta_filter would be:

    int meta_filter(..., const char *src, size_t len, struct strbuf *sb)
    {
        int ret = 0;
        ret |= filter1(...., src, len, sb);
        if (ret) {
            src = sb->buf;
            len = sb->len;
        }
        ret |= filter2(...., src, len, sb);
        if (ret) {
            src = sb->buf;
            len = sb->len;
        }
        ....
        return ret | filtern(..., src, len, sb);
    }

That's why subfilters the convert_to_* functions called were also rewritten
to work this way.

Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---
 builtin-apply.c   |   27 ++--
 builtin-archive.c |   78 +++++------
 cache.h           |    6 +-
 convert.c         |  414 ++++++++++++++++++++++-------------------------------
 diff.c            |   12 +-
 entry.c           |   10 +-
 sha1_file.c       |   10 +-
 7 files changed, 240 insertions(+), 317 deletions(-)

diff --git a/builtin-apply.c b/builtin-apply.c
index 7057d0d..9735b47 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -1446,8 +1446,7 @@ static int read_old_data(struct stat *st, const char *path, char **buf_p, unsign
 {
 	int fd;
 	unsigned long got;
-	unsigned long nsize;
-	char *nbuf;
+	struct strbuf nbuf;
 	unsigned long size = *size_p;
 	char *buf = *buf_p;
 
@@ -1466,13 +1465,12 @@ static int read_old_data(struct stat *st, const char *path, char **buf_p, unsign
 			got += ret;
 		}
 		close(fd);
-		nsize = got;
-		nbuf = convert_to_git(path, buf, &nsize);
-		if (nbuf) {
+		strbuf_init(&nbuf, 0);
+		if (convert_to_git(path, buf, size, &nbuf)) {
 			free(buf);
-			*buf_p = nbuf;
-			*alloc_p = nsize;
-			*size_p = nsize;
+			*buf_p = nbuf.buf;
+			*alloc_p = nbuf.alloc;
+			*size_p = nbuf.len;
 		}
 		return got != size;
 	default:
@@ -2438,7 +2436,7 @@ static void add_index_file(const char *path, unsigned mode, void *buf, unsigned
 static int try_create_file(const char *path, unsigned int mode, const char *buf, unsigned long size)
 {
 	int fd;
-	char *nbuf;
+	struct strbuf nbuf;
 
 	if (S_ISGITLINK(mode)) {
 		struct stat st;
@@ -2457,9 +2455,11 @@ static int try_create_file(const char *path, unsigned int mode, const char *buf,
 	if (fd < 0)
 		return -1;
 
-	nbuf = convert_to_working_tree(path, buf, &size);
-	if (nbuf)
-		buf = nbuf;
+	strbuf_init(&nbuf, 0);
+	if (convert_to_working_tree(path, buf, size, &nbuf)) {
+		size = nbuf.len;
+		buf  = nbuf.buf;
+	}
 
 	while (size) {
 		int written = xwrite(fd, buf, size);
@@ -2472,8 +2472,7 @@ static int try_create_file(const char *path, unsigned int mode, const char *buf,
 	}
 	if (close(fd) < 0)
 		die("closing file %s: %s", path, strerror(errno));
-	if (nbuf)
-		free(nbuf);
+	strbuf_release(&nbuf);
 	return 0;
 }
 
diff --git a/builtin-archive.c b/builtin-archive.c
index 6fa424d..843a9e3 100644
--- a/builtin-archive.c
+++ b/builtin-archive.c
@@ -81,22 +81,21 @@ static int run_remote_archiver(const char *remote, int argc,
 	return !!rv;
 }
 
-static void *format_subst(const struct commit *commit, const char *format,
-                          unsigned long *sizep)
+static void format_subst(const struct commit *commit,
+                         const char *src, size_t len,
+                         struct strbuf *buf)
 {
-	unsigned long len = *sizep;
-	const char *a = format;
-	struct strbuf result;
+	char *to_free = NULL;
 	struct strbuf fmt;
 
-	strbuf_init(&result, 0);
+	if (src == buf->buf)
+		to_free = strbuf_detach(buf);
 	strbuf_init(&fmt, 0);
-
 	for (;;) {
 		const char *b, *c;
 
-		b = memmem(a, len, "$Format:", 8);
-		if (!b || a + len < b + 9)
+		b = memmem(src, len, "$Format:", 8);
+		if (!b || src + len < b + 9)
 			break;
 		c = memchr(b + 8, '$', len - 8);
 		if (!c)
@@ -105,62 +104,57 @@ static void *format_subst(const struct commit *commit, const char *format,
 		strbuf_reset(&fmt);
 		strbuf_add(&fmt, b + 8, c - b - 8);
 
-		strbuf_add(&result, a, b - a);
-		format_commit_message(commit, fmt.buf, &result);
-		len -= c + 1 - a;
-		a = c + 1;
+		strbuf_add(buf, src, b - src);
+		format_commit_message(commit, fmt.buf, buf);
+		len -= c + 1 - src;
+		src  = c + 1;
 	}
-
-	strbuf_add(&result, a, len);
-
-	*sizep = result.len;
-
+	strbuf_add(buf, src, len);
 	strbuf_release(&fmt);
-	return strbuf_detach(&result);
+	free(to_free);
 }
 
-static void *convert_to_archive(const char *path,
-                                const void *src, unsigned long *sizep,
-                                const struct commit *commit)
+static int convert_to_archive(const char *path,
+                              const void *src, size_t len,
+                              struct strbuf *buf,
+                              const struct commit *commit)
 {
 	static struct git_attr *attr_export_subst;
 	struct git_attr_check check[1];
 
 	if (!commit)
-		return NULL;
+		return 0;
 
-        if (!attr_export_subst)
-                attr_export_subst = git_attr("export-subst", 12);
+	if (!attr_export_subst)
+		attr_export_subst = git_attr("export-subst", 12);
 
 	check[0].attr = attr_export_subst;
 	if (git_checkattr(path, ARRAY_SIZE(check), check))
-		return NULL;
+		return 0;
 	if (!ATTR_TRUE(check[0].value))
-		return NULL;
+		return 0;
 
-	return format_subst(commit, src, sizep);
+	format_subst(commit, src, len, buf);
+	return 1;
 }
 
 void *sha1_file_to_archive(const char *path, const unsigned char *sha1,
                            unsigned int mode, enum object_type *type,
-                           unsigned long *size,
+                           unsigned long *sizep,
                            const struct commit *commit)
 {
-	void *buffer, *converted;
+	void *buffer;
 
-	buffer = read_sha1_file(sha1, type, size);
+	buffer = read_sha1_file(sha1, type, sizep);
 	if (buffer && S_ISREG(mode)) {
-		converted = convert_to_working_tree(path, buffer, size);
-		if (converted) {
-			free(buffer);
-			buffer = converted;
-		}
-
-		converted = convert_to_archive(path, buffer, size, commit);
-		if (converted) {
-			free(buffer);
-			buffer = converted;
-		}
+		struct strbuf buf;
+
+		strbuf_init(&buf, 0);
+		strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
+		convert_to_working_tree(path, buf.buf, buf.len, &buf);
+		convert_to_archive(path, buf.buf, buf.len, &buf, commit);
+		*sizep = buf.len;
+		buffer = buf.buf;
 	}
 
 	return buffer;
diff --git a/cache.h b/cache.h
index b9a461a..8650d62 100644
--- a/cache.h
+++ b/cache.h
@@ -2,6 +2,7 @@
 #define CACHE_H
 
 #include "git-compat-util.h"
+#include "strbuf.h"
 
 #include SHA1_HEADER
 #include <zlib.h>
@@ -590,8 +591,9 @@ extern void trace_printf(const char *format, ...);
 extern void trace_argv_printf(const char **argv, int count, const char *format, ...);
 
 /* convert.c */
-extern char *convert_to_git(const char *path, const char *src, unsigned long *sizep);
-extern char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep);
+/* returns 1 if *dst was used */
+extern int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst);
+extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
 
 /* diff.c */
 extern int diff_auto_refresh_index;
diff --git a/convert.c b/convert.c
index d77c8eb..00a341c 100644
--- a/convert.c
+++ b/convert.c
@@ -1,6 +1,7 @@
 #include "cache.h"
 #include "attr.h"
 #include "run-command.h"
+#include "strbuf.h"
 
 /*
  * convert.c - convert a file when checking it out and checking it in.
@@ -80,24 +81,19 @@ static int is_binary(unsigned long size, struct text_stat *stats)
 	return 0;
 }
 
-static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep, int action)
+static int crlf_to_git(const char *path, const char *src, size_t len,
+                       struct strbuf *buf, int action)
 {
-	char *buffer, *dst;
-	unsigned long size, nsize;
 	struct text_stat stats;
+	char *dst;
 
-	if ((action == CRLF_BINARY) || !auto_crlf)
-		return NULL;
-
-	size = *sizep;
-	if (!size)
-		return NULL;
-
-	gather_stats(src, size, &stats);
+	if ((action == CRLF_BINARY) || !auto_crlf || !len)
+		return 0;
 
+	gather_stats(src, len, &stats);
 	/* No CR? Nothing to convert, regardless. */
 	if (!stats.cr)
-		return NULL;
+		return 0;
 
 	if (action == CRLF_GUESS) {
 		/*
@@ -106,24 +102,17 @@ static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep
 		 * stuff?
 		 */
 		if (stats.cr != stats.crlf)
-			return NULL;
+			return 0;
 
 		/*
 		 * And add some heuristics for binary vs text, of course...
 		 */
-		if (is_binary(size, &stats))
-			return NULL;
+		if (is_binary(len, &stats))
+			return 0;
 	}
 
-	/*
-	 * Ok, allocate a new buffer, fill it in, and return it
-	 * to let the caller know that we switched buffers.
-	 */
-	nsize = size - stats.crlf;
-	buffer = xmalloc(nsize);
-	*sizep = nsize;
-
-	dst = buffer;
+	strbuf_grow(buf, len);
+	dst = buf->buf;
 	if (action == CRLF_GUESS) {
 		/*
 		 * If we guessed, we already know we rejected a file with
@@ -134,71 +123,72 @@ static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep
 			unsigned char c = *src++;
 			if (c != '\r')
 				*dst++ = c;
-		} while (--size);
+		} while (--len);
 	} else {
 		do {
 			unsigned char c = *src++;
-			if (! (c == '\r' && (1 < size && *src == '\n')))
+			if (! (c == '\r' && (1 < len && *src == '\n')))
 				*dst++ = c;
-		} while (--size);
+		} while (--len);
 	}
-
-	return buffer;
+	strbuf_setlen(buf, dst - buf->buf);
+	return 1;
 }
 
-static char *crlf_to_worktree(const char *path, const char *src, unsigned long *sizep, int action)
+static int crlf_to_worktree(const char *path, const char *src, size_t len,
+                            struct strbuf *buf, int action)
 {
-	char *buffer, *dst;
-	unsigned long size, nsize;
+	char *to_free = NULL;
 	struct text_stat stats;
-	unsigned char last;
 
 	if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
 	    auto_crlf <= 0)
-		return NULL;
+		return 0;
 
-	size = *sizep;
-	if (!size)
-		return NULL;
+	if (!len)
+		return 0;
 
-	gather_stats(src, size, &stats);
+	gather_stats(src, len, &stats);
 
 	/* No LF? Nothing to convert, regardless. */
 	if (!stats.lf)
-		return NULL;
+		return 0;
 
 	/* Was it already in CRLF format? */
 	if (stats.lf == stats.crlf)
-		return NULL;
+		return 0;
 
 	if (action == CRLF_GUESS) {
 		/* If we have any bare CR characters, we're not going to touch it */
 		if (stats.cr != stats.crlf)
-			return NULL;
+			return 0;
 
-		if (is_binary(size, &stats))
-			return NULL;
+		if (is_binary(len, &stats))
+			return 0;
 	}
 
-	/*
-	 * Ok, allocate a new buffer, fill it in, and return it
-	 * to let the caller know that we switched buffers.
-	 */
-	nsize = size + stats.lf - stats.crlf;
-	buffer = xmalloc(nsize);
-	*sizep = nsize;
-	last = 0;
-
-	dst = buffer;
-	do {
-		unsigned char c = *src++;
-		if (c == '\n' && last != '\r')
-			*dst++ = '\r';
-		*dst++ = c;
-		last = c;
-	} while (--size);
-
-	return buffer;
+	/* are we "faking" in place editing ? */
+	if (src == buf->buf)
+		to_free = strbuf_detach(buf);
+
+	strbuf_grow(buf, len + stats.lf - stats.crlf);
+	for (;;) {
+		const char *nl = memchr(src, '\n', len);
+		if (!nl)
+			break;
+		if (nl > src && nl[-1] == '\r') {
+			strbuf_add(buf, src, nl + 1 - src);
+		} else {
+			strbuf_add(buf, src, nl - src);
+			strbuf_addstr(buf, "\r\n");
+		}
+		len -= nl + 1 - src;
+		src  = nl + 1;
+	}
+	strbuf_add(buf, src, len);
+
+	free(to_free);
+	return 1;
 }
 
 static int filter_buffer(const char *path, const char *src,
@@ -246,8 +236,8 @@ static int filter_buffer(const char *path, const char *src,
 	return (write_err || status);
 }
 
-static char *apply_filter(const char *path, const char *src,
-			  unsigned long *sizep, const char *cmd)
+static int apply_filter(const char *path, const char *src, size_t len,
+                        struct strbuf *dst, const char *cmd)
 {
 	/*
 	 * Create a pipeline to have the command filter the buffer's
@@ -255,21 +245,19 @@ static char *apply_filter(const char *path, const char *src,
 	 *
 	 * (child --> cmd) --> us
 	 */
-	const int SLOP = 4096;
 	int pipe_feed[2];
-	int status;
-	char *dst;
-	unsigned long dstsize, dstalloc;
+	int status, ret = 1;
 	struct child_process child_process;
+	struct strbuf nbuf;
 
 	if (!cmd)
-		return NULL;
+		return 0;
 
 	memset(&child_process, 0, sizeof(child_process));
 
 	if (pipe(pipe_feed) < 0) {
 		error("cannot create pipe to run external filter %s", cmd);
-		return NULL;
+		return 0;
 	}
 
 	fflush(NULL);
@@ -278,54 +266,37 @@ static char *apply_filter(const char *path, const char *src,
 		error("cannot fork to run external filter %s", cmd);
 		close(pipe_feed[0]);
 		close(pipe_feed[1]);
-		return NULL;
+		return 0;
 	}
 	if (!child_process.pid) {
 		dup2(pipe_feed[1], 1);
 		close(pipe_feed[0]);
 		close(pipe_feed[1]);
-		exit(filter_buffer(path, src, *sizep, cmd));
+		exit(filter_buffer(path, src, len, cmd));
 	}
 	close(pipe_feed[1]);
 
-	dstalloc = *sizep;
-	dst = xmalloc(dstalloc);
-	dstsize = 0;
-
-	while (1) {
-		ssize_t numread = xread(pipe_feed[0], dst + dstsize,
-					dstalloc - dstsize);
-
-		if (numread <= 0) {
-			if (!numread)
-				break;
-			error("read from external filter %s failed", cmd);
-			free(dst);
-			dst = NULL;
-			break;
-		}
-		dstsize += numread;
-		if (dstalloc <= dstsize + SLOP) {
-			dstalloc = dstsize + SLOP;
-			dst = xrealloc(dst, dstalloc);
-		}
+	strbuf_init(&nbuf, 0);
+	if (strbuf_read(&nbuf, pipe_feed[0], len) < 0) {
+		error("read from external filter %s failed", cmd);
+		ret = 0;
 	}
 	if (close(pipe_feed[0])) {
-		error("read from external filter %s failed", cmd);
-		free(dst);
-		dst = NULL;
+		ret = error("read from external filter %s failed", cmd);
+		ret = 0;
 	}
-
 	status = finish_command(&child_process);
 	if (status) {
-		error("external filter %s failed %d", cmd, -status);
-		free(dst);
-		dst = NULL;
+		ret = error("external filter %s failed %d", cmd, -status);
+		ret = 0;
 	}
 
-	if (dst)
-		*sizep = dstsize;
-	return dst;
+	if (ret) {
+		*dst = nbuf;
+	} else {
+		strbuf_release(&nbuf);
+	}
+	return ret;
 }
 
 static struct convert_driver {
@@ -449,137 +420,104 @@ static int count_ident(const char *cp, unsigned long size)
 	return cnt;
 }
 
-static char *ident_to_git(const char *path, const char *src, unsigned long *sizep, int ident)
+static int ident_to_git(const char *path, const char *src, size_t len,
+                        struct strbuf *buf, int ident)
 {
-	int cnt;
-	unsigned long size;
-	char *dst, *buf;
+	char *dst, *dollar;
 
-	if (!ident)
-		return NULL;
-	size = *sizep;
-	cnt = count_ident(src, size);
-	if (!cnt)
-		return NULL;
-	buf = xmalloc(size);
-
-	for (dst = buf; size; size--) {
-		char ch = *src++;
-		*dst++ = ch;
-		if ((ch == '$') && (3 <= size) &&
-		    !memcmp("Id:", src, 3)) {
-			unsigned long rem = size - 3;
-			const char *cp = src + 3;
-			do {
-				ch = *cp++;
-				if (ch == '$')
-					break;
-				rem--;
-			} while (rem);
-			if (!rem)
-				continue;
+	if (!ident || !count_ident(src, len))
+		return 0;
+
+	strbuf_grow(buf, len);
+	dst = buf->buf;
+	for (;;) {
+		dollar = memchr(src, '$', len);
+		if (!dollar)
+			break;
+		memcpy(dst, src, dollar + 1 - src);
+		dst += dollar + 1 - src;
+		len -= dollar + 1 - src;
+		src  = dollar + 1;
+
+		if (len > 3 && !memcmp(src, "Id:", 3)) {
+			dollar = memchr(src + 3, '$', len - 3);
+			if (!dollar)
+				break;
 			memcpy(dst, "Id$", 3);
 			dst += 3;
-			size -= (cp - src);
-			src = cp;
+			len -= dollar + 1 - src;
+			src  = dollar + 1;
 		}
 	}
-
-	*sizep = dst - buf;
-	return buf;
+	memcpy(dst, src, len);
+	strbuf_setlen(buf, dst + len - buf->buf);
+	return 1;
 }
 
-static char *ident_to_worktree(const char *path, const char *src, unsigned long *sizep, int ident)
+static int ident_to_worktree(const char *path, const char *src, size_t len,
+                             struct strbuf *buf, int ident)
 {
-	int cnt;
-	unsigned long size;
-	char *dst, *buf;
 	unsigned char sha1[20];
+	char *to_free = NULL, *dollar;
+	int cnt;
 
 	if (!ident)
-		return NULL;
+		return 0;
 
-	size = *sizep;
-	cnt = count_ident(src, size);
+	cnt = count_ident(src, len);
 	if (!cnt)
-		return NULL;
+		return 0;
 
-	hash_sha1_file(src, size, "blob", sha1);
-	buf = xmalloc(size + cnt * 43);
-
-	for (dst = buf; size; size--) {
-		const char *cp;
-		/* Fetch next source character, move the pointer on */
-		char ch = *src++;
-		/* Copy the current character to the destination */
-		*dst++ = ch;
-		/* If the current character is "$" or there are less than three
-		 * remaining bytes or the two bytes following this one are not
-		 * "Id", then simply read the next character */
-		if ((ch != '$') || (size < 3) || memcmp("Id", src, 2))
-			continue;
-		/*
-		 * Here when
-		 *  - There are more than 2 bytes remaining
-		 *  - The current three bytes are "$Id"
-		 * with
-		 *  - ch == "$"
-		 *  - src[0] == "I"
-		 */
+	/* are we "faking" in place editing ? */
+	if (src == buf->buf)
+		to_free = strbuf_detach(buf);
+	hash_sha1_file(src, len, "blob", sha1);
 
-		/*
-		 * It's possible that an expanded Id has crept its way into the
-		 * repository, we cope with that by stripping the expansion out
-		 */
-		if (src[2] == ':') {
-			/* Expanded keywords have "$Id:" at the front */
+	strbuf_grow(buf, len + cnt * 43);
+	for (;;) {
+		/* step 1: run to the next '$' */
+		dollar = memchr(src, '$', len);
+		if (!dollar)
+			break;
+		strbuf_add(buf, src, dollar + 1 - src);
+		len -= dollar + 1 - src;
+		src  = dollar + 1;
 
-			/* discard up to but not including the closing $ */
-			unsigned long rem = size - 3;
-			/* Point at first byte after the ":" */
-			cp = src + 3;
-			/*
-			 * Throw away characters until either
-			 *  - we reach a "$"
-			 *  - we run out of bytes (rem == 0)
-			 */
-			do {
-				ch = *cp;
-				if (ch == '$')
-					break;
-				cp++;
-				rem--;
-			} while (rem);
-			/* If the above finished because it ran out of characters, then
-			 * this is an incomplete keyword, so don't run the expansion */
-			if (!rem)
-				continue;
-		} else if (src[2] == '$')
-			cp = src + 2;
-		else
-			/* Anything other than "$Id:XXX$" or $Id$ and we skip the
-			 * expansion */
+		/* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
+		if (len < 3 || memcmp("Id", src, 2))
 			continue;
 
-		/* cp is now pointing at the last $ of the keyword */
-
-		memcpy(dst, "Id: ", 4);
-		dst += 4;
-		memcpy(dst, sha1_to_hex(sha1), 40);
-		dst += 40;
-		*dst++ = ' ';
+		/* step 3: skip over Id$ or Id:xxxxx$ */
+		if (src[2] == '$') {
+			src += 3;
+			len -= 3;
+		} else if (src[2] == ':') {
+			/*
+			 * It's possible that an expanded Id has crept its way into the
+			 * repository, we cope with that by stripping the expansion out
+			 */
+			dollar = memchr(src + 3, '$', len - 3);
+			if (!dollar) {
+				/* incomplete keyword, no more '$', so just quit the loop */
+				break;
+			}
 
-		/* Adjust for the characters we've discarded */
-		size -= (cp - src);
-		src = cp;
+			len -= dollar + 1 - src;
+			src  = dollar + 1;
+		} else {
+			/* it wasn't a "Id$" or "Id:xxxx$" */
+			continue;
+		}
 
-		/* Copy the final "$" */
-		*dst++ = *src++;
-		size--;
+		/* step 4: substitute */
+		strbuf_addstr(buf, "Id: ");
+		strbuf_add(buf, sha1_to_hex(sha1), 40);
+		strbuf_addstr(buf, " $");
 	}
+	strbuf_add(buf, src, len);
 
-	*sizep = dst - buf;
-	return buf;
+	free(to_free);
+	return 1;
 }
 
 static int git_path_check_crlf(const char *path, struct git_attr_check *check)
@@ -618,13 +556,12 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check)
 	return !!ATTR_TRUE(value);
 }
 
-char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
+int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst)
 {
 	struct git_attr_check check[3];
 	int crlf = CRLF_GUESS;
-	int ident = 0;
+	int ident = 0, ret = 0;
 	char *filter = NULL;
-	char *buf, *buf2;
 
 	setup_convert_check(check);
 	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
@@ -636,30 +573,25 @@ char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
 			filter = drv->clean;
 	}
 
-	buf = apply_filter(path, src, sizep, filter);
-
-	buf2 = crlf_to_git(path, buf ? buf : src, sizep, crlf);
-	if (buf2) {
-		free(buf);
-		buf = buf2;
+	ret |= apply_filter(path, src, len, dst, filter);
+	if (ret) {
+		src = dst->buf;
+		len = dst->len;
 	}
-
-	buf2 = ident_to_git(path, buf ? buf : src, sizep, ident);
-	if (buf2) {
-		free(buf);
-		buf = buf2;
+	ret |= crlf_to_git(path, src, len, dst, crlf);
+	if (ret) {
+		src = dst->buf;
+		len = dst->len;
 	}
-
-	return buf;
+	return ret | ident_to_git(path, src, len, dst, ident);
 }
 
-char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep)
+int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
 {
 	struct git_attr_check check[3];
 	int crlf = CRLF_GUESS;
-	int ident = 0;
+	int ident = 0, ret = 0;
 	char *filter = NULL;
-	char *buf, *buf2;
 
 	setup_convert_check(check);
 	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
@@ -671,19 +603,15 @@ char *convert_to_working_tree(const char *path, const char *src, unsigned long *
 			filter = drv->smudge;
 	}
 
-	buf = ident_to_worktree(path, src, sizep, ident);
-
-	buf2 = crlf_to_worktree(path, buf ? buf : src, sizep, crlf);
-	if (buf2) {
-		free(buf);
-		buf = buf2;
+	ret |= ident_to_worktree(path, src, len, dst, ident);
+	if (ret) {
+		src = dst->buf;
+		len = dst->len;
 	}
-
-	buf2 = apply_filter(path, buf ? buf : src, sizep, filter);
-	if (buf2) {
-		free(buf);
-		buf = buf2;
+	ret |= crlf_to_worktree(path, src, len, dst, crlf);
+	if (ret) {
+		src = dst->buf;
+		len = dst->len;
 	}
-
-	return buf;
+	return ret | apply_filter(path, src, len, dst, filter);
 }
diff --git a/diff.c b/diff.c
index 693140b..d46dd11 100644
--- a/diff.c
+++ b/diff.c
@@ -1600,10 +1600,9 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 
 	if (!s->sha1_valid ||
 	    reuse_worktree_file(s->path, s->sha1, 0)) {
+		struct strbuf buf;
 		struct stat st;
 		int fd;
-		char *buf;
-		unsigned long size;
 
 		if (!strcmp(s->path, "-"))
 			return populate_from_stdin(s);
@@ -1644,13 +1643,12 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 		/*
 		 * Convert from working tree format to canonical git format
 		 */
-		size = s->size;
-		buf = convert_to_git(s->path, s->data, &size);
-		if (buf) {
+		strbuf_init(&buf, 0);
+		if (convert_to_git(s->path, s->data, s->size, &buf)) {
 			munmap(s->data, s->size);
 			s->should_munmap = 0;
-			s->data = buf;
-			s->size = size;
+			s->data = buf.buf;
+			s->size = buf.len;
 			s->should_free = 1;
 		}
 	}
diff --git a/entry.c b/entry.c
index fc3a506..4a8c73b 100644
--- a/entry.c
+++ b/entry.c
@@ -104,7 +104,8 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 	long wrote;
 
 	switch (ntohl(ce->ce_mode) & S_IFMT) {
-		char *buf, *new;
+		char *new;
+		struct strbuf buf;
 		unsigned long size;
 
 	case S_IFREG:
@@ -116,10 +117,11 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 		/*
 		 * Convert from git internal format to working tree format
 		 */
-		buf = convert_to_working_tree(ce->name, new, &size);
-		if (buf) {
+		strbuf_init(&buf, 0);
+		if (convert_to_working_tree(ce->name, new, size, &buf)) {
 			free(new);
-			new = buf;
+			new = buf.buf;
+			size = buf.len;
 		}
 
 		if (to_tempfile) {
diff --git a/sha1_file.c b/sha1_file.c
index aea1096..64b5b46 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2343,12 +2343,12 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
 	 * Convert blobs to git internal format
 	 */
 	if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) {
-		unsigned long nsize = size;
-		char *nbuf = convert_to_git(path, buf, &nsize);
-		if (nbuf) {
+		struct strbuf nbuf;
+		strbuf_init(&nbuf, 0);
+		if (convert_to_git(path, buf, size, &nbuf)) {
 			munmap(buf, size);
-			size = nsize;
-			buf = nbuf;
+			size = nbuf.len;
+			buf = nbuf.buf;
 			re_allocated = 1;
 		}
 	}
-- 
1.5.3.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH] builtin-apply: use strbuf's instead of buffer_desc's.
  2007-09-16 17:21 ` Pierre Habouzit
                     ` (4 preceding siblings ...)
  2007-09-16 13:51   ` [PATCH] Rewrite convert_to_{git,working_tree} to use strbuf's Pierre Habouzit
@ 2007-09-16 16:54   ` Pierre Habouzit
  2007-09-16 17:28   ` [RFC] strbuf's in builtin-apply Pierre Habouzit
  6 siblings, 0 replies; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-16 16:54 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---
 builtin-apply.c |  212 +++++++++++++++++++------------------------------------
 1 files changed, 72 insertions(+), 140 deletions(-)

diff --git a/builtin-apply.c b/builtin-apply.c
index ae708d7..05011bb 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -1441,37 +1441,28 @@ static void show_stats(struct patch *patch)
 	free(qname);
 }
 
-static int read_old_data(struct stat *st, const char *path, char **buf_p, unsigned long *alloc_p, unsigned long *size_p)
+static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
 {
 	int fd;
-	unsigned long got;
-	struct strbuf nbuf;
-	unsigned long size = *size_p;
-	char *buf = *buf_p;
 
 	switch (st->st_mode & S_IFMT) {
 	case S_IFLNK:
-		return readlink(path, buf, size) != size;
+		strbuf_grow(buf, st->st_size);
+		if (readlink(path, buf->buf, st->st_size) != st->st_size)
+			return -1;
+		strbuf_setlen(buf, st->st_size);
+		return 0;
 	case S_IFREG:
 		fd = open(path, O_RDONLY);
 		if (fd < 0)
 			return error("unable to open %s", path);
-		got = 0;
-		for (;;) {
-			ssize_t ret = xread(fd, buf + got, size - got);
-			if (ret <= 0)
-				break;
-			got += ret;
+		if (strbuf_read(buf, fd, st->st_size) < 0) {
+			close(fd);
+			return -1;
 		}
 		close(fd);
-		strbuf_init(&nbuf, 0);
-		if (convert_to_git(path, buf, size, &nbuf)) {
-			free(buf);
-			*buf_p = nbuf.buf;
-			*alloc_p = nbuf.alloc;
-			*size_p = nbuf.len;
-		}
-		return got != size;
+		convert_to_git(path, buf->buf, buf->len, buf);
+		return 0;
 	default:
 		return -1;
 	}
@@ -1576,12 +1567,6 @@ static void remove_last_line(const char **rbuf, int *rsize)
 	*rsize = offset + 1;
 }
 
-struct buffer_desc {
-	char *buffer;
-	unsigned long size;
-	unsigned long alloc;
-};
-
 static int apply_line(char *output, const char *patch, int plen)
 {
 	/* plen is number of bytes to be copied from patch,
@@ -1651,10 +1636,9 @@ static int apply_line(char *output, const char *patch, int plen)
 	return output + plen - buf;
 }
 
-static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag, int inaccurate_eof)
+static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, int inaccurate_eof)
 {
 	int match_beginning, match_end;
-	char *buf = desc->buffer;
 	const char *patch = frag->patch;
 	int offset, size = frag->size;
 	char *old = xmalloc(size);
@@ -1765,24 +1749,17 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag, i
 	lines = 0;
 	pos = frag->newpos;
 	for (;;) {
-		offset = find_offset(buf, desc->size,
+		offset = find_offset(buf->buf, buf->len,
 				     oldlines, oldsize, pos, &lines);
-		if (match_end && offset + oldsize != desc->size)
+		if (match_end && offset + oldsize != buf->len)
 			offset = -1;
 		if (match_beginning && offset)
 			offset = -1;
 		if (offset >= 0) {
-			int diff;
-			unsigned long size, alloc;
-
 			if (new_whitespace == strip_whitespace &&
-			    (desc->size - oldsize - offset == 0)) /* end of file? */
+			    (buf->len - oldsize - offset == 0)) /* end of file? */
 				newsize -= new_blank_lines_at_end;
 
-			diff = newsize - oldsize;
-			size = desc->size + diff;
-			alloc = desc->alloc;
-
 			/* Warn if it was necessary to reduce the number
 			 * of context lines.
 			 */
@@ -1792,19 +1769,8 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag, i
 					" to apply fragment at %d\n",
 					leading, trailing, pos + lines);
 
-			if (size > alloc) {
-				alloc = size + 8192;
-				desc->alloc = alloc;
-				buf = xrealloc(buf, alloc);
-				desc->buffer = buf;
-			}
-			desc->size = size;
-			memmove(buf + offset + newsize,
-				buf + offset + oldsize,
-				size - offset - newsize);
-			memcpy(buf + offset, newlines, newsize);
+			strbuf_splice(buf, offset, oldsize, newlines, newsize);
 			offset = 0;
-
 			break;
 		}
 
@@ -1840,12 +1806,11 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag, i
 	return offset;
 }
 
-static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch)
+static int apply_binary_fragment(struct strbuf *buf, struct patch *patch)
 {
-	unsigned long dst_size;
 	struct fragment *fragment = patch->fragments;
-	void *data;
-	void *result;
+	unsigned long len;
+	void *dst;
 
 	/* Binary patch is irreversible without the optional second hunk */
 	if (apply_in_reverse) {
@@ -1856,29 +1821,24 @@ static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch)
 				     ? patch->new_name : patch->old_name);
 		fragment = fragment->next;
 	}
-	data = (void*) fragment->patch;
 	switch (fragment->binary_patch_method) {
 	case BINARY_DELTA_DEFLATED:
-		result = patch_delta(desc->buffer, desc->size,
-				     data,
-				     fragment->size,
-				     &dst_size);
-		free(desc->buffer);
-		desc->buffer = result;
-		break;
+		dst = patch_delta(buf->buf, buf->len, fragment->patch,
+				  fragment->size, &len);
+		if (!dst)
+			return -1;
+		/* XXX patch_delta NUL-terminates */
+		strbuf_attach(buf, dst, len, len + 1);
+		return 0;
 	case BINARY_LITERAL_DEFLATED:
-		free(desc->buffer);
-		desc->buffer = data;
-		dst_size = fragment->size;
-		break;
+		strbuf_reset(buf);
+		strbuf_add(buf, fragment->patch, fragment->size);
+		return 0;
 	}
-	if (!desc->buffer)
-		return -1;
-	desc->size = desc->alloc = dst_size;
-	return 0;
+	return -1;
 }
 
-static int apply_binary(struct buffer_desc *desc, struct patch *patch)
+static int apply_binary(struct strbuf *buf, struct patch *patch)
 {
 	const char *name = patch->old_name ? patch->old_name : patch->new_name;
 	unsigned char sha1[20];
@@ -1897,7 +1857,7 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
 		/* See if the old one matches what the patch
 		 * applies to.
 		 */
-		hash_sha1_file(desc->buffer, desc->size, blob_type, sha1);
+		hash_sha1_file(buf->buf, buf->len, blob_type, sha1);
 		if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix))
 			return error("the patch applies to '%s' (%s), "
 				     "which does not match the "
@@ -1906,16 +1866,14 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
 	}
 	else {
 		/* Otherwise, the old one must be empty. */
-		if (desc->size)
+		if (buf->len)
 			return error("the patch applies to an empty "
 				     "'%s' but it is not empty", name);
 	}
 
 	get_sha1_hex(patch->new_sha1_prefix, sha1);
 	if (is_null_sha1(sha1)) {
-		free(desc->buffer);
-		desc->alloc = desc->size = 0;
-		desc->buffer = NULL;
+		strbuf_release(buf);
 		return 0; /* deletion patch */
 	}
 
@@ -1923,43 +1881,44 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
 		/* We already have the postimage */
 		enum object_type type;
 		unsigned long size;
+		char *result;
 
-		free(desc->buffer);
-		desc->buffer = read_sha1_file(sha1, &type, &size);
-		if (!desc->buffer)
+		result = read_sha1_file(sha1, &type, &size);
+		if (!result)
 			return error("the necessary postimage %s for "
 				     "'%s' cannot be read",
 				     patch->new_sha1_prefix, name);
-		desc->alloc = desc->size = size;
-	}
-	else {
-		/* We have verified desc matches the preimage;
+		/* XXX read_sha1_file NUL-terminates */
+		strbuf_attach(buf, result, size, size + 1);
+	} else {
+		/* We have verified buf matches the preimage;
 		 * apply the patch data to it, which is stored
 		 * in the patch->fragments->{patch,size}.
 		 */
-		if (apply_binary_fragment(desc, patch))
+		if (apply_binary_fragment(buf, patch))
 			return error("binary patch does not apply to '%s'",
 				     name);
 
 		/* verify that the result matches */
-		hash_sha1_file(desc->buffer, desc->size, blob_type, sha1);
+		hash_sha1_file(buf->buf, buf->len, blob_type, sha1);
 		if (strcmp(sha1_to_hex(sha1), patch->new_sha1_prefix))
-			return error("binary patch to '%s' creates incorrect result (expecting %s, got %s)", name, patch->new_sha1_prefix, sha1_to_hex(sha1));
+			return error("binary patch to '%s' creates incorrect result (expecting %s, got %s)",
+				name, patch->new_sha1_prefix, sha1_to_hex(sha1));
 	}
 
 	return 0;
 }
 
-static int apply_fragments(struct buffer_desc *desc, struct patch *patch)
+static int apply_fragments(struct strbuf *buf, struct patch *patch)
 {
 	struct fragment *frag = patch->fragments;
 	const char *name = patch->old_name ? patch->old_name : patch->new_name;
 
 	if (patch->is_binary)
-		return apply_binary(desc, patch);
+		return apply_binary(buf, patch);
 
 	while (frag) {
-		if (apply_one_fragment(desc, frag, patch->inaccurate_eof)) {
+		if (apply_one_fragment(buf, frag, patch->inaccurate_eof)) {
 			error("patch failed: %s:%ld", name, frag->oldpos);
 			if (!apply_with_reject)
 				return -1;
@@ -1970,76 +1929,57 @@ static int apply_fragments(struct buffer_desc *desc, struct patch *patch)
 	return 0;
 }
 
-static int read_file_or_gitlink(struct cache_entry *ce, char **buf_p,
-				unsigned long *size_p)
+static int read_file_or_gitlink(struct cache_entry *ce, struct strbuf *buf)
 {
 	if (!ce)
 		return 0;
 
 	if (S_ISGITLINK(ntohl(ce->ce_mode))) {
-		*buf_p = xmalloc(100);
-		*size_p = snprintf(*buf_p, 100,
-			"Subproject commit %s\n", sha1_to_hex(ce->sha1));
+		strbuf_grow(buf, 100);
+		strbuf_addf(buf, "Subproject commit %s\n", sha1_to_hex(ce->sha1));
 	} else {
 		enum object_type type;
-		*buf_p = read_sha1_file(ce->sha1, &type, size_p);
-		if (!*buf_p)
+		unsigned long sz;
+		char *result;
+
+		result = read_sha1_file(ce->sha1, &type, &sz);
+		if (!result)
 			return -1;
+		/* XXX read_sha1_file NUL-terminates */
+		strbuf_attach(buf, result, sz, sz + 1);
 	}
 	return 0;
 }
 
 static int apply_data(struct patch *patch, struct stat *st, struct cache_entry *ce)
 {
-	char *buf;
-	unsigned long size, alloc;
-	struct buffer_desc desc;
+	struct strbuf buf;
 
-	size = 0;
-	alloc = 0;
-	buf = NULL;
+	strbuf_init(&buf, 0);
 	if (cached) {
-		if (read_file_or_gitlink(ce, &buf, &size))
+		if (read_file_or_gitlink(ce, &buf))
 			return error("read of %s failed", patch->old_name);
-		alloc = size;
 	} else if (patch->old_name) {
 		if (S_ISGITLINK(patch->old_mode)) {
-			if (ce)
-				read_file_or_gitlink(ce, &buf, &size);
-			else {
+			if (ce) {
+				read_file_or_gitlink(ce, &buf);
+			} else {
 				/*
 				 * There is no way to apply subproject
 				 * patch without looking at the index.
 				 */
 				patch->fragments = NULL;
-				size = 0;
 			}
-		}
-		else {
-			size = xsize_t(st->st_size);
-			alloc = size + 8192;
-			buf = xmalloc(alloc);
-			if (read_old_data(st, patch->old_name,
-					  &buf, &alloc, &size))
-				return error("read of %s failed",
-					     patch->old_name);
+		} else {
+			if (read_old_data(st, patch->old_name, &buf))
+				return error("read of %s failed", patch->old_name);
 		}
 	}
 
-	desc.size = size;
-	desc.alloc = alloc;
-	desc.buffer = buf;
-
-	if (apply_fragments(&desc, patch) < 0)
+	if (apply_fragments(&buf, patch) < 0)
 		return -1; /* note with --reject this succeeds. */
-
-	/* NUL terminate the result */
-	if (desc.alloc <= desc.size)
-		desc.buffer = xrealloc(desc.buffer, desc.size + 1);
-	desc.buffer[desc.size] = 0;
-
-	patch->result = desc.buffer;
-	patch->resultsize = desc.size;
+	patch->result = buf.buf;
+	patch->resultsize = buf.len;
 
 	if (0 < patch->is_delete && patch->resultsize)
 		return error("removal patch leaves file contents");
@@ -2459,19 +2399,11 @@ static int try_create_file(const char *path, unsigned int mode, const char *buf,
 		size = nbuf.len;
 		buf  = nbuf.buf;
 	}
+	write_or_die(fd, buf, size);
+	strbuf_release(&nbuf);
 
-	while (size) {
-		int written = xwrite(fd, buf, size);
-		if (written < 0)
-			die("writing file %s: %s", path, strerror(errno));
-		if (!written)
-			die("out of space writing file %s", path);
-		buf += written;
-		size -= written;
-	}
 	if (close(fd) < 0)
 		die("closing file %s: %s", path, strerror(errno));
-	strbuf_release(&nbuf);
 	return 0;
 }
 
-- 
1.5.3.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [RFC] strbuf's in builtin-apply
  2007-09-15 14:12 [RFC] strbuf's in builtin-apply Pierre Habouzit
                   ` (2 preceding siblings ...)
  2007-09-15 17:07 ` [RFC] strbuf's in builtin-apply Pierre Habouzit
@ 2007-09-16 17:21 ` Pierre Habouzit
  2007-09-15 13:56   ` [PATCH] Now that cache.h needs strbuf.h, remove useless includes Pierre Habouzit
                     ` (6 more replies)
  3 siblings, 7 replies; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-16 17:21 UTC (permalink / raw)
  To: git, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 1700 bytes --]

Following this mail will happen a new janitoring series. This is a
rewrite of the former, using Junio's advice to use strbufs in
convert_to_* functions. The patch hence becomes more intrusive than
before (in convert.c mostly). Note that this imply that now strbuf.h is
included from cache.h so all git sources see strbuf's.

The convert_to_git patches gain some marginal efficiency as the new API
makes the reuse of the buffers possible when in-place editing works
(e.g. the \r\n -> \n can be done in place, we save a malloc here). Else
nothing should have changed significantly.

The last 2 patches are new. The first one is a simplification of the
code splicing the "encoding" header in commit.c, reusing the logic
already in strbuf.c for that matter, and also making the parsing code
easier to read (IMHO).

The latter further simplify some code that was trying to guess if
rfc2047 encoding of some header was needed. Thanks to strbuf_grow, and
the fact that now at each point we can grow buffers (which was harder
before), I tried to wait until we are sure if rfc2047 encoding will be
needed or not to extend the buffer. I've benchmarked many tools (on real
repositories, with commiters having non ascii chars in their name) using
the pretty printer without noticeable changes in the numbers (and rather
again, a trend to be faster, but with less than a percent gain, so I
won't call it a real gain).

The series is based on next, as many patches are definitely not suitable
for master :)

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] strbuf's in builtin-apply
  2007-09-16 17:21 ` Pierre Habouzit
                     ` (5 preceding siblings ...)
  2007-09-16 16:54   ` [PATCH] builtin-apply: use strbuf's instead of buffer_desc's Pierre Habouzit
@ 2007-09-16 17:28   ` Pierre Habouzit
  2007-09-16 22:54     ` Junio C Hamano
  6 siblings, 1 reply; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-16 17:28 UTC (permalink / raw)
  To: git, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 688 bytes --]

  Damn it I forgot to add -n to my format-patch invocation *again*. The
proper ordering is:

The builtin-apply part:
  [1/6] New strbuf APIs: splice and attach.
  [2/6] Rewrite convert_to_{git,working_tree} to use strbuf's.
  [3/6] Now that cache.h needs strbuf.h, remove useless includes.
  [4/6] builtin-apply: use strbuf's instead of buffer_desc's.

And the two somehow more independant patches (need 1/6 still):
  [5/6] Refactor replace_encoding_header.
  [6/6] Remove preemptive allocations.

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] Rewrite convert_to_{git,working_tree} to use strbuf's.
  2007-09-16 13:51   ` [PATCH] Rewrite convert_to_{git,working_tree} to use strbuf's Pierre Habouzit
@ 2007-09-16 18:27     ` Linus Torvalds
  0 siblings, 0 replies; 21+ messages in thread
From: Linus Torvalds @ 2007-09-16 18:27 UTC (permalink / raw)
  To: Pierre Habouzit; +Cc: Junio C Hamano, git



On Sun, 16 Sep 2007, Pierre Habouzit wrote:
..
>  7 files changed, 240 insertions(+), 317 deletions(-)

I like how all these patches seem to be removing more lines than 
they add.

The end result looks good from a visual standpoint too.

So:

	Acked-by: Linus Torvalds <torvalds@linux-foundation.org>

for the whole series.

		Linus

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] New strbuf APIs: splice and attach.
  2007-09-15 13:56   ` [PATCH] New strbuf APIs: splice and attach Pierre Habouzit
@ 2007-09-16 20:20     ` Florian Weimer
  2007-09-16 20:51       ` Pierre Habouzit
  0 siblings, 1 reply; 21+ messages in thread
From: Florian Weimer @ 2007-09-16 20:20 UTC (permalink / raw)
  To: git

* Pierre Habouzit:

> +void strbuf_grow(struct strbuf *sb, size_t extra)
> +{
>  	if (sb->len + extra + 1 <= sb->len)
>  		die("you want to use way too much memory");

By the way, this comparison is always false because sb->len is signed.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] New strbuf APIs: splice and attach.
  2007-09-16 20:20     ` Florian Weimer
@ 2007-09-16 20:51       ` Pierre Habouzit
  2007-09-17  5:43         ` Florian Weimer
  0 siblings, 1 reply; 21+ messages in thread
From: Pierre Habouzit @ 2007-09-16 20:51 UTC (permalink / raw)
  To: Florian Weimer; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 560 bytes --]

On Sun, Sep 16, 2007 at 08:20:06PM +0000, Florian Weimer wrote:
> * Pierre Habouzit:
> 
> > +void strbuf_grow(struct strbuf *sb, size_t extra)
> > +{
> >  	if (sb->len + extra + 1 <= sb->len)
> >  		die("you want to use way too much memory");
> 
> By the way, this comparison is always false because sb->len is signed.

  News to me. Actually it's not, it's a size_t :)

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] strbuf's in builtin-apply
  2007-09-16 17:28   ` [RFC] strbuf's in builtin-apply Pierre Habouzit
@ 2007-09-16 22:54     ` Junio C Hamano
  0 siblings, 0 replies; 21+ messages in thread
From: Junio C Hamano @ 2007-09-16 22:54 UTC (permalink / raw)
  To: Pierre Habouzit; +Cc: git

Pierre Habouzit <madcoder@debian.org> writes:

> The builtin-apply part:
>   [1/6] New strbuf APIs: splice and attach.
>   [2/6] Rewrite convert_to_{git,working_tree} to use strbuf's.
>   [3/6] Now that cache.h needs strbuf.h, remove useless includes.
>   [4/6] builtin-apply: use strbuf's instead of buffer_desc's.
>
> And the two somehow more independant patches (need 1/6 still):
>   [5/6] Refactor replace_encoding_header.
>   [6/6] Remove preemptive allocations.

Quite nice.  Thanks, will queue.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] New strbuf APIs: splice and attach.
  2007-09-16 20:51       ` Pierre Habouzit
@ 2007-09-17  5:43         ` Florian Weimer
  0 siblings, 0 replies; 21+ messages in thread
From: Florian Weimer @ 2007-09-17  5:43 UTC (permalink / raw)
  To: Pierre Habouzit; +Cc: git

* Pierre Habouzit:

> On Sun, Sep 16, 2007 at 08:20:06PM +0000, Florian Weimer wrote:
>> * Pierre Habouzit:
>> 
>> > +void strbuf_grow(struct strbuf *sb, size_t extra)
>> > +{
>> >  	if (sb->len + extra + 1 <= sb->len)
>> >  		die("you want to use way too much memory");
>> 
>> By the way, this comparison is always false because sb->len is signed.
>
>   News to me. Actually it's not, it's a size_t :)

Ah, then this has changed somewhere.  It used to be int.  Good.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2007-09-17  5:43 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-09-15 14:12 [RFC] strbuf's in builtin-apply Pierre Habouzit
2007-09-15 13:56 ` [PATCH] New strbuf APIs: splice and embed Pierre Habouzit
2007-09-16  0:57   ` Junio C Hamano
2007-09-16  8:10     ` Pierre Habouzit
2007-09-15 14:04 ` [PATCH] builtin-apply: use strbuf's instead of buffer_desc's Pierre Habouzit
2007-09-16  0:56   ` Junio C Hamano
2007-09-16  8:15     ` Pierre Habouzit
2007-09-15 17:07 ` [RFC] strbuf's in builtin-apply Pierre Habouzit
2007-09-16 17:21 ` Pierre Habouzit
2007-09-15 13:56   ` [PATCH] Now that cache.h needs strbuf.h, remove useless includes Pierre Habouzit
2007-09-15 13:56   ` [PATCH] New strbuf APIs: splice and attach Pierre Habouzit
2007-09-16 20:20     ` Florian Weimer
2007-09-16 20:51       ` Pierre Habouzit
2007-09-17  5:43         ` Florian Weimer
2007-09-15 21:50   ` [PATCH] Refactor replace_encoding_header Pierre Habouzit
2007-09-16  8:19   ` [PATCH] Remove preemptive allocations Pierre Habouzit
2007-09-16 13:51   ` [PATCH] Rewrite convert_to_{git,working_tree} to use strbuf's Pierre Habouzit
2007-09-16 18:27     ` Linus Torvalds
2007-09-16 16:54   ` [PATCH] builtin-apply: use strbuf's instead of buffer_desc's Pierre Habouzit
2007-09-16 17:28   ` [RFC] strbuf's in builtin-apply Pierre Habouzit
2007-09-16 22:54     ` Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).