git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / code / Atom feed
* [RFC] Convert builin-mailinfo.c to use The Better String Library.
@ 2007-09-04 20:50 Lukas Sandström
  2007-09-04 21:38 ` Alex Riesen
                   ` (3 more replies)
  0 siblings, 4 replies; 102+ messages in thread
From: Lukas Sandström @ 2007-09-04 20:50 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano

Hi.

This is an attempt to use "The Better String Library"[1] in builtin-mailinfo.c

The patch doesn't pass all the tests in the testsuit yet, but I thought I'd
send it out so people can decide if they like how the code looks.

I'm not sending a patch to add the library files at this time. I'll send
that patch when this patch is working.

The changes required to make it pass the tests shouldn't be very large.

/Lukas

[1] http://bstring.sourceforge.net/

---
 builtin-mailinfo.c |  795 ++++++++++++++++++++++++++--------------------------
 1 files changed, 392 insertions(+), 403 deletions(-)

diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c
index d7cb11d..2ddc15d 100644
--- a/builtin-mailinfo.c
+++ b/builtin-mailinfo.c
@@ -5,14 +5,14 @@
 #include "cache.h"
 #include "builtin.h"
 #include "utf8.h"
+#include "bstring/bstrlib.h"
 
 static FILE *cmitmsg, *patchfile, *fin, *fout;
 
 static int keep_subject;
-static const char *metainfo_charset;
-static char line[1000];
-static char name[1000];
-static char email[1000];
+static bstring metainfo_charset;
+static bstring name;
+static bstring email;
 
 static enum  {
 	TE_DONTCARE, TE_QP, TE_BASE64,
@@ -21,321 +21,291 @@ static enum  {
 	TYPE_TEXT, TYPE_OTHER,
 } message_type;
 
-static char charset[256];
+static bstring charset;
 static int patch_lines;
-static char **p_hdr_data, **s_hdr_data;
+static bstring *p_hdr_data, *s_hdr_data;
 
 #define MAX_HDR_PARSED 10
 #define MAX_BOUNDARIES 5
 
-static char *sanity_check(char *name, char *email)
+static bstring sanity_check(bstring name, bstring email)
 {
-	int len = strlen(name);
-	if (len < 3 || len > 60)
+	static struct tagbstring email_ind = bsStatic("<@>");
+	if (blength(name) < 3 || blength(name) > 60)
 		return email;
-	if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>'))
+	if (binchr(name, 0, &email_ind) != BSTR_ERR)
 		return email;
 	return name;
 }
 
-static int bogus_from(char *line)
+static int bogus_from(const_bstring line)
 {
 	/* John Doe <johndoe> */
-	char *bra, *ket, *dst, *cp;
-
+	int bra, ket;
 	/* This is fallback, so do not bother if we already have an
 	 * e-mail address.
 	 */
-	if (*email)
+	if (blength(email))
 		return 0;
 
-	bra = strchr(line, '<');
-	if (!bra)
+	bra = bstrchr(line, '<');
+	if (bra == BSTR_ERR)
 		return 0;
-	ket = strchr(bra, '>');
-	if (!ket)
+	ket = bstrchrp(line, bra, '>');
+	if (ket == BSTR_ERR)
 		return 0;
 
-	for (dst = email, cp = bra+1; cp < ket; )
-		*dst++ = *cp++;
-	*dst = 0;
-	for (cp = line; isspace(*cp); cp++)
-		;
-	for (bra--; isspace(*bra); bra--)
-		*bra = 0;
-	cp = sanity_check(cp, email);
-	strcpy(name, cp);
+	bdestroy(email);
+	email = bmidstr(line, bra + 1, ket - bra - 1);
+
+	name = bmidstr(line, 0, bra);
+	btrimws(name);
+	bassign(name, sanity_check(name, email));
 	return 1;
 }
 
-static int handle_from(char *in_line)
+static int handle_from(const_bstring line)
 {
-	char line[1000];
-	char *at;
-	char *dst;
+	int at, es, ee;
+	static struct tagbstring email_delim = bsStatic(" \n\t\r\v\f<>");
 
-	strcpy(line, in_line);
-	at = strchr(line, '@');
-	if (!at)
+	at = bstrchr(line, '@');
+	if (at == BSTR_ERR)
 		return bogus_from(line);
 
 	/*
 	 * If we already have one email, don't take any confusing lines
 	 */
-	if (*email && strchr(at+1, '@'))
+	if (blength(email) && bstrchrp(line, '@', at + 1) != BSTR_ERR)
 		return 0;
 
 	/* Pick up the string around '@', possibly delimited with <>
-	 * pair; that is the email part.  White them out while copying.
+	 * pair; that is the email part.
 	 */
-	while (at > line) {
-		char c = at[-1];
-		if (isspace(c))
-			break;
-		if (c == '<') {
-			at[-1] = ' ';
-			break;
-		}
-		at--;
-	}
-	dst = email;
-	for (;;) {
-		unsigned char c = *at;
-		if (!c || c == '>' || isspace(c)) {
-			if (c == '>')
-				*at = ' ';
-			break;
-		}
-		*at++ = ' ';
-		*dst++ = c;
-	}
-	*dst++ = 0;
+
+	es = binchrr(line, at, &email_delim);
+	ee = binchr(line, at, &email_delim);
+	bdestroy(email);
+	email = bmidstr(line, es + 1, ee - es - 1);
 
 	/* The remainder is name.  It could be "John Doe <john.doe@xz>"
 	 * or "john.doe@xz (John Doe)", but we have whited out the
 	 * email part, so trim from both ends, possibly removing
 	 * the () pair at the end.
 	 */
-	at = line + strlen(line);
-	while (at > line) {
-		unsigned char c = *--at;
-		if (!isspace(c)) {
-			at[(c == ')') ? 0 : 1] = 0;
-			break;
-		}
-	}
 
-	at = line;
-	for (;;) {
-		unsigned char c = *at;
-		if (!c || !isspace(c)) {
-			if (c == '(')
-				at++;
-			break;
-		}
-		at++;
-	}
-	at = sanity_check(at, email);
-	strcpy(name, at);
+	bdestroy(name);
+	name = bstrcpy(line);
+	bdelete(name, es, ee - es + 1);
+	btrimws(name);
+	if (bchar(name, 0) == '(')
+		bdelete(name, 0, 1);
+	if (bchar(name, blength(name) - 1) == ')')
+		btrunc(name, blength(name) - 1);
+	
+	bassign(name, sanity_check(name, email));
 	return 1;
 }
 
-static int handle_header(char *line, char *data, int ofs)
-{
-	if (!line || !data)
-		return 1;
-
-	strcpy(data, line+ofs);
-
-	return 0;
-}
-
 /* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
  * to have enough heuristics to grok MIME encoded patches often found
  * on our mailing lists.  For example, we do not even treat header lines
  * case insensitively.
  */
 
-static int slurp_attr(const char *line, const char *name, char *attr)
+static bstring slurp_attr(const_bstring line, const char *name)
 {
-	const char *ends, *ap = strcasestr(line, name);
-	size_t sz;
+	int end, start;
+	static struct tagbstring endchars = bsStatic("; \t");
+	struct tagbstring bname;
 
-	if (!ap) {
-		*attr = 0;
-		return 0;
-	}
-	ap += strlen(name);
-	if (*ap == '"') {
-		ap++;
-		ends = "\"";
+	btfromcstr(bname, name);
+	start =  binstrcaseless(line, 0, &bname);
+	if (start == BSTR_ERR)
+		return NULL;
+	
+	start += blength(&bname);
+	if (blength(line) > start && bchar(line, start) == '"') {
+		start++;
+		if ((end = bstrchrp(line, start, '"')) == BSTR_ERR)
+			end = blength(line);
+		return bmidstr(line, start, end - start);
 	}
-	else
-		ends = "; \t";
-	sz = strcspn(ap, ends);
-	memcpy(attr, ap, sz);
-	attr[sz] = 0;
-	return 1;
+	if ((end = binchr(line, start, &endchars)) == BSTR_ERR)
+		end = blength(line);
+	return bmidstr(line, start, end - start);
 }
 
 struct content_type {
-	char *boundary;
-	int boundary_len;
+	bstring boundary;
 };
 
 static struct content_type content[MAX_BOUNDARIES];
 
 static struct content_type *content_top = content;
 
-static int handle_content_type(char *line)
+static int handle_content_type(const_bstring line)
 {
-	char boundary[256];
-
-	if (strcasestr(line, "text/") == NULL)
+	static struct tagbstring cmp_text = bsStatic("text/");
+	bstring attr, boundary;
+	
+	if (binstrcaseless(line, 0, &cmp_text) == BSTR_ERR)
 		 message_type = TYPE_OTHER;
-	if (slurp_attr(line, "boundary=", boundary + 2)) {
-		memcpy(boundary, "--", 2);
+
+	if ((attr = slurp_attr(line, "boundary="))) {
+		boundary = bfromcstr("--");
+		bconcat(boundary, attr);
+		bdestroy(attr);
 		if (content_top++ >= &content[MAX_BOUNDARIES]) {
 			fprintf(stderr, "Too many boundaries to handle\n");
 			exit(1);
 		}
-		content_top->boundary_len = strlen(boundary);
-		content_top->boundary = xmalloc(content_top->boundary_len+1);
-		strcpy(content_top->boundary, boundary);
+		content_top->boundary = boundary;
+		return 0;
 	}
-	if (slurp_attr(line, "charset=", charset)) {
-		int i, c;
-		for (i = 0; (c = charset[i]) != 0; i++)
-			charset[i] = tolower(c);
+	if ((attr = slurp_attr(line, "charset="))) {
+		if (btolower(attr) == BSTR_ERR)
+			die("Couldn't convert %s to lowercase.\n", attr->data);
+		charset = attr;
 	}
 	return 0;
 }
 
-static int handle_content_transfer_encoding(char *line)
+static int handle_content_transfer_encoding(const_bstring line)
 {
-	if (strcasestr(line, "base64"))
+	static struct tagbstring cmp_base64 = bsStatic("base64");
+	static struct tagbstring cmp_qp = bsStatic("quoted-printable");
+
+	if (binstrcaseless(line, 0, &cmp_base64) != BSTR_ERR)
 		transfer_encoding = TE_BASE64;
-	else if (strcasestr(line, "quoted-printable"))
+	else if (binstrcaseless(line, 0, &cmp_qp))
 		transfer_encoding = TE_QP;
 	else
 		transfer_encoding = TE_DONTCARE;
 	return 0;
 }
 
-static int is_multipart_boundary(const char *line)
-{
-	return (!memcmp(line, content_top->boundary, content_top->boundary_len));
-}
-
-static int eatspace(char *line)
+static int is_multipart_boundary(const_bstring line)
 {
-	int len = strlen(line);
-	while (len > 0 && isspace(line[len-1]))
-		line[--len] = 0;
-	return len;
+	return !bstrncmp(line, content_top->boundary, blength(content_top->boundary));
 }
 
-static char *cleanup_subject(char *subject)
+/*
+ * Removes (Re:|[ \[\t:]|\[.*\])* if prefixed and
+ * trims trailing whitespace.
+ */
+static void cleanup_subject(bstring subject)
 {
-	for (;;) {
-		char *p;
-		int len, remove;
-		switch (*subject) {
+	int pos;
+	while (blength(subject)) {
+		switch (bchar(subject, 0)) {
 		case 'r': case 'R':
-			if (!memcmp("e:", subject+1, 2)) {
-				subject += 3;
+			if (blength(subject) <= 3)
+				break;
+			if (!memcmp(bdata(subject) + 1, "e:", 2)) {
+				bdelete(subject, 0, 3);
 				continue;
 			}
 			break;
-		case ' ': case '\t': case ':':
-			subject++;
-			continue;
-
 		case '[':
-			p = strchr(subject, ']');
-			if (!p) {
-				subject++;
-				continue;
-			}
-			len = strlen(p);
-			remove = p - subject;
-			if (remove <= len *2) {
-				subject = p+1;
-				continue;
+			if ((pos = bstrchr(subject, ']')) != BSTR_ERR) {
+				/* Don't remove more than a third of the subject. */
+				if (pos <= blength(subject)/3) {
+					bdelete(subject, 0, pos + 1);
+					continue;
+				}
+				break;
 			}
-			break;
+		/* fall through */
+		case ' ': case '\t': case ':':
+			bdelete(subject, 0, 1);
+			continue;
 		}
-		eatspace(subject);
-		return subject;
+
+		btrimws(subject);
+		return;
 	}
 }
 
-static void cleanup_space(char *buf)
+static void cleanup_space(bstring buf)
 {
-	unsigned char c;
-	while ((c = *buf) != 0) {
-		buf++;
-		if (isspace(c)) {
-			buf[-1] = ' ';
-			c = *buf;
-			while (isspace(c)) {
-				int len = strlen(buf);
-				memmove(buf, buf+1, len);
-				c = *buf;
-			}
+	struct bstrList *tok;
+	static struct tagbstring whitespace = bsStatic(" \n\t\r\f\v");
+	int i;
+
+	tok = bsplitstr(buf, &whitespace);
+	btrunc(buf, 0);
+	for (i = 0; i < tok->qty; i++) {
+		if (blength(tok->entry[i])) {
+			bconcat(buf, tok->entry[i]);
+			bconchar(buf, ' ');
 		}
 	}
+	/* Remove the last ' ' */
+	btrunc(buf, blength(buf) - 1);
+	bstrListDestroy(tok);
+}
+
+static int handle_header(bstring line, bstring *data, int ofs)
+{
+	if (!line)
+		return 1;
+
+	bdestroy(*data);
+	*data = bmidstr(line, ofs, blength(line) - ofs);
+
+	return 0;
 }
 
-static void decode_header(char *it, unsigned itsize);
+static void decode_header(bstring line);
 static char *header[MAX_HDR_PARSED] = {
 	"From","Subject","Date",
 };
 
-static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite)
+static int check_header(bstring line, bstring hdr_data[], int overwrite)
 {
 	int i;
 
 	/* search for the interesting parts */
 	for (i = 0; header[i]; i++) {
 		int len = strlen(header[i]);
+
 		if ((!hdr_data[i] || overwrite) &&
-		    !strncasecmp(line, header[i], len) &&
-		    line[len] == ':' && isspace(line[len + 1])) {
+		    bisstemeqcaselessblk(line, header[i], len) &&
+		    bchar(line, len) == ':' && isspace(bchar(line, len + 1))) {
 			/* Unwrap inline B and Q encoding, and optionally
 			 * normalize the meta information to utf8.
 			 */
-			decode_header(line + len + 2, linesize - len - 2);
-			hdr_data[i] = xmalloc(1000 * sizeof(char));
-			if (! handle_header(line, hdr_data[i], len + 2)) {
+			decode_header(line);
+			if (!handle_header(line, &hdr_data[i], len + 2)) {
 				return 1;
 			}
 		}
 	}
 
 	/* Content stuff */
-	if (!strncasecmp(line, "Content-Type", 12) &&
-		line[12] == ':' && isspace(line[12 + 1])) {
-		decode_header(line + 12 + 2, linesize - 12 - 2);
+	if (!bisstemeqcaselessblk(line, bsStaticBlkParms("Content-Type")) &&
+		bchar(line, 12) == ':' && isspace(bchar(line, 12 + 1))) {
+		decode_header(line);
 		if (! handle_content_type(line)) {
 			return 1;
 		}
 	}
-	if (!strncasecmp(line, "Content-Transfer-Encoding", 25) &&
-		line[25] == ':' && isspace(line[25 + 1])) {
-		decode_header(line + 25 + 2, linesize - 25 - 2);
+	if (!bisstemeqcaselessblk(line, bsStaticBlkParms("Content-Transfer-Encoding")) &&
+		bchar(line, 25) == ':' && isspace(bchar(line, 25 + 1))) {
+		decode_header(line);
 		if (! handle_content_transfer_encoding(line)) {
 			return 1;
 		}
 	}
 
 	/* for inbody stuff */
-	if (!memcmp(">From", line, 5) && isspace(line[5]))
+	if (bisstemeqblk(line, bsStaticBlkParms(">From")) && isspace(bchar(line, 5)))
 		return 1;
-	if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) {
+	if (bisstemeqblk(line, bsStaticBlkParms("[PATCH]")) && isspace(bchar(line, 7))) {
 		for (i = 0; header[i]; i++) {
-			if (!memcmp("Subject: ", header[i], 9)) {
-				if (! handle_header(line, hdr_data[i], 0)) {
+			if (!memcmp("Subject", header[i], 7)) {
+				if (!handle_header(line, &hdr_data[i], 0)) {
 					return 1;
 				}
 			}
@@ -346,7 +316,7 @@ static int check_header(char *line, unsigned linesize, char **hdr_data, int over
 	return 0;
 }
 
-static int is_rfc2822_header(char *line)
+static int is_rfc2822_header(const_bstring line)
 {
 	/*
 	 * The section that defines the loosest possible
@@ -357,15 +327,15 @@ static int is_rfc2822_header(char *line)
 	 * ftext = %d33-57 / %59-126
 	 */
 	int ch;
-	char *cp = line;
+	char *cp = bdata(line);
 
 	/* Count mbox From headers as headers */
-	if (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6))
+	if (blength(line) >= 6 && (!memcmp(cp, "From ", 5) || !memcmp(cp, ">From ", 6)))
 		return 1;
 
 	while ((ch = *cp++)) {
 		if (ch == ':')
-			return cp != line;
+			return cp != bdata(line);
 		if ((33 <= ch && ch <= 57) ||
 		    (59 <= ch && ch <= 126))
 			continue;
@@ -375,34 +345,23 @@ static int is_rfc2822_header(char *line)
 }
 
 /*
- * sz is size of 'line' buffer in bytes.  Must be reasonably
- * long enough to hold one physical real-world e-mail line.
+ * 'line' must be a valid bstring
  */
-static int read_one_header_line(char *line, int sz, FILE *in)
+static int read_one_header_line(struct bStream *in, bstring line)
 {
-	int len;
-
-	/*
-	 * We will read at most (sz-1) bytes and then potentially
-	 * re-add NUL after it.  Accessing line[sz] after this is safe
-	 * and we can allow len to grow up to and including sz.
-	 */
-	sz--;
-
 	/* Get the first part of the line. */
-	if (!fgets(line, sz, in))
-		return 0;
+	if (bsreadln(line, in, '\n') != BSTR_OK)
+		goto unread_line;
 
 	/*
 	 * Is it an empty line or not a valid rfc2822 header?
 	 * If so, stop here, and return false ("not a header")
 	 */
-	len = eatspace(line);
-	if (!len || !is_rfc2822_header(line)) {
+	brtrimws(line);
+	if (!blength(line) || !is_rfc2822_header(line)) {
 		/* Re-add the newline */
-		line[len] = '\n';
-		line[len + 1] = '\0';
-		return 0;
+		bconchar(line, '\n');
+		goto unread_line;
 	}
 
 	/*
@@ -410,63 +369,57 @@ static int read_one_header_line(char *line, int sz, FILE *in)
 	 * Yuck, 2822 header "folding"
 	 */
 	for (;;) {
-		int peek, addlen;
-		static char continuation[1000];
+		bstring continuation;
+		continuation = bfromcstr("");
 
-		peek = fgetc(in); ungetc(peek, in);
-		if (peek != ' ' && peek != '\t')
+		if (bsreadln(continuation, in, '\n') != BSTR_OK)
 			break;
-		if (!fgets(continuation, sizeof(continuation), in))
+		if (bchar(continuation, 0) != ' ' && bchar(continuation, 0) != '\t') {
+			bsunread(in, continuation);
 			break;
-		addlen = eatspace(continuation);
-		if (len < sz - 1) {
-			if (addlen >= sz - len)
-				addlen = sz - len - 1;
-			memcpy(line + len, continuation, addlen);
-			line[len] = '\n';
-			len += addlen;
 		}
+
+		continuation->data[0] = '\n';
+		brtrimws(continuation);
+		bconcat(line, continuation);
 	}
-	line[len] = 0;
 
 	return 1;
+unread_line:
+	bsunread(in, line);
+	return 0;
 }
 
-static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047)
+static bstring decode_q_segment(bstring line, int rfc2047)
 {
-	char *otend = ot + otsize;
+	char *in = bdata(line);
 	int c;
-	while ((c = *in++) != 0 && (in <= ep)) {
-		if (ot == otend) {
-			*--ot = '\0';
-			return -1;
-		}
+	bstring out = bfromcstralloc(blength(line), "");
+
+	while ((c = *in++) != 0) {
 		if (c == '=') {
 			int d = *in++;
 			if (d == '\n' || !d)
 				break; /* drop trailing newline */
-			*ot++ = ((hexval(d) << 4) | hexval(*in++));
+			bconchar(out, (hexval(d) << 4) | hexval(*in++));
 			continue;
 		}
 		if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
 			c = 0x20;
-		*ot++ = c;
+		bconchar(out, c);
 	}
-	*ot = 0;
-	return 0;
+	return out;
 }
 
-static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep)
+static bstring decode_b_segment(bstring line)
 {
 	/* Decode in..ep, possibly in-place to ot */
 	int c, pos = 0, acc = 0;
-	char *otend = ot + otsize;
+	char *in = bdata(line);
+	bstring out;
 
-	while ((c = *in++) != 0 && (in <= ep)) {
-		if (ot == otend) {
-			*--ot = '\0';
-			return -1;
-		}
+	out = bfromcstralloc(blength(line), "");
+	while ((c = *in++) != 0) {
 		if (c == '+')
 			c = 62;
 		else if (c == '/')
@@ -491,21 +444,20 @@ static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep)
 			acc = (c << 2);
 			break;
 		case 1:
-			*ot++ = (acc | (c >> 4));
+			bconchar(out, (acc | (c >> 4)));
 			acc = (c & 15) << 4;
 			break;
 		case 2:
-			*ot++ = (acc | (c >> 2));
+			bconchar(out, (acc | (c >> 2)));
 			acc = (c & 3) << 6;
 			break;
 		case 3:
-			*ot++ = (acc | c);
+			bconchar(out, (acc | c));
 			acc = pos = 0;
 			break;
 		}
 	}
-	*ot = 0;
-	return 0;
+	return out;
 }
 
 /*
@@ -519,147 +471,174 @@ static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep)
  * Otherwise, we default to assuming it is Latin1 for historical
  * reasons.
  */
-static const char *guess_charset(const char *line, const char *target_charset)
+static bstring guess_charset(bstring line, bstring target_charset)
 {
-	if (is_encoding_utf8(target_charset)) {
-		if (is_utf8(line))
+	//FIXME: convert utf8.c to bstring
+	if (is_encoding_utf8(bdata(target_charset))) {
+		if (is_utf8(bdata(line)))
 			return NULL;
 	}
-	return "latin1";
+	return bfromcstr("latin1");
 }
 
-static void convert_to_utf8(char *line, unsigned linesize, const char *charset)
+static void convert_to_utf8(bstring line, bstring charset)
 {
-	char *out;
+	bstring out;
+	char *cout;
 
-	if (!charset || !*charset) {
-		charset = guess_charset(line, metainfo_charset);
-		if (!charset)
+	if (blength(charset) == 0) {
+		out = guess_charset(line, metainfo_charset);
+		if (!out) {
+			bdestroy(charset);
+			charset = NULL;
 			return;
+		}
+		bassign(charset, out);
+		bdestroy(out);
 	}
 
-	if (!strcmp(metainfo_charset, charset))
+	if (!bstrcmp(metainfo_charset, charset))
 		return;
-	out = reencode_string(line, metainfo_charset, charset);
-	if (!out)
+	//FIXME: convert utf8.c to use bstring
+	cout = reencode_string(bdata(line), bdata(metainfo_charset), bdata(charset));
+	if (!cout)
 		die("cannot convert from %s to %s\n",
-		    charset, metainfo_charset);
-	strlcpy(line, out, linesize);
-	free(out);
+		    bdata(charset), bdata(metainfo_charset));
+	bassigncstr(line, cout);
 }
 
-static int decode_header_bq(char *it, unsigned itsize)
+static int decode_header_bq(bstring line)
 {
-	char *in, *out, *ep, *cp, *sp;
-	char outbuf[1000];
+	bstring out;
+	bstring decoded = NULL, charset_q = NULL, tmp;
 	int rfc2047 = 0;
+	int in = 0, cp, ep;
 
-	in = it;
-	out = outbuf;
-	while ((ep = strstr(in, "=?")) != NULL) {
-		int sz, encoding;
-		char charset_q[256], piecebuf[256];
+	struct tagbstring cmp_eq_qst = bsStatic("=?");
+	struct tagbstring cmp_qst_eq = bsStatic("?=");
+
+	out = bfromcstralloc(blength(line), "");
+
+	while ((ep = binstr(line, 0, &cmp_eq_qst)) != BSTR_ERR) {
+		int encoding;
 		rfc2047 = 1;
 
-		if (in != ep) {
-			sz = ep - in;
-			memcpy(out, in, sz);
-			out += sz;
-			in += sz;
-		}
+		bcatblk(out, bdataofs(line, in), ep - in);
+		in += ep - in + 2;
 		/* E.g.
 		 * ep : "=?iso-2022-jp?B?GyR...?= foo"
 		 * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
 		 */
-		ep += 2;
-		cp = strchr(ep, '?');
-		if (!cp)
-			return rfc2047; /* no munging */
-		for (sp = ep; sp < cp; sp++)
-			charset_q[sp - ep] = tolower(*sp);
-		charset_q[cp - ep] = 0;
-		encoding = cp[1];
-		if (!encoding || cp[2] != '?')
-			return rfc2047; /* no munging */
-		ep = strstr(cp + 3, "?=");
-		if (!ep)
-			return rfc2047; /* no munging */
+
+		cp = bstrchrp(line, in, '?');
+		if (cp == BSTR_ERR)
+			goto out0; /* no munging */
+
+		charset_q = bmidstr(line, in, cp - in);
+		if (charset_q->slen)
+			btolower(charset_q);
+
+		if (line->slen < cp + 2)
+			goto out1;
+			//die("Bad header: %s,", line->data);
+
+		encoding = bchar(line, cp + 1);
+		if (!encoding || bchar(line, cp + 2) != '?')
+			goto out1; /* no munging */
+		ep = binstr(line, cp + 3, &cmp_qst_eq);
+		if (ep == BSTR_ERR)
+			goto out1; /* no munging */
 		switch (tolower(encoding)) {
 		default:
-			return rfc2047; /* no munging */
+			goto out1; /* no munging */
 		case 'b':
-			sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep);
+			//FIXME: use bmid2tbstr ?
+			// Needs to change the decode function to not look for null
+			tmp = bmidstr(line, cp + 3, ep - cp -3);
+			decoded = decode_b_segment(tmp);
 			break;
 		case 'q':
-			sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1);
+			tmp = bmidstr(line, cp + 3, ep - cp -3);
+			decoded = decode_q_segment(tmp, 1);
 			break;
 		}
-		if (sz < 0)
-			return rfc2047;
+		bdestroy(tmp);
+		if (decoded == NULL)
+			goto out1;
 		if (metainfo_charset)
-			convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q);
+			convert_to_utf8(decoded, charset_q);
 
-		sz = strlen(piecebuf);
-		if (outbuf + sizeof(outbuf) <= out + sz)
-			return rfc2047; /* no munging */
-		strcpy(out, piecebuf);
-		out += sz;
+		bconcat(out, decoded);
 		in = ep + 2;
+
+		bdestroy(decoded);
+		bdestroy(charset_q);
 	}
-	strcpy(out, in);
-	strlcpy(it, outbuf, itsize);
+	/* Add the remainder of the line. */
+	bcatblk(out, bdataofs(line, in), blength(line) - in);
+
+	bassign(line, out);
+
+	bdestroy(decoded);
+out1:
+	bdestroy(charset_q);
+out0:
+	bdestroy(out);
 	return rfc2047;
 }
 
-static void decode_header(char *it, unsigned itsize)
+static void decode_header(bstring line)
 {
-
-	if (decode_header_bq(it, itsize))
+	if (decode_header_bq(line))
 		return;
 	/* otherwise "it" is a straight copy of the input.
 	 * This can be binary guck but there is no charset specified.
 	 */
 	if (metainfo_charset)
-		convert_to_utf8(it, itsize, "");
+		convert_to_utf8(line, NULL);
 }
 
-static void decode_transfer_encoding(char *line, unsigned linesize)
+static void decode_transfer_encoding(bstring line)
 {
-	char *ep;
+	bstring ret = NULL;
 
 	switch (transfer_encoding) {
 	case TE_QP:
-		ep = line + strlen(line);
-		decode_q_segment(line, line, linesize, ep, 0);
+		ret = decode_q_segment(line, 0);
 		break;
 	case TE_BASE64:
-		ep = line + strlen(line);
-		decode_b_segment(line, line, linesize, ep);
+		ret = decode_b_segment(line);
 		break;
 	case TE_DONTCARE:
 		break;
 	}
+	if (ret)
+		bassign(line, ret);
+	bdestroy(ret);
 }
 
-static int handle_filter(char *line, unsigned linesize);
+static int handle_filter(bstring line);
 
-static int find_boundary(void)
+static int find_boundary(struct bStream *in, bstring line)
 {
-	while(fgets(line, sizeof(line), fin) != NULL) {
+	while(bsreadln(line, in, '\n') != BSTR_ERR) {
 		if (is_multipart_boundary(line))
 			return 1;
 	}
 	return 0;
 }
 
-static int handle_boundary(void)
+static int handle_boundary(struct bStream *in, bstring line)
 {
-	char newline[]="\n";
+	struct tagbstring newline = bsStatic("\n");
+	char *c;
 again:
-	if (!memcmp(line+content_top->boundary_len, "--", 2)) {
+	if (blength(line) >= blength(content_top->boundary) + 2 &&
+	    (c = bdataofs(line, blength(content_top->boundary))) &&
+	    !memcmp(c, "--", 2)) {
 		/* we hit an end boundary */
 		/* pop the current boundary off the stack */
-		free(content_top->boundary);
+		bdestroy(content_top->boundary);
 
 		/* technically won't happen as is_multipart_boundary()
 		   will fail first.  But just in case..
@@ -669,49 +648,52 @@ again:
 					"can't recover\n");
 			exit(1);
 		}
-		handle_filter(newline, sizeof(newline));
+		handle_filter(&newline);
 
 		/* skip to the next boundary */
-		if (!find_boundary())
+		if (!find_boundary(in, line)) {
+			bsunread(in, line);
 			return 0;
+		}
 		goto again;
 	}
 
 	/* set some defaults */
 	transfer_encoding = TE_DONTCARE;
-	charset[0] = 0;
+	bassigncstr(charset, "");
 	message_type = TYPE_TEXT;
 
 	/* slurp in this section's info */
-	while (read_one_header_line(line, sizeof(line), fin))
-		check_header(line, sizeof(line), p_hdr_data, 0);
+	while (read_one_header_line(in, line))
+		check_header(line, p_hdr_data, 0);
 
 	/* eat the blank line after section info */
-	return (fgets(line, sizeof(line), fin) != NULL);
+	return (bsreadln(line, in, '\n') != BSTR_ERR);
 }
 
-static inline int patchbreak(const char *line)
+static inline int patchbreak(const_bstring line)
 {
+	int i;
+
 	/* Beginning of a "diff -" header? */
-	if (!memcmp("diff -", line, 6))
+	if (!bisstemeqblk(line, bsStaticBlkParms("diff -")))
 		return 1;
 
 	/* CVS "Index: " line? */
-	if (!memcmp("Index: ", line, 7))
+	if (!bisstemeqblk(line, bsStaticBlkParms("Index: ")))
 		return 1;
 
 	/*
 	 * "--- <filename>" starts patches without headers
 	 * "---<sp>*" is a manual separator
 	 */
-	if (!memcmp("---", line, 3)) {
-		line += 3;
+	if (!bisstemeqblk(line, bsStaticBlkParms("---"))) {
 		/* space followed by a filename? */
-		if (line[0] == ' ' && !isspace(line[1]))
+		if (bchar(line, 3) == ' ' && !isspace(bchar(line, 4)))
 			return 1;
 		/* Just whitespace? */
-		for (;;) {
-			unsigned char c = *line++;
+		for (i = 3; i < blength(line); i++) {
+			unsigned char c = bchar(line, i);
 			if (c == '\n')
 				return 1;
 			if (!isspace(c))
@@ -723,31 +705,25 @@ static inline int patchbreak(const char *line)
 }
 
 
-static int handle_commit_msg(char *line, unsigned linesize)
+static int handle_commit_msg(bstring line)
 {
 	static int still_looking = 1;
-	char *endline = line + linesize;
+	char *c;
 
 	if (!cmitmsg)
 		return 0;
 
 	if (still_looking) {
-		char *cp = line;
-		if (isspace(*line)) {
-			for (cp = line + 1; *cp; cp++) {
-				if (!isspace(*cp))
-					break;
-			}
-			if (!*cp)
-				return 0;
-		}
-		if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0)
+		brtrimws(line);
+		if (blength(line) == 0)
+			return 0;
+		if ((still_looking = check_header(line, s_hdr_data, 0)) != 0)
 			return 0;
 	}
 
 	/* normalize the log message to UTF-8. */
 	if (metainfo_charset)
-		convert_to_utf8(line, endline - line, charset);
+		convert_to_utf8(line, charset);
 
 	if (patchbreak(line)) {
 		fclose(cmitmsg);
@@ -755,18 +731,24 @@ static int handle_commit_msg(char *line, unsigned linesize)
 		return 1;
 	}
 
-	fputs(line, cmitmsg);
+	if ((c = bdata(line)) == NULL)
+		die("Programming error: line had no data\n");
+	fputs(c, cmitmsg);
 	return 0;
 }
 
-static int handle_patch(char *line)
+static int handle_patch(const_bstring line)
 {
-	fputs(line, patchfile);
+	char *c;
+
+	if ((c = bdata(line)) == NULL)
+		die("Programming error: patch line had no data\n");
+	fputs(c, patchfile);
 	patch_lines++;
 	return 0;
 }
 
-static int handle_filter(char *line, unsigned linesize)
+static int handle_filter(bstring line)
 {
 	static int filter = 0;
 
@@ -775,7 +757,7 @@ static int handle_filter(char *line, unsigned linesize)
 	 */
 	switch (filter) {
 	case 0:
-		if (!handle_commit_msg(line, linesize))
+		if (!handle_commit_msg(line))
 			break;
 		filter++;
 	case 1:
@@ -789,16 +771,19 @@ static int handle_filter(char *line, unsigned linesize)
 	return 0;
 }
 
-static void handle_body(void)
+static void handle_body(struct bStream *in)
 {
-	int rc = 0;
-	static char newline[2000];
-	static char *np = newline;
+	//FIXME: bdestroy line. unread line in more places?
+	bstring line;
+	int rc = 0, i, end;
 
+	line = bfromcstr("");
 	/* Skip up to the first boundary */
-	if (content_top->boundary) {
-		if (!find_boundary())
+	if (content_top->boundary) {//FIXME: ?
+		if (!find_boundary(in, line)) {
+			bsunread(in, line);
 			return;
+		}
 	}
 
 	do {
@@ -806,24 +791,24 @@ static void handle_body(void)
 		if (content_top->boundary && is_multipart_boundary(line)) {
 			/* flush any leftover */
 			if ((transfer_encoding == TE_BASE64)  &&
-			    (np != newline)) {
-				handle_filter(newline, sizeof(newline));
+			    (blength(line))) {
+				handle_filter(line);
 			}
-			if (!handle_boundary())
+			if (!handle_boundary(in, line))
 				return;
 		}
 
 		/* Unwrap transfer encoding */
-		decode_transfer_encoding(line, sizeof(line));
+		decode_transfer_encoding(line);
 
 		switch (transfer_encoding) {
 		case TE_BASE64:
 		{
-			char *op = line;
+			struct bstrList *lines;
 
 			/* binary data most likely doesn't have newlines */
 			if (message_type != TYPE_TEXT) {
-				rc = handle_filter(line, sizeof(newline));
+				rc = handle_filter(line);
 				break;
 			}
 
@@ -832,54 +817,55 @@ static void handle_body(void)
 			 * at a time to handle_filter()
 			 */
 
-			do {
-				while (*op != '\n' && *op != 0)
-					*np++ = *op++;
-				*np = *op;
-				if (*np != 0) {
-					/* should be sitting on a new line */
-					*(++np) = 0;
-					op++;
-					rc = handle_filter(newline, sizeof(newline));
-					np = newline;
-				}
-			} while (*op != 0);
-			/* the partial chunk is saved in newline and
+			lines = bsplit(line, '\n');
+			end = lines->qty - 1;
+			/* the partial chunk is saved in line and
 			 * will be appended by the next iteration of fgets
 			 */
+			if (bchar(line, blength(line) - 1) != '\n') {
+				bassign(line, lines->entry[end]);
+				end--;
+			} else
+				btrunc(line, 0);
+			for (i = 0; i <= end; i++)
+				rc = handle_filter(lines->entry[i]);
+
+			bstrListDestroy(lines);
 			break;
 		}
 		default:
-			rc = handle_filter(line, sizeof(newline));
+			rc = handle_filter(line);
+			btrunc(line, 0);
 		}
 		if (rc)
 			/* nothing left to filter */
 			break;
-	} while (fgets(line, sizeof(line), fin));
+	} while (bsreadlna(line, in, '\n') != BSTR_ERR);
 
 	return;
 }
 
-static void output_header_lines(FILE *fout, const char *hdr, char *data)
+static void output_header_lines(FILE *fout, const char *hdr, const_bstring data)
 {
+	char *sp;
+	sp = bdata(data);
 	while (1) {
-		char *ep = strchr(data, '\n');
+		char *ep = strchr(sp, '\n');
 		int len;
 		if (!ep)
-			len = strlen(data);
+			len = strlen(sp);
 		else
-			len = ep - data;
-		fprintf(fout, "%s: %.*s\n", hdr, len, data);
+			len = ep - sp;
+		fprintf(fout, "%s: %.*s\n", hdr, len, sp);
 		if (!ep)
 			break;
-		data = ep + 1;
+		sp = ep + 1;
 	}
 }
 
 static void handle_info(void)
 {
-	char *sub;
-	char *hdr;
+	bstring hdr;
 	int i;
 
 	for (i = 0; header[i]; i++) {
@@ -893,32 +879,32 @@ static void handle_info(void)
 			continue;
 
 		if (!memcmp(header[i], "Subject", 7)) {
-			if (keep_subject)
-				sub = hdr;
-			else {
-				sub = cleanup_subject(hdr);
-				cleanup_space(sub);
+			if (!keep_subject) {
+				cleanup_subject(hdr);
+				cleanup_space(hdr);
 			}
-			output_header_lines(fout, "Subject", sub);
+			output_header_lines(fout, "Subject", hdr);
 		} else if (!memcmp(header[i], "From", 4)) {
 			handle_from(hdr);
-			fprintf(fout, "Author: %s\n", name);
-			fprintf(fout, "Email: %s\n", email);
+			fprintf(fout, "Author: %s\n", bdata(name));
+			fprintf(fout, "Email: %s\n", bdata(email));
 		} else {
 			cleanup_space(hdr);
-			fprintf(fout, "%s: %s\n", header[i], hdr);
+			fprintf(fout, "%s: %s\n", header[i], bdata(hdr));
 		}
 	}
 	fprintf(fout, "\n");
 }
 
-static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
+static int mailinfo(FILE *in, FILE *out, int ks, const_bstring encoding,
 		    const char *msg, const char *patch)
 {
 	keep_subject = ks;
-	metainfo_charset = encoding;
+	metainfo_charset = bstrcpy(encoding);
 	fin = in;
 	fout = out;
+	bstring line;
+	struct bStream *in_stream = bsopen((bNread) fread, in);
 
 	cmitmsg = fopen(msg, "w");
 	if (!cmitmsg) {
@@ -932,14 +918,17 @@ static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
 		return -1;
 	}
 
-	p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
-	s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
+	p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*p_hdr_data));
+	s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*s_hdr_data));
 
 	/* process the email header */
-	while (read_one_header_line(line, sizeof(line), fin))
-		check_header(line, sizeof(line), p_hdr_data, 1);
+	line = bfromcstr("");
+	while (read_one_header_line(in_stream, line))
+		check_header(line, p_hdr_data, 1);
 
-	handle_body();
+	bsunread(in_stream, line);
+	
+	handle_body(in_stream);
 	handle_info();
 
 	return 0;
@@ -958,17 +947,17 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 	git_config(git_default_config);
 
 	def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8");
-	metainfo_charset = def_charset;
+	metainfo_charset = bfromcstr(def_charset);
 
 	while (1 < argc && argv[1][0] == '-') {
 		if (!strcmp(argv[1], "-k"))
 			keep_subject = 1;
 		else if (!strcmp(argv[1], "-u"))
-			metainfo_charset = def_charset;
+			bassigncstr(metainfo_charset, def_charset);
 		else if (!strcmp(argv[1], "-n"))
 			metainfo_charset = NULL;
 		else if (!prefixcmp(argv[1], "--encoding="))
-			metainfo_charset = argv[1] + 11;
+			bassigncstr(metainfo_charset, argv[1] + 11);
 		else
 			usage(mailinfo_usage);
 		argc--; argv++;
-- 
1.5.3.rc7

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-04 20:50 [RFC] Convert builin-mailinfo.c to use The Better String Library Lukas Sandström
@ 2007-09-04 21:38 ` Alex Riesen
  2007-09-04 23:01   ` Pierre Habouzit
  2007-09-05 14:54 ` Kristian Høgsberg
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 102+ messages in thread
From: Alex Riesen @ 2007-09-04 21:38 UTC (permalink / raw)
  To: Lukas Sandström; +Cc: Git Mailing List, Junio C Hamano

Lukas Sandström, Tue, Sep 04, 2007 22:50:08 +0200:
> Hi.
> 
> This is an attempt to use "The Better String Library"[1] in builtin-mailinfo.c
> 
> The patch doesn't pass all the tests in the testsuit yet, but I thought I'd
> send it out so people can decide if they like how the code looks.

It looks uglier, but what are measurable merits? Object code size,
perfomance hit/improvement, valgrind logs?

> -static int read_one_header_line(char *line, int sz, FILE *in)
> +static int read_one_header_line(struct bStream *in, bstring line)

Every coder has a time in his life when he writes a string library...
and a stream support for it.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-04 21:38 ` Alex Riesen
@ 2007-09-04 23:01   ` Pierre Habouzit
  0 siblings, 0 replies; 102+ messages in thread
From: Pierre Habouzit @ 2007-09-04 23:01 UTC (permalink / raw)
  To: Alex Riesen; +Cc: Lukas Sandström, Git Mailing List, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 3505 bytes --]

On mar, sep 04, 2007 at 09:38:57 +0000, Alex Riesen wrote:
> Lukas Sandström, Tue, Sep 04, 2007 22:50:08 +0200:
> > Hi.
> > 
> > This is an attempt to use "The Better String Library"[1] in builtin-mailinfo.c
> > 
> > The patch doesn't pass all the tests in the testsuit yet, but I thought I'd
> > send it out so people can decide if they like how the code looks.
> 
> It looks uglier, but what are measurable merits? Object code size,
> perfomance hit/improvement, valgrind logs?

  Well I honestly believe that putting strbufs/bstrings in mailinfo.c
adds no value. I was going to give it a try to see how strbufs
performed, but it's just useless.

  The main problem mailinfo has, it's according to Junio that it may
sometimes truncate some things in buffers at 1000 octets, without dying
loudly. That is bad.

  _but_ there is no point in using arbitrary long string buffers to
parse a mail. Remember, a mail goes through SMTP, and SMTP is supposed
to limit its lines at 512 characters (without use of extensions at
least). Not to mention that an email address cannot be more than 64+256
chars long (or sth around that). So using variable lengths buffers is
just a waste.

  string buffers are not really (IMHO) supposed to help in parsing
tasks, and when you need to do some serious parsing, either do it by
hand or use lex, but nothing in between makes sense to me.

  OTOH, string buffers can be used in many places where git has (at
least 4 different to my current count, growing) many implementations of
always slightly different kind of buffers. I've some more patches
pending here than the one I already sent, and well, here is the
diffstat:

$ git diff --stat origin/master.. ^strbuf*
 archive-tar.c         |   67 ++++++++++++------------------------------------
 builtin-apply.c       |   29 ++++++---------------
 builtin-blame.c       |   34 ++++++++-----------------
 builtin-commit-tree.c |   59 +++++++++---------------------------------
 builtin-rerere.c      |   53 +++++++++++---------------------------
 cache-tree.c          |   57 ++++++++++++++---------------------------
 diff.c                |   25 ++++++------------
 fast-import.c         |   38 +++++++++++----------------
 mktree.c              |   26 ++++++-------------
 9 files changed, 116 insertions(+), 272 deletions(-)

  I mean, there is not even a need to show the diff to understand what
the gain is. And that was possible, because strbufs are straightforward,
and gives you the kind of controls git needs (tweaking how memory will
be allocated to avoid reallocs is part of the answer).


  A French author once said: “Il semble que la perfection soit atteinte
non quand il n'y a plus rien à ajouter, mais quand il n'y a plus rien à
retrancher.” -- Antoine de St Éxupéry[0]. IMHO git will never need any
of the bstring splits, streaming functions, tokenization or whatever,
and supporting those has necessarily led the bstring library to make
some choices that may not fit git needs. I don't really like reinventing
the wheel, but OTOH buffers and strings are often of the critical path,
and having a nice fitting buffer API is priceless.


  [0] Perfection is achieved, not when there is nothing more to add, but
      when there is nothing left to take away.
-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-04 20:50 [RFC] Convert builin-mailinfo.c to use The Better String Library Lukas Sandström
  2007-09-04 21:38 ` Alex Riesen
@ 2007-09-05 14:54 ` Kristian Høgsberg
  2007-09-05 17:29   ` Matthieu Moy
  2007-09-05 15:27 ` Kristian Høgsberg
  2007-09-07 10:47 ` Lukas Sandström
  3 siblings, 1 reply; 102+ messages in thread
From: Kristian Høgsberg @ 2007-09-05 14:54 UTC (permalink / raw)
  To: Lukas Sandström; +Cc: Git Mailing List, Junio C Hamano

On Tue, 2007-09-04 at 22:50 +0200, Lukas Sandström wrote:
> Hi.
> 
> This is an attempt to use "The Better String Library"[1] in builtin-mailinfo.c
> 
> The patch doesn't pass all the tests in the testsuit yet, but I thought I'd
> send it out so people can decide if they like how the code looks.
> 
> I'm not sending a patch to add the library files at this time. I'll send
> that patch when this patch is working.
> 
> The changes required to make it pass the tests shouldn't be very large.

Please, no.  Let's not pull in a dependency for something as simple as a
string library.  How many distros have bstring pcakaged?  
The right version?  Does it work on Windows?  We already have strbuf.c,
lets just consolidate the string manipulation code already in git under
that interface.

Kristian

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-04 20:50 [RFC] Convert builin-mailinfo.c to use The Better String Library Lukas Sandström
  2007-09-04 21:38 ` Alex Riesen
  2007-09-05 14:54 ` Kristian Høgsberg
@ 2007-09-05 15:27 ` Kristian Høgsberg
  2007-09-07 10:47 ` Lukas Sandström
  3 siblings, 0 replies; 102+ messages in thread
From: Kristian Høgsberg @ 2007-09-05 15:27 UTC (permalink / raw)
  To: Git Mailing List

On Tue, 2007-09-04 at 22:50 +0200, Lukas Sandström wrote:
> Hi.
> 
> This is an attempt to use "The Better String Library"[1] in builtin-mailinfo.c
> 
> The patch doesn't pass all the tests in the testsuit yet, but I thought I'd
> send it out so people can decide if they like how the code looks.
> 
> I'm not sending a patch to add the library files at this time. I'll send
> that patch when this patch is working.
> 
> The changes required to make it pass the tests shouldn't be very large.

Please, no.  Let's not pull in a dependency for something as simple as a
string library.  How many distros have bstring pcakaged?  
The right version?  Does it work on Windows?  We already have strbuf.c,
lets just consolidate the string manipulation code already in git under
that interface.

Kristian

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-05 14:54 ` Kristian Høgsberg
@ 2007-09-05 17:29   ` Matthieu Moy
  2007-09-06  2:30     ` Miles Bader
  2007-09-06  4:48     ` Dmitry Kakurin
  0 siblings, 2 replies; 102+ messages in thread
From: Matthieu Moy @ 2007-09-05 17:29 UTC (permalink / raw)
  To: Kristian Høgsberg
  Cc: Lukas Sandström, Git Mailing List, Junio C Hamano

Kristian Høgsberg <krh@redhat.com> writes:

> On Tue, 2007-09-04 at 22:50 +0200, Lukas Sandström wrote:
>> Hi.
>> 
>> This is an attempt to use "The Better String Library"[1] in builtin-mailinfo.c
>> 
>> The patch doesn't pass all the tests in the testsuit yet, but I thought I'd
>> send it out so people can decide if they like how the code looks.
>> 
>> I'm not sending a patch to add the library files at this time. I'll send
>> that patch when this patch is working.
>> 
>> The changes required to make it pass the tests shouldn't be very large.
>
> Please, no.  Let's not pull in a dependency for something as simple as a
> string library.  How many distros have bstring pcakaged?  
> The right version?

That's not a good argument. If dependancy is a problem, bsstring can
easily be distributed as part of git. It's really small, so it wont
make git bloated:

$ wc -l *.c *.h
    82 bsafe.c
  3462 bstest.c
  1134 bstraux.c
  2964 bstrlib.c
   358 testaux.c
    43 bsafe.h
   112 bstraux.h
   302 bstrlib.h
   442 bstrwrap.h
  8899 total

> Does it work on Windows?

The library is totally stand alone, portable (known to work with
gcc/g++, MSVC++, Intel C++, WATCOM C/C++, Turbo C, Borland C++, IBM's
native CC compiler on Windows, Linux and Mac OS X)

> We already have strbuf.c, lets just consolidate the string
> manipulation code already in git under that interface.

The right question is: what does git need. One way to consolidate
strbuf would be to simply

$ rm strbuf.{c,h}
$ unzip bsstring.zip

and if people decide that git needs a non-trivial string library,
writting/testing more code in strbuf.c would probably be more work
than just reading what bsstring code does to become familiar enough
with it to even be able to maintain it later.

If people decide that git needs a really trivial string library, then
a few improvements to stbuf.c can be good.

I'd argue in favor of the first option. C strings are horrible, and I
think doing something pleasant to use and safe is not completely
trivial. But I'm not a big contributor enough to really decide in
spite of others ;-).

-- 
Matthieu

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-05 17:29   ` Matthieu Moy
@ 2007-09-06  2:30     ` Miles Bader
  2007-09-06  4:48     ` Dmitry Kakurin
  1 sibling, 0 replies; 102+ messages in thread
From: Miles Bader @ 2007-09-06  2:30 UTC (permalink / raw)
  To: Matthieu Moy
  Cc: Kristian Høgsberg, Lukas Sandström, Git Mailing List,
	Junio C Hamano

Matthieu Moy <Matthieu.Moy@imag.fr> writes:
> and if people decide that git needs a non-trivial string library,
> writting/testing more code in strbuf.c would probably be more work
> than just reading what bsstring code does to become familiar enough
> with it to even be able to maintain it later.

>From what I've seen (by perusing the bstring website), bstring is kind
of ugly though....

-Miles

-- 
"Suppose we've chosen the wrong god. Every time we go to church we're
just making him madder and madder." -- Homer Simpson

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-05 17:29   ` Matthieu Moy
  2007-09-06  2:30     ` Miles Bader
@ 2007-09-06  4:48     ` Dmitry Kakurin
  2007-09-06  4:59       ` Shawn O. Pearce
                         ` (3 more replies)
  1 sibling, 4 replies; 102+ messages in thread
From: Dmitry Kakurin @ 2007-09-06  4:48 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: Git

[ snip ]

When I first looked at Git source code two things struck me as odd:
1. Pure C as opposed to C++. No idea why. Please don't talk about 
portability, it's BS.
2. Brute-force, direct string manipulation. It's both verbose and 
error-prone. This makes it hard to follow high-level code logic.

- Dmitry

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06  4:48     ` Dmitry Kakurin
@ 2007-09-06  4:59       ` Shawn O. Pearce
  2007-09-06  9:12         ` Andreas Ericsson
  2007-09-06  5:03       ` Miles Bader
                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 102+ messages in thread
From: Shawn O. Pearce @ 2007-09-06  4:59 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Matthieu Moy, Git

Dmitry Kakurin <dmitry.kakurin@gmail.com> wrote:
> When I first looked at Git source code two things struck me as odd:
> 1. Pure C as opposed to C++. No idea why. Please don't talk about 
> portability, it's BS.

Git's creator (Linus) codes in C, not C++.  He has at various times
stated reasons why he does not use C++.  I'm sure one can find such
messages with a bit of searching on mailing lists that he frequents.
He has his reasons.  I also happen to agree with at least some
of them.  :)

Git evolved from that initial prototype that Linus created.  I'm not
sure how much code survives from that initial few versions that
Linus managed before Junio took over, but nobody wanted to rewrite
things that already work so it just stayed in C.
"If it works, don't fix it."

C works.  We (now) have 83,215 lines of it.  Its not going away
anytime soon in Git.  It is also a relatively simple language that
a large number of open source programmers know.  This makes it easy
for them to get involved in the project.  Instead of say Haskell,
which has a smaller community.  Or Tcl/Tk as we recently found out
in the Git User Survey.  :-\

-- 
Shawn.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06  4:48     ` Dmitry Kakurin
  2007-09-06  4:59       ` Shawn O. Pearce
@ 2007-09-06  5:03       ` Miles Bader
  2007-09-06 12:08         ` Johannes Schindelin
  2007-09-06 17:50       ` Linus Torvalds
  2010-06-10 19:12       ` Ian Molton
  3 siblings, 1 reply; 102+ messages in thread
From: Miles Bader @ 2007-09-06  5:03 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Matthieu Moy, Git

Dmitry Kakurin <dmitry.kakurin@gmail.com> writes:
> When I first looked at Git source code two things struck me as odd:
> 1. Pure C as opposed to C++. No idea why. Please don't talk about
> portability, it's BS.

Just to piss you off.

-Miles

-- 
Love is a snowmobile racing across the tundra.  Suddenly it flips over,
pinning you underneath.  At night the ice weasels come.  --Nietzsche

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06  4:59       ` Shawn O. Pearce
@ 2007-09-06  9:12         ` Andreas Ericsson
  2007-09-06  9:35           ` Junio C Hamano
  2007-09-06  9:52           ` David Kastrup
  0 siblings, 2 replies; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-06  9:12 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: Dmitry Kakurin, Matthieu Moy, Git

Shawn O. Pearce wrote:
> Dmitry Kakurin <dmitry.kakurin@gmail.com> wrote:
>> When I first looked at Git source code two things struck me as odd:
>> 1. Pure C as opposed to C++. No idea why. Please don't talk about 
>> portability, it's BS.
> 
> It is also a relatively simple language that
> a large number of open source programmers know.  This makes it easy
> for them to get involved in the project.


This is important. Git contains code from more than 300 people. I'm
guessing you could cut that number by 2/3 if it had been written in C++.

Git is cheating a bit though. Its primary audience was (and is) the
various integrators working on the Linux kernel, all of whom are fairly
competent C programmers.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06  9:12         ` Andreas Ericsson
@ 2007-09-06  9:35           ` Junio C Hamano
  2007-09-06 10:21             ` Andreas Ericsson
  2007-09-06  9:52           ` David Kastrup
  1 sibling, 1 reply; 102+ messages in thread
From: Junio C Hamano @ 2007-09-06  9:35 UTC (permalink / raw)
  To: Andreas Ericsson; +Cc: Shawn O. Pearce, Dmitry Kakurin, Matthieu Moy, Git

Andreas Ericsson <ae@op5.se> writes:

> Git is cheating a bit though. Its primary audience was (and is) the
> various integrators working on the Linux kernel, all of whom are fairly
> competent C programmers.

Do we still have a huge overlap with the kernel people?  I had
an impression that patches from the kernel folks, with notable
exception from a handful (you know who you are), have petered
out rapidly after the first several weeks.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06  9:12         ` Andreas Ericsson
  2007-09-06  9:35           ` Junio C Hamano
@ 2007-09-06  9:52           ` David Kastrup
  1 sibling, 0 replies; 102+ messages in thread
From: David Kastrup @ 2007-09-06  9:52 UTC (permalink / raw)
  To: Andreas Ericsson; +Cc: Shawn O. Pearce, Dmitry Kakurin, Matthieu Moy, Git

Andreas Ericsson <ae@op5.se> writes:

> Shawn O. Pearce wrote:
>> Dmitry Kakurin <dmitry.kakurin@gmail.com> wrote:
>>> When I first looked at Git source code two things struck me as odd:
>>> 1. Pure C as opposed to C++. No idea why. Please don't talk about
>>> portability, it's BS.
>>
>> It is also a relatively simple language that
>> a large number of open source programmers know.  This makes it easy
>> for them to get involved in the project.
>
>
> This is important. Git contains code from more than 300 people. I'm
> guessing you could cut that number by 2/3 if it had been written in
> C++.

C++ is a language without design discipline.  Its set of features and
syntactic elements is incontingent (for example, its templates started
as a ripoff of Ada generics which would have been ok except for the
completely braindead idea of taking the Ada angle bracket restriction
syntax along with it), and it is the task of each programmer to choose
a sane and manageable subset and style, and implement using that.  As
a consequence, every C++ programmer writes his own personal dialect of
C++, and we have about 20 different incompatible implementations of
multidimensional numeric arrays, making a complete mockery of the
"code reuse" mantra: C++ _projects_ can't actually usefully achieve
"multiple inheritance" on a design/meta level: once you start with one
non-trivial design, fitting other separately evolved components with a
different style causes retrofitting nightmares.

So going to C++ means cutting down the amount of people who find
themselves comfortable with the actual design and layout down to maybe
10% of those who would actually feel ok with the actual _algorithms_
employed.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06  9:35           ` Junio C Hamano
@ 2007-09-06 10:21             ` Andreas Ericsson
  0 siblings, 0 replies; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-06 10:21 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Shawn O. Pearce, Dmitry Kakurin, Matthieu Moy, Git

Junio C Hamano wrote:
> Andreas Ericsson <ae@op5.se> writes:
> 
>> Git is cheating a bit though. Its primary audience was (and is) the
>> various integrators working on the Linux kernel, all of whom are fairly
>> competent C programmers.
> 
> Do we still have a huge overlap with the kernel people?  I had
> an impression that patches from the kernel folks, with notable
> exception from a handful (you know who you are), have petered
> out rapidly after the first several weeks.

True, but the point I was trying to make is that because git is written
in C, for an audience who are extremely at home with that particular
language, it quickly attracted contributors.

git log --pretty=short | sed -n 's/^Author: \([^<]*\)<.*$/\1/p' | \
	sort | uniq | wc -l

reports 355 unique lines, although some authors are mentioned twice
(Theodore Tso vs Theodore Ts'o). Cross-matching the kernel authors
with the git authors shows that git and linux have 111 developers
in common, again reporting some of them twice. A quick visual scan
shows the figure to be 106, assuming no two authors have the same
name (including email addresses produced more unique contributors as
people change email more often than they change name).

It's not unreasonable to say that git got at least 106 C-programmers
"for free" included in their userbase round about the same second
Linus went public with his intentions of managing the linux kernel
in git, all of which are obviously comfortable enough with C to
poke around in the kernel.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06  5:03       ` Miles Bader
@ 2007-09-06 12:08         ` Johannes Schindelin
  0 siblings, 0 replies; 102+ messages in thread
From: Johannes Schindelin @ 2007-09-06 12:08 UTC (permalink / raw)
  To: Miles Bader; +Cc: Dmitry Kakurin, Matthieu Moy, Git

Hi,

On Thu, 6 Sep 2007, Miles Bader wrote:

> Dmitry Kakurin <dmitry.kakurin@gmail.com> writes:
> > When I first looked at Git source code two things struck me as odd:
> > 1. Pure C as opposed to C++. No idea why. Please don't talk about
> > portability, it's BS.
> 
> Just to piss you off.

Hehe.

FWIW I strongly disagree that it's BS.  As others have stated, the reasons 
are easily found, and they are no weak arguments.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06  4:48     ` Dmitry Kakurin
  2007-09-06  4:59       ` Shawn O. Pearce
  2007-09-06  5:03       ` Miles Bader
@ 2007-09-06 17:50       ` Linus Torvalds
  2007-09-07  0:21         ` Dmitry Kakurin
                           ` (2 more replies)
  2010-06-10 19:12       ` Ian Molton
  3 siblings, 3 replies; 102+ messages in thread
From: Linus Torvalds @ 2007-09-06 17:50 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Matthieu Moy, Git



On Wed, 5 Sep 2007, Dmitry Kakurin wrote:
> 
> When I first looked at Git source code two things struck me as odd:
> 1. Pure C as opposed to C++. No idea why. Please don't talk about portability,
> it's BS.

*YOU* are full of bullshit.

C++ is a horrible language. It's made more horrible by the fact that a lot 
of substandard programmers use it, to the point where it's much much 
easier to generate total and utter crap with it. Quite frankly, even if 
the choice of C were to do *nothing* but keep the C++ programmers out, 
that in itself would be a huge reason to use C.

In other words: the choice of C is the only sane choice. I know Miles 
Bader jokingly said "to piss you off", but it's actually true. I've come 
to the conclusion that any programmer that would prefer the project to be 
in C++ over C is likely a programmer that I really *would* prefer to piss 
off, so that he doesn't come and screw up any project I'm involved with.

C++ leads to really really bad design choices. You invariably start using 
the "nice" library features of the language like STL and Boost and other 
total and utter crap, that may "help" you program, but causes:

 - infinite amounts of pain when they don't work (and anybody who tells me 
   that STL and especially Boost are stable and portable is just so full 
   of BS that it's not even funny)

 - inefficient abstracted programming models where two years down the road 
   you notice that some abstraction wasn't very efficient, but now all 
   your code depends on all the nice object models around it, and you 
   cannot fix it without rewriting your app.

In other words, the only way to do good, efficient, and system-level and 
portable C++ ends up to limit yourself to all the things that are 
basically available in C. And limiting your project to C means that people 
don't screw that up, and also means that you get a lot of programmers that 
do actually understand low-level issues and don't screw things up with any 
idiotic "object model" crap.

So I'm sorry, but for something like git, where efficiency was a primary 
objective, the "advantages" of C++ is just a huge mistake. The fact that 
we also piss off people who cannot see that is just a big additional 
advantage.

If you want a VCS that is written in C++, go play with Monotone. Really. 
They use a "real database". They use "nice object-oriented libraries". 
They use "nice C++ abstractions". And quite frankly, as a result of all 
these design decisions that sound so appealing to some CS people, the end 
result is a horrible and unmaintainable mess.

But I'm sure you'd like it more than git.

			Linus

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06 17:50       ` Linus Torvalds
@ 2007-09-07  0:21         ` Dmitry Kakurin
  2007-09-07  0:38           ` Linus Torvalds
                             ` (3 more replies)
  2007-09-24 13:41         ` figo
  2012-05-22 18:30         ` Syed M Raihan
  2 siblings, 4 replies; 102+ messages in thread
From: Dmitry Kakurin @ 2007-09-07  0:21 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Matthieu Moy, Git

On 9/6/07, Linus Torvalds <torvalds@linux-foundation.org> wrote:
> On Wed, 5 Sep 2007, Dmitry Kakurin wrote:
> >
> > When I first looked at Git source code two things struck me as odd:
> > 1. Pure C as opposed to C++. No idea why. Please don't talk about portability,
> > it's BS.
>
> *YOU* are full of bullshit.

nice

> C++ is a horrible language. It's made more horrible by the fact that a lot
> of substandard programmers use it, to the point where it's much much
> easier to generate total and utter crap with it. Quite frankly, even if
> the choice of C were to do *nothing* but keep the C++ programmers out,
> that in itself would be a huge reason to use C.
>
> In other words: the choice of C is the only sane choice. I know Miles
> Bader jokingly said "to piss you off", but it's actually true. I've come
> to the conclusion that any programmer that would prefer the project to be
> in C++ over C is likely a programmer that I really *would* prefer to piss
> off, so that he doesn't come and screw up any project I'm involved with.

As dinosaurs (who code exclusively in C) are becoming extinct, you
will soon find yourself alone with attitude like this.

Measuring number of people who contributed to Git is incorrect metric.
Obviously C++ developers can contribute C code. But assuming that they
prefer it that way is wrong.

I was coding in Assembly when there was no C.
Then in C before C++ was created.
Now days it's C++ and C#, and I have never looked back.
Bad developers will write bad code in any language. But penalizing
good developers for this illusive reason of repealing bad contributors
is nonsense.

Anyway I don't mean to start a religious C vs. C++ war. It's a matter
of beliefs and as such pointless.
I just wanted to get a sense of how many people share this "Git should
be in pure C" doctrine.
-- 
- Dmitry

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  0:21         ` Dmitry Kakurin
@ 2007-09-07  0:38           ` Linus Torvalds
  2007-09-07  1:08             ` Dmitry Kakurin
  2007-09-07  1:12             ` Linus Torvalds
  2007-09-07  3:06           ` Wincent Colaiuta
                             ` (2 subsequent siblings)
  3 siblings, 2 replies; 102+ messages in thread
From: Linus Torvalds @ 2007-09-07  0:38 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Matthieu Moy, Git



On Thu, 6 Sep 2007, Dmitry Kakurin wrote:
> 
> As dinosaurs (who code exclusively in C) are becoming extinct, you
> will soon find yourself alone with attitude like this.

Unlike you, I actually gave reasons for my dislike of C++, and pointed to 
examples of the kinds of failures that it leads to.

You, on the other hand, have given no sane reasons *for* using C++.

The fact is, git is better than the other SCM's. And good taste (and C) is 
one of the reasons for that.

It has nothing to do with dinosaurs. Good taste doesn't go out of style, 
and comparing C to assembler just shows that you don't have a friggin idea 
about what you're talking about.

			Linus

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  0:38           ` Linus Torvalds
@ 2007-09-07  1:08             ` Dmitry Kakurin
  2007-09-07  1:27               ` Linus Torvalds
  2007-09-07  6:50               ` David Kastrup
  2007-09-07  1:12             ` Linus Torvalds
  1 sibling, 2 replies; 102+ messages in thread
From: Dmitry Kakurin @ 2007-09-07  1:08 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Matthieu Moy, Git

On 9/6/07, Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
>
> On Thu, 6 Sep 2007, Dmitry Kakurin wrote:
> >
> > As dinosaurs (who code exclusively in C) are becoming extinct, you
> > will soon find yourself alone with attitude like this.
>
> Unlike you, I actually gave reasons for my dislike of C++, and pointed to
> examples of the kinds of failures that it leads to.

As I said, it's a matter of believes. As such, any reasoning and
arguing will be endless and pointless, as for any other religious
issue.

> You, on the other hand, have given no sane reasons *for* using C++.

I'll give you reasons why to use C++ for Git (not why C++ is better
for any project in general, as that again would be pointless):

1. Good String class will make code much more readable (and
significantly shorter)
2. Good Buffer class - same reason
3. Smart pointers and smart handles to manage memory and
file/socket/lock handles.

As it is right now, it's too hard to see the high-level logic thru
this endless-busy-work of micro-managing strings and memory.

> The fact is, git is better than the other SCM's. And good taste (and C) is
> one of the reasons for that.

IMHO Git has a brilliant high-level design (object database, using
hashes, simple and accessible storage for data and metadata). Kudos to
you!
The implementation: a mixture of C and shell scripts, command line
interface that has evolved bottom-up is so-so.

> and comparing C to assembler just shows that you don't have a friggin idea
> about what you're talking about.

I don't see myself comparing assembler to C anywhere.
I was pointing out that I've been programming in different languages
(many more actually) and observed bad developers writing bad code in
all of them. So this quality "bad developer" is actually
language-agnostic :-).
-- 
- Dmitry

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  0:38           ` Linus Torvalds
  2007-09-07  1:08             ` Dmitry Kakurin
@ 2007-09-07  1:12             ` Linus Torvalds
  2007-09-07  1:40               ` alan
                                 ` (3 more replies)
  1 sibling, 4 replies; 102+ messages in thread
From: Linus Torvalds @ 2007-09-07  1:12 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Matthieu Moy, Git



On Fri, 7 Sep 2007, Linus Torvalds wrote:
> 
> The fact is, git is better than the other SCM's. And good taste (and C) is 
> one of the reasons for that.

To be very specific:
 - simple and clear core datastructures, with *very* lean and aggressive 
   code to manage them that takes the whole approach of "simplicity over 
   fancy" to the extreme.
 - a willingness to not abstract away the data structures and algorithms, 
   because those are the *whole*point* of core git. 

And if you want a fancier language, C++ is absolutely the worst one to 
choose. If you want real high-level, pick one that has true high-level 
features like garbage collection or a good system integration, rather than 
something that lacks both the sparseness and straightforwardness of C, 
*and* doesn't even have the high-level bindings to important concepts. 

IOW, C++ is in that inconvenient spot where it doesn't help make things 
simple enough to be truly usable for prototyping or simple GUI 
programming, and yet isn't the lean system programming language that C is 
that actively encourags you to use simple and direct constructs.

				Linus

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  1:08             ` Dmitry Kakurin
@ 2007-09-07  1:27               ` Linus Torvalds
  2007-09-07  3:09                 ` Dmitry Kakurin
  2007-09-07 10:26                 ` Johannes Schindelin
  2007-09-07  6:50               ` David Kastrup
  1 sibling, 2 replies; 102+ messages in thread
From: Linus Torvalds @ 2007-09-07  1:27 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Matthieu Moy, Git



On Thu, 6 Sep 2007, Dmitry Kakurin wrote:
> 
> As it is right now, it's too hard to see the high-level logic thru
> this endless-busy-work of micro-managing strings and memory.

Total BS. The string/memory management is not at all relevant. Look at the 
code (I bet you didn't). This isn't the important, or complex part.

> IMHO Git has a brilliant high-level design (object database, using
> hashes, simple and accessible storage for data and metadata). Kudos to
> you!
> The implementation: a mixture of C and shell scripts, command line
> interface that has evolved bottom-up is so-so.

The only really important part is the *design*. The fact that some of it 
is in a "prototyping language" is exactly because it wasn't the core 
parts, and it's slowly getting replaced. C++ would in *no* way have been 
able to replace the shell scripts or perl parts.

And C++ would in no way have made the truly core parts better. 

> > and comparing C to assembler just shows that you don't have a friggin idea
> > about what you're talking about.
> 
> I don't see myself comparing assembler to C anywhere.

You made a very clear "assembler -> C -> C++/C#" progression nin your 
life, comparing my staying with C as a "dinosaur", as if it was some 
inescapable evolution towards a better/more modern language.

With zero basis for it, since in many ways C is much superior to C++ (and 
even more so C#) in both its portability and in its availability of 
interfaces and low-level support.

> I was pointing out that I've been programming in different languages
> (many more actually) and observed bad developers writing bad code in
> all of them. So this quality "bad developer" is actually
> language-agnostic :-).

You can write bad code in any language. However, some languages, and 
especially some *mental* baggages that go with them are bad.

The very fact that you come in as a newbie, point to some absolutely 
*trivial* patches, and use that as an argument for a language that the 
original author doesn't like, is a sign of you being a person who should 
be disabused on any idiotic notions as soon as possible.

The things that actually *matter* for core git code is things like writing 
your own object allocator to make the footprint be as small as possible in 
order to be able to keep track of object flags for a million objects 
efficiently. It's writing a parser for the tree objects that is basically 
fairly optimal, because there *is* no abstraction. Absolutely all of it is 
at the raw memory byte level.

Can those kinds of things be written in other languages than C? Sure. But 
they can *not* be written by people who think the "high-level" 
capabilities of C++ string handling somehow matter.

The fact is, that is *exactly* the kinds of things that C excels at. Not 
just as a language, but as a required *mentality*. One of the great 
strengths of C is that it doesn't make you think of your program as 
anything high-level. It's what makes you apparently prefer other 
languages, but the thing is, from a git standpoint, "high level" is 
exactly the wrong thing. 

		Linus

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  1:12             ` Linus Torvalds
@ 2007-09-07  1:40               ` alan
  2007-09-07  5:09               ` Walter Bright
                                 ` (2 subsequent siblings)
  3 siblings, 0 replies; 102+ messages in thread
From: alan @ 2007-09-07  1:40 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Dmitry Kakurin, Matthieu Moy, Git

On Fri, 7 Sep 2007, Linus Torvalds wrote:

> IOW, C++ is in that inconvenient spot where it doesn't help make things
> simple enough to be truly usable for prototyping or simple GUI
> programming, and yet isn't the lean system programming language that C is
> that actively encourags you to use simple and direct constructs.

Not to mention try finding two C++ compilers that support the same 
language features.  C is a known quantity. C++ depends on whos compiler 
you use and what class libraries you use.  Trying to make those things 
work crossplatform is not an easy task.  (Harder than it is in C at 
least.)

A number of years ago, a programmer who will not be named (and is not me), 
tried to port Perl to C++.  It was a disaster.  He found that every 
compiler handled something differently.

If you stuck to one compiler, it might work.  But trying to get GCC to 
work like MS C++ or Borland C++ or whatever is just asking for pain.

-- 
Refrigerator Rule #1: If you don't remember when you bought it, Don't eat it.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  0:21         ` Dmitry Kakurin
  2007-09-07  0:38           ` Linus Torvalds
@ 2007-09-07  3:06           ` Wincent Colaiuta
  2007-09-07  4:06             ` Paul Wankadia
                               ` (2 more replies)
  2007-09-07  6:47           ` David Kastrup
  2007-09-07 10:21           ` Johannes Schindelin
  3 siblings, 3 replies; 102+ messages in thread
From: Wincent Colaiuta @ 2007-09-07  3:06 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Linus Torvalds, Matthieu Moy, Git

El 7/9/2007, a las 2:21, Dmitry Kakurin escribió:

> I just wanted to get a sense of how many people share this "Git should
> be in pure C" doctrine.

Count me as one of them. Git is all about speed, and C is the best  
choice for speed, especially in context of Git's workload.

Cheers,
Wincent

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  1:27               ` Linus Torvalds
@ 2007-09-07  3:09                 ` Dmitry Kakurin
  2007-09-07  5:48                   ` David Symonds
                                     ` (4 more replies)
  2007-09-07 10:26                 ` Johannes Schindelin
  1 sibling, 5 replies; 102+ messages in thread
From: Dmitry Kakurin @ 2007-09-07  3:09 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Matthieu Moy, Git

On 9/6/07, Linus Torvalds <torvalds@linux-foundation.org> wrote:
> On Thu, 6 Sep 2007, Dmitry Kakurin wrote:
> >
> > As it is right now, it's too hard to see the high-level logic thru
> > this endless-busy-work of micro-managing strings and memory.
>
> Total BS. The string/memory management is not at all relevant. Look at the
> code (I bet you didn't). This isn't the important, or complex part.

Not only have I looked at the code, I've also debugged it quite a bit.
Granted most of my problems had to do with handling paths on Windows
(i.e. string manipulations).

Let me snip "C is better than C++" part ...
> [ snip ]
... and explain where I'm coming from:
My goal is to *use* Git. When something does not work *for me* I want
to be able to fix it (and contribute the fix) in *shortest time
possible* and with *minimal efforts*. As for me it's a diversion from
my main activities.
The fact that Git is written in C does not really contribute to that goal.
Suggestion to use C++ is the only alternative with existing C codebase.
So while C++ may not be the best choice "academically speaking" it's
pretty much the only practical choice.

"Democracy is the worst form of government except for all those others
that have been tried." - Winston Churchill

Now, I realize that I'm a very infrequent contributor to Git, but I
want my opinion to be heard.
People who carry the main weight of developing and maintaining Git
should make the call.
-- 
- Dmitry

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  3:06           ` Wincent Colaiuta
@ 2007-09-07  4:06             ` Paul Wankadia
  2007-09-07  4:30               ` Nicolas Pitre
  2007-09-07  9:19               ` Wincent Colaiuta
  2007-09-07  6:25             ` Andreas Ericsson
  2007-09-07  8:36             ` Walter Bright
  2 siblings, 2 replies; 102+ messages in thread
From: Paul Wankadia @ 2007-09-07  4:06 UTC (permalink / raw)
  To: git

Wincent Colaiuta <win <at> wincent.com> writes:

> > I just wanted to get a sense of how many people share this "Git should
> > be in pure C" doctrine.
> 
> Count me as one of them. Git is all about speed, and C is the best  
> choice for speed, especially in context of Git's workload.

I concur, but I also feel that D, Clean and OCaml are viable alternatives.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  4:06             ` Paul Wankadia
@ 2007-09-07  4:30               ` Nicolas Pitre
  2007-09-07  9:19               ` Wincent Colaiuta
  1 sibling, 0 replies; 102+ messages in thread
From: Nicolas Pitre @ 2007-09-07  4:30 UTC (permalink / raw)
  To: Paul Wankadia; +Cc: git

On Fri, 7 Sep 2007, Paul Wankadia wrote:

> Wincent Colaiuta <win <at> wincent.com> writes:
> 
> > > I just wanted to get a sense of how many people share this "Git should
> > > be in pure C" doctrine.
> > 
> > Count me as one of them. Git is all about speed, and C is the best  
> > choice for speed, especially in context of Git's workload.
> 
> I concur, but I also feel that D, Clean and OCaml are viable alternatives.

I happen to have zero experience with any of those, so if Git 
development was done with one of them, you'd have to count me out.

C is simply the lingua franca when it comes to programming, and it 
happens to be the fastest amongst portable languages too.


Nicolas

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07  1:12             ` Linus Torvalds
  2007-09-07  1:40               ` alan
@ 2007-09-07  5:09               ` Walter Bright
  2007-09-07  7:40                 ` David Kastrup
  2007-09-07  9:41                 ` Pierre Habouzit
  2007-09-08  0:56               ` John 'Z-Bo' Zabroski
  2007-09-19 19:56               ` Steven Burns
  3 siblings, 2 replies; 102+ messages in thread
From: Walter Bright @ 2007-09-07  5:09 UTC (permalink / raw)
  To: git

Linus Torvalds wrote:
> And if you want a fancier language, C++ is absolutely the worst one to 
> choose. If you want real high-level, pick one that has true high-level 
> features like garbage collection or a good system integration, rather than 
> something that lacks both the sparseness and straightforwardness of C, 
> *and* doesn't even have the high-level bindings to important concepts. 
> 
> IOW, C++ is in that inconvenient spot where it doesn't help make things 
> simple enough to be truly usable for prototyping or simple GUI 
> programming, and yet isn't the lean system programming language that C is 
> that actively encourags you to use simple and direct constructs.

The D programming language is a different take than C++ has on growing 
C. I'm curious what your thoughts on that are (D has garbage collection, 
while still retaining the ability to directly manage memory). Can you 
enumerate what you feel are the important concepts?

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  3:09                 ` Dmitry Kakurin
@ 2007-09-07  5:48                   ` David Symonds
  2007-09-07  6:15                   ` Theodore Tso
                                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 102+ messages in thread
From: David Symonds @ 2007-09-07  5:48 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Linus Torvalds, Matthieu Moy, Git

On 07/09/07, Dmitry Kakurin <dmitry.kakurin@gmail.com> wrote:
> My goal is to *use* Git. When something does not work *for me* I want
> to be able to fix it (and contribute the fix) in *shortest time
> possible* and with *minimal efforts*. As for me it's a diversion from
> my main activities.
> The fact that Git is written in C does not really contribute to that goal.

That's just it -- Git's goal isn't to make it as easy as possible for
Git _users_ to fix it (thought that is a nice thing to have). Git's
goal is to be a very good, very fast SCM. Bugs should be found and
fixed, but that can most effectively be done by the people who are
already knowledgeable about Git's codebase (i.e. its developers), not
its users.


Dave.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  3:09                 ` Dmitry Kakurin
  2007-09-07  5:48                   ` David Symonds
@ 2007-09-07  6:15                   ` Theodore Tso
  2007-09-20 14:06                     ` Steven Burns
  2007-09-07  6:31                   ` Andreas Ericsson
                                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 102+ messages in thread
From: Theodore Tso @ 2007-09-07  6:15 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Linus Torvalds, Matthieu Moy, Git

On Thu, Sep 06, 2007 at 08:09:23PM -0700, Dmitry Kakurin wrote:
> > Total BS. The string/memory management is not at all relevant. Look at the
> > code (I bet you didn't). This isn't the important, or complex part.
> 
> Not only have I looked at the code, I've also debugged it quite a bit.
> Granted most of my problems had to do with handling paths on Windows
> (i.e. string manipulations).

I consider string manipulation to be one of the places where C++ is a
total disaster.  It's way to easy for idiots to do something like this:

	a = b + "/share/" + c + serial_num;

where you can have absolutely no idea how many memory allocations are
done, due to type coercions, overloaded operators (good God, you can
overload the comma operator in C++!!!), and then when something like
that ends up in an inner loop, the result is a disaster from a
performance point of view, and it's not even obvious *why*!

> My goal is to *use* Git. When something does not work *for me* I want
> to be able to fix it (and contribute the fix) in *shortest time
> possible* and with *minimal efforts*. As for me it's a diversion from
> my main activities.

Yes, and if you contribute something the shortest time possible, and
it ends up being crap, who gets to rewrite it and fix it?  I've seen
too many C++ programs which get this kind of crap added, and it's not
noticed right away (because C++ is really good at hiding such
performance killers so they are not visible), and then later on, it's
even harder to find the performance problems and fix them.

> Now, I realize that I'm a very infrequent contributor to Git, but I
> want my opinion to be heard.

And if git were written in C++, it's precisely the infrequent
contributors (who are in a hurry, who only care about the quick hack
to get them going, and not about the long-term maintainability and
performance of the package) that are be in the position to do the
most damage...

						- Ted

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  3:06           ` Wincent Colaiuta
  2007-09-07  4:06             ` Paul Wankadia
@ 2007-09-07  6:25             ` Andreas Ericsson
  2007-09-07 10:56               ` Johannes Schindelin
  2007-09-07 11:30               ` Wincent Colaiuta
  2007-09-07  8:36             ` Walter Bright
  2 siblings, 2 replies; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-07  6:25 UTC (permalink / raw)
  To: Wincent Colaiuta; +Cc: Dmitry Kakurin, Linus Torvalds, Matthieu Moy, Git

Wincent Colaiuta wrote:
> El 7/9/2007, a las 2:21, Dmitry Kakurin escribió:
> 
>> I just wanted to get a sense of how many people share this "Git should
>> be in pure C" doctrine.
> 
> Count me as one of them. Git is all about speed, and C is the best 
> choice for speed, especially in context of Git's workload.
> 

Nono, hand-optimized assembly is the best choice for speed. C is just
a little more portable ;-)

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  3:09                 ` Dmitry Kakurin
  2007-09-07  5:48                   ` David Symonds
  2007-09-07  6:15                   ` Theodore Tso
@ 2007-09-07  6:31                   ` Andreas Ericsson
  2007-09-07 22:17                     ` Dmitry Kakurin
  2007-09-07  6:52                   ` David Kastrup
  2007-09-07 10:28                   ` Johannes Schindelin
  4 siblings, 1 reply; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-07  6:31 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Linus Torvalds, Matthieu Moy, Git

Dmitry Kakurin wrote:
> On 9/6/07, Linus Torvalds <torvalds@linux-foundation.org> wrote:
>> On Thu, 6 Sep 2007, Dmitry Kakurin wrote:
>>> As it is right now, it's too hard to see the high-level logic thru
>>> this endless-busy-work of micro-managing strings and memory.
>> Total BS. The string/memory management is not at all relevant. Look at the
>> code (I bet you didn't). This isn't the important, or complex part.
> 
> Not only have I looked at the code, I've also debugged it quite a bit.
> Granted most of my problems had to do with handling paths on Windows
> (i.e. string manipulations).
> 
> Let me snip "C is better than C++" part ...
>> [ snip ]
> ... and explain where I'm coming from:
> My goal is to *use* Git. When something does not work *for me* I want
> to be able to fix it (and contribute the fix) in *shortest time
> possible* and with *minimal efforts*. As for me it's a diversion from
> my main activities.
> The fact that Git is written in C does not really contribute to that goal.


Coupled with what you said in an earlier mail, namely
---%<---%<---
> Obviously C++ developers can contribute C code. But assuming that they
> prefer it that way is wrong.
> 
> I was coding in Assembly when there was no C.
> Then in C before C++ was created.
> Now days it's C++ and C#, and I have never looked back.
---%<---%<---

Considering C appeared in 1972, and C++ appeared in 1985, you have been
writing C code for 13 years. And you're telling me that git being written
in C prevents you from contributing?

If you want to do something useful in C++ for git, make it easy for C++
programmers to write apps for it.

> 
> Now, I realize that I'm a very infrequent contributor to Git, but I
> want my opinion to be heard.
> People who carry the main weight of developing and maintaining Git
> should make the call.

They already have, but every now and then someone comes along and suggest
a complete rewrite in some other language. So far we've had Java (there's
always one...), Python and now C++.

It happens to all projects, sooner or later. The funny thing is that all those
people that want their favourite software to be rewritten in their favourite
programming language always wants someone else to rewrite it for them.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  0:21         ` Dmitry Kakurin
  2007-09-07  0:38           ` Linus Torvalds
  2007-09-07  3:06           ` Wincent Colaiuta
@ 2007-09-07  6:47           ` David Kastrup
  2007-09-07  7:41             ` Andy Parkins
  2007-09-07 10:21           ` Johannes Schindelin
  3 siblings, 1 reply; 102+ messages in thread
From: David Kastrup @ 2007-09-07  6:47 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Linus Torvalds, Matthieu Moy, Git

"Dmitry Kakurin" <dmitry.kakurin@gmail.com> writes:

> On 9/6/07, Linus Torvalds <torvalds@linux-foundation.org> wrote:
>>
>> In other words: the choice of C is the only sane choice. I know
>> Miles Bader jokingly said "to piss you off", but it's actually
>> true. I've come to the conclusion that any programmer that would
>> prefer the project to be in C++ over C is likely a programmer that
>> I really *would* prefer to piss off, so that he doesn't come and
>> screw up any project I'm involved with.
>
> As dinosaurs (who code exclusively in C) are becoming extinct, you
> will soon find yourself alone with attitude like this.

As long as TeX, Emacs and vi are around, I would not worry too much
about dinosaurs in general.  But C++ is a cancerous dinosaur.  It has
growths that just don't belong on a C body.

> I was coding in Assembly when there was no C.  Then in C before C++
> was created.  Now days it's C++ and C#, and I have never looked
> back.  Bad developers will write bad code in any language. But
> penalizing good developers for this illusive reason of repealing bad
> contributors is nonsense.

The problem with C++ is that every C++ developer has his own style,
and reuse is an illusion within that style.  Take a look at classes
implementing matrix arithmetic: there are as many around as the day is
long, and all of them are incompatible with one another.

With regard to programming styles, C++ does not support multiple
inheritance.  For a single project grown from a single start, you can
get reasonable solutions.  But combining stuff is creating maintenance
messes.

With C, the situation is not dissimilar, but you spent less time
fighting the illusion that you don't need to reimplement, anyway.

> I just wanted to get a sense of how many people share this "Git
> should be in pure C" doctrine.

What nonsense.  Large parts of git already are shell scripts, so
obviously there is no such doctrine.  Just because C++ is not a sane
proposition does not mean that others might not work.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  1:08             ` Dmitry Kakurin
  2007-09-07  1:27               ` Linus Torvalds
@ 2007-09-07  6:50               ` David Kastrup
  1 sibling, 0 replies; 102+ messages in thread
From: David Kastrup @ 2007-09-07  6:50 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Linus Torvalds, Matthieu Moy, Git

"Dmitry Kakurin" <dmitry.kakurin@gmail.com> writes:

> I'll give you reasons why to use C++ for Git (not why C++ is better
> for any project in general, as that again would be pointless):
>
> 1. Good String class will make code much more readable (and
> significantly shorter)
> 2. Good Buffer class - same reason
> 3. Smart pointers and smart handles to manage memory and
> file/socket/lock handles.

But all of those are incompatible with another and require major
headaches and/or interface code to get to run with one another.  And
then might use different interface styles, anyway.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  3:09                 ` Dmitry Kakurin
                                     ` (2 preceding siblings ...)
  2007-09-07  6:31                   ` Andreas Ericsson
@ 2007-09-07  6:52                   ` David Kastrup
  2007-09-07 10:28                   ` Johannes Schindelin
  4 siblings, 0 replies; 102+ messages in thread
From: David Kastrup @ 2007-09-07  6:52 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Linus Torvalds, Matthieu Moy, Git

"Dmitry Kakurin" <dmitry.kakurin@gmail.com> writes:

> ... and explain where I'm coming from:
> My goal is to *use* Git. When something does not work *for me* I want
> to be able to fix it (and contribute the fix) in *shortest time
> possible* and with *minimal efforts*. As for me it's a diversion from
> my main activities.
> The fact that Git is written in C does not really contribute to that goal.
> Suggestion to use C++ is the only alternative with existing C codebase.
> So while C++ may not be the best choice "academically speaking" it's
> pretty much the only practical choice.

Sorry, but for fixing things in C, I can look and work locally.  For
fixing things in C++, I first need to understand the class
hierarchies used in the project.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07  5:09               ` Walter Bright
@ 2007-09-07  7:40                 ` David Kastrup
  2007-09-07  8:15                   ` Walter Bright
  2007-09-07 11:36                   ` Wincent Colaiuta
  2007-09-07  9:41                 ` Pierre Habouzit
  1 sibling, 2 replies; 102+ messages in thread
From: David Kastrup @ 2007-09-07  7:40 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

Walter Bright <boost@digitalmars.com> writes:

> Linus Torvalds wrote:
>> And if you want a fancier language, C++ is absolutely the worst one
>> to choose. If you want real high-level, pick one that has true
>> high-level features like garbage collection or a good system
>> integration, rather than something that lacks both the sparseness
>> and straightforwardness of C, *and* doesn't even have the high-level
>> bindings to important concepts. 
>>
>> IOW, C++ is in that inconvenient spot where it doesn't help make
>> things simple enough to be truly usable for prototyping or simple
>> GUI programming, and yet isn't the lean system programming language
>> that C is that actively encourags you to use simple and direct
>> constructs.
>
> The D programming language is a different take than C++ has on growing
> C. I'm curious what your thoughts on that are (D has garbage
> collection, while still retaining the ability to directly manage
> memory). Can you enumerate what you feel are the important concepts?

A design is perfect not when there is no longer anything you can add
to it, but if there is no longer anything you can take away.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  6:47           ` David Kastrup
@ 2007-09-07  7:41             ` Andy Parkins
  2007-09-07  8:08               ` David Kastrup
  0 siblings, 1 reply; 102+ messages in thread
From: Andy Parkins @ 2007-09-07  7:41 UTC (permalink / raw)
  To: git; +Cc: David Kastrup, Dmitry Kakurin, Linus Torvalds, Matthieu Moy

On Friday 2007 September 07, David Kastrup wrote:

(Disclaimer: I'm certainly not joining the "C++ for git" chant; this reply is 
merely to the statements made about C++ in David's message).

> The problem with C++ is that every C++ developer has his own style,
> and reuse is an illusion within that style.  Take a look at classes
> implementing matrix arithmetic: there are as many around as the day is
> long, and all of them are incompatible with one another.

One could say the same about any API.  "Take a look at that C library libXYZ - 
it does exactly the same thing as libPQR but all the function calls and 
structures are different.  Conclusion: C is shit".  Obviously nonsense.

> With regard to programming styles, C++ does not support multiple
> inheritance.  For a single project grown from a single start, you can

Multiple inheritance is the spawn of the devil, but C++ _does_ support it.

Forgetting about the terrible STL, to me there really is no difference between 
C and C++; you can be object oriented in C.  Take a look at the Linux kernel, 
it should be printed out, rolled up and used to beat the ideas into students 
learning C++/Java/C#.   Object oriented design is a choice, and if you really 
wanted you could do it in assembly.

I would imagine the reason people often turn up wanting to rewrite Linux and 
git in C++ is because they are so object oriented in nature already and it's 
natural to think "wouldn't this be even better if I wrote it in an object 
oriented language"?  Maybe, maybe not, but why bother?



Andy

-- 
Dr Andy Parkins, M Eng (hons), MIET
andyparkins@gmail.com

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  7:41             ` Andy Parkins
@ 2007-09-07  8:08               ` David Kastrup
  0 siblings, 0 replies; 102+ messages in thread
From: David Kastrup @ 2007-09-07  8:08 UTC (permalink / raw)
  To: Andy Parkins; +Cc: git, Dmitry Kakurin, Linus Torvalds, Matthieu Moy

Andy Parkins <andyparkins@gmail.com> writes:

> On Friday 2007 September 07, David Kastrup wrote:
>
> (Disclaimer: I'm certainly not joining the "C++ for git" chant; this reply is 
> merely to the statements made about C++ in David's message).
>
>> The problem with C++ is that every C++ developer has his own style,
>> and reuse is an illusion within that style.  Take a look at classes
>> implementing matrix arithmetic: there are as many around as the day is
>> long, and all of them are incompatible with one another.
>
> One could say the same about any API.  "Take a look at that C
> library libXYZ - it does exactly the same thing as libPQR but all
> the function calls and structures are different.  Conclusion: C is
> shit".  Obviously nonsense.

The difference is that you can pass structures from one library into
another with tolerable efficiency.  Because there are only basically 2
ways to lay out a two-dimensional array of floats.

>> With regard to programming styles, C++ does not support multiple
>> inheritance.  For a single project grown from a single start, you
>> can
>
> Multiple inheritance is the spawn of the devil, but C++ _does_
> support it.

What about "With regard to programming styles" did you not understand?
I was not talking about a technical feature at class level, but about
code merging from multiple sources.

> I would imagine the reason people often turn up wanting to rewrite
> Linux and git in C++ is because they are so object oriented in
> nature already and it's natural to think "wouldn't this be even
> better if I wrote it in an object oriented language"?  Maybe, maybe
> not, but why bother?

Maintainability and extensibility certainly are valid arguments for
rewrites.  But C++ does not really shine in that regard.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07  7:40                 ` David Kastrup
@ 2007-09-07  8:15                   ` Walter Bright
  2007-09-07  8:26                     ` David Kastrup
  2007-09-07 11:36                   ` Wincent Colaiuta
  1 sibling, 1 reply; 102+ messages in thread
From: Walter Bright @ 2007-09-07  8:15 UTC (permalink / raw)
  To: git

David Kastrup wrote:
> Walter Bright <boost@digitalmars.com> writes:
>> The D programming language is a different take than C++ has on growing
>> C. I'm curious what your thoughts on that are (D has garbage
>> collection, while still retaining the ability to directly manage
>> memory). Can you enumerate what you feel are the important concepts?
> 
> A design is perfect not when there is no longer anything you can add
> to it, but if there is no longer anything you can take away.

I like to phrase that a slightly different way: anyone can make 
something complicated, but it takes genius to make something simple.

A very big goal for D is to make what should be simple code, simple. It 
turns out that what's simple for a computer is complex for a human. So 
to design a language that is simple for programmers is (unfortunately) a 
rather complex problem. Or perhaps I'm just not smart enough <g>.

A canonical example is that of a loop. Consider a simple C loop over an 
array:

void foo(int array[10])
{
     for (int i = 0; i < 10; i++)
     {   int value = array[i];
         ... do something ...
     }
}

It's simple, but it has a lot of problems:

1) i should be size_t, not int
2) array is not checked for overflow
3) 10 may not be the actual array dimension
4) may be more efficient to step through the array with pointers, rather 
than indices
5) type of array may change, but the type of value may not get updated
6) crashes if array is NULL
7) only works with arrays and pointers

Since this thread is talking about C++, let's look at the C++ version:

void foo(std::vector<int> array)
{
   for (std::vector<int>::const_iterator
        i = array.begin();
        i != array.end();
        i++)
   {
     int value = *i;
     ... do something ...
   }
}

It has fewer latent bugs, but still:

1) type of array may change, but the type of value may not get updated
2) too darned much typing
3) it's more complicated, not simpler

Frankly, I don't want to write loops that way. I want to write them like 
this:

void foo(int[] array)
{
   foreach (value; array)
   {
     ... do something ...
   }
}

As a programmer, I'm specifying exactly what I want to happen without 
much extra puffery. It's less typing, simpler, and more resistant to bugs.

1) correct loop index type is selected based on the type of array
2) arrays carry with them their dimension, so foreach is guaranteed to 
step through the loop the correct number of times
3) implementation decides if pointers will do a better job than indices, 
based on the compilation target
4) type of value is inferred automatically from the type of array, so no 
worries if the type changes
5) Null arrays have 0 length, so no crashing
6) works with any collection type

[This example is extracted from a presentation I've made.]

------
Walter Bright
http://www.digitalmars.com  C, C++, D programming language compilers
http://www.astoriaseminar.com  Extraordinary C++

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07  8:15                   ` Walter Bright
@ 2007-09-07  8:26                     ` David Kastrup
  2007-09-07  9:14                       ` Walter Bright
  0 siblings, 1 reply; 102+ messages in thread
From: David Kastrup @ 2007-09-07  8:26 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

Walter Bright <boost@digitalmars.com> writes:

> A canonical example is that of a loop. Consider a simple C loop over
> an array:
>
> void foo(int array[10])
> {
>     for (int i = 0; i < 10; i++)
>     {   int value = array[i];
>         ... do something ...
>     }
> }
>
> It's simple, but it has a lot of problems:
>
> 1) i should be size_t, not int

Wrong.  size_t is for holding the size of memory objects in bytes, not
in terms of indices.  For indices, the best variable is of the same
type as the declared index maximum size, so here it is typeof(10),
namely int.

> 2) array is not checked for overflow

Why should it?

> 3) 10 may not be the actual array dimension

Your point is?

> 4) may be more efficient to step through the array with pointers,
> rather than indices

No.  It is a beginners' and advanced users' mistake to think using
pointers for access is a good idea.  Trivial optimizations are what a
compiler is best at, not the user.  Using pointer manipulation will
more often than not break loop unrolling, loop reversal, strength
reduction and other things.

> 5) type of array may change, but the type of value may not get
> updated

Huh?

> 6) crashes if array is NULL

Certainly.  Your point being?

> 7) only works with arrays and pointers

Since there are only arrays and pointers in C, not really a restriction.

>
> Since this thread is talking about C++, let's look at the C++ version:
>
> void foo(std::vector<int> array)
> {
>   for (std::vector<int>::const_iterator
>        i = array.begin();
>        i != array.end();
>        i++)
>   {
>     int value = *i;
>     ... do something ...
>   }
> }

Where is my barf bag?

> Frankly, I don't want to write loops that way. I want to write them
> like this:
>
> void foo(int[] array)
> {
>   foreach (value; array)
>   {
>     ... do something ...
>   }
> }
>
> As a programmer, I'm specifying exactly what I want to happen without
> much extra puffery. It's less typing, simpler, and more resistant to
> bugs.
>
> 1) correct loop index type is selected based on the type of array
> 2) arrays carry with them their dimension, so foreach is guaranteed to
> step through the loop the correct number of times
> 3) implementation decides if pointers will do a better job than
> indices, based on the compilation target
> 4) type of value is inferred automatically from the type of array, so
> no worries if the type changes
> 5) Null arrays have 0 length, so no crashing
> 6) works with any collection type

Most of those are toy concerns.  They prevent problems that don't
actually occur much in practice.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  3:06           ` Wincent Colaiuta
  2007-09-07  4:06             ` Paul Wankadia
  2007-09-07  6:25             ` Andreas Ericsson
@ 2007-09-07  8:36             ` Walter Bright
  2007-09-07  9:41               ` Andreas Ericsson
                                 ` (2 more replies)
  2 siblings, 3 replies; 102+ messages in thread
From: Walter Bright @ 2007-09-07  8:36 UTC (permalink / raw)
  To: git

Wincent Colaiuta wrote:
> Git is all about speed, and C is the best 
> choice for speed, especially in context of Git's workload.

I can appreciate that. I originally got into writing compilers because 
my game (Empire) ran too slowly and I thought the existing compilers 
could be dramatically improved.

And technically, yes, you can write code in C that is >= the speed of 
any other language (other than asm). But practically, this isn't 
necessarily so, for the following reasons:

1) You wind up having to implement the complex, dirty details of things 
yourself. The consequences of this are:

    a) you pick a simpler algorithm (which is likely less efficient - I 
run across bubble sorts all the time in code)

    b) once you implement, tune, and squeeze all the bugs out of those 
complex, dirty details, you're reluctant to change it. You're reluctant 
to try a different algorithm to see if it's faster. I've seen this 
effect a lot in my own code. (I translated a large body of my own C++ 
code that I'd spent months tuning to D, and quickly managed to get 
significantly more speed out of it, because it was much simpler to try 
out different algorithms/data structures.)

2) Garbage collection has an interesting and counterintuitive 
consequence. If you compare n malloc/free's with n gcnew/collections, 
the malloc/free will come out faster, and you conclude that gc is slow. 
But that misses one huge speed advantage of gc - you can do FAR fewer 
allocations! For example, I've done a lot of string manipulating 
programs in C. The basic problem is keeping track of who owns each 
string. This is done by, when in doubt, make a copy of the string.

But if you have gc, you don't worry about who owns the string. You just 
make another pointer to it. D takes this a step further with the concept 
of array slicing, where one creates windows on existing arrays, or 
windows on windows on windows, and no allocations are ever done. It's 
just pointer fiddling.

------
Walter Bright
http://www.digitalmars.com  C, C++, D programming language compilers
http://www.astoriaseminar.com  Extraordinary C++

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07  8:26                     ` David Kastrup
@ 2007-09-07  9:14                       ` Walter Bright
  2007-09-07  9:31                         ` David Kastrup
  0 siblings, 1 reply; 102+ messages in thread
From: Walter Bright @ 2007-09-07  9:14 UTC (permalink / raw)
  To: git

David Kastrup wrote:
> Walter Bright <boost@digitalmars.com> writes:
> 
>> A canonical example is that of a loop. Consider a simple C loop over
>> an array:
>>
>> void foo(int array[10])
>> {
>>     for (int i = 0; i < 10; i++)
>>     {   int value = array[i];
>>         ... do something ...
>>     }
>> }
>>
>> It's simple, but it has a lot of problems:
>>
>> 1) i should be size_t, not int
> 
> Wrong.  size_t is for holding the size of memory objects in bytes, not
> in terms of indices.  For indices, the best variable is of the same
> type as the declared index maximum size, so here it is typeof(10),
> namely int.

The easiest way to show the error is consider the code being ported to a 
typical 64 bit C compiler. int's are still 32 bits, yet the array can be 
larger than 32 bits. You're right in that what we want to be able to do 
is typeof(array dimension), but there is no way to do that automatically 
in C, which is my point. If the array dimension changes, you have to 
carefully check to make sure every loop dependency on the type is 
updated, too.

size_t will always work, however, making it a better choice than int, at 
least for C.

>> 2) array is not checked for overflow
> 
> Why should it?

Because the 10 array dimension is not statically checked in C. I could 
pass it a pointer to 3 ints without the compiler complaining. This makes 
it a potential maintenance problem. Also, the maintenance programmer may 
change the array dimension in the function signature, but overlook 
changing it in the for loop. Again, a maintenance problem.


>> 3) 10 may not be the actual array dimension
> 
> Your point is?

Array buffer overflow errors are commonplace in C, because array 
dimensions are not automatically checked at either compile or run time. 
This is an expensive problem. Some C APIs try to deal with this by 
passing a second argument for arrays giving the dimension (snprintf, for 
example), but this tends to be sporadic, not conventional. It being 
extra work for the programmer inevitably means it doesn't get done.


>> 4) may be more efficient to step through the array with pointers,
>> rather than indices
> 
> No.  It is a beginners' and advanced users' mistake to think using
> pointers for access is a good idea.  Trivial optimizations are what a
> compiler is best at, not the user.  Using pointer manipulation will
> more often than not break loop unrolling, loop reversal, strength
> reduction and other things.

C compilers vary widely in the optimizations they'll do for simple 
loops. I see often enough attempts by programmers to take such matters 
into their own hands. I agree with you on that - and suggest the 
language should not tempt the user to do such optimizations.

>> 5) type of array may change, but the type of value may not get
>> updated
> 
> Huh?

Let's say our fearless maintenance programmer decides to make it an 
array of longs, not an array of ints. He overlooks changing the type of 
value in the loop. Suddenly, things subtly break because of overflows. 
Or maybe he changed the int to an unsigned, now the divides in the loop 
give different answers. Etc. There really isn't any compiler/language 
help in finding these kinds of problems.


>> 6) crashes if array is NULL
> 
> Certainly.  Your point being?

I consider an array that is NULL to have no members, so instead of 
crashing the loop should execute 0 times.


>> 7) only works with arrays and pointers
> 
> Since there are only arrays and pointers in C, not really a restriction.

C has structs, too, as well as more complicated user defined 
collections. Essentially, you cannot (simply) write generic algorithms 
in C, because you cannot (simply) generically express iteration. Of 
course, you can still express anything in C if you're willing to work 
hard enough to get it. Me, I'm too lazy <g>. It's like why I can't play 
chess - everytime I try to play it instead I think about writing a 
program to do the hard work for me.


>> As a programmer, I'm specifying exactly what I want to happen without
>> much extra puffery. It's less typing, simpler, and more resistant to
>> bugs.
>>
>> 1) correct loop index type is selected based on the type of array
>> 2) arrays carry with them their dimension, so foreach is guaranteed to
>> step through the loop the correct number of times
>> 3) implementation decides if pointers will do a better job than
>> indices, based on the compilation target
>> 4) type of value is inferred automatically from the type of array, so
>> no worries if the type changes
>> 5) Null arrays have 0 length, so no crashing
>> 6) works with any collection type
> 
> Most of those are toy concerns.  They prevent problems that don't
> actually occur much in practice.

I beg to differ - buffer overflow bugs are common and expensive. The 
nice thing about the D loop is it is LESS typing than the C one - you 
get the extra robustness for free.

Let's look at the code gen for the inner loop for C:

L8:             push    [EBX*4][ESI]
                 call    near ptr _bar
                 inc     EBX
                 add     ESP,4
                 cmp     EBX,0Ah
                 jb      L8

and for D:

LE:            mov     EAX,[EBX]
                call    near ptr _D4test3barFiZv
                add     EBX,4
                cmp     EBX,ESI
                jb      LE

I think you can see that performance isn't an impediment.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  4:06             ` Paul Wankadia
  2007-09-07  4:30               ` Nicolas Pitre
@ 2007-09-07  9:19               ` Wincent Colaiuta
  1 sibling, 0 replies; 102+ messages in thread
From: Wincent Colaiuta @ 2007-09-07  9:19 UTC (permalink / raw)
  To: Paul Wankadia; +Cc: git

El 7/9/2007, a las 6:06, Paul Wankadia escribió:

> Wincent Colaiuta <win <at> wincent.com> writes:
>
>>> I just wanted to get a sense of how many people share this "Git  
>>> should
>>> be in pure C" doctrine.
>>
>> Count me as one of them. Git is all about speed, and C is the best
>> choice for speed, especially in context of Git's workload.
>
> I concur, but I also feel that D, Clean and OCaml are viable  
> alternatives.

Yes, they have reputation for speed[1], but also a smaller number of  
people know them[2].

[1] <http://shootout.alioth.debian.org/gp4/benchmark.php? 
test=all&lang=all>
[2] <http://www.tiobe.com/tpci.htm>

Cheers,
Wincent

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07  9:14                       ` Walter Bright
@ 2007-09-07  9:31                         ` David Kastrup
  2007-09-07 20:22                           ` Walter Bright
  0 siblings, 1 reply; 102+ messages in thread
From: David Kastrup @ 2007-09-07  9:31 UTC (permalink / raw)
  To: git

Walter Bright <boost@digitalmars.com> writes:

> David Kastrup wrote:
>> Walter Bright <boost@digitalmars.com> writes:
>>
>>> A canonical example is that of a loop. Consider a simple C loop over
>>> an array:
>>>
>>> void foo(int array[10])
>>> {
>>>     for (int i = 0; i < 10; i++)
>>>     {   int value = array[i];
>>>         ... do something ...
>>>     }
>>> }
>>>
>>> It's simple, but it has a lot of problems:
>>>
>>> 1) i should be size_t, not int
>>
>> Wrong.  size_t is for holding the size of memory objects in bytes, not
>> in terms of indices.  For indices, the best variable is of the same
>> type as the declared index maximum size, so here it is typeof(10),
>> namely int.
>
> The easiest way to show the error is consider the code being ported to
> a typical 64 bit C compiler. int's are still 32 bits, yet the array
> can be larger than 32 bits.

Not if it is an array declared of size 10.  And if it isn't, you have
no business stating so in the function prototype.

Willfully obfuscate programming does not prove anything.

>>> 2) array is not checked for overflow
>>
>> Why should it?
>
> Because the 10 array dimension is not statically checked in C. I
> could pass it a pointer to 3 ints without the compiler
> complaining. This makes it a potential maintenance problem.

Nonsense.  Again, C won't keep you from shooting yourself in the foot.

>>> 3) 10 may not be the actual array dimension
>>
>> Your point is?
>
> Array buffer overflow errors are commonplace in C, because array
> dimensions are not automatically checked at either compile or run
> time.

No, because programmers get things wrong.  You can tell C compilers to
check all array accesses, but that is a performance issue.  For gcc,
we have

`-fmudflap -fmudflapth -fmudflapir'
     For front-ends that support it (C and C++), instrument all risky
     pointer/array dereferencing operations, some standard library
     string/heap functions, and some other associated constructs with
     range/validity tests.  Modules so instrumented should be immune to
     buffer overflows, invalid heap use, and some other classes of C/C++
     programming errors.  The instrumentation relies on a separate
     runtime library (`libmudflap'), which will be linked into a
     program if `-fmudflap' is given at link time.  Run-time behavior
     of the instrumented program is controlled by the `MUDFLAP_OPTIONS'
     environment variable.  See `env MUDFLAP_OPTIONS=-help a.out' for
     its options.

Why isn't it the default?  Because it is a performance issue.

>>> 5) type of array may change, but the type of value may not get
>>> updated
>>
>> Huh?
>
> Let's say our fearless maintenance programmer decides to make it an
> array of longs, not an array of ints. He overlooks changing the type
> of value in the loop.

Again: C does not prevent you from shooting yourself in the foot.

>>> 6) crashes if array is NULL
>>
>> Certainly.  Your point being?
>
> I consider an array that is NULL to have no members,

Nobody else does that.

> so instead of crashing the loop should execute 0 times.

If the loop count is zero, this is what will happen.

>>> 7) only works with arrays and pointers
>>
>> Since there are only arrays and pointers in C, not really a
>> restriction.
>
> C has structs, too, as well as more complicated user defined
> collections. Essentially, you cannot (simply) write generic
> algorithms in C, because you cannot (simply) generically express
> iteration.

Of course you can.  Macros exist.

>> Most of those are toy concerns.  They prevent problems that don't
>> actually occur much in practice.
>
> I beg to differ - buffer overflow bugs are common and expensive.

Then compile your program with appropriate options.  The key word is
"option".  You don't have to take the performance hit if you don't
want or need it.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07  5:09               ` Walter Bright
  2007-09-07  7:40                 ` David Kastrup
@ 2007-09-07  9:41                 ` Pierre Habouzit
  2007-09-07 19:03                   ` Walter Bright
  1 sibling, 1 reply; 102+ messages in thread
From: Pierre Habouzit @ 2007-09-07  9:41 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 2814 bytes --]

On Fri, Sep 07, 2007 at 05:09:26AM +0000, Walter Bright wrote:
> Linus Torvalds wrote:
> >And if you want a fancier language, C++ is absolutely the worst one to 
> >choose. If you want real high-level, pick one that has true high-level 
> >features like garbage collection or a good system integration, rather 
> >than something that lacks both the sparseness and straightforwardness of 
> >C, *and* doesn't even have the high-level bindings to important 
> >concepts. IOW, C++ is in that inconvenient spot where it doesn't help 
> >make things simple enough to be truly usable for prototyping or simple 
> >GUI programming, and yet isn't the lean system programming language that 
> >C is that actively encourags you to use simple and direct constructs.
> 
> The D programming language is a different take than C++ has on growing C. 
> I'm curious what your thoughts on that are (D has garbage collection, 
> while still retaining the ability to directly manage memory). Can you 
> enumerate what you feel are the important concepts?

  Well, to me D has two significant drawbacks to be "ready to use". The
first one is that it doesn't has bit-fields. I often deal with bit-fields
on structures that have a _lot_ of instances in my program, and the
bit-field is chosen for code readability _and_ structure size efficiency.
I know you pretend that using masks manually often generates better
code. But in my case, speed does not matter _that_ much. I mean it does,
but not that this micro-level as access to the bit-field is not my
inner-loop.

  The other second issue I have, is that there is no way to do:
  import (C) "foo.h"

  And this is a big no-go (maybe not for git, but as a general issue)
because it impedes the use of external libraries with a C interface a
_lot_. E.g. I'd really like to use it to use some GNU libc extensions,
but I can't because it has too many dependencies (some async getaddrinfo
interface, that need me to import all the signal events and so on
extensions in the libc, with bitfields, wich send us back to the first
point).


  I also have a third, but non critical issue, I absolutely don't like
phobos :) Though I'm obviously free to chose another library. D has
definitely many many many real advances over C (like the .init, .size,
... and so on fields, known types, and whatever portability nightmare
the C impose us). In fact I like to use D like I code in C, using
modules and functions, and very few classes, as few as I can. And even
(under- ?) using D like this, it is a real pleasure to work with. I'm
really eager to see gdc be more stable.

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  8:36             ` Walter Bright
@ 2007-09-07  9:41               ` Andreas Ericsson
  2007-09-07 19:23                 ` Walter Bright
  2007-09-07 11:52               ` Wincent Colaiuta
  2007-09-22 16:52               ` Steven Burns
  2 siblings, 1 reply; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-07  9:41 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

Walter Bright wrote:
> Wincent Colaiuta wrote:
>> Git is all about speed, and C is the best choice for speed, especially 
>> in context of Git's workload.
> 
> I can appreciate that. I originally got into writing compilers because 
> my game (Empire) ran too slowly and I thought the existing compilers 
> could be dramatically improved.
> 
> And technically, yes, you can write code in C that is >= the speed of 
> any other language (other than asm). But practically, this isn't 
> necessarily so, for the following reasons:
> 
> 1) You wind up having to implement the complex, dirty details of things 
> yourself. The consequences of this are:
> 
>    a) you pick a simpler algorithm (which is likely less efficient - I 
> run across bubble sorts all the time in code)
> 
>    b) once you implement, tune, and squeeze all the bugs out of those 
> complex, dirty details, you're reluctant to change it. You're reluctant 
> to try a different algorithm to see if it's faster. I've seen this 
> effect a lot in my own code. (I translated a large body of my own C++ 
> code that I'd spent months tuning to D, and quickly managed to get 
> significantly more speed out of it, because it was much simpler to try 
> out different algorithms/data structures.)
> 

I haven't seen this in the development of git, although to be fair, you
didn't mention the number of developers that were simultaneously working
on your project. If it was you alone, I can imagine you were reluctant to
change it just to see if something is faster.

Opensource projects with many contributors (git, linux) work differently,
since one or a few among the plethora of authors will almost always be
a true expert at the problem being solved.

The current pack-format and how it's read is one such example. It was
done once, by the combined efforts of Linus and Junio (this is all off
the top of my head and I cba to go looking up the details, so bear with
me if there are errors). Linus and Junio are both very good C-programmers,
but the handling of packfiles was not what you'd call their specialty.
Along came Nicolas Pitre, another excellent C programmer, who probably
has done some similar work before. He constructed a better algorithm,
eventually resulting in the ultimate performance win with a net gain
in both time and size (gj, Nicolas).

The point is that, given enough developers, *someone* is bound to
find an algorithm that works so well that it's no longer worth
investing time to even discuss if anything else would work better,
either because it moves the performance bottleneck to somewhere else
(where further speedups would no longer produce humanly measurable
improvements), or because the action seems instantanous to the user
(further improvements simply aren't worth it, because no valuable
resource will be saved from it).

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  0:21         ` Dmitry Kakurin
                             ` (2 preceding siblings ...)
  2007-09-07  6:47           ` David Kastrup
@ 2007-09-07 10:21           ` Johannes Schindelin
  2007-09-08  0:32             ` Dmitry Kakurin
  3 siblings, 1 reply; 102+ messages in thread
From: Johannes Schindelin @ 2007-09-07 10:21 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Linus Torvalds, Matthieu Moy, Git

Hi,

On Thu, 6 Sep 2007, Dmitry Kakurin wrote:

> Anyway I don't mean to start a religious C vs. C++ war.

You have a very strange way of not meaning to start a C vs. C++ war.

> It's a matter of beliefs and as such pointless.

No, it's not.  As has been shown by some very good _arguments_.  Once you 
have facts to back up your claims, it is not any belief any longer.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  1:27               ` Linus Torvalds
  2007-09-07  3:09                 ` Dmitry Kakurin
@ 2007-09-07 10:26                 ` Johannes Schindelin
  1 sibling, 0 replies; 102+ messages in thread
From: Johannes Schindelin @ 2007-09-07 10:26 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Dmitry Kakurin, Matthieu Moy, Git

Hi,

On Fri, 7 Sep 2007, Linus Torvalds wrote:

> On Thu, 6 Sep 2007, Dmitry Kakurin wrote:
> 
> > I was pointing out that I've been programming in different languages 
> > (many more actually) and observed bad developers writing bad code in 
> > all of them. So this quality "bad developer" is actually 
> > language-agnostic :-).
> 
> You can write bad code in any language. However, some languages, and 
> especially some *mental* baggages that go with them are bad.

There is an important additional point: a language like C _holds_ you to a 
certain degree of diligence.

In my day-job I have to code in other languages, which make it "easy" to 
code.  As a result, the code I have to work with is sloppy, ugly and 
buggy.  By applying the same principles I am _forced_ to use in C, with 
Git, I produce better code.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  3:09                 ` Dmitry Kakurin
                                     ` (3 preceding siblings ...)
  2007-09-07  6:52                   ` David Kastrup
@ 2007-09-07 10:28                   ` Johannes Schindelin
  4 siblings, 0 replies; 102+ messages in thread
From: Johannes Schindelin @ 2007-09-07 10:28 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Linus Torvalds, Matthieu Moy, Git

Hi,

On Thu, 6 Sep 2007, Dmitry Kakurin wrote:

> Now, I realize that I'm a very infrequent contributor to Git, but I want 
> my opinion to be heard.

We are a happy little meritocracy here.  Once you proved that you're not 
full of shit (some seem to try the opposite, you know who you are), you 
can go all caps.  Before that, you'll have to show that you earn to be 
heard first.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-04 20:50 [RFC] Convert builin-mailinfo.c to use The Better String Library Lukas Sandström
                   ` (2 preceding siblings ...)
  2007-09-05 15:27 ` Kristian Høgsberg
@ 2007-09-07 10:47 ` Lukas Sandström
  3 siblings, 0 replies; 102+ messages in thread
From: Lukas Sandström @ 2007-09-07 10:47 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano

Lukas Sandström wrote:
> Hi.
> 
> This is an attempt to use "The Better String Library"[1] in builtin-mailinfo.c
> 
> The patch doesn't pass all the tests in the testsuit yet, but I thought I'd
> send it out so people can decide if they like how the code looks.
> 
> I'm not sending a patch to add the library files at this time. I'll send
> that patch when this patch is working.
> 
> The changes required to make it pass the tests shouldn't be very large.
> 
> /Lukas
> 
> [1] http://bstring.sourceforge.net/
> 
> ---
>  builtin-mailinfo.c |  795 ++++++++++++++++++++++++++--------------------------
>  1 files changed, 392 insertions(+), 403 deletions(-)

Unfortunatley, I haven't had any time inte the last few days to code, nor read
mail. I'm assuming that there is no point in me finishing the patch and that git
will go with the strbuf solution?

/Lukas

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  6:25             ` Andreas Ericsson
@ 2007-09-07 10:56               ` Johannes Schindelin
  2007-09-07 11:54                 ` Andreas Ericsson
  2007-09-07 16:09                 ` David Kastrup
  2007-09-07 11:30               ` Wincent Colaiuta
  1 sibling, 2 replies; 102+ messages in thread
From: Johannes Schindelin @ 2007-09-07 10:56 UTC (permalink / raw)
  To: Andreas Ericsson
  Cc: Wincent Colaiuta, Dmitry Kakurin, Linus Torvalds, Matthieu Moy, Git

Hi,

On Fri, 7 Sep 2007, Andreas Ericsson wrote:

> Wincent Colaiuta wrote:
> > El 7/9/2007, a las 2:21, Dmitry Kakurin escribi?:
> > 
> > > I just wanted to get a sense of how many people share this "Git should
> > > be in pure C" doctrine.
> > 
> > Count me as one of them. Git is all about speed, and C is the best choice
> > for speed, especially in context of Git's workload.
> > 
> 
> Nono, hand-optimized assembly is the best choice for speed. C is just
> a little more portable ;-)

I have a buck here that says that you cannot hand-optimise assembly (on 
modern processors at least) as good as even gcc.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  6:25             ` Andreas Ericsson
  2007-09-07 10:56               ` Johannes Schindelin
@ 2007-09-07 11:30               ` Wincent Colaiuta
  1 sibling, 0 replies; 102+ messages in thread
From: Wincent Colaiuta @ 2007-09-07 11:30 UTC (permalink / raw)
  To: Andreas Ericsson; +Cc: Dmitry Kakurin, Linus Torvalds, Matthieu Moy, Git

El 7/9/2007, a las 8:25, Andreas Ericsson escribió:

> Nono, hand-optimized assembly is the best choice for speed. C is just
> a little more portable ;-)

Funny thing is, GCC almost certainly produces better-optimized  
assembly than most programmers could... ;-)

Cheers,
Wincent

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07  7:40                 ` David Kastrup
  2007-09-07  8:15                   ` Walter Bright
@ 2007-09-07 11:36                   ` Wincent Colaiuta
  1 sibling, 0 replies; 102+ messages in thread
From: Wincent Colaiuta @ 2007-09-07 11:36 UTC (permalink / raw)
  To: David Kastrup; +Cc: Walter Bright, git

El 7/9/2007, a las 9:40, David Kastrup escribió:

> A design is perfect not when there is no longer anything you can add
> to it, but if there is no longer anything you can take away.

Il semble que la perfection soit atteinte non quand il n'y a plus  
rien à ajouter, mais quand il n'y a plus rien à retrancher.
Perfection is achieved, not when there is nothing more to add, but  
when there is nothing left to take away.
Ch. III: L'Avion, p. 60

<http://en.wikiquote.org/wiki/Exupery>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  8:36             ` Walter Bright
  2007-09-07  9:41               ` Andreas Ericsson
@ 2007-09-07 11:52               ` Wincent Colaiuta
  2007-09-07 19:25                 ` Walter Bright
  2007-09-22 16:52               ` Steven Burns
  2 siblings, 1 reply; 102+ messages in thread
From: Wincent Colaiuta @ 2007-09-07 11:52 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

El 7/9/2007, a las 10:36, Walter Bright escribió:

> Wincent Colaiuta wrote:
>> Git is all about speed, and C is the best choice for speed,  
>> especially in context of Git's workload.
>
> I can appreciate that. I originally got into writing compilers  
> because my game (Empire) ran too slowly and I thought the existing  
> compilers could be dramatically improved.
>
> And technically, yes, you can write code in C that is >= the speed  
> of any other language (other than asm). But practically, this isn't  
> necessarily so, for the following reasons:
>
> 1) You wind up having to implement the complex, dirty details of  
> things yourself. The consequences of this are:
>
>    a) you pick a simpler algorithm (which is likely less efficient  
> - I run across bubble sorts all the time in code)
>
>    b) once you implement, tune, and squeeze all the bugs out of  
> those complex, dirty details, you're reluctant to change it. You're  
> reluctant to try a different algorithm to see if it's faster. I've  
> seen this effect a lot in my own code. (I translated a large body  
> of my own C++ code that I'd spent months tuning to D, and quickly  
> managed to get significantly more speed out of it, because it was  
> much simpler to try out different algorithms/data structures.)

While I accept that this is generally true, I think Git is somewhat  
of a special case. From a design perspective the data structures and  
algorithms are remarkably simple -- therein lies its elegance. I  
think it's precisely the kind of problem that can be tackled well  
with a close-to-the-metal language like C.

> 2) Garbage collection has an interesting and counterintuitive  
> consequence. If you compare n malloc/free's with n gcnew/ 
> collections, the malloc/free will come out faster, and you conclude  
> that gc is slow. But that misses one huge speed advantage of gc -  
> you can do FAR fewer allocations! For example, I've done a lot of  
> string manipulating programs in C. The basic problem is keeping  
> track of who owns each string. This is done by, when in doubt, make  
> a copy of the string.
>
> But if you have gc, you don't worry about who owns the string. You  
> just make another pointer to it. D takes this a step further with  
> the concept of array slicing, where one creates windows on existing  
> arrays, or windows on windows on windows, and no allocations are  
> ever done. It's just pointer fiddling.

This mirrors my experience in desktop application development.  
Despite GC being "slower" the app actually runs faster and a lot of  
nasty problems (shared resources, locking etc) just magically go  
away. Development is easier too.

But once again I think Git falls into a special category where the  
design makes the "hassle" of developing in C worth it.

Wincent

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 10:56               ` Johannes Schindelin
@ 2007-09-07 11:54                 ` Andreas Ericsson
  2007-09-07 12:33                   ` Wincent Colaiuta
  2007-09-07 16:09                 ` David Kastrup
  1 sibling, 1 reply; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-07 11:54 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Wincent Colaiuta, Dmitry Kakurin, Linus Torvalds, Matthieu Moy, Git

Johannes Schindelin wrote:
> Hi,
> 
> On Fri, 7 Sep 2007, Andreas Ericsson wrote:
> 
>> Wincent Colaiuta wrote:
>>> El 7/9/2007, a las 2:21, Dmitry Kakurin escribi?:
>>>
>>>> I just wanted to get a sense of how many people share this "Git should
>>>> be in pure C" doctrine.
>>> Count me as one of them. Git is all about speed, and C is the best choice
>>> for speed, especially in context of Git's workload.
>>>
>> Nono, hand-optimized assembly is the best choice for speed. C is just
>> a little more portable ;-)
> 
> I have a buck here that says that you cannot hand-optimise assembly (on 
> modern processors at least) as good as even gcc.
> 


http://www.gelato.unsw.edu.au/archives/git/0504/1746.html

I win. Donate $1 to FSF next time you get the opportunity ;-)

Hand-optimized asm is faster because the optimizer in the compiler is a
general-purpose one that has to guess and make assumptions about the code
and its input to make the correct decisions. While it gets things right
in as many as 80% of the cases, there's still the 20% where it doesn't.
A human can, with sufficient research and effort, make the same optimizations
where they are correct but avoid the 20% erroneous ones.

If the compiler gets it wrong inside your innermost loop, it might be worth
shaving those extra 0.0001 seconds off of each iteration, because in the long
run, world-wide, it might save several weeks worth of CPU-time every day.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 11:54                 ` Andreas Ericsson
@ 2007-09-07 12:33                   ` Wincent Colaiuta
  2007-09-07 12:55                     ` Karl Hasselström
  2007-09-07 13:58                     ` Andreas Ericsson
  0 siblings, 2 replies; 102+ messages in thread
From: Wincent Colaiuta @ 2007-09-07 12:33 UTC (permalink / raw)
  To: Andreas Ericsson
  Cc: Johannes Schindelin, Dmitry Kakurin, Linus Torvalds, Matthieu Moy, Git

El 7/9/2007, a las 13:54, Andreas Ericsson escribió:

> Johannes Schindelin wrote:
>> Hi,
>> On Fri, 7 Sep 2007, Andreas Ericsson wrote:
>>> Wincent Colaiuta wrote:
>>>> El 7/9/2007, a las 2:21, Dmitry Kakurin escribi?:
>>>>
>>>>> I just wanted to get a sense of how many people share this "Git  
>>>>> should
>>>>> be in pure C" doctrine.
>>>> Count me as one of them. Git is all about speed, and C is the  
>>>> best choice
>>>> for speed, especially in context of Git's workload.
>>>>
>>> Nono, hand-optimized assembly is the best choice for speed. C is  
>>> just
>>> a little more portable ;-)
>> I have a buck here that says that you cannot hand-optimise  
>> assembly (on modern processors at least) as good as even gcc.
>
>
> http://www.gelato.unsw.edu.au/archives/git/0504/1746.html
>
> I win. Donate $1 to FSF next time you get the opportunity ;-)

Well, you picked a very specific algorithm amenable to that kind of  
optimization: small, manageable, with a minimal and well-defined  
performance critical section that could be written in assembly. Note  
how a good chunk of the implementation was still in C. At most I'd  
give you 75 cents for that one. ;-)

Wincent

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 12:33                   ` Wincent Colaiuta
@ 2007-09-07 12:55                     ` Karl Hasselström
  2007-09-07 13:58                     ` Andreas Ericsson
  1 sibling, 0 replies; 102+ messages in thread
From: Karl Hasselström @ 2007-09-07 12:55 UTC (permalink / raw)
  To: Wincent Colaiuta
  Cc: Andreas Ericsson, Johannes Schindelin, Dmitry Kakurin,
	Linus Torvalds, Matthieu Moy, Git

On 2007-09-07 14:33:42 +0200, Wincent Colaiuta wrote:

> Well, you picked a very specific algorithm amenable to that kind of
> optimization: small, manageable, with a minimal and well-defined
> performance critical section that could be written in assembly. Note
> how a good chunk of the implementation was still in C.

And this is of course exactly the kind of spot where you _would_ use
assembly in the real world. 99.99% of code is better written in C than
assembler, but there is that 0.01% where hand-coded assembler is a
better choice.

-- 
Karl Hasselström, kha@treskal.com
      www.treskal.com/kalle

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 12:33                   ` Wincent Colaiuta
  2007-09-07 12:55                     ` Karl Hasselström
@ 2007-09-07 13:58                     ` Andreas Ericsson
  2007-09-07 14:13                       ` Wincent Colaiuta
  1 sibling, 1 reply; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-07 13:58 UTC (permalink / raw)
  To: Wincent Colaiuta
  Cc: Johannes Schindelin, Dmitry Kakurin, Matthieu Moy, Git, Linus Torvalds

Wincent Colaiuta wrote:
> El 7/9/2007, a las 13:54, Andreas Ericsson escribió:
> 
>> Johannes Schindelin wrote:
>>> Hi,
>>> On Fri, 7 Sep 2007, Andreas Ericsson wrote:
>>>> Wincent Colaiuta wrote:
>>>>> El 7/9/2007, a las 2:21, Dmitry Kakurin escribi?:
>>>>>
>>>>>> I just wanted to get a sense of how many people share this "Git 
>>>>>> should
>>>>>> be in pure C" doctrine.
>>>>> Count me as one of them. Git is all about speed, and C is the best 
>>>>> choice
>>>>> for speed, especially in context of Git's workload.
>>>>>
>>>> Nono, hand-optimized assembly is the best choice for speed. C is just
>>>> a little more portable ;-)
>>> I have a buck here that says that you cannot hand-optimise assembly 
>>> (on modern processors at least) as good as even gcc.
>>
>>
>> http://www.gelato.unsw.edu.au/archives/git/0504/1746.html
>>
>> I win. Donate $1 to FSF next time you get the opportunity ;-)
> 
> Well, you picked a very specific algorithm amenable to that kind of 
> optimization: small, manageable, with a minimal and well-defined 
> performance critical section that could be written in assembly. Note how 
> a good chunk of the implementation was still in C. At most I'd give you 
> 75 cents for that one. ;-)
> 

Yes, but that's what I said in the original email as well. C is just so
much more pleasant to write in that the only place you'd (sanely) use
asm is in exactly these tight loops, where the code is likely to be used
and reused until the algorithm it describes is no longer a viable option
for doing what it was originally designed to do.

It still proves the point though, as surely as n+1 > n for any value of n:
Hand-optimized assembly is faster than compiler-optimized C code.

It might be harder to do properly on some architectures than others (RISC
comes to mind), but it's still possible.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 13:58                     ` Andreas Ericsson
@ 2007-09-07 14:13                       ` Wincent Colaiuta
  2007-09-09  0:09                         ` Andreas Ericsson
  0 siblings, 1 reply; 102+ messages in thread
From: Wincent Colaiuta @ 2007-09-07 14:13 UTC (permalink / raw)
  To: Andreas Ericsson
  Cc: Johannes Schindelin, Dmitry Kakurin, Matthieu Moy, Git, Linus Torvalds

El 7/9/2007, a las 15:58, Andreas Ericsson escribió:

> Yes, but that's what I said in the original email as well. C is  
> just so
> much more pleasant to write in that the only place you'd (sanely) use
> asm is in exactly these tight loops, where the code is likely to be  
> used
> and reused until the algorithm it describes is no longer a viable  
> option
> for doing what it was originally designed to do.
>
> It still proves the point though, as surely as n+1 > n for any  
> value of n:
> Hand-optimized assembly is faster than compiler-optimized C code.

In a theoretical ideal world, yes; no one would argue that C is  
faster than fine-tuned assembly.

But in the *real world* rewriting Git in assembly would be like  
painting a house using a single horse hair instead of a paint brush  
or roller. Your SHA-1 example is a perfect example of where you  
benefit from doing a tiny embellished detail using the single hair  
(assembly) and leave all the rest in C.

In the real world and not the theoretical ideal world, it's not just  
about the diminishing returns you get from writing more and more of a  
code base in assembly instead of just the performance-critical  
bottlenecks; it's that you're more likely to make subtle mistakes or  
even make things slower. GCC does a remarkable job of optimizing in a  
huge number of use cases, and best of all, it does it for free.  
Personal opinion, of course, but that's the way I think it is.

Cheers,
Wincent

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 10:56               ` Johannes Schindelin
  2007-09-07 11:54                 ` Andreas Ericsson
@ 2007-09-07 16:09                 ` David Kastrup
  1 sibling, 0 replies; 102+ messages in thread
From: David Kastrup @ 2007-09-07 16:09 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Andreas Ericsson, Wincent Colaiuta, Dmitry Kakurin,
	Linus Torvalds, Matthieu Moy, Git

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> On Fri, 7 Sep 2007, Andreas Ericsson wrote:
>
>> Wincent Colaiuta wrote:
>> > El 7/9/2007, a las 2:21, Dmitry Kakurin escribi?:
>> > 
>> > > I just wanted to get a sense of how many people share this "Git should
>> > > be in pure C" doctrine.
>> > 
>> > Count me as one of them. Git is all about speed, and C is the best choice
>> > for speed, especially in context of Git's workload.
>> > 
>> 
>> Nono, hand-optimized assembly is the best choice for speed. C is just
>> a little more portable ;-)
>
> I have a buck here that says that you cannot hand-optimise assembly
> (on modern processors at least) as good as even gcc.

That assumes that the original task can even expressed well in C.
Multiple precision arithmetic, for example, requires access to the
carry bit.  You can code around this, for example by writing something
like

unsigned a,b,carry;

[...]

carry = (a+b) < a;

but the problem is that those are ad-hoc idioms with a variety of
possibilities, and thus the compilers are not made to recognize them.
Another thing is mixed-precision multiplications and divisions: those
are _natural_ operations on a normal CPU, but have no representation
in assembly language.

As a consequence, most high performance multiple-precision packages
contain assembly language in some form or other.

gcc's assembly language template are excellent in that they actually
cooperate nicely with the optimizer, so the optimizer can do all the
address calculations and register assignments and opcode reorderings,
and then the actual operations that are not expressible in C can be
done by the programmer.

But anyway, I have worked as a graphics driver programmer for some
amount of time, and bit-stuffing memory-mapped areas with data was
still something where hand assembly was best.

I have also done BIOS terminal emulators, and being able to write
something like

ld b,whatever
myloop:
push bc
push hl
call nextchar
pop hl
pop bc
ld (hl),a
inc hl
djnz myloop

in order to suspend the terminal driver until the application comes up
with the next `whatever' output characters in an escape sequence is
_wagonloads_ more maintainable than using a state machine or whatever
else for distributing material delivered into the driver.

But this requires that nextchar can do something like
nextchar: ld (driverstack),sp
  ld sp,(appstack)
  ret

and the entrypoint, in contrast, does

outchar: ld (appstack),sp
  ld sp,(driverstack)
  ret

Cheap and expedient.  You just need to set up a small stack, and
presto: coroutines, at absolutely negligible cost.  I know that there
are some "portable" coroutine implementations that use setjmp/longjmp
in a rather horrific way, but those are way more unnatural.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07  9:41                 ` Pierre Habouzit
@ 2007-09-07 19:03                   ` Walter Bright
  2007-09-07 19:31                     ` David Kastrup
  2007-09-07 19:41                     ` Pierre Habouzit
  0 siblings, 2 replies; 102+ messages in thread
From: Walter Bright @ 2007-09-07 19:03 UTC (permalink / raw)
  To: git

Pierre Habouzit wrote:
>   Well, to me D has two significant drawbacks to be "ready to use". The
> first one is that it doesn't has bit-fields. I often deal with bit-fields
> on structures that have a _lot_ of instances in my program, and the
> bit-field is chosen for code readability _and_ structure size efficiency.
> I know you pretend that using masks manually often generates better
> code. But in my case, speed does not matter _that_ much. I mean it does,
> but not that this micro-level as access to the bit-field is not my
> inner-loop.

I'm surprised this is such an important issue. Others have mentioned it, 
but regard it as a minor thing. Interestingly, the htod program (which 
converts C .h files to D import files) will convert bit fields to inline 
functions, giving equivalent functionality.

>   The other second issue I have, is that there is no way to do:
>   import (C) "foo.h"
> 
>   And this is a big no-go (maybe not for git, but as a general issue)
> because it impedes the use of external libraries with a C interface a
> _lot_. E.g. I'd really like to use it to use some GNU libc extensions,
> but I can't because it has too many dependencies (some async getaddrinfo
> interface, that need me to import all the signal events and so on
> extensions in the libc, with bitfields, wich send us back to the first
> point).

D does come with htod, which converts C .h files to D files. It's not 
possible to do a perfect job (because of macros), but it comes pretty 
darned close. The reason htod gets so close is because it is actually a 
real C compiler front end, not a perl or regex string processing hack.

Because it (may) require a little hand tweaking of the results (again, 
because C headers may include awful things like:
	#define BEGIN {
	#define print printf(
), it's a separate program rather than built-in.


>   I also have a third, but non critical issue, I absolutely don't like
> phobos :)

You're not the only one <g>. But I'll add that access to the standard C 
runtime library *is* a part of D, so at some level it can't be worse 
than C. There's also another runtime library available, Tango, which is 
very popular.

> Though I'm obviously free to chose another library. D has
> definitely many many many real advances over C (like the .init, .size,
> ... and so on fields, known types, and whatever portability nightmare
> the C impose us). In fact I like to use D like I code in C, using
> modules and functions, and very few classes, as few as I can. And even
> (under- ?) using D like this, it is a real pleasure to work with. I'm
> really eager to see gdc be more stable.

There are a lot of people hard at work on D to make it more stable and 
increase the breadth and depth of tools available. I am fully aware that 
there may be non-technical issues to using D in a project like git, like 
availability of other D programmers, tradition, etc., but in this thread 
I'm concerned mainly with technical issues.

P.S. I'm also NOT suggesting that git be converted to D. Translating a 
working, debugged, 80,000 line codebase from one language to another is 
usually a fool's errand.

Thanks for taking the time to post your thoughts.

-----------
Walter Bright
http://www.digitalmars.com  C, C++, D programming language compilers
http://www.astoriaseminar.com  Extraordinary C++

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  9:41               ` Andreas Ericsson
@ 2007-09-07 19:23                 ` Walter Bright
  2007-09-07 19:40                   ` David Kastrup
                                     ` (2 more replies)
  0 siblings, 3 replies; 102+ messages in thread
From: Walter Bright @ 2007-09-07 19:23 UTC (permalink / raw)
  To: git

Andreas Ericsson wrote:
> Walter Bright wrote:
>> 1) You wind up having to implement the complex, dirty details of 
>> things yourself. The consequences of this are:
>>
>>    a) you pick a simpler algorithm (which is likely less efficient - I 
>> run across bubble sorts all the time in code)
>>
>>    b) once you implement, tune, and squeeze all the bugs out of those 
>> complex, dirty details, you're reluctant to change it. You're 
>> reluctant to try a different algorithm to see if it's faster. I've 
>> seen this effect a lot in my own code. (I translated a large body of 
>> my own C++ code that I'd spent months tuning to D, and quickly managed 
>> to get significantly more speed out of it, because it was much simpler 
>> to try out different algorithms/data structures.)
>>
> 
> I haven't seen this in the development of git, although to be fair, you
> didn't mention the number of developers that were simultaneously working
> on your project.

On my project, one. But I've seen this problem repeatedly in other 
projects that had multiple developers. For example, I used to use 
version 1 of an assembler. It was itself written entirely in assembler. 
It ran *incredibly* slowly on large asm files. But it was written in 
assembler, which is very fast, so how could that be?

Turns out, the symbol table used internally was a linear one. A linear 
symbol table is easy to implement, but doesn't scale well at all. A 
linear symbol table was implemented because it was just harder to do 
more advanced symbol table algorithms in assembler. In this case, a 
higher level language re-implementation made the assembler much faster, 
even though that implementation was SLOWER in every detail. It was 
faster overall, because it was easier to develop faster algorithms.


> If it was you alone, I can imagine you were reluctant to
> change it just to see if something is faster.

My point was that when I reimplemented it in D, the cost of changing the 
algorithms got much lower, so I was much more tempted to muck around 
trying out different ones. The result was I found faster ones.


> Opensource projects with many contributors (git, linux) work differently,
> since one or a few among the plethora of authors will almost always be
> a true expert at the problem being solved.

That is a nice advantage. I don't think many projects can rely on having 
the best in the business working on them, though <g>.


> The point is that, given enough developers, *someone* is bound to
> find an algorithm that works so well that it's no longer worth
> investing time to even discuss if anything else would work better,
> either because it moves the performance bottleneck to somewhere else
> (where further speedups would no longer produce humanly measurable
> improvements), or because the action seems instantanous to the user
> (further improvements simply aren't worth it, because no valuable
> resource will be saved from it).

Sure, but I suggest that few projects reach this maxima. Case in point: 
ld, the gnu linker. It's terribly slow. To see how slow it is, compare 
it to optlink (the 15 years old one that comes with D for Windows). So I 
don't believe there is anything inherent about linking that should make 
ld so slow. There's some huge leverage possible in speeding up ld 
(spreading out that saved time among all the gnu developers).

So while git may have reached a maxima in performance, I don't think 
this principle is applicable in general, even for very widely used open 
source projects that would profit greatly from improved performance.

------
Walter Bright
http://www.digitalmars.com  C, C++, D programming language compilers
http://www.astoriaseminar.com  Extraordinary C++

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 11:52               ` Wincent Colaiuta
@ 2007-09-07 19:25                 ` Walter Bright
  0 siblings, 0 replies; 102+ messages in thread
From: Walter Bright @ 2007-09-07 19:25 UTC (permalink / raw)
  To: git

Wincent Colaiuta wrote:
> But once again I think Git falls into a special category where the 
> design makes the "hassle" of developing in C worth it.

That may very well be true. I've never looked at the source code for 
git, so I'm not in any position to judge it. Nor do I suggest 
translating a debugged, working, 80,000 line project into another language.

My comments here are in more general terms.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07 19:03                   ` Walter Bright
@ 2007-09-07 19:31                     ` David Kastrup
  2007-09-07 20:49                       ` Walter Bright
  2007-09-07 19:41                     ` Pierre Habouzit
  1 sibling, 1 reply; 102+ messages in thread
From: David Kastrup @ 2007-09-07 19:31 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

Walter Bright <boost@digitalmars.com> writes:

> There are a lot of people hard at work on D to make it more stable
> and increase the breadth and depth of tools available. I am fully
> aware that there may be non-technical issues to using D in a project
> like git, like availability of other D programmers, tradition, etc.,
> but in this thread I'm concerned mainly with technical issues.
>
> P.S. I'm also NOT suggesting that git be converted to D. Translating
> a working, debugged, 80,000 line codebase from one language to
> another is usually a fool's errand.

In my opinion there is basically one area which C has botched up
seriously in order to be useful as a general purpose language, and
that is conflating pointers and arrays, and allowing pointer
arithmetic.  The consequences are absolutely awful with regard to
compilers being able to optimize, and it is pretty much the primary
reason that Fortran is still quite in use for numerical work.

C has no usable two-dimensional (never mind higher dimensions) array
concept that would allow passing multidimensional arrays of
runtime-determined size into functions.  Period.

Add to that the pointer aliasing problems affecting compilers, and C
is useless for serious portable readable numerical work.

Fortran libraries like blas and lapack are ubiquitous after decades
because the language can deal with multiple-dimension arrays sensibly,
and could do so in the sixties already.

C99 helps a bit.  But messing around with restrict pointers and
similar means that to wring equal performance out of some trivial code
piece (or permitting the compiler to do so without having to take
aliasing into account) is a lot of work and leads to ugly and
inscrutable code.

That's the one thing that has seriously hampered C: the lack of a true
array type on its own, decoupled from pointers.  It does not need to
carry its dimensions with it or other
hide-the-implementation-from-the-programmer niceties: C is, after all,
a low-level language, and Fortran did not suffer from not having array
dimensions packed into the arrays as well.

But that's water down the drawbridge.  This single major deficiency is
not anything that would hamper git development.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 19:23                 ` Walter Bright
@ 2007-09-07 19:40                   ` David Kastrup
  2007-09-09  0:25                   ` Andreas Ericsson
  2009-09-17 16:23                   ` Bernd Jendrissek
  2 siblings, 0 replies; 102+ messages in thread
From: David Kastrup @ 2007-09-07 19:40 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

Walter Bright <boost@digitalmars.com> writes:

> On my project, one. But I've seen this problem repeatedly in other
> projects that had multiple developers. For example, I used to use
> version 1 of an assembler. It was itself written entirely in
> assembler. It ran *incredibly* slowly on large asm files. But it was
> written in assembler, which is very fast, so how could that be?
>
> Turns out, the symbol table used internally was a linear one. A
> linear symbol table is easy to implement, but doesn't scale well at
> all.

Well, my first system was a Z80 computer with an editor/assembler in
ROM (4kb).  At one time I tried figuring out the size requirements of
symbols.  It was two bytes for each symbol.  Namely the value.  The
"symbol table" was located behind the source code.  Whenever this
marvel of technology encountered a label, it searched the source code
from the beginning for the definition of the label, keeping count of
all label definitions in between.  When it found the definition, the
count corresponded to the position in the symbol table.

So compilation times were O(ns), with n the number of symbol uses and
s the size of the source code.

Implementing in a higher language would not have helped: memory
efficiency was what dictated this layout.  Given that the whole
available memory was perhaps 50kB, assembly language modules could not
get so large that scale issues were deadly.  But the assembly times
did get annoying sometimes.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07 19:03                   ` Walter Bright
  2007-09-07 19:31                     ` David Kastrup
@ 2007-09-07 19:41                     ` Pierre Habouzit
  2007-09-07 19:51                       ` David Kastrup
  2007-09-07 20:40                       ` Walter Bright
  1 sibling, 2 replies; 102+ messages in thread
From: Pierre Habouzit @ 2007-09-07 19:41 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 5595 bytes --]

On Fri, Sep 07, 2007 at 07:03:24PM +0000, Walter Bright wrote:
> Pierre Habouzit wrote:
> >  Well, to me D has two significant drawbacks to be "ready to use". The
> >first one is that it doesn't has bit-fields. I often deal with 
> >bit-fields
> >on structures that have a _lot_ of instances in my program, and the
> >bit-field is chosen for code readability _and_ structure size 
> >efficiency.
> >I know you pretend that using masks manually often generates better
> >code. But in my case, speed does not matter _that_ much. I mean it does,
> >but not that this micro-level as access to the bit-field is not my
> >inner-loop.
> 
> I'm surprised this is such an important issue. Others have mentioned it, 
> but regard it as a minor thing. Interestingly, the htod program (which 
> converts C .h files to D import files) will convert bit fields to inline 
> functions, giving equivalent functionality.

  Well htod does that, but it's very impractical to write them from
scratch. Especially if you want to benefit from the fact that padding
and integer sizes are very well defined to map e.g. structs onto a raw
stream, avoiding deserialization and so on. And for that bit-fields are
a really really fast and simple way to describe things.

  I mean, take your classical example of the foreach loop. Your whole
point is that it's way shorter, and safer. And now you are saying that
people should instead of sth like:

  struct my_struct {
    unsigned some_field : 2;
    unsigned has_this_property : 1;
    unsigned is_in_this_state  : 1;
    unsigned priority_level    : 2;
    ...
  }

  people should write (IIRC it works since ->some_field = 2 calls
->some_field(2) if the member does not exists, or maybe it's
set_some_field, it's not very relevant anyway):

  struct my_struct {
    unsigned some_field() {
      return this->real_field >> 30;
    }

    void some_field(unsigned value) {
      this->real_field |= (value & 3) << 30;
    }

    ...

  private:
    unsigned real_field;
  }

  Please it has to be a joke: there is 42 ways for people to write it
wrong (wrong shifts, wrong masks, and so on), it's horribly obfuscated,
hence needs a lot of comments, whereas the bitfield is 90% self
documented, and the syntax is _very_ clear, you cannot beat that. I
would be absolutely fine with it being syntactical sugar for some kind
of template call though.

  Not to mention that the usual C idiom:

  union {
    unsigned flags;
    struct {
      // many bitfields
    };
  };

  Would need an explicit copy_flags(const my_struct foo) function to
work. Not pretty, not straightforward.

  Really, I feel this is a big lack, for a language that aims at
simplicity, conciseness _and_ correctness.

  OK, maybe I'm biased, I work with networks protocols all day long, so
I often need bitfields, but still, a lot of people deal with network
protocols, it's not a niche.

> >  The other second issue I have, is that there is no way to do:
> >  import (C) "foo.h"
> >  And this is a big no-go (maybe not for git, but as a general issue)
> >because it impedes the use of external libraries with a C interface a
> >_lot_. E.g. I'd really like to use it to use some GNU libc extensions,
> >but I can't because it has too many dependencies (some async getaddrinfo
> >interface, that need me to import all the signal events and so on
> >extensions in the libc, with bitfields, wich send us back to the first
> >point).
> 
> D does come with htod, which converts C .h files to D files.

  Last time I checked it was only available on windows, and closed
source, both are an impediment for many people. It's definitely clear
that gcc being opensource and available on so many platforms helped to
make C what it is today. Lacking portable and free (as in speech) tools
are an impediment to the succes of a language. Right now, for D, only
gdc exists, it lags behind dmd quite a lot afaict, and there is no other
toolchain helpers yet.

> It's not possible to do a perfect job (because of macros), but it
> comes pretty darned close. The reason htod gets so close is because it
> is actually a real C compiler front end, not a perl or regex string
> processing hack.
> 
> Because it (may) require a little hand tweaking of the results (again, 
> because C headers may include awful things like:
> 	#define BEGIN {
> 	#define print printf(
> ), it's a separate program rather than built-in.

  Yeah I'm fine with that, but sadly it's not available everywhere like
I said.

> >  I also have a third, but non critical issue, I absolutely don't like
> >phobos :)
> 
> You're not the only one <g>. But I'll add that access to the standard C 
> runtime library *is* a part of D, so at some level it can't be worse than 
> C. There's also another runtime library available, Tango, which is very 
> popular.

  I completely agree, and I knew about Tango, and anyways, I'm so used
to C, and D has so few to bring to my code style when I deal with low
level system functions, that I'm totally fine with std.c.* anyways :)

  For the record I wasn't suggesting to rewrite git in D at all. I just
happened to see your post, and being very interested in where D is going
because I feel it's an excellent langage, and saw an opportunity to
mention a few quirks I feel it has, so, well, I answered :)

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07 19:41                     ` Pierre Habouzit
@ 2007-09-07 19:51                       ` David Kastrup
  2007-09-07 19:59                         ` Pierre Habouzit
  2007-09-07 20:40                       ` Walter Bright
  1 sibling, 1 reply; 102+ messages in thread
From: David Kastrup @ 2007-09-07 19:51 UTC (permalink / raw)
  To: Pierre Habouzit; +Cc: Walter Bright, git

Pierre Habouzit <madcoder@debian.org> writes:

[bit fields]

>   Really, I feel this is a big lack, for a language that aims at
> simplicity, conciseness _and_ correctness.
>
>   OK, maybe I'm biased, I work with networks protocols all day long, so
> I often need bitfields, but still, a lot of people deal with network
> protocols, it's not a niche.

And strictly speaking, C bitfields are completely useless for that
purpose since the compiler is free to use whatever method he wants for
allocating bit fields.  So if you want to write a portable program,
you are back to making the masks yourself.

Where bit fields work reliably is when you are not interchanging data
with other applications, but just laying out your internals.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07 19:51                       ` David Kastrup
@ 2007-09-07 19:59                         ` Pierre Habouzit
  0 siblings, 0 replies; 102+ messages in thread
From: Pierre Habouzit @ 2007-09-07 19:59 UTC (permalink / raw)
  To: David Kastrup; +Cc: Walter Bright, git

[-- Attachment #1: Type: text/plain, Size: 1435 bytes --]

On Fri, Sep 07, 2007 at 07:51:11PM +0000, David Kastrup wrote:
> Pierre Habouzit <madcoder@debian.org> writes:
> 
> [bit fields]
> 
> >   Really, I feel this is a big lack, for a language that aims at
> > simplicity, conciseness _and_ correctness.
> >
> >   OK, maybe I'm biased, I work with networks protocols all day long, so
> > I often need bitfields, but still, a lot of people deal with network
> > protocols, it's not a niche.
> 
> And strictly speaking, C bitfields are completely useless for that
> purpose since the compiler is free to use whatever method he wants for
> allocating bit fields.  So if you want to write a portable program,
> you are back to making the masks yourself.

  The point is (1) D is not C, (2) we all know that linux e.g. does that
in many places using the fact that it knows how the supported compilers
(gcc icc tcc maybe some other) do their packing.

  The discussion is about D. D solves the infamous problem with longs
not having the same size everywhere, I don't see why it couldn't solve
the bitfield issue either.

> Where bit fields work reliably is when you are not interchanging data
> with other applications, but just laying out your internals.

  Thank you for the _C_ lesson.

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07  9:31                         ` David Kastrup
@ 2007-09-07 20:22                           ` Walter Bright
  2007-09-07 20:27                             ` David Kastrup
  2007-09-08 23:50                             ` Andreas Ericsson
  0 siblings, 2 replies; 102+ messages in thread
From: Walter Bright @ 2007-09-07 20:22 UTC (permalink / raw)
  To: git

David Kastrup wrote:
> Again, C won't keep you from shooting yourself in the foot.

Right, it won't. A good systems language should do what it can to 
prevent the programmer from *inadvertently* shooting himself in the 
foot, while allowing him to *deliberately* shoot himself in the foot.

In the example loop, the cases I pointed out were ones where, if one 
changes part of the code (such as the array dimension, or the array 
type, etc.), then there are multiple places in the source that must be 
updated to reflect it. The reality of human programmers is we update one 
or two places, and overlook the third place, and we now have a bug. 
Saying that one should just be a better programmer and not make such 
mistakes is a pipe dream.

Ideally, each facet of the design of the code should have a single point 
where it can be changed, and then all dependencies on that design should 
be automatically updated. That way, nothing gets overlooked. Doing this 
in C, such as using a #define for the array dimension, involves extra work.

It's a truism that if it involves extra work, then it often gets omitted.

Doesn't "A design is perfect not when there is no longer anything you 
can add to it, but if there is no longer anything you can take away." 
apply here? Going from:

  void foo(int array[10])
  {
     for (int i = 0; i < 10; i++)
     {   int value = array[i];
         ... do something ...
     }
  }

to:

  void foo(int[] array)
  {
    foreach (value; array)
    {
      ... do something ...
    }
  }

takes a lot of frankly unnecessary things away, each of which is a 
potential source of error when maintaining the code.


> No, because programmers get things wrong.

Exactly. That goes back to my point that a good language should help 
prevent inadvertent errors, while still allowing deliberate choices. D 
approaches this by making the correct approach essentially be the one 
with minimal typing effort. To deliberately shoot yourself in the foot 
usually requires extra typing. For our loop example, we can still write 
the C style loop (with all its potential problems) in D, but it requires 
extra effort to put in those potential problems. The easier, simpler way 
doesn't have the problems.

(The issue I have with C++'s fixes to various problems is they require 
extra typing (like the loop example), so guess what, people being people 
tend to not use them. This results in endless attempts to try and push 
C++ programmers into using the more verbose forms, a strategy I suspect 
will be ultimately futile.)


> You can tell C compilers to
> check all array accesses, but that is a performance issue.

Runtime checking of arrays in D is a performance issue too, so it is 
selectable via a command line switch. But more importantly,

1) Static type checking of fixed size arrays works, so errors can be 
caught at compile time.

2) For dynamically sized arrays, the dimension of the array is carried 
with the array, so loops automatically loop the correct number of times. 
No runtime check is necessary, and it's easier for the code reviewer to 
visually check the code for correctness.

------
Walter Bright
http://www.digitalmars.com  C, C++, D programming language compilers
http://www.astoriaseminar.com  Extraordinary C++

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07 20:22                           ` Walter Bright
@ 2007-09-07 20:27                             ` David Kastrup
  2007-09-07 23:16                               ` Walter Bright
  2007-09-08 23:50                             ` Andreas Ericsson
  1 sibling, 1 reply; 102+ messages in thread
From: David Kastrup @ 2007-09-07 20:27 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

Walter Bright <boost@digitalmars.com> writes:

>  void foo(int array[10])
>  {
>     for (int i = 0; i < 10; i++)
>     {   int value = array[i];
>         ... do something ...
>     }
>  }
>
> to:
>
>  void foo(int[] array)
>  {
>    foreach (value; array)
>    {
>      ... do something ...
>    }
>  }
>
> takes a lot of frankly unnecessary things away, each of which is a
> potential source of error when maintaining the code.

The problem is a toy problem: in real applications, you'll need to
access several data structures using the same index, and you'll need
to be able to assign index values to temporary variables and so on.
So being able to hide the type of an index in one very specific
application (looping through a single array completely) at one place
is not going to buy you much.

Anyway, D is pretty much irrelevant as a perspective for git, so you
should take it to a language advocacy group.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07 19:41                     ` Pierre Habouzit
  2007-09-07 19:51                       ` David Kastrup
@ 2007-09-07 20:40                       ` Walter Bright
  2007-09-07 20:56                         ` Pierre Habouzit
  1 sibling, 1 reply; 102+ messages in thread
From: Walter Bright @ 2007-09-07 20:40 UTC (permalink / raw)
  To: git

Pierre Habouzit wrote:
>   Well htod does that, but it's very impractical to write them from
> scratch.

True. I haven't tried yet (nobody else seems to care about it as much as 
you do!), but I think this could be automated fairly easily with a template.


> And for that bit-fields are
> a really really fast and simple way to describe things.

I should point out that inline functions are inlined, and there is no 
speed difference in the result.


>   Not to mention that the usual C idiom:
> 
>   union {
>     unsigned flags;
>     struct {
>       // many bitfields
>     };
>   };
> 
>   Would need an explicit copy_flags(const my_struct foo) function to
> work. Not pretty, not straightforward.

I'm not following this. To copy a union, you just copy it with the 
assignment operator:

	U a, b;
	a = b;		// copies all the bit fields, too!


>> D does come with htod, which converts C .h files to D files.
>   Last time I checked it was only available on windows, and closed
> source, both are an impediment for many people.

You're right on both counts. It's because htod is built out of a fork of 
the Digital Mars C compiler. Something similar could be done with gcc, 
but I'm not the person to do it. I should also get off my lazy tail and 
port htod to linux.


> Right now, for D, only
> gdc exists, it lags behind dmd quite a lot afaict, and there is no other
> toolchain helpers yet.

GDC was just released for D 1.020, which is behind D 1.021, but 1.021 
was released just a couple days ago <g>.


>   For the record I wasn't suggesting to rewrite git in D at all. I just
> happened to see your post, and being very interested in where D is going
> because I feel it's an excellent langage, and saw an opportunity to
> mention a few quirks I feel it has, so, well, I answered :)

And it's nice to hear your perspective, which is why I dropped by this 
thread.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07 19:31                     ` David Kastrup
@ 2007-09-07 20:49                       ` Walter Bright
  0 siblings, 0 replies; 102+ messages in thread
From: Walter Bright @ 2007-09-07 20:49 UTC (permalink / raw)
  To: git

David Kastrup wrote:
> In my opinion there is basically one area which C has botched up
> seriously in order to be useful as a general purpose language, and
> that is conflating pointers and arrays, and allowing pointer
> arithmetic.  The consequences are absolutely awful with regard to
> compilers being able to optimize, and it is pretty much the primary
> reason that Fortran is still quite in use for numerical work.

I agree. It's one of those things that probably sounded like a good idea 
at the time. The consequences were not foreseen. All languages have a 
few of these (C++ has the infamous use of < > for template arguments).

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07 20:40                       ` Walter Bright
@ 2007-09-07 20:56                         ` Pierre Habouzit
  2007-09-07 22:54                           ` Walter Bright
  0 siblings, 1 reply; 102+ messages in thread
From: Pierre Habouzit @ 2007-09-07 20:56 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 1836 bytes --]

On Fri, Sep 07, 2007 at 08:40:56PM +0000, Walter Bright wrote:
> Pierre Habouzit wrote:
> >And for that bit-fields are
> >a really really fast and simple way to describe things.
>
> I should point out that inline functions are inlined, and there is no 
> speed difference in the result.

  I know that, and that's why I said I was totally fine with the
bitfield notation to be only syntactic sugar on a template thingy if
that's the simplest way to have that it's OKay.

> >  Not to mention that the usual C idiom:
> >  union {
> >    unsigned flags;
> >    struct {
> >      // many bitfields
> >    };
> >  };
> >  Would need an explicit copy_flags(const my_struct foo) function to
> >work. Not pretty, not straightforward.
> 
> I'm not following this. To copy a union, you just copy it with the 
> assignment operator:
> 
> 	U a, b;
> 	a = b;		// copies all the bit fields, too!

  That was the point indeed. But if you don't have bitfields, you can't
do the union. And if the bitfield is just syntactic sugar, it may be
unpossible to have such a union. But I may be wrong.

> >Right now, for D, only
> >gdc exists, it lags behind dmd quite a lot afaict, and there is no other
> >toolchain helpers yet.
> 
> GDC was just released for D 1.020, which is behind D 1.021, but 1.021 was 
> released just a couple days ago <g>.

  Sure, but it does not works on amd64 properly (and it's the
architecture I care about) and is not ready for the current gcc (4.2,
only 4.1 builds) and so on. It's not as stable as DMD is. It does not
lags too much version-wise, it lags in maturity. But well, youth has a
cure: time :)

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  6:31                   ` Andreas Ericsson
@ 2007-09-07 22:17                     ` Dmitry Kakurin
  2007-09-07 22:28                       ` David Kastrup
  2007-09-09  0:29                       ` Andreas Ericsson
  0 siblings, 2 replies; 102+ messages in thread
From: Dmitry Kakurin @ 2007-09-07 22:17 UTC (permalink / raw)
  To: Andreas Ericsson; +Cc: Linus Torvalds, Matthieu Moy, Git

On 9/6/07, Andreas Ericsson <ae@op5.se> wrote:
> They already have, but every now and then someone comes along and suggest
> a complete rewrite in some other language. So far we've had Java (there's
> always one...), Python and now C++.

Since this "complete rewrite" was mentioned in multiple emails I'd
like to rectify that:
What I'm offering (for Git) is to use C++ as a "better C".
Don't change any existing *working* code, but start introducing simple
C++ constructs in the new code.
Git is simple enough to not require any high-level abstractions. But
some utility classes could make code much simpler.

And BTW, I don't even like C++ that much :-), I just like it much
better than C.  I've been saying that C++ is a legacy language for
quite some time now. But we will use it for many years to come because
the size of this legacy code is huge, so there will be plenty of C++
developers available (to contribute to Git :-).
And C++ is the only way to move with existing C codebase.
-- 
- Dmitry

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 22:17                     ` Dmitry Kakurin
@ 2007-09-07 22:28                       ` David Kastrup
  2007-09-08  0:37                         ` Dmitry Kakurin
  2007-09-09  0:29                       ` Andreas Ericsson
  1 sibling, 1 reply; 102+ messages in thread
From: David Kastrup @ 2007-09-07 22:28 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Andreas Ericsson, Linus Torvalds, Matthieu Moy, Git

"Dmitry Kakurin" <dmitry.kakurin@gmail.com> writes:

> On 9/6/07, Andreas Ericsson <ae@op5.se> wrote:
>> They already have, but every now and then someone comes along and suggest
>> a complete rewrite in some other language. So far we've had Java (there's
>> always one...), Python and now C++.
>
> Since this "complete rewrite" was mentioned in multiple emails I'd
> like to rectify that:
> What I'm offering (for Git) is to use C++ as a "better C".
> Don't change any existing *working* code, but start introducing simple
> C++ constructs in the new code.

You are aware that the Linux kernel was kept compilable under g++ for
a while in its history?  You'll need more than vague words to erase
the memories from that experiment...

Just compiling under C++, with no source changes, is likely to impact
performance and compile time rather badly, not to mention portability
(you need the C++ runtime, for one thing).

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07 20:56                         ` Pierre Habouzit
@ 2007-09-07 22:54                           ` Walter Bright
  0 siblings, 0 replies; 102+ messages in thread
From: Walter Bright @ 2007-09-07 22:54 UTC (permalink / raw)
  To: git

Pierre Habouzit wrote:
>   Sure, but it does not works on amd64 properly (and it's the
> architecture I care about) and is not ready for the current gcc (4.2,
> only 4.1 builds) and so on. It's not as stable as DMD is. It does not
> lags too much version-wise, it lags in maturity. But well, youth has a
> cure: time :)

Yes, and the more people use it, the better it will get. These are all 
environmental problems, not technical limitations of the language.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07 20:27                             ` David Kastrup
@ 2007-09-07 23:16                               ` Walter Bright
  0 siblings, 0 replies; 102+ messages in thread
From: Walter Bright @ 2007-09-07 23:16 UTC (permalink / raw)
  To: git

David Kastrup wrote:
> The problem is a toy problem: in real applications,

Necessarily, to make an example suitable for a n.g. post, I ruthlessly 
cut down the size of it. This can have the inadvertent effect of making 
it appear trivial.

> you'll need to
> access several data structures using the same index, and you'll need
> to be able to assign index values to temporary variables and so on.

The index is available:

	foreach (index, value; array)
	{
		writefln("array[%s] = %s", index, value);
	}

and it isn't necessary to worry about what the correct type for index 
is, as it is inferred.

> So being able to hide the type of an index in one very specific
> application (looping through a single array completely)

  foreach'ing over a subset (i.e. slice) of an array:

	foreach (value; array[5 .. $])
		... loop from 5 to the end ...

> at one place is not going to buy you much.

Experience with foreach in real code shows that the for loop is what 
becomes a rarity. Simple as it is, foreach is one of the best liked 
improvements D has. And I speak as one who has written so many for loops 
that spewing out:

	for (int i = 0; i < 10; i++)

is a 'finger' macro for me, i.e. my fingers blit it out without even 
thinking about it.

 > Anyway, D is pretty much irrelevant as a perspective for git, so you
 > should take it to a language advocacy group.

I wished to answer your specific comments in this post.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 10:21           ` Johannes Schindelin
@ 2007-09-08  0:32             ` Dmitry Kakurin
  2007-09-08  6:24               ` David Kastrup
  2007-09-08 23:25               ` Alex Riesen
  0 siblings, 2 replies; 102+ messages in thread
From: Dmitry Kakurin @ 2007-09-08  0:32 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Linus Torvalds, Matthieu Moy, Git

On 9/7/07, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> Hi,
>
> On Thu, 6 Sep 2007, Dmitry Kakurin wrote:
>
> > Anyway I don't mean to start a religious C vs. C++ war.
>
> You have a very strange way of not meaning to start a C vs. C++ war.

I honestly didn't. I didn't even think it's possible. In the
environment of mainstream commercial software development the last war
on this subj was over 8-10 years ago.
Even wars like "do we use exceptions/templates/stl" are pretty much
over. Now days it's "do we use Boost", or "do we use template
metaprogramming". But even more often it's Java/C# vs. C++.

That's why I was wondering how come C was chosen for Git.

> > It's a matter of beliefs and as such pointless.
>
> No, it's not.  As has been shown by some very good _arguments_.  Once you
> have facts to back up your claims, it is not any belief any longer.

Well I've heard *opinions* and anecdotal evidence. No facts though.
And it's not surprising. There could be no hard facts in such a
matter. It always boils down to "most of all, I want my software to be
X" where X is different for different people (fast,maintainable,quick
to market, scalable, beautiful, etc ... to name a few).
With different values of X any debate is pointless. And X is exactly
the matter of believes.

Anyway my curiosity is satisfied (thru the roof so to speak) and I
think it's enough on the subj. It has reminded me of good old times
though.

-- 
- Dmitry

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 22:28                       ` David Kastrup
@ 2007-09-08  0:37                         ` Dmitry Kakurin
  2007-09-08  6:25                           ` David Kastrup
  0 siblings, 1 reply; 102+ messages in thread
From: Dmitry Kakurin @ 2007-09-08  0:37 UTC (permalink / raw)
  To: David Kastrup; +Cc: Andreas Ericsson, Linus Torvalds, Matthieu Moy, Git

On 9/7/07, David Kastrup <dak@gnu.org> wrote:
> Just compiling under C++, with no source changes, is likely to impact
> performance and compile time rather badly

This in fact is a very specific statement. Would you care to back it
up with facts?

-- 
- Dmitry

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  1:12             ` Linus Torvalds
  2007-09-07  1:40               ` alan
  2007-09-07  5:09               ` Walter Bright
@ 2007-09-08  0:56               ` John 'Z-Bo' Zabroski
  2007-09-08  6:36                 ` David Kastrup
  2007-09-19 19:56               ` Steven Burns
  3 siblings, 1 reply; 102+ messages in thread
From: John 'Z-Bo' Zabroski @ 2007-09-08  0:56 UTC (permalink / raw)
  To: git

Linus Torvalds <torvalds <at> linux-foundation.org> writes:

> 
> And if you want a fancier language, C++ is absolutely the worst one to 
> choose. If you want real high-level, pick one that has true high-level 
> features like garbage collection or a good system integration, rather than 
> something that lacks both the sparseness and straightforwardness of C, 
> *and* doesn't even have the high-level bindings to important concepts. 
> 
> IOW, C++ is in that inconvenient spot where it doesn't help make things 
> simple enough to be truly usable for prototyping or simple GUI 
> programming, and yet isn't the lean system programming language that C is 
> that actively encourags you to use simple and direct constructs.
> 
> 				Linus
> 

I want code that is Correct, Explicit, Fast, and in that order.

I'm 23 years old and learned C++ when I was 13.  Back then, my compiler didn't
even support "bleeding edge" C++ language features like namespaces.  I'm not a
C++ expert, and I don't have the ego to call myself a superb programmer.  The
largest program I've written is 10K SLOC in C.  Yet, I'd like to participate in
this discussion, if that is OKay =)

I do think I am capable of an honest critique of the downside of C++:

_Problems_ _With_ _C++_

 *size*
    On my bookshelf, most recent editions of the canonical C++ _books_:
        Accelerated C++: Practical Programming by Example (336 pages)
        The C++ Standard Template Library: A Tutorial and Reference (832 pages)
        Effective C++: 50 Specific Ways to Improve Your Programs and Design (288
pages)
        More Effective C++: 35 New Ways to Improve Your Programs and Designs
(336 pages)
        Exceptional C++: 47 Engineering Puzzles, Programming Problems, and
Solutions (240 pages)
        More Exceptional C++: 40 New Engineering Puzzles, Programming Problems,
and Solutions (304 pages) 
        The C++ Programming Language (1030 pages)
        Modern C++ Design: Generic Programming and Design Patterns Applied (352
pages)
        C++ Templates: The Complete Guide (552 pages)

    Altogether, that is 3918 pages.  K&R, the canonical C _book_, is 272 pages.
 Becoming a C++ language lawyer is much harder than becoming a C language
lawyer.  Language lawyers know "how not to hang oneself" while programming in
the language.  I don't know how many of these titles are translated to other
languages, however, I am sure the *effort* required to translate all of them is
significant.  Open source is more successful if there is a lingua franca for
programming, and that is C.  Now, it may move away from C over time, but it will
*never* be C++ because it's encyclopedic.

 *hidden complexity*
    (1) it's hard to say what code will compile down to.  viz., constructors can
be elided, but there is no fitness warranty; profiling your compiler to find out
whether it is elided is tedious and "searching for secrets" that should be
_explicit_
    (2) people don't understand static polymorphism and compile-time dispatch;
people are used to objects sending messages dynamically (run-time dispatch)
    (3) coercion
    (4) networks of objects are not explicitly laid out, hiding quadratically
complex patterns of communication between objects
    (5) data structure and data flow come before algorithms.  Sometimes, data
structure dictates data flow (ad-hoc networks of objects); sometimes, data flow
dictates data structure (one of life's most disagreeable tasks - waiting in line
- is characterized as FIFO).  This, I feel, is the most important point, because
the first rule of programming is to figure out what you want to say before you
figure out how to say it.  In C++, ad-hoc networks of objects with cyclic
message paths are all too easy to create [see (4)] which means _code_ _is_ _not_
_explicit_ and as a result _code_ _is_ _not_ _fast_.

 *transfer semantics on objects are not robust*
    this ties into (1) in hidden complexity
    the code author needs to specify a lot of boilerplate to achieve desired
transfer semantics on objects.  Similarly, the code audience, be it reviewer,
maintainer or merger, needs to read a lot of boilerplate to understand how
objects get moved around in memory.  Moreover, most of these concepts are
intuitively declarative in nature, such as a parent object/child object relation.

 *poor re-use of effort*
    "code re-use" is a misnomer; when programmers speak of code-reuse they mean
re-use of effort.  There is no benefit to polymorphism if effort cannot be
consolidated easily.

 *C++ Standard iffy*
    Some things just disappear quickly for *frantic* reasons (strstream was
removed for aesthetics), indicating not enough foresight into what is important.
 I do not want to pick a language where I have to worry about features in it's
"standard library" becoming deprecated mainly for aesthetics.  As Dijkstra
preached, programming is _not_ supposed to be a frantic exercise.

 *usually, better options*
    See C++??: A Critique of C++ and Programing and Language Trends in the 1990s
 by Ian Joyner http://web.mac.com/joynerian/iWeb/Ian%20Joyner/CPPCritique.pdf
(Somewhat outdated, but many of the points are intrinsic and will forever be
relevant).  You can add to the list of better options D 1.0.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-08  0:32             ` Dmitry Kakurin
@ 2007-09-08  6:24               ` David Kastrup
  2007-09-08 23:25               ` Alex Riesen
  1 sibling, 0 replies; 102+ messages in thread
From: David Kastrup @ 2007-09-08  6:24 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Johannes Schindelin, Linus Torvalds, Matthieu Moy, Git

"Dmitry Kakurin" <dmitry.kakurin@gmail.com> writes:

> On 9/7/07, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
>
>> No, it's not.  As has been shown by some very good _arguments_.
>> Once you have facts to back up your claims, it is not any belief
>> any longer.
>
> Well I've heard *opinions* and anecdotal evidence. No facts though.

Anecdotal evidence _is_ hard facts.  That's what experience is all
about.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-08  0:37                         ` Dmitry Kakurin
@ 2007-09-08  6:25                           ` David Kastrup
  0 siblings, 0 replies; 102+ messages in thread
From: David Kastrup @ 2007-09-08  6:25 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Andreas Ericsson, Linus Torvalds, Matthieu Moy, Git

"Dmitry Kakurin" <dmitry.kakurin@gmail.com> writes:

> On 9/7/07, David Kastrup <dak@gnu.org> wrote:
>> Just compiling under C++, with no source changes, is likely to impact
>> performance and compile time rather badly
>
> This in fact is a very specific statement. Would you care to back it
> up with facts?

Read up on the Linux kernel history in the archives.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-08  0:56               ` John 'Z-Bo' Zabroski
@ 2007-09-08  6:36                 ` David Kastrup
  0 siblings, 0 replies; 102+ messages in thread
From: David Kastrup @ 2007-09-08  6:36 UTC (permalink / raw)
  To: John 'Z-Bo' Zabroski; +Cc: git

John 'Z-Bo' Zabroski <johnzabroski@yahoo.com> writes:

> Linus Torvalds <torvalds <at> linux-foundation.org> writes:
>
>> IOW, C++ is in that inconvenient spot where it doesn't help make
>> things simple enough to be truly usable for prototyping or simple
>> GUI programming, and yet isn't the lean system programming language
>> that C is that actively encourags you to use simple and direct
>> constructs.
>
> I want code that is Correct, Explicit, Fast, and in that order.

One beef I have with C++ is its automatic conversion rules.  They were
obviously designed with two goals:

a) behave as C when not using user-defined types.  That's ok.

b) behave like Fortran in mixed-type expressions involving "complex"
   when using C++ (with any arbitrary user-defined type taking the
   role of "complex").

And b is just madness.  Not every user-defined arithmetic type is
complex.  I did some work using modular arithmetic (GF(65521) and
similar) and it was some hard work to keep values going through the
wrong arithmetic conversions.  Basically trial and error and reading
the generated assembly code and head scratching and standard-reading.

In short: the automatic conversions made it hard to express what one
wanted to get done, both for compiler as well as programmer.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-08  0:32             ` Dmitry Kakurin
  2007-09-08  6:24               ` David Kastrup
@ 2007-09-08 23:25               ` Alex Riesen
  1 sibling, 0 replies; 102+ messages in thread
From: Alex Riesen @ 2007-09-08 23:25 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Johannes Schindelin, Linus Torvalds, Matthieu Moy, Git

Dmitry Kakurin, Sat, Sep 08, 2007 02:32:09 +0200:
> On 9/7/07, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> > Hi,
> >
> > On Thu, 6 Sep 2007, Dmitry Kakurin wrote:
> >
> > > Anyway I don't mean to start a religious C vs. C++ war.
> >
> > You have a very strange way of not meaning to start a C vs. C++ war.
> 
> I honestly didn't. I didn't even think it's possible. In the
> environment of mainstream commercial software development the last war
> on this subj was over 8-10 years ago.

It is because the "environment of mainstream commercial software
development" is stuck in "8-10" back from now.

> Even wars like "do we use exceptions/templates/stl" are pretty much
> over. Now days it's "do we use Boost", or "do we use template
> metaprogramming". But even more often it's Java/C# vs. C++.

Now that's a stupid argument to bring up. Commercial software
development is were the most stupid mistakes are done and repeated.

> That's why I was wondering how come C was chosen for Git.

"Just to annoy mainstream commercial software developers" would be a
good reason.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-07 20:22                           ` Walter Bright
  2007-09-07 20:27                             ` David Kastrup
@ 2007-09-08 23:50                             ` Andreas Ericsson
  2007-09-09  0:37                               ` Pierre Habouzit
  1 sibling, 1 reply; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-08 23:50 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

Walter Bright wrote:
> David Kastrup wrote:
>> Again, C won't keep you from shooting yourself in the foot.
> 
> Right, it won't. A good systems language should do what it can to 
> prevent the programmer from *inadvertently* shooting himself in the 
> foot, while allowing him to *deliberately* shoot himself in the foot.
> 

No, a good systems language should do exactly what it's told. Supporting
tools should tell the programmer if he's risking shooting himself in the
foot.

> 
>> You can tell C compilers to
>> check all array accesses, but that is a performance issue.
> 
> Runtime checking of arrays in D is a performance issue too, so it is 
> selectable via a command line switch.

Same as in C then.

> But more importantly,
> 
> 2) For dynamically sized arrays, the dimension of the array is carried 
> with the array, so loops automatically loop the correct number of times. 
> No runtime check is necessary, and it's easier for the code reviewer to 
> visually check the code for correctness.
> 

But this introduces handy but, strictly speaking, unnecessary overhead as
well, meaning, in short; 'D is slower than C, but easier to write code in'.

So in essence, it's a bit like Python, but a teensy bit faster and a lot
easier to shoot yourself in the foot with.

What was the niche you were going for when you thought up D? It can't have
been systems programming, because *any* extra baggage is baggage one would
like to get rid of. If it was application programming I fail to see how one
more language would help, as there will be portability problems galore and
it's still considerably slower to develop in than fe Python, while at the
same time being considerably easier to mess up in.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 14:13                       ` Wincent Colaiuta
@ 2007-09-09  0:09                         ` Andreas Ericsson
  0 siblings, 0 replies; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-09  0:09 UTC (permalink / raw)
  To: Wincent Colaiuta
  Cc: Johannes Schindelin, Dmitry Kakurin, Matthieu Moy, Git, Linus Torvalds

Wincent Colaiuta wrote:
> El 7/9/2007, a las 15:58, Andreas Ericsson escribió:
> 
>> Yes, but that's what I said in the original email as well. C is just so
>> much more pleasant to write in that the only place you'd (sanely) use
>> asm is in exactly these tight loops, where the code is likely to be used
>> and reused until the algorithm it describes is no longer a viable option
>> for doing what it was originally designed to do.
>>
>> It still proves the point though, as surely as n+1 > n for any value 
>> of n:
>> Hand-optimized assembly is faster than compiler-optimized C code.
> 
> In a theoretical ideal world, yes; no one would argue that C is faster 
> than fine-tuned assembly.
> 
> But in the *real world* rewriting Git in assembly would be like painting 
> a house using a single horse hair instead of a paint brush or roller. 
> Your SHA-1 example is a perfect example of where you benefit from doing 
> a tiny embellished detail using the single hair (assembly) and leave all 
> the rest in C.
> 
> In the real world and not the theoretical ideal world, it's not just 
> about the diminishing returns you get from writing more and more of a 
> code base in assembly instead of just the performance-critical 
> bottlenecks; it's that you're more likely to make subtle mistakes or 
> even make things slower. GCC does a remarkable job of optimizing in a 
> huge number of use cases, and best of all, it does it for free. Personal 
> opinion, of course, but that's the way I think it is.
> 

The discussion was theoretical from the beginning. Nobody's arguing that
git should be rewritten in asm, and you've been preaching to the choir far
too long now. I'll just drop this thread.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 19:23                 ` Walter Bright
  2007-09-07 19:40                   ` David Kastrup
@ 2007-09-09  0:25                   ` Andreas Ericsson
  2009-09-17 16:23                   ` Bernd Jendrissek
  2 siblings, 0 replies; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-09  0:25 UTC (permalink / raw)
  To: Walter Bright; +Cc: git

Walter Bright wrote:
> Andreas Ericsson wrote:
>> Walter Bright wrote:
>>> 1) You wind up having to implement the complex, dirty details of 
>>> things yourself. The consequences of this are:
>>>
>>>    a) you pick a simpler algorithm (which is likely less efficient - 
>>> I run across bubble sorts all the time in code)
>>>
>>>    b) once you implement, tune, and squeeze all the bugs out of those 
>>> complex, dirty details, you're reluctant to change it. You're 
>>> reluctant to try a different algorithm to see if it's faster. I've 
>>> seen this effect a lot in my own code. (I translated a large body of 
>>> my own C++ code that I'd spent months tuning to D, and quickly 
>>> managed to get significantly more speed out of it, because it was 
>>> much simpler to try out different algorithms/data structures.)
>>>
>>
>> I haven't seen this in the development of git, although to be fair, you
>> didn't mention the number of developers that were simultaneously working
>> on your project.
> 
> On my project, one. But I've seen this problem repeatedly in other 
> projects that had multiple developers. For example, I used to use 
> version 1 of an assembler. It was itself written entirely in assembler. 
> It ran *incredibly* slowly on large asm files. But it was written in 
> assembler, which is very fast, so how could that be?
> 
> Turns out, the symbol table used internally was a linear one. A linear 
> symbol table is easy to implement, but doesn't scale well at all. A 
> linear symbol table was implemented because it was just harder to do 
> more advanced symbol table algorithms in assembler. In this case, a 
> higher level language re-implementation made the assembler much faster, 
> even though that implementation was SLOWER in every detail. It was 
> faster overall, because it was easier to develop faster algorithms.
> 

Well, when the ease-of-coding vs the exec-speed of D vs C is that of
C vs asm, C will be dead fairly soon. However, since C is so ingrained
in every language designer's head, I find that unlikely to happen any
time soon.

> 
>> Opensource projects with many contributors (git, linux) work differently,
>> since one or a few among the plethora of authors will almost always be
>> a true expert at the problem being solved.
> 
> That is a nice advantage. I don't think many projects can rely on having 
> the best in the business working on them, though <g>.
> 

True that. I know a fair few projects that could have done with borrowing
one or two proper gurus, but even opensource programmers are selfish in
that we usually only work for something that benefits ourselves.

> 
>> The point is that, given enough developers, *someone* is bound to
>> find an algorithm that works so well that it's no longer worth
>> investing time to even discuss if anything else would work better,
>> either because it moves the performance bottleneck to somewhere else
>> (where further speedups would no longer produce humanly measurable
>> improvements), or because the action seems instantanous to the user
>> (further improvements simply aren't worth it, because no valuable
>> resource will be saved from it).
> 
> Sure, but I suggest that few projects reach this maxima.

True again, but given what I said above holds, it would be madness to
move from the lingua franca of oss hacking to a less common one, as it
would mean fewer eyes on the code.

> Case in point: 
> ld, the gnu linker. It's terribly slow. To see how slow it is, compare 
> it to optlink (the 15 years old one that comes with D for Windows). So I 
> don't believe there is anything inherent about linking that should make 
> ld so slow. There's some huge leverage possible in speeding up ld 
> (spreading out that saved time among all the gnu developers).
> 
> So while git may have reached a maxima in performance, I don't think 
> this principle is applicable in general, even for very widely used open 
> source projects that would profit greatly from improved performance.
> 

Interesting. I recently did a spot of work comparing various string-hashing
algorithms. Perhaps I should head over to the ld camp and see if I can help.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 22:17                     ` Dmitry Kakurin
  2007-09-07 22:28                       ` David Kastrup
@ 2007-09-09  0:29                       ` Andreas Ericsson
  1 sibling, 0 replies; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-09  0:29 UTC (permalink / raw)
  To: Dmitry Kakurin; +Cc: Linus Torvalds, Matthieu Moy, Git

Dmitry Kakurin wrote:
> On 9/6/07, Andreas Ericsson <ae@op5.se> wrote:
>> They already have, but every now and then someone comes along and suggest
>> a complete rewrite in some other language. So far we've had Java (there's
>> always one...), Python and now C++.
> 
> Since this "complete rewrite" was mentioned in multiple emails I'd
> like to rectify that:
> What I'm offering (for Git) is to use C++ as a "better C".
> Don't change any existing *working* code, but start introducing simple
> C++ constructs in the new code.
> Git is simple enough to not require any high-level abstractions. But
> some utility classes could make code much simpler.
> 

There are far too many highly valuable contributors that have spoken
against C++ for me to believe that C++ and C will ever co-exist in the
official git repo. Good thing utility classes can be developed on top
of the existing C-code, but in a separate repo, and packed into a
library. That way, you get some hacking ground for your beloved C++
coderswhile the current git contributors can keep contributing in the
language they like best.


> And BTW, I don't even like C++ that much :-), I just like it much
> better than C.  I've been saying that C++ is a legacy language for
> quite some time now. But we will use it for many years to come because
> the size of this legacy code is huge, so there will be plenty of C++
> developers available (to contribute to Git :-).

The C code base is a lot larger and C++ will drop dead pretty fast if it's
ever removed or left unmaintained. So much for dinosaurs...

> And C++ is the only way to move with existing C codebase.

Complete and utter BS. It can also stay in C, or get language bindings for
Python/Perl/PHP/LUA(?)/whatever, or both.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-08 23:50                             ` Andreas Ericsson
@ 2007-09-09  0:37                               ` Pierre Habouzit
  2007-09-09  1:36                                 ` Andreas Ericsson
  0 siblings, 1 reply; 102+ messages in thread
From: Pierre Habouzit @ 2007-09-09  0:37 UTC (permalink / raw)
  To: Andreas Ericsson; +Cc: Walter Bright, git

[-- Attachment #1: Type: text/plain, Size: 4510 bytes --]

On Sat, Sep 08, 2007 at 11:50:34PM +0000, Andreas Ericsson wrote:
> Walter Bright wrote:
> > David Kastrup wrote:
> > > Again, C won't keep you from shooting yourself in the foot.
> > Right, it won't. A good systems language should do what it can to 
> > prevent the programmer from *inadvertently* shooting himself in the 
> > foot, while allowing him to *deliberately* shoot himself in the foot.
>
> No, a good systems language should do exactly what it's told.
> Supporting tools should tell the programmer if he's risking shooting
> himself in the foot.

  I beg to differ. I mean, knowing enough of D, I think that what Walter
tries to say is that a good language should provide constructions that
when used prevent the programmer to shoot himself in both foot at the
same time.

  D supports most of the C constructions, so when you want to juggle
with razor blades, you're free to do so in D. Though, the language
provides idioms that prevent you to write stupid mistakes when used. And
that is great.

  D is not Java, you have pointers, you can deal with memory
explicitely, you can do whatever you can do in C with no or very little
overhead. Or you can use higher level D, at your own discretion.

> > > You can tell C compilers to
> > > check all array accesses, but that is a performance issue.
> > Runtime checking of arrays in D is a performance issue too, so it is 
> > selectable via a command line switch.
>
> Same as in C then.

  HAHAHAHAHAHA. Please, who do you try to convince here ? Except in the
local scope, there is few differences between a foo* and a foo[] in C.

> > But more importantly,
> > 2) For dynamically sized arrays, the dimension of the array is carried
> > with the array, so loops automatically loop the correct number of times.
> > No runtime check is necessary, and it's easier for the code reviewer to
> > visually check the code for correctness.
>
> But this introduces handy but, strictly speaking, unnecessary overhead
> as well, meaning, in short; 'D is slower than C, but easier to write
> code in'.

  That's BS. See the strbuf API I've been pushing recently ? It has
simplified git's code a lot, because each time git had to deal with a
growing string, it had to deal with at least three variables: the buffer
pointer, the current occupied length, and its allocated size. That was
three thing to have variable names for, and to pass to functions.

  Now instead, it's just one struct. D gives that gratis. There is no
performance loss because you _need_ to do the same. How do you deal with
dynamic arrays if you dont't store their lenght and size somewhere ? Or
are you the kind of programmer that write:

  /* 640kb should be enough for everyone… */
  some_type *array = malloc(640 << 10);


> So in essence, it's a bit like Python, but a teensy bit faster and a
> lot easier to shoot yourself in the foot with.

> What was the niche you were going for when you thought up D? It can't
> have been systems programming, because *any* extra baggage is baggage
> one would like to get rid of. If it was application programming I fail
> to see how one more language would help, as there will be portability
> problems galore and it's still considerably slower to develop in than
> fe Python, while at the same time being considerably easier to mess up
> in.

  Right now I'm just laughing. There is for sure overheads in some
places of D, but the example you take, and what you try to attack in D
is definitely not where you lose any kind of performance. You could have
attacked the GC instead (which is after all an easy classical target).

  Just to evaluate the silliness of your arguments:
  * http://www.digitalmars.com/d/comparison.html so that you can tell
    what the D features really are,
  * http://shootout.alioth.debian.org/gp4/benchmark.php?test=all&lang=all
    so that you can know what the D performance really is about. Of
    course those are only micro benchmarks, but well, python is "just"
    15 times slower than D, and D seems to be 10% slower. Well then I'm
    okay with D, I'm ready to buy 10% faster CPUs and avoid a lot of
    painful debugging time. In my world, 10% faster hardware is cheaper
    by many orders of magnitude than skilled programmers, but YMMV.

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String   Library.
  2007-09-09  0:37                               ` Pierre Habouzit
@ 2007-09-09  1:36                                 ` Andreas Ericsson
  0 siblings, 0 replies; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-09  1:36 UTC (permalink / raw)
  To: Pierre Habouzit, Andreas Ericsson, Walter Bright, git

Pierre Habouzit wrote:
> On Sat, Sep 08, 2007 at 11:50:34PM +0000, Andreas Ericsson wrote:
> 
>>>> You can tell C compilers to
>>>> check all array accesses, but that is a performance issue.
>>> Runtime checking of arrays in D is a performance issue too, so it is 
>>> selectable via a command line switch.
>> Same as in C then.
> 
>   HAHAHAHAHAHA. Please, who do you try to convince here ? Except in the
> local scope, there is few differences between a foo* and a foo[] in C.
> 

"Runtime checking of arrays is a performance issue." It's true whether it's
done manually by the coder or by the compiler. The difference is that in C,
you get to choose where it should be done.


>>> But more importantly,
>>> 2) For dynamically sized arrays, the dimension of the array is carried
>>> with the array, so loops automatically loop the correct number of times.
>>> No runtime check is necessary, and it's easier for the code reviewer to
>>> visually check the code for correctness.
>> But this introduces handy but, strictly speaking, unnecessary overhead
>> as well, meaning, in short; 'D is slower than C, but easier to write
>> code in'.
> 
>   That's BS. See the strbuf API I've been pushing recently ? It has
> simplified git's code a lot, because each time git had to deal with a
> growing string, it had to deal with at least three variables: the buffer
> pointer, the current occupied length, and its allocated size. That was
> three thing to have variable names for, and to pass to functions.
> 

Yup. I applaud your efforts, but it does come with a slight overhead,
except where it replaces faulty code. In practice, it's probably better
to use the api for all the string-handling, as none of it is performance-
critical.


>   Now instead, it's just one struct. D gives that gratis. There is no
> performance loss because you _need_ to do the same. How do you deal with
> dynamic arrays if you dont't store their lenght and size somewhere ? Or
> are you the kind of programmer that write:
> 
>   /* 640kb should be enough for everyone… */
>   some_type *array = malloc(640 << 10);
> 

No, but it would depend on what I am to do with it.

> 
>> So in essence, it's a bit like Python, but a teensy bit faster and a
>> lot easier to shoot yourself in the foot with.
> 
>> What was the niche you were going for when you thought up D? It can't
>> have been systems programming, because *any* extra baggage is baggage
>> one would like to get rid of. If it was application programming I fail
>> to see how one more language would help, as there will be portability
>> problems galore and it's still considerably slower to develop in than
>> fe Python, while at the same time being considerably easier to mess up
>> in.
> 
>   Right now I'm just laughing. There is for sure overheads in some
> places of D, but the example you take, and what you try to attack in D
> is definitely not where you lose any kind of performance. You could have
> attacked the GC instead (which is after all an easy classical target).
> 

I was asking what role D was designed to fill. I didn't mean it as an
attack, but re-reading what I wrote earlier I see it came off a bit harsh.


>   Just to evaluate the silliness of your arguments:
>   * http://www.digitalmars.com/d/comparison.html so that you can tell
>     what the D features really are,

You may notice that the feature-list is being provided by the creators
and marketeers of the D language. Walter Bright certainly seems like a
nice enough person, but it's possible it's a tad biased.


>   * http://shootout.alioth.debian.org/gp4/benchmark.php?test=all&lang=all
>     so that you can know what the D performance really is about. Of
>     course those are only micro benchmarks, but well, python is "just"
>     15 times slower than D, and D seems to be 10% slower.


I get it to 7.7xC and 1.2xC, respectively, but whatever. It still means
performance-critical apps will be written in C, while
insert-script-language-of-choice will still be used for prototyping and
not-so performance-critical apps.


> Well then I'm
>     okay with D, I'm ready to buy 10% faster CPUs and avoid a lot of
>     painful debugging time. In my world, 10% faster hardware is cheaper
>     by many orders of magnitude than skilled programmers, but YMMV.
> 

I'm curious as to how many fewer bugs D developers write compared to C
programmers. I guess it's hard to do a fair test given the comparatively
shallow pool of D gurus around, but it'd still be interesting to see a
practical test. 20% increase in runtime is certainly acceptable for
never having to see a bug again, but is it acceptable for 10% fewer bugs?
Or 20% fewer?

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  1:12             ` Linus Torvalds
                                 ` (2 preceding siblings ...)
  2007-09-08  0:56               ` John 'Z-Bo' Zabroski
@ 2007-09-19 19:56               ` Steven Burns
  3 siblings, 0 replies; 102+ messages in thread
From: Steven Burns @ 2007-09-19 19:56 UTC (permalink / raw)
  To: git

To me, the only thing that C++ has that all other mentioned languages lack 
is the power you get from the templates and generic programming.
Sorting will always be faster if you can call the comparison function 
directly without using a function pointer, and the only way you can create a 
generic sorting algorithm is that way.

Thinking about it with a cold head, most things to hate about C++ are not in 
the language but in its libraries.
The only feature I hate from the language itself is the preprocessor 
(macros), which you get in C too.

And maybe I also hate the fact that C++ allows for unexperienced programmers 
to create a bunch of classes and hierarchies that make sense to nobody but 
them. Or even worse, unexperienced programmers start writing their own 
frameworks, wrapping and re-wrapping, the same good old C function one 
thousand times.

I guess that is why most C++ based projects out there have a strict list of 
rules and conventions, you cannot have a stable project without them.

But, nothing prevents anybody from programming in C++ the way you describe, 
using simple and clear core structures with some basic methods that 
complement them (not obscure them) and make it easier to write the 
algorithms.
Sadly, once you start using std::string, their overly complicated and fancy 
iostreams, and bulky classes that hide too much from you, I have no other 
choice than to agree and call the whole thing a mess.

Steven Burns


"Linus Torvalds" <torvalds@linux-foundation.org> wrote in message 
news:alpine.LFD.0.999.0709070203200.5626@evo.linux-foundation.org...
>
>
> On Fri, 7 Sep 2007, Linus Torvalds wrote:
>>
>> The fact is, git is better than the other SCM's. And good taste (and C) 
>> is
>> one of the reasons for that.
>
> To be very specific:
> - simple and clear core datastructures, with *very* lean and aggressive
>   code to manage them that takes the whole approach of "simplicity over
>   fancy" to the extreme.
> - a willingness to not abstract away the data structures and algorithms,
>   because those are the *whole*point* of core git.
>
> And if you want a fancier language, C++ is absolutely the worst one to
> choose. If you want real high-level, pick one that has true high-level
> features like garbage collection or a good system integration, rather than
> something that lacks both the sparseness and straightforwardness of C,
> *and* doesn't even have the high-level bindings to important concepts.
>
> IOW, C++ is in that inconvenient spot where it doesn't help make things
> simple enough to be truly usable for prototyping or simple GUI
> programming, and yet isn't the lean system programming language that C is
> that actively encourags you to use simple and direct constructs.
>
> Linus 

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  6:15                   ` Theodore Tso
@ 2007-09-20 14:06                     ` Steven Burns
  2007-09-20 14:56                       ` Andreas Ericsson
  0 siblings, 1 reply; 102+ messages in thread
From: Steven Burns @ 2007-09-20 14:06 UTC (permalink / raw)
  To: git

> a = b + "/share/" + c + serial_num;
>
> where you can have absolutely no idea how many memory allocations are
> done, due to type coercions, overloaded operators

You are assuming (incorrectly) everybody will use dumb string classes like 
that.

It is very possible to create a string class that instead of allocating all
those strings simply concatenates tiny temporary objects and performs one
single operation in the end. Not to mention those temporaries are optimized
away by any decent compiler and you end up with code that runs at the same
speed as your C code.
I've done it, many other programmers have. As a reference, I'd like to
mention Matthew Wilson's chapter on efficient string concatenation in his
book "Imperfect C++". He uses expression templates (that's the technique I
just described) and gets impressive results.

With that said, your point is valid. 90% of C++ programmers will use string
classes that are very inefficient for concatenation, starting with
std::string which I hate for that reason (and many other reasons, e.g. you 
have to
resort to Boost for mundane things like trimming)

Steven Burns

"Theodore Tso" <tytso@mit.edu> wrote in message 
news:20070907061554.GB30161@thunk.org...
> On Thu, Sep 06, 2007 at 08:09:23PM -0700, Dmitry Kakurin wrote:
>> > Total BS. The string/memory management is not at all relevant. Look at 
>> > the
>> > code (I bet you didn't). This isn't the important, or complex part.
>>
>> Not only have I looked at the code, I've also debugged it quite a bit.
>> Granted most of my problems had to do with handling paths on Windows
>> (i.e. string manipulations).
>
> I consider string manipulation to be one of the places where C++ is a
> total disaster.  It's way to easy for idiots to do something like this:
>
> a = b + "/share/" + c + serial_num;
>
> where you can have absolutely no idea how many memory allocations are
> done, due to type coercions, overloaded operators (good God, you can
> overload the comma operator in C++!!!), and then when something like
> that ends up in an inner loop, the result is a disaster from a
> performance point of view, and it's not even obvious *why*!
>
>> My goal is to *use* Git. When something does not work *for me* I want
>> to be able to fix it (and contribute the fix) in *shortest time
>> possible* and with *minimal efforts*. As for me it's a diversion from
>> my main activities.
>
> Yes, and if you contribute something the shortest time possible, and
> it ends up being crap, who gets to rewrite it and fix it?  I've seen
> too many C++ programs which get this kind of crap added, and it's not
> noticed right away (because C++ is really good at hiding such
> performance killers so they are not visible), and then later on, it's
> even harder to find the performance problems and fix them.
>
>> Now, I realize that I'm a very infrequent contributor to Git, but I
>> want my opinion to be heard.
>
> And if git were written in C++, it's precisely the infrequent
> contributors (who are in a hurry, who only care about the quick hack
> to get them going, and not about the long-term maintainability and
> performance of the package) that are be in the position to do the
> most damage...
>
> - Ted 

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-20 14:06                     ` Steven Burns
@ 2007-09-20 14:56                       ` Andreas Ericsson
  0 siblings, 0 replies; 102+ messages in thread
From: Andreas Ericsson @ 2007-09-20 14:56 UTC (permalink / raw)
  To: Steven Burns; +Cc: git

Steven Burns wrote:
>> a = b + "/share/" + c + serial_num;
>>
>> where you can have absolutely no idea how many memory allocations are
>> done, due to type coercions, overloaded operators
> 
> You are assuming (incorrectly) everybody will use dumb string classes like 
> that.
> 

Not really. He said "It's way to easy for idiots to do something like this:"
just prior to the line you quoted. I wholeheartedly agree, but in no way
does anyone assume that everybody will use dumb string classes.

I'm sure it's perfectly possible to write properly functioning programs in
C++. I know I use a few of them myself. That doesn't change the fact that
it's an idiot-friendly language to write code in that's extremely annoying
for competent programmers to fix up later.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07  8:36             ` Walter Bright
  2007-09-07  9:41               ` Andreas Ericsson
  2007-09-07 11:52               ` Wincent Colaiuta
@ 2007-09-22 16:52               ` Steven Burns
  2 siblings, 0 replies; 102+ messages in thread
From: Steven Burns @ 2007-09-22 16:52 UTC (permalink / raw)
  To: git

Another reason GC is sometimes surprisingly faster is not only you end up 
allocating less times like you mention, but because some GC are compacting 
garbage collectors and that simplyfies allocations dramatically because 
allocating memory is just increasing a pointer. Compare that to the way most 
C++ heaps get implemented.
I don't know if that's the case with D's GC though.

I completely understand what you say about the strings and who owns it, I've 
ran into the same situation a hundred times, not only with strings but with 
vectors, matrixes, lists, etc.

After reading your post, I think I will have to revisit D sometime.
I read about it a few years ago and I got the impression some syntax 
decisions had been made to ease the writing of the compiler as opposed to 
favoring the end user/programmer, but it's been a while and maybe I was too 
quick to judge.

Steven


"Walter Bright" <boost@digitalmars.com> wrote in message 
news:fbr2iv$ugg$1@sea.gmane.org...
> Wincent Colaiuta wrote:
>> Git is all about speed, and C is the best choice for speed, especially in 
>> context of Git's workload.
>
> I can appreciate that. I originally got into writing compilers because my 
> game (Empire) ran too slowly and I thought the existing compilers could be 
> dramatically improved.
>
> And technically, yes, you can write code in C that is >= the speed of any 
> other language (other than asm). But practically, this isn't necessarily 
> so, for the following reasons:
>
> 1) You wind up having to implement the complex, dirty details of things 
> yourself. The consequences of this are:
>
>    a) you pick a simpler algorithm (which is likely less efficient - I run 
> across bubble sorts all the time in code)
>
>    b) once you implement, tune, and squeeze all the bugs out of those 
> complex, dirty details, you're reluctant to change it. You're reluctant to 
> try a different algorithm to see if it's faster. I've seen this effect a 
> lot in my own code. (I translated a large body of my own C++ code that I'd 
> spent months tuning to D, and quickly managed to get significantly more 
> speed out of it, because it was much simpler to try out different 
> algorithms/data structures.)
>
> 2) Garbage collection has an interesting and counterintuitive consequence. 
> If you compare n malloc/free's with n gcnew/collections, the malloc/free 
> will come out faster, and you conclude that gc is slow. But that misses 
> one huge speed advantage of gc - you can do FAR fewer allocations! For 
> example, I've done a lot of string manipulating programs in C. The basic 
> problem is keeping track of who owns each string. This is done by, when in 
> doubt, make a copy of the string.
>
> But if you have gc, you don't worry about who owns the string. You just 
> make another pointer to it. D takes this a step further with the concept 
> of array slicing, where one creates windows on existing arrays, or windows 
> on windows on windows, and no allocations are ever done. It's just pointer 
> fiddling.
>
> ------
> Walter Bright
> http://www.digitalmars.com  C, C++, D programming language compilers
> http://www.astoriaseminar.com  Extraordinary C++
> 

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06 17:50       ` Linus Torvalds
  2007-09-07  0:21         ` Dmitry Kakurin
@ 2007-09-24 13:41         ` figo
  2007-09-24 13:57           ` David Kastrup
  2012-05-22 18:30         ` Syed M Raihan
  2 siblings, 1 reply; 102+ messages in thread
From: figo @ 2007-09-24 13:41 UTC (permalink / raw)
  To: git

http://www.research.att.com/~bs/applications.html

just as Bjarne once wrote in his TC++PL, its hard to teach an old dog new 
tricks. Its even harder to give quality education about how to use something 
to someone who doesnt want to learn.

you hate high level, then continue programming operative systems, please NEVER 
DO something else. C++ was designed to give programmers high level tools and 
still being able to take care about performance.

portability wont be possible after a standard is published and some couple of 
years given to the compiler developers. C++ had its standard in 1998, and add 
two or three years for compiler development = 2002. "Quite recently", way more 
recently that your last use of C++ I can bet.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-24 13:41         ` figo
@ 2007-09-24 13:57           ` David Kastrup
  2007-09-25 19:19             ` Steven Burns
  0 siblings, 1 reply; 102+ messages in thread
From: David Kastrup @ 2007-09-24 13:57 UTC (permalink / raw)
  To: git

figo <rcc_dark@hotmail.com> writes:

> http://www.research.att.com/~bs/applications.html
>
> just as Bjarne once wrote in his TC++PL, its hard to teach an old dog new 
> tricks. Its even harder to give quality education about how to use something 
> to someone who doesnt want to learn.
>
> you hate high level, then continue programming operative systems,
> please NEVER DO something else. C++ was designed to give programmers
> high level tools and still being able to take care about
> performance.
>
> portability wont be possible after a standard is published and some
>couple of years given to the compiler developers. C++ had its
>standard in 1998, and add two or three years for compiler development
>= 2002. "Quite recently", way more recently that your last use of C++
>I can bet.

Care to explain why there are still not two numerical C++ libraries
with compatible matrix classes?

What use is talking about portability and high level when a basic
interoperability feature that has been available since the sixties
(more than 4 decades ago) in Fortran has not yet managed to make it
into C++?  C++ by now more or less offers a (somewhat deficient)
standardized way to work with complex numbers, but matrices are still
not standardized in any manner, and libraries won't interoperate.

So C++ should get its head wrapped around the _low_ level problems
first.  It is a bloody shame that it still has not caught up with
Fortran IV (or even Fortran II) with regard to usefulness for
numerical libraries.

It is not a matter of "hating high level" to see that C++ is mostly
focused about addressing the wrong kinds of problems in the wrong
ways.  The pain/gain ratio is just bad.

-- 
David Kastrup

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-24 13:57           ` David Kastrup
@ 2007-09-25 19:19             ` Steven Burns
  2007-09-25 19:55               ` David Kastrup
  0 siblings, 1 reply; 102+ messages in thread
From: Steven Burns @ 2007-09-25 19:19 UTC (permalink / raw)
  To: git

The C++ community in general suffers a lot from the NIH Syndrome.
Matrixes, Strings, Vectors, everybody creates their own which are always, or 
course, superior to what's already available.

Again, is not the language's fault, a language is just a language.
It's the way it has been driven.

My two cents.


"David Kastrup" <dak@gnu.org> wrote in message 
news:86odfstbc6.fsf@lola.quinscape.zz...
> figo <rcc_dark@hotmail.com> writes:
>
>> http://www.research.att.com/~bs/applications.html
>>
>> just as Bjarne once wrote in his TC++PL, its hard to teach an old dog new
>> tricks. Its even harder to give quality education about how to use 
>> something
>> to someone who doesnt want to learn.
>>
>> you hate high level, then continue programming operative systems,
>> please NEVER DO something else. C++ was designed to give programmers
>> high level tools and still being able to take care about
>> performance.
>>
>> portability wont be possible after a standard is published and some
>>couple of years given to the compiler developers. C++ had its
>>standard in 1998, and add two or three years for compiler development
>>= 2002. "Quite recently", way more recently that your last use of C++
>>I can bet.
>
> Care to explain why there are still not two numerical C++ libraries
> with compatible matrix classes?
>
> What use is talking about portability and high level when a basic
> interoperability feature that has been available since the sixties
> (more than 4 decades ago) in Fortran has not yet managed to make it
> into C++?  C++ by now more or less offers a (somewhat deficient)
> standardized way to work with complex numbers, but matrices are still
> not standardized in any manner, and libraries won't interoperate.
>
> So C++ should get its head wrapped around the _low_ level problems
> first.  It is a bloody shame that it still has not caught up with
> Fortran IV (or even Fortran II) with regard to usefulness for
> numerical libraries.
>
> It is not a matter of "hating high level" to see that C++ is mostly
> focused about addressing the wrong kinds of problems in the wrong
> ways.  The pain/gain ratio is just bad.
>
> -- 
> David Kastrup
> 

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-25 19:19             ` Steven Burns
@ 2007-09-25 19:55               ` David Kastrup
  0 siblings, 0 replies; 102+ messages in thread
From: David Kastrup @ 2007-09-25 19:55 UTC (permalink / raw)
  To: git

"Steven Burns" <royalstream@hotmail.com> writes:

> The C++ community in general suffers a lot from the NIH Syndrome.
> Matrixes, Strings, Vectors, everybody creates their own which are always, or 
> course, superior to what's already available.
>
> Again, is not the language's fault, a language is just a language.
> It's the way it has been driven.

Having loose wires instead of a brake pedal in a car because the user
might prefer to brake with his teeth or by wiggling his backside or
building any other contraption of his own invention is a design
mistake.  Especially when we are talking about public transportation
with changing drivers.

Making a language huge and bloated in order to be able to use the
language itself for defining a set of basic data types is just
masturbation.  C++ has the most complicated set of implicit
conversions from any language in the world, and what for?  It is
modeled for being able to create a user-defined "complex" type which
behaves almost as well as Fortran's.  Too bad that this mostly means
everybody will define his own type (well, at least we have seen two or
three different library "standards" by now), and that the implicit
conversion rules and chains are appallingly wrong for a number of
other possible user-defined arithmetic types.

-- 
David Kastrup

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-07 19:23                 ` Walter Bright
  2007-09-07 19:40                   ` David Kastrup
  2007-09-09  0:25                   ` Andreas Ericsson
@ 2009-09-17 16:23                   ` Bernd Jendrissek
  2 siblings, 0 replies; 102+ messages in thread
From: Bernd Jendrissek @ 2009-09-17 16:23 UTC (permalink / raw)
  To: git

Walter Bright <boost <at> digitalmars.com> writes:
> Sure, but I suggest that few projects reach this maxima. Case in point: 
> ld, the gnu linker. It's terribly slow. To see how slow it is, compare 
> it to optlink (the 15 years old one that comes with D for Windows). So I 
> don't believe there is anything inherent about linking that should make 
> ld so slow. There's some huge leverage possible in speeding up ld 
> (spreading out that saved time among all the gnu developers).

http://en.wikipedia.org/wiki/Gold_(linker)

Note that gold is written in C++; the wikipedia quasi-stub article doesn't make
this clear.  Normally that wouldn't be relevant, but in this branch of the
thread it is.  Its C++-ness seems to be making an argument, but I don't know on
which side!

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06  4:48     ` Dmitry Kakurin
                         ` (2 preceding siblings ...)
  2007-09-06 17:50       ` Linus Torvalds
@ 2010-06-10 19:12       ` Ian Molton
  2010-06-11 12:23         ` Jakub Narebski
  3 siblings, 1 reply; 102+ messages in thread
From: Ian Molton @ 2010-06-10 19:12 UTC (permalink / raw)
  To: git

Dmitry Kakurin <dmitry.kakurin <at> gmail.com> writes:

> 
> [ snip ]
> 
> When I first looked at Git source code two things struck me as odd:
> 1. Pure C as opposed to C++. No idea why. Please don't talk about 
> portability, it's BS.

Word to the wise... you effectively just told one of *the* best known
programmers of all time that they are talking BS... nice one. Hope you've got
some flameproof undies. Whats that? no? ah well...

I smell a troll, but since everyone else has had a go...

Heres some comments picked out from the thread, in no particular order...

> > You have a very strange way of not meaning to start a C vs. C++ war.

> I honestly didn't. I didn't even think it's possible. In the
> environment of mainstream commercial software development the last war
> on this subj was over 8-10 years ago.

Really? I dont know what planet you're from, but this 'war' has been raging for
decades, and will probably continue until one side or the other gets round to
using tactical nukes.

And besides, this *isn't* the commercial (closed) software world - we've moved
on. We no longer depend on closed companies handing out features like orphans in
the Victorian times...

>>> [bitfields in D]

>>   Really, I feel this is a big lack, for a language that aims at
>> simplicity, conciseness _and_ correctness.
>>
>>   OK, maybe I'm biased, I work with networks protocols all day long, so
>> I often need bitfields, but still, a lot of people deal with network
>> protocols, it's not a niche.
>
> And strictly speaking, C bitfields are completely useless for that
> purpose since the compiler is free to use whatever method he wants for
> allocating bit fields.  So if you want to write a portable program,
> you are back to making the masks yourself.

Sadly. Thats always been one of the things I found annoying in C. There are
times when you want access to the types the hardware itself uses, and there are
times when you want to know your int is 32 bits long, and there isnt really a
standardised way of doing that. Of course, its worked around in practice, but it
all seems so unnecessary.

> in the *real world* rewriting Git in assembly would be like  
> painting a house using a single horse hair instead of a paint brush  
> or roller. Your SHA-1 example is a perfect example of where you  
> benefit from doing a tiny embellished detail using the single hair  
> (assembly) and leave all the rest in C.

The above comment is pure epic win :-)

On another note, some people talked about code reuse...

IMHO Sourcecode reuse is something of a myth in any language. Sure, some small
algorithms get reused, but thats really not a language dependent characteristic.
As soon as you build something much bigger than an algorithm, it starts to need
an interface, and at that point you may as well turn it into a library. Thats
where the REAL code reuse happens. And as it happens at runtime, its good for
users - bugfixes help everyone.

On to language choice...

I have NEVER understood why people seem to think theres some kind of hierarchy
in either ease of coding or speed. You see it all the time, people think that:

assembler is faster than c is faster than c++ is faster than perl etc.

WHY? I've seen some truely braindamaged assembler that could be outperformed by
BASIC on a BBC micro. I've seen 'handcrafted' C and C++ that looked like it was
written during a skydive whilst on crack.

languages are *tools*. Pick the most appropriate. Use two. Embrace the power of
and...

Linux make good use of C and assembler, both compiled/assembled seperately and
inline. Some stuff like accessing weird registers with oddball opcodes is
actually impossible under C. But (say) write a filesystem in assembler? no
thanks! (not that it hasn't been done, but for the love of god, why?)

So, anyway, why do these kind of threads never go away? because opinions are
like arseholes. Everyones got one. As you grow older, you learn stuff. You
hopefully dont repeat the mistakes of ones youth. (theres at least one
ill-conceived C string library out there which I'm embarrased to admit is my
fault (hopefully it'll never leave the company I was at when I wrote it...).
These threads are where the n00bs meet the pros. Usually, the n00bs just need to
suck it up and admit it when they've been dumb. Its a very rare day when
something truely radical comes along, and its even rarer when its born of total
inexperience.

Nothing to see here...

All the best,

-Ian

PS. ironically, in order to post this, gmane required me to enter a word. That
word was "restraint". Gotta love karma.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2010-06-10 19:12       ` Ian Molton
@ 2010-06-11 12:23         ` Jakub Narebski
  2010-06-11 13:33           ` Dario Rodriguez
  0 siblings, 1 reply; 102+ messages in thread
From: Jakub Narebski @ 2010-06-11 12:23 UTC (permalink / raw)
  To: Ian Molton; +Cc: git, Dmitry Kakurin

Ian Molton <ian.molton@collabora.co.uk> writes:
> Dmitry Kakurin <dmitry.kakurin <at> gmail.com> writes:
>
> > [ snip ]
> > 
> > When I first looked at Git source code two things struck me as odd:
> > 1. Pure C as opposed to C++. No idea why. Please don't talk about 
> > portability, it's BS.

No gain from C++.

Also, I don't know when Dmitri written his post, but git uses its own
string manipulation mini-library, named strbuf, at least since end of 2007
(Documentation/technical/api-strbuf.txt was added as stub on 2007-11-24).
 
> > in the *real world* rewriting Git in assembly would be like  
> > painting a house using a single horse hair instead of a paint brush  
> > or roller. Your SHA-1 example is a perfect example of where you  
> > benefit from doing a tiny embellished detail using the single hair  
> > (assembly) and leave all the rest in C.

Sidenote: block-sha1 implementation is C plus smidgeon of assembly via
'asm'.

-- 
Jakub Narebski
Poland
ShadeHawk on #git

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2010-06-11 12:23         ` Jakub Narebski
@ 2010-06-11 13:33           ` Dario Rodriguez
  0 siblings, 0 replies; 102+ messages in thread
From: Dario Rodriguez @ 2010-06-11 13:33 UTC (permalink / raw)
  To: Jakub Narebski; +Cc: Ian Molton, git, Dmitry Kakurin

On Fri, Jun 11, 2010 at 9:23 AM, Jakub Narebski <jnareb@gmail.com> wrote:
> Also, I don't know when Dmitri written his post, but git uses its own
> string manipulation mini-library, named strbuf, at least since end of 2007
> (Documentation/technical/api-strbuf.txt was added as stub on 2007-11-24).

An interesting point, Dmitri written this post on September 2007...
why the flamewar continuation? just curiosity...

Now, as a resume of flames and concepts, the 'better' things in the
string library (also the first excuse for C++ apologists, as a
repetitive piece of youknowwhat) are pure algorithms and data
structures, easy to code in C. The goal isn't in the OO design or
abstraction itself, or in the language...

Personally, I use C++ almost every day at work and I found it stupid.
I love pure C.

Cheers,
Dario (argentina)

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RFC] Convert builin-mailinfo.c to use The Better String Library.
  2007-09-06 17:50       ` Linus Torvalds
  2007-09-07  0:21         ` Dmitry Kakurin
  2007-09-24 13:41         ` figo
@ 2012-05-22 18:30         ` Syed M Raihan
  2 siblings, 0 replies; 102+ messages in thread
From: Syed M Raihan @ 2012-05-22 18:30 UTC (permalink / raw)
  To: git

Linus Torvalds <torvalds <at> linux-foundation.org> writes:

> 
> 
> On Wed, 5 Sep 2007, Dmitry Kakurin wrote:
> > 
> > When I first looked at Git source code two things struck me as odd:
> > 1. Pure C as opposed to C++. No idea why. Please don't talk about port,
> > it's BS.
> 
> *YOU* are full of bullshit.
> 
> C++ is a horrible language. It's made more horrible by the fact that a lot 
> of substandard programmers use it, to the point where it's much much 
> easier to generate total and utter crap with it. Quite frankly, even if 
> the choice of C were to do *nothing* but keep the C++ programmers out, 
> that in itself would be a huge reason to use C.
> 			Linus
> 

C++ has one weakness that is ABI compatibility among compilers.
Other than that Object Model does not make things horrible. 
I have seen 15 years old C++ application library which still 
uses old implementation to implement new enhancement/features 
that just works seamlessly and even an old dog can learn 
this old library written in C++.

I have seen C programmers constantly trying how they can mimic 
poly-morphism, inheritance and encapsulation in their C.
This is just *BS* - if you dont want C++ then dont use poly-morphism,
inheritance and encapsulation in your C code!
Or else just use C++ or Jave or C#.

Regards
Please note: I am really a Fan of Linus Torvalds since ever :)
Syed Raihan

^ permalink raw reply	[flat|nested] 102+ messages in thread

end of thread, other threads:[~2012-05-22 18:35 UTC | newest]

Thread overview: 102+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-09-04 20:50 [RFC] Convert builin-mailinfo.c to use The Better String Library Lukas Sandström
2007-09-04 21:38 ` Alex Riesen
2007-09-04 23:01   ` Pierre Habouzit
2007-09-05 14:54 ` Kristian Høgsberg
2007-09-05 17:29   ` Matthieu Moy
2007-09-06  2:30     ` Miles Bader
2007-09-06  4:48     ` Dmitry Kakurin
2007-09-06  4:59       ` Shawn O. Pearce
2007-09-06  9:12         ` Andreas Ericsson
2007-09-06  9:35           ` Junio C Hamano
2007-09-06 10:21             ` Andreas Ericsson
2007-09-06  9:52           ` David Kastrup
2007-09-06  5:03       ` Miles Bader
2007-09-06 12:08         ` Johannes Schindelin
2007-09-06 17:50       ` Linus Torvalds
2007-09-07  0:21         ` Dmitry Kakurin
2007-09-07  0:38           ` Linus Torvalds
2007-09-07  1:08             ` Dmitry Kakurin
2007-09-07  1:27               ` Linus Torvalds
2007-09-07  3:09                 ` Dmitry Kakurin
2007-09-07  5:48                   ` David Symonds
2007-09-07  6:15                   ` Theodore Tso
2007-09-20 14:06                     ` Steven Burns
2007-09-20 14:56                       ` Andreas Ericsson
2007-09-07  6:31                   ` Andreas Ericsson
2007-09-07 22:17                     ` Dmitry Kakurin
2007-09-07 22:28                       ` David Kastrup
2007-09-08  0:37                         ` Dmitry Kakurin
2007-09-08  6:25                           ` David Kastrup
2007-09-09  0:29                       ` Andreas Ericsson
2007-09-07  6:52                   ` David Kastrup
2007-09-07 10:28                   ` Johannes Schindelin
2007-09-07 10:26                 ` Johannes Schindelin
2007-09-07  6:50               ` David Kastrup
2007-09-07  1:12             ` Linus Torvalds
2007-09-07  1:40               ` alan
2007-09-07  5:09               ` Walter Bright
2007-09-07  7:40                 ` David Kastrup
2007-09-07  8:15                   ` Walter Bright
2007-09-07  8:26                     ` David Kastrup
2007-09-07  9:14                       ` Walter Bright
2007-09-07  9:31                         ` David Kastrup
2007-09-07 20:22                           ` Walter Bright
2007-09-07 20:27                             ` David Kastrup
2007-09-07 23:16                               ` Walter Bright
2007-09-08 23:50                             ` Andreas Ericsson
2007-09-09  0:37                               ` Pierre Habouzit
2007-09-09  1:36                                 ` Andreas Ericsson
2007-09-07 11:36                   ` Wincent Colaiuta
2007-09-07  9:41                 ` Pierre Habouzit
2007-09-07 19:03                   ` Walter Bright
2007-09-07 19:31                     ` David Kastrup
2007-09-07 20:49                       ` Walter Bright
2007-09-07 19:41                     ` Pierre Habouzit
2007-09-07 19:51                       ` David Kastrup
2007-09-07 19:59                         ` Pierre Habouzit
2007-09-07 20:40                       ` Walter Bright
2007-09-07 20:56                         ` Pierre Habouzit
2007-09-07 22:54                           ` Walter Bright
2007-09-08  0:56               ` John 'Z-Bo' Zabroski
2007-09-08  6:36                 ` David Kastrup
2007-09-19 19:56               ` Steven Burns
2007-09-07  3:06           ` Wincent Colaiuta
2007-09-07  4:06             ` Paul Wankadia
2007-09-07  4:30               ` Nicolas Pitre
2007-09-07  9:19               ` Wincent Colaiuta
2007-09-07  6:25             ` Andreas Ericsson
2007-09-07 10:56               ` Johannes Schindelin
2007-09-07 11:54                 ` Andreas Ericsson
2007-09-07 12:33                   ` Wincent Colaiuta
2007-09-07 12:55                     ` Karl Hasselström
2007-09-07 13:58                     ` Andreas Ericsson
2007-09-07 14:13                       ` Wincent Colaiuta
2007-09-09  0:09                         ` Andreas Ericsson
2007-09-07 16:09                 ` David Kastrup
2007-09-07 11:30               ` Wincent Colaiuta
2007-09-07  8:36             ` Walter Bright
2007-09-07  9:41               ` Andreas Ericsson
2007-09-07 19:23                 ` Walter Bright
2007-09-07 19:40                   ` David Kastrup
2007-09-09  0:25                   ` Andreas Ericsson
2009-09-17 16:23                   ` Bernd Jendrissek
2007-09-07 11:52               ` Wincent Colaiuta
2007-09-07 19:25                 ` Walter Bright
2007-09-22 16:52               ` Steven Burns
2007-09-07  6:47           ` David Kastrup
2007-09-07  7:41             ` Andy Parkins
2007-09-07  8:08               ` David Kastrup
2007-09-07 10:21           ` Johannes Schindelin
2007-09-08  0:32             ` Dmitry Kakurin
2007-09-08  6:24               ` David Kastrup
2007-09-08 23:25               ` Alex Riesen
2007-09-24 13:41         ` figo
2007-09-24 13:57           ` David Kastrup
2007-09-25 19:19             ` Steven Burns
2007-09-25 19:55               ` David Kastrup
2012-05-22 18:30         ` Syed M Raihan
2010-06-10 19:12       ` Ian Molton
2010-06-11 12:23         ` Jakub Narebski
2010-06-11 13:33           ` Dario Rodriguez
2007-09-05 15:27 ` Kristian Høgsberg
2007-09-07 10:47 ` Lukas Sandström

Code repositories for project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).