git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 0/3] bundle.c: remove "ref_list" in favor of string-list.c API
@ 2021-06-17 11:21 Ævar Arnfjörð Bjarmason
  2021-06-17 11:21 ` [PATCH 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
                   ` (3 more replies)
  0 siblings, 4 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-17 11:21 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin,
	Ævar Arnfjörð Bjarmason

Converts the bundle.c code to use the string-list.c API, getting rid
of some duplication in the codebase, while doing that stop the bundle
command-line tool and its API from leaking memory in some common
cases.

Ævar Arnfjörð Bjarmason (3):
  bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  bundle.c: use a temporary variable for OIDs and names
  bundle: remove "ref_list" in favor of string-list.c API

 builtin/bundle.c | 91 ++++++++++++++++++++++++++++++++----------------
 bundle.c         | 72 +++++++++++++++++++++-----------------
 bundle.h         | 20 +++++------
 transport.c      | 11 ++++--
 4 files changed, 119 insertions(+), 75 deletions(-)

-- 
2.32.0.571.gdba276db2c


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  2021-06-17 11:21 [PATCH 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
@ 2021-06-17 11:21 ` Ævar Arnfjörð Bjarmason
  2021-06-17 11:21 ` [PATCH 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-17 11:21 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin,
	Ævar Arnfjörð Bjarmason

Fix a memory leak from the prefix_filename() function introduced with
its use in 3b754eedd5 (bundle: use prefix_filename with bundle path,
2017-03-20).

As noted in that commit the leak was intentional as a part of being
sloppy about freeing resources just before we exit, I'm changing this
because I'll be fixing other memory leaks in the bundle API (including
the library version) in subsequent commits. It's easier to reason
about those fixes if valgrind runs cleanly at the end without any
leaks whatsoever.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/bundle.c | 79 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 55 insertions(+), 24 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index ea6948110b..7778297277 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -46,7 +46,7 @@ static int parse_options_cmd_bundle(int argc,
 		const char* prefix,
 		const char * const usagestr[],
 		const struct option options[],
-		const char **bundle_file) {
+		char **bundle_file) {
 	int newargc;
 	newargc = parse_options(argc, argv, NULL, options, usagestr,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
@@ -61,7 +61,8 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 	int progress = isatty(STDERR_FILENO);
 	struct strvec pack_opts;
 	int version = -1;
-
+	int die_no_repo = 0;
+	int ret;
 	struct option options[] = {
 		OPT_SET_INT('q', "quiet", &progress,
 			    N_("do not show progress meter"), 0),
@@ -76,7 +77,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 			    N_("specify bundle format version")),
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_create_usage, options, &bundle_file);
@@ -92,77 +93,107 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 	if (progress && all_progress_implied)
 		strvec_push(&pack_opts, "--all-progress-implied");
 
-	if (!startup_info->have_repository)
+	if (!startup_info->have_repository) {
+		die_no_repo = 1;
+		goto cleanup;
+	}
+	ret = !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
+cleanup:
+	free(bundle_file);
+	if (die_no_repo)
 		die(_("Need a repository to create a bundle."));
-	return !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
+	return ret;
 }
 
 static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
 	int quiet = 0;
-
+	int ret;
 	struct option options[] = {
 		OPT_BOOL('q', "quiet", &quiet,
 			    N_("do not show bundle details")),
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_verify_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
 	close(bundle_fd);
-	if (verify_bundle(the_repository, &header, !quiet))
-		return 1;
+	if (verify_bundle(the_repository, &header, !quiet)) {
+		ret = 1;
+		goto cleanup;
+	}
+
 	fprintf(stderr, _("%s is okay\n"), bundle_file);
-	return 0;
+	ret = 0;
+cleanup:
+	free(bundle_file);
+	return ret;
 }
 
 static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
-
+	int ret;
 	struct option options[] = {
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_list_heads_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
 	close(bundle_fd);
-	return !!list_bundle_refs(&header, argc, argv);
+	ret = !!list_bundle_refs(&header, argc, argv);
+cleanup:
+	free(bundle_file);
+	return ret;
 }
 
 static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
-
+	int die_no_repo = 0;
+	int ret;
 	struct option options[] = {
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_unbundle_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
-	if (!startup_info->have_repository)
-		die(_("Need a repository to unbundle."));
-	return !!unbundle(the_repository, &header, bundle_fd, 0) ||
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
+	if (!startup_info->have_repository) {
+		die_no_repo = 1;
+		goto cleanup;
+	}
+	ret = !!unbundle(the_repository, &header, bundle_fd, 0) ||
 		list_bundle_refs(&header, argc, argv);
+cleanup:
+	if (die_no_repo)
+		die(_("Need a repository to unbundle."));
+	free(bundle_file);
+	return ret;
 }
 
 int cmd_bundle(int argc, const char **argv, const char *prefix)
-- 
2.32.0.571.gdba276db2c


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 2/3] bundle.c: use a temporary variable for OIDs and names
  2021-06-17 11:21 [PATCH 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
  2021-06-17 11:21 ` [PATCH 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
@ 2021-06-17 11:21 ` Ævar Arnfjörð Bjarmason
  2021-06-17 11:21 ` [PATCH 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
  2021-06-21 15:16 ` [PATCH v2 0/3] bundle.c: " Ævar Arnfjörð Bjarmason
  3 siblings, 0 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-17 11:21 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin,
	Ævar Arnfjörð Bjarmason

In preparation for moving away from accessing the OID and name via the
"oid" and "name" slots in a subsequent commit, change the code that
accesses it to use named variables. This makes the subsequent change
smaller.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 bundle.c    | 22 +++++++++++++++-------
 transport.c |  3 ++-
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/bundle.c b/bundle.c
index 693d619551..621708f40e 100644
--- a/bundle.c
+++ b/bundle.c
@@ -156,6 +156,9 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 	int i;
 
 	for (i = 0; i < r->nr; i++) {
+		struct object_id *oid;
+		const char *name;
+
 		if (argc > 1) {
 			int j;
 			for (j = 1; j < argc; j++)
@@ -164,8 +167,10 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 			if (j == argc)
 				continue;
 		}
-		printf("%s %s\n", oid_to_hex(&r->list[i].oid),
-				r->list[i].name);
+
+		oid = &r->list[i].oid;
+		name = r->list[i].name;
+		printf("%s %s\n", oid_to_hex(oid), name);
 	}
 	return 0;
 }
@@ -194,7 +199,8 @@ int verify_bundle(struct repository *r,
 	repo_init_revisions(r, &revs, NULL);
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		struct object *o = parse_object(r, &e->oid);
+		struct object_id *oid = &e->oid;
+		struct object *o = parse_object(r, oid);
 		if (o) {
 			o->flags |= PREREQ_MARK;
 			add_pending_object(&revs, o, e->name);
@@ -202,7 +208,7 @@ int verify_bundle(struct repository *r,
 		}
 		if (++ret == 1)
 			error("%s", message);
-		error("%s %s", oid_to_hex(&e->oid), e->name);
+		error("%s %s", oid_to_hex(oid), e->name);
 	}
 	if (revs.pending.nr != p->nr)
 		return ret;
@@ -219,19 +225,21 @@ int verify_bundle(struct repository *r,
 
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		struct object *o = parse_object(r, &e->oid);
+		struct object_id *oid = &e->oid;
+		struct object *o = parse_object(r, oid);
 		assert(o); /* otherwise we'd have returned early */
 		if (o->flags & SHOWN)
 			continue;
 		if (++ret == 1)
 			error("%s", message);
-		error("%s %s", oid_to_hex(&e->oid), e->name);
+		error("%s %s", oid_to_hex(oid), e->name);
 	}
 
 	/* Clean up objects used, as they will be reused. */
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		commit = lookup_commit_reference_gently(r, &e->oid, 1);
+		struct object_id *oid = &e->oid;
+		commit = lookup_commit_reference_gently(r, oid, 1);
 		if (commit)
 			clear_commit_marks(commit, ALL_REV_FLAGS);
 	}
diff --git a/transport.c b/transport.c
index 50f5830eb6..9d601c8c95 100644
--- a/transport.c
+++ b/transport.c
@@ -149,7 +149,8 @@ static struct ref *get_refs_from_bundle(struct transport *transport,
 	for (i = 0; i < data->header.references.nr; i++) {
 		struct ref_list_entry *e = data->header.references.list + i;
 		struct ref *ref = alloc_ref(e->name);
-		oidcpy(&ref->old_oid, &e->oid);
+		struct object_id *oid = &e->oid;
+		oidcpy(&ref->old_oid, oid);
 		ref->next = result;
 		result = ref;
 	}
-- 
2.32.0.571.gdba276db2c


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 3/3] bundle: remove "ref_list" in favor of string-list.c API
  2021-06-17 11:21 [PATCH 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
  2021-06-17 11:21 ` [PATCH 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
  2021-06-17 11:21 ` [PATCH 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
@ 2021-06-17 11:21 ` Ævar Arnfjörð Bjarmason
  2021-06-19  2:12   ` Andrei Rybak
  2021-06-21 15:16 ` [PATCH v2 0/3] bundle.c: " Ævar Arnfjörð Bjarmason
  3 siblings, 1 reply; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-17 11:21 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin,
	Ævar Arnfjörð Bjarmason

Move away from the "struct ref_list" in bundle.c in favor of the
almost identical string-list.c API.

That API fits this use-case perfectly, but did not exist in its
current form when this code was added in 2e0afafebd (Add git-bundle:
move objects and references by archive, 2007-02-22), with hindsight we
could have used the path-list API, which later got renamed to
string-list. See 8fd2cb4069 (Extract helper bits from
c-merge-recursive work, 2006-07-25)

We need to change "name" to "string" and "oid" to "util" to make this
conversion, but other than that the APIs are pretty much identical for
what bundle.c made use of.

Let's also replace the memset(..,0,...) pattern with a more idiomatic
"INIT" macro, and finally add a *_release() function so to free the
allocated memory.

Before this the add_to_ref_list() would leak memory, now e.g. "bundle
list-heads" reports no memory leaks at all under valgrind.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/bundle.c | 12 ++++-----
 bundle.c         | 64 ++++++++++++++++++++++++------------------------
 bundle.h         | 20 +++++++--------
 transport.c      | 10 +++++---
 4 files changed, 55 insertions(+), 51 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index 7778297277..8d82255280 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -106,7 +106,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 }
 
 static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int quiet = 0;
 	int ret;
@@ -121,7 +121,6 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 			builtin_bundle_verify_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -136,11 +135,12 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 	ret = 0;
 cleanup:
 	free(bundle_file);
+	bundle_header_release(&header);
 	return ret;
 }
 
 static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int ret;
 	struct option options[] = {
@@ -152,7 +152,6 @@ static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix
 			builtin_bundle_list_heads_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -161,11 +160,12 @@ static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix
 	ret = !!list_bundle_refs(&header, argc, argv);
 cleanup:
 	free(bundle_file);
+	bundle_header_release(&header);
 	return ret;
 }
 
 static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int die_no_repo = 0;
 	int ret;
@@ -178,7 +178,6 @@ static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix)
 			builtin_bundle_unbundle_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -189,6 +188,7 @@ static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix)
 	}
 	ret = !!unbundle(the_repository, &header, bundle_fd, 0) ||
 		list_bundle_refs(&header, argc, argv);
+	bundle_header_release(&header);
 cleanup:
 	if (die_no_repo)
 		die(_("Need a repository to unbundle."));
diff --git a/bundle.c b/bundle.c
index 621708f40e..d36eeee1a5 100644
--- a/bundle.c
+++ b/bundle.c
@@ -23,15 +23,6 @@ static struct {
 	{ 3, v3_bundle_signature },
 };
 
-static void add_to_ref_list(const struct object_id *oid, const char *name,
-		struct ref_list *list)
-{
-	ALLOC_GROW(list->list, list->nr + 1, list->alloc);
-	oidcpy(&list->list[list->nr].oid, oid);
-	list->list[list->nr].name = xstrdup(name);
-	list->nr++;
-}
-
 static int parse_capability(struct bundle_header *header, const char *capability)
 {
 	const char *arg;
@@ -79,7 +70,7 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
 	/* The bundle header ends with an empty line */
 	while (!strbuf_getwholeline_fd(&buf, fd, '\n') &&
 	       buf.len && buf.buf[0] != '\n') {
-		struct object_id oid;
+		struct object_id *oid;
 		int is_prereq = 0;
 		const char *p;
 
@@ -103,19 +94,22 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
 		 * Prerequisites have object name that is optionally
 		 * followed by SP and subject line.
 		 */
-		if (parse_oid_hex_algop(buf.buf, &oid, &p, header->hash_algo) ||
+		oid = xmalloc(sizeof(struct object_id));
+		if (parse_oid_hex_algop(buf.buf, oid, &p, header->hash_algo) ||
 		    (*p && !isspace(*p)) ||
 		    (!is_prereq && !*p)) {
 			if (report_path)
 				error(_("unrecognized header: %s%s (%d)"),
 				      (is_prereq ? "-" : ""), buf.buf, (int)buf.len);
 			status = -1;
+			free(oid);
 			break;
 		} else {
-			if (is_prereq)
-				add_to_ref_list(&oid, "", &header->prerequisites);
-			else
-				add_to_ref_list(&oid, p + 1, &header->references);
+			const char *string = is_prereq ? "" : p + 1;
+			struct string_list *list = is_prereq
+				? &header->prerequisites
+				: &header->references;
+			string_list_append(list, string)->util = oid;
 		}
 	}
 
@@ -139,19 +133,19 @@ int read_bundle_header(const char *path, struct bundle_header *header)
 
 int is_bundle(const char *path, int quiet)
 {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int fd = open(path, O_RDONLY);
 
 	if (fd < 0)
 		return 0;
-	memset(&header, 0, sizeof(header));
 	fd = parse_bundle_header(fd, &header, quiet ? NULL : path);
 	if (fd >= 0)
 		close(fd);
+	bundle_header_release(&header);
 	return (fd >= 0);
 }
 
-static int list_refs(struct ref_list *r, int argc, const char **argv)
+static int list_refs(struct string_list *r, int argc, const char **argv)
 {
 	int i;
 
@@ -162,14 +156,14 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 		if (argc > 1) {
 			int j;
 			for (j = 1; j < argc; j++)
-				if (!strcmp(r->list[i].name, argv[j]))
+				if (!strcmp(r->items[i].string, argv[j]))
 					break;
 			if (j == argc)
 				continue;
 		}
 
-		oid = &r->list[i].oid;
-		name = r->list[i].name;
+		oid = r->items[i].util;
+		name = r->items[i].string;
 		printf("%s %s\n", oid_to_hex(oid), name);
 	}
 	return 0;
@@ -186,7 +180,7 @@ int verify_bundle(struct repository *r,
 	 * Do fast check, then if any prereqs are missing then go line by line
 	 * to be verbose about the errors
 	 */
-	struct ref_list *p = &header->prerequisites;
+	struct string_list *p = &header->prerequisites;
 	struct rev_info revs;
 	const char *argv[] = {NULL, "--all", NULL};
 	struct commit *commit;
@@ -198,17 +192,17 @@ int verify_bundle(struct repository *r,
 
 	repo_init_revisions(r, &revs, NULL);
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		struct object_id *oid = e->util;
 		struct object *o = parse_object(r, oid);
 		if (o) {
 			o->flags |= PREREQ_MARK;
-			add_pending_object(&revs, o, e->name);
+			add_pending_object(&revs, o, e->string);
 			continue;
 		}
 		if (++ret == 1)
 			error("%s", message);
-		error("%s %s", oid_to_hex(oid), e->name);
+		error("%s %s", oid_to_hex(oid), e->string);
 	}
 	if (revs.pending.nr != p->nr)
 		return ret;
@@ -224,28 +218,28 @@ int verify_bundle(struct repository *r,
 			i--;
 
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		const struct object_id *oid = e->util;
 		struct object *o = parse_object(r, oid);
 		assert(o); /* otherwise we'd have returned early */
 		if (o->flags & SHOWN)
 			continue;
 		if (++ret == 1)
 			error("%s", message);
-		error("%s %s", oid_to_hex(oid), e->name);
+		error("%s %s", oid_to_hex(oid), e->string);
 	}
 
 	/* Clean up objects used, as they will be reused. */
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		struct object_id *oid = e->util;
 		commit = lookup_commit_reference_gently(r, oid, 1);
 		if (commit)
 			clear_commit_marks(commit, ALL_REV_FLAGS);
 	}
 
 	if (verbose) {
-		struct ref_list *r;
+		struct string_list *r;
 
 		r = &header->references;
 		printf_ln(Q_("The bundle contains this ref:",
@@ -582,3 +576,9 @@ int unbundle(struct repository *r, struct bundle_header *header,
 		return error(_("index-pack died"));
 	return 0;
 }
+
+void bundle_header_release(struct bundle_header *header)
+{
+	string_list_clear(&header->prerequisites, 1);
+	string_list_clear(&header->references, 1);
+}
diff --git a/bundle.h b/bundle.h
index f9e2d1c8ef..f98c1d24d9 100644
--- a/bundle.h
+++ b/bundle.h
@@ -3,22 +3,21 @@
 
 #include "strvec.h"
 #include "cache.h"
-
-struct ref_list {
-	unsigned int nr, alloc;
-	struct ref_list_entry {
-		struct object_id oid;
-		char *name;
-	} *list;
-};
+#include "string-list.h"
 
 struct bundle_header {
 	unsigned version;
-	struct ref_list prerequisites;
-	struct ref_list references;
+	struct string_list prerequisites;
+	struct string_list references;
 	const struct git_hash_algo *hash_algo;
 };
 
+#define BUNDLE_HEADER_INIT \
+{ \
+	.prerequisites = STRING_LIST_INIT_DUP, \
+	.references = STRING_LIST_INIT_DUP, \
+}
+
 int is_bundle(const char *path, int quiet);
 int read_bundle_header(const char *path, struct bundle_header *header);
 int create_bundle(struct repository *r, const char *path,
@@ -30,5 +29,6 @@ int unbundle(struct repository *r, struct bundle_header *header,
 	     int bundle_fd, int flags);
 int list_bundle_refs(struct bundle_header *header,
 		int argc, const char **argv);
+void bundle_header_release(struct bundle_header *header);
 
 #endif
diff --git a/transport.c b/transport.c
index 9d601c8c95..667c4e0cc6 100644
--- a/transport.c
+++ b/transport.c
@@ -147,13 +147,14 @@ static struct ref *get_refs_from_bundle(struct transport *transport,
 	transport->hash_algo = data->header.hash_algo;
 
 	for (i = 0; i < data->header.references.nr; i++) {
-		struct ref_list_entry *e = data->header.references.list + i;
-		struct ref *ref = alloc_ref(e->name);
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = data->header.references.items + i;
+		struct ref *ref = alloc_ref(e->string);
+		const struct object_id *oid = e->util;
 		oidcpy(&ref->old_oid, oid);
 		ref->next = result;
 		result = ref;
 	}
+	string_list_clear(&data->header.references, 1);
 	return result;
 }
 
@@ -176,6 +177,7 @@ static int close_bundle(struct transport *transport)
 	struct bundle_transport_data *data = transport->data;
 	if (data->fd > 0)
 		close(data->fd);
+	bundle_header_release(&data->header);
 	free(data);
 	return 0;
 }
@@ -1082,6 +1084,8 @@ struct transport *transport_get(struct remote *remote, const char *url)
 		die(_("git-over-rsync is no longer supported"));
 	} else if (url_is_local_not_ssh(url) && is_file(url) && is_bundle(url, 1)) {
 		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
+		string_list_init(&data->header.prerequisites, 1);
+		string_list_init(&data->header.references, 1);
 		transport_check_allowed("file");
 		ret->data = data;
 		ret->vtable = &bundle_vtable;
-- 
2.32.0.571.gdba276db2c


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/3] bundle: remove "ref_list" in favor of string-list.c API
  2021-06-17 11:21 ` [PATCH 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
@ 2021-06-19  2:12   ` Andrei Rybak
  0 siblings, 0 replies; 33+ messages in thread
From: Andrei Rybak @ 2021-06-19  2:12 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason, git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin

On 17/06/2021 13:21, Ævar Arnfjörð Bjarmason wrote:
> ---
>   builtin/bundle.c | 12 ++++-----
>   bundle.c         | 64 ++++++++++++++++++++++++------------------------
>   bundle.h         | 20 +++++++--------
>   transport.c      | 10 +++++---
>   4 files changed, 55 insertions(+), 51 deletions(-)
> 

[snip]

> diff --git a/bundle.c b/bundle.c
> index 621708f40e..d36eeee1a5 100644
> --- a/bundle.c
> +++ b/bundle.c
> @@ -162,14 +156,14 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
>   		if (argc > 1) {
>   			int j;
>   			for (j = 1; j < argc; j++)
> -				if (!strcmp(r->list[i].name, argv[j]))
> +				if (!strcmp(r->items[i].string, argv[j]))
>   					break;
>   			if (j == argc)
>   				continue;
>   		}
>   
> -		oid = &r->list[i].oid;
> -		name = r->list[i].name;
> +		oid = r->items[i].util;
> +		name = r->items[i].string;
>   		printf("%s %s\n", oid_to_hex(oid), name);

In function `list_refs` variable `name` that is used in a call to printf
has been extracted by the previous patch.

>   	}
>   	return 0;
> @@ -186,7 +180,7 @@ int verify_bundle(struct repository *r,
>   	 * Do fast check, then if any prereqs are missing then go line by line
>   	 * to be verbose about the errors
>   	 */
> -	struct ref_list *p = &header->prerequisites;
> +	struct string_list *p = &header->prerequisites;
>   	struct rev_info revs;
>   	const char *argv[] = {NULL, "--all", NULL};
>   	struct commit *commit;
> @@ -198,17 +192,17 @@ int verify_bundle(struct repository *r,
>   
>   	repo_init_revisions(r, &revs, NULL);
>   	for (i = 0; i < p->nr; i++) {
> -		struct ref_list_entry *e = p->list + i;
> -		struct object_id *oid = &e->oid;
> +		struct string_list_item *e = p->items + i;
> +		struct object_id *oid = e->util;
>   		struct object *o = parse_object(r, oid);
>   		if (o) {
>   			o->flags |= PREREQ_MARK;
> -			add_pending_object(&revs, o, e->name);
> +			add_pending_object(&revs, o, e->string);
>   			continue;
>   		}
>   		if (++ret == 1)
>   			error("%s", message);
> -		error("%s %s", oid_to_hex(oid), e->name);
> +		error("%s %s", oid_to_hex(oid), e->string);
>   	}
>   	if (revs.pending.nr != p->nr)
>   		return ret;
> @@ -224,28 +218,28 @@ int verify_bundle(struct repository *r,
>   			i--;
>   
>   	for (i = 0; i < p->nr; i++) {
> -		struct ref_list_entry *e = p->list + i;
> -		struct object_id *oid = &e->oid;
> +		struct string_list_item *e = p->items + i;
> +		const struct object_id *oid = e->util;
>   		struct object *o = parse_object(r, oid);
>   		assert(o); /* otherwise we'd have returned early */
>   		if (o->flags & SHOWN)
>   			continue;
>   		if (++ret == 1)
>   			error("%s", message);
> -		error("%s %s", oid_to_hex(oid), e->name);
> +		error("%s %s", oid_to_hex(oid), e->string);

However, `e->name` in three places in function `verify_bundle` for two
different instances of `struct ref_list_entry *` wasn't extracted into
a variable by the previous patch. Could you please clarify this
discrepancy?

[snip]

> diff --git a/transport.c b/transport.c
> index 9d601c8c95..667c4e0cc6 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -147,13 +147,14 @@ static struct ref *get_refs_from_bundle(struct transport *transport,
>   	transport->hash_algo = data->header.hash_algo;
>   
>   	for (i = 0; i < data->header.references.nr; i++) {
> -		struct ref_list_entry *e = data->header.references.list + i;
> -		struct ref *ref = alloc_ref(e->name);
> -		struct object_id *oid = &e->oid;
> +		struct string_list_item *e = data->header.references.items + i;
> +		struct ref *ref = alloc_ref(e->string);

Similar question for `e->name` here.

> +		const struct object_id *oid = e->util;
>   		oidcpy(&ref->old_oid, oid);
>   		ref->next = result;
>   		result = ref;
>   	}
> +	string_list_clear(&data->header.references, 1);
>   	return result;
>   }
>   
> @@ -176,6 +177,7 @@ static int close_bundle(struct transport *transport)
>   	struct bundle_transport_data *data = transport->data;
>   	if (data->fd > 0)
>   		close(data->fd);
> +	bundle_header_release(&data->header);
>   	free(data);
>   	return 0;
>   }
> @@ -1082,6 +1084,8 @@ struct transport *transport_get(struct remote *remote, const char *url)
>   		die(_("git-over-rsync is no longer supported"));
>   	} else if (url_is_local_not_ssh(url) && is_file(url) && is_bundle(url, 1)) {
>   		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
> +		string_list_init(&data->header.prerequisites, 1);
> +		string_list_init(&data->header.references, 1);
>   		transport_check_allowed("file");
>   		ret->data = data;
>   		ret->vtable = &bundle_vtable;
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v2 0/3] bundle.c: remove "ref_list" in favor of string-list.c API
  2021-06-17 11:21 [PATCH 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
                   ` (2 preceding siblings ...)
  2021-06-17 11:21 ` [PATCH 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
@ 2021-06-21 15:16 ` Ævar Arnfjörð Bjarmason
  2021-06-21 15:16   ` [PATCH v2 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
                     ` (4 more replies)
  3 siblings, 5 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-21 15:16 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

This v2 addresses an omission noted by Andrei Rybak[1]. I didn't
factor out the "name" into a variable in 2/3 for ease of reading
3/3. That's now done.

1. https://lore.kernel.org/git/23c4ce5f-7769-1d2c-3a97-ac9733f73a25@gmail.com/

Ævar Arnfjörð Bjarmason (3):
  bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  bundle.c: use a temporary variable for OIDs and names
  bundle: remove "ref_list" in favor of string-list.c API

 builtin/bundle.c | 91 ++++++++++++++++++++++++++++++++----------------
 bundle.c         | 74 ++++++++++++++++++++++-----------------
 bundle.h         | 20 +++++------
 transport.c      | 12 +++++--
 4 files changed, 122 insertions(+), 75 deletions(-)

Range-diff against v1:
1:  f4191088ac3 = 1:  932c0883ce0 bundle cmd: stop leaking memory from parse_options_cmd_bundle()
2:  f297fd0432a ! 2:  7e0d57951e5 bundle.c: use a temporary variable for OIDs and names
    @@ bundle.c: int verify_bundle(struct repository *r,
      	for (i = 0; i < p->nr; i++) {
      		struct ref_list_entry *e = p->list + i;
     -		struct object *o = parse_object(r, &e->oid);
    ++		const char *name = e->name;
     +		struct object_id *oid = &e->oid;
     +		struct object *o = parse_object(r, oid);
      		if (o) {
      			o->flags |= PREREQ_MARK;
    - 			add_pending_object(&revs, o, e->name);
    -@@ bundle.c: int verify_bundle(struct repository *r,
    +-			add_pending_object(&revs, o, e->name);
    ++			add_pending_object(&revs, o, name);
    + 			continue;
      		}
      		if (++ret == 1)
      			error("%s", message);
     -		error("%s %s", oid_to_hex(&e->oid), e->name);
    -+		error("%s %s", oid_to_hex(oid), e->name);
    ++		error("%s %s", oid_to_hex(oid), name);
      	}
      	if (revs.pending.nr != p->nr)
      		return ret;
    @@ bundle.c: int verify_bundle(struct repository *r,
      	for (i = 0; i < p->nr; i++) {
      		struct ref_list_entry *e = p->list + i;
     -		struct object *o = parse_object(r, &e->oid);
    ++		const char *name = e->name;
     +		struct object_id *oid = &e->oid;
     +		struct object *o = parse_object(r, oid);
      		assert(o); /* otherwise we'd have returned early */
    @@ bundle.c: int verify_bundle(struct repository *r,
      		if (++ret == 1)
      			error("%s", message);
     -		error("%s %s", oid_to_hex(&e->oid), e->name);
    -+		error("%s %s", oid_to_hex(oid), e->name);
    ++		error("%s %s", oid_to_hex(oid), name);
      	}
      
      	/* Clean up objects used, as they will be reused. */
    @@ bundle.c: int verify_bundle(struct repository *r,
     
      ## transport.c ##
     @@ transport.c: static struct ref *get_refs_from_bundle(struct transport *transport,
    + 
      	for (i = 0; i < data->header.references.nr; i++) {
      		struct ref_list_entry *e = data->header.references.list + i;
    - 		struct ref *ref = alloc_ref(e->name);
    +-		struct ref *ref = alloc_ref(e->name);
     -		oidcpy(&ref->old_oid, &e->oid);
    ++		const char *name = e->name;
    ++		struct ref *ref = alloc_ref(name);
     +		struct object_id *oid = &e->oid;
     +		oidcpy(&ref->old_oid, oid);
      		ref->next = result;
3:  887313d3b02 ! 3:  9d9cb5aaf9e bundle: remove "ref_list" in favor of string-list.c API
    @@ bundle.c: int verify_bundle(struct repository *r,
      	repo_init_revisions(r, &revs, NULL);
      	for (i = 0; i < p->nr; i++) {
     -		struct ref_list_entry *e = p->list + i;
    +-		const char *name = e->name;
     -		struct object_id *oid = &e->oid;
     +		struct string_list_item *e = p->items + i;
    ++		const char *name = e->string;
     +		struct object_id *oid = e->util;
      		struct object *o = parse_object(r, oid);
      		if (o) {
      			o->flags |= PREREQ_MARK;
    --			add_pending_object(&revs, o, e->name);
    -+			add_pending_object(&revs, o, e->string);
    - 			continue;
    - 		}
    - 		if (++ret == 1)
    - 			error("%s", message);
    --		error("%s %s", oid_to_hex(oid), e->name);
    -+		error("%s %s", oid_to_hex(oid), e->string);
    - 	}
    - 	if (revs.pending.nr != p->nr)
    - 		return ret;
     @@ bundle.c: int verify_bundle(struct repository *r,
      			i--;
      
      	for (i = 0; i < p->nr; i++) {
     -		struct ref_list_entry *e = p->list + i;
    +-		const char *name = e->name;
     -		struct object_id *oid = &e->oid;
     +		struct string_list_item *e = p->items + i;
    ++		const char *name = e->string;
     +		const struct object_id *oid = e->util;
      		struct object *o = parse_object(r, oid);
      		assert(o); /* otherwise we'd have returned early */
      		if (o->flags & SHOWN)
    - 			continue;
    - 		if (++ret == 1)
    - 			error("%s", message);
    --		error("%s %s", oid_to_hex(oid), e->name);
    -+		error("%s %s", oid_to_hex(oid), e->string);
    - 	}
    +@@ bundle.c: int verify_bundle(struct repository *r,
      
      	/* Clean up objects used, as they will be reused. */
      	for (i = 0; i < p->nr; i++) {
    @@ transport.c: static struct ref *get_refs_from_bundle(struct transport *transport
      
      	for (i = 0; i < data->header.references.nr; i++) {
     -		struct ref_list_entry *e = data->header.references.list + i;
    --		struct ref *ref = alloc_ref(e->name);
    --		struct object_id *oid = &e->oid;
    +-		const char *name = e->name;
     +		struct string_list_item *e = data->header.references.items + i;
    -+		struct ref *ref = alloc_ref(e->string);
    -+		const struct object_id *oid = e->util;
    ++		const char *name = e->string;
    + 		struct ref *ref = alloc_ref(name);
    +-		struct object_id *oid = &e->oid;
    ++		struct object_id *oid = e->util;
      		oidcpy(&ref->old_oid, oid);
      		ref->next = result;
      		result = ref;
-- 
2.32.0.599.g3967b4fa4ac


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v2 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  2021-06-21 15:16 ` [PATCH v2 0/3] bundle.c: " Ævar Arnfjörð Bjarmason
@ 2021-06-21 15:16   ` Ævar Arnfjörð Bjarmason
  2021-06-24 16:54     ` Jeff King
  2021-06-21 15:16   ` [PATCH v2 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-21 15:16 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

Fix a memory leak from the prefix_filename() function introduced with
its use in 3b754eedd5 (bundle: use prefix_filename with bundle path,
2017-03-20).

As noted in that commit the leak was intentional as a part of being
sloppy about freeing resources just before we exit, I'm changing this
because I'll be fixing other memory leaks in the bundle API (including
the library version) in subsequent commits. It's easier to reason
about those fixes if valgrind runs cleanly at the end without any
leaks whatsoever.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/bundle.c | 79 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 55 insertions(+), 24 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index ea6948110b0..7778297277a 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -46,7 +46,7 @@ static int parse_options_cmd_bundle(int argc,
 		const char* prefix,
 		const char * const usagestr[],
 		const struct option options[],
-		const char **bundle_file) {
+		char **bundle_file) {
 	int newargc;
 	newargc = parse_options(argc, argv, NULL, options, usagestr,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
@@ -61,7 +61,8 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 	int progress = isatty(STDERR_FILENO);
 	struct strvec pack_opts;
 	int version = -1;
-
+	int die_no_repo = 0;
+	int ret;
 	struct option options[] = {
 		OPT_SET_INT('q', "quiet", &progress,
 			    N_("do not show progress meter"), 0),
@@ -76,7 +77,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 			    N_("specify bundle format version")),
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_create_usage, options, &bundle_file);
@@ -92,77 +93,107 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 	if (progress && all_progress_implied)
 		strvec_push(&pack_opts, "--all-progress-implied");
 
-	if (!startup_info->have_repository)
+	if (!startup_info->have_repository) {
+		die_no_repo = 1;
+		goto cleanup;
+	}
+	ret = !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
+cleanup:
+	free(bundle_file);
+	if (die_no_repo)
 		die(_("Need a repository to create a bundle."));
-	return !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
+	return ret;
 }
 
 static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
 	int quiet = 0;
-
+	int ret;
 	struct option options[] = {
 		OPT_BOOL('q', "quiet", &quiet,
 			    N_("do not show bundle details")),
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_verify_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
 	close(bundle_fd);
-	if (verify_bundle(the_repository, &header, !quiet))
-		return 1;
+	if (verify_bundle(the_repository, &header, !quiet)) {
+		ret = 1;
+		goto cleanup;
+	}
+
 	fprintf(stderr, _("%s is okay\n"), bundle_file);
-	return 0;
+	ret = 0;
+cleanup:
+	free(bundle_file);
+	return ret;
 }
 
 static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
-
+	int ret;
 	struct option options[] = {
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_list_heads_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
 	close(bundle_fd);
-	return !!list_bundle_refs(&header, argc, argv);
+	ret = !!list_bundle_refs(&header, argc, argv);
+cleanup:
+	free(bundle_file);
+	return ret;
 }
 
 static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
-
+	int die_no_repo = 0;
+	int ret;
 	struct option options[] = {
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_unbundle_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
-	if (!startup_info->have_repository)
-		die(_("Need a repository to unbundle."));
-	return !!unbundle(the_repository, &header, bundle_fd, 0) ||
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
+	if (!startup_info->have_repository) {
+		die_no_repo = 1;
+		goto cleanup;
+	}
+	ret = !!unbundle(the_repository, &header, bundle_fd, 0) ||
 		list_bundle_refs(&header, argc, argv);
+cleanup:
+	if (die_no_repo)
+		die(_("Need a repository to unbundle."));
+	free(bundle_file);
+	return ret;
 }
 
 int cmd_bundle(int argc, const char **argv, const char *prefix)
-- 
2.32.0.599.g3967b4fa4ac


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v2 2/3] bundle.c: use a temporary variable for OIDs and names
  2021-06-21 15:16 ` [PATCH v2 0/3] bundle.c: " Ævar Arnfjörð Bjarmason
  2021-06-21 15:16   ` [PATCH v2 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
@ 2021-06-21 15:16   ` Ævar Arnfjörð Bjarmason
  2021-06-21 15:16   ` [PATCH v2 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-21 15:16 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

In preparation for moving away from accessing the OID and name via the
"oid" and "name" slots in a subsequent commit, change the code that
accesses it to use named variables. This makes the subsequent change
smaller.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 bundle.c    | 26 ++++++++++++++++++--------
 transport.c |  6 ++++--
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/bundle.c b/bundle.c
index 693d6195514..7210e5e7105 100644
--- a/bundle.c
+++ b/bundle.c
@@ -156,6 +156,9 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 	int i;
 
 	for (i = 0; i < r->nr; i++) {
+		struct object_id *oid;
+		const char *name;
+
 		if (argc > 1) {
 			int j;
 			for (j = 1; j < argc; j++)
@@ -164,8 +167,10 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 			if (j == argc)
 				continue;
 		}
-		printf("%s %s\n", oid_to_hex(&r->list[i].oid),
-				r->list[i].name);
+
+		oid = &r->list[i].oid;
+		name = r->list[i].name;
+		printf("%s %s\n", oid_to_hex(oid), name);
 	}
 	return 0;
 }
@@ -194,15 +199,17 @@ int verify_bundle(struct repository *r,
 	repo_init_revisions(r, &revs, NULL);
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		struct object *o = parse_object(r, &e->oid);
+		const char *name = e->name;
+		struct object_id *oid = &e->oid;
+		struct object *o = parse_object(r, oid);
 		if (o) {
 			o->flags |= PREREQ_MARK;
-			add_pending_object(&revs, o, e->name);
+			add_pending_object(&revs, o, name);
 			continue;
 		}
 		if (++ret == 1)
 			error("%s", message);
-		error("%s %s", oid_to_hex(&e->oid), e->name);
+		error("%s %s", oid_to_hex(oid), name);
 	}
 	if (revs.pending.nr != p->nr)
 		return ret;
@@ -219,19 +226,22 @@ int verify_bundle(struct repository *r,
 
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		struct object *o = parse_object(r, &e->oid);
+		const char *name = e->name;
+		struct object_id *oid = &e->oid;
+		struct object *o = parse_object(r, oid);
 		assert(o); /* otherwise we'd have returned early */
 		if (o->flags & SHOWN)
 			continue;
 		if (++ret == 1)
 			error("%s", message);
-		error("%s %s", oid_to_hex(&e->oid), e->name);
+		error("%s %s", oid_to_hex(oid), name);
 	}
 
 	/* Clean up objects used, as they will be reused. */
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		commit = lookup_commit_reference_gently(r, &e->oid, 1);
+		struct object_id *oid = &e->oid;
+		commit = lookup_commit_reference_gently(r, oid, 1);
 		if (commit)
 			clear_commit_marks(commit, ALL_REV_FLAGS);
 	}
diff --git a/transport.c b/transport.c
index 50f5830eb6b..95c1138e9ae 100644
--- a/transport.c
+++ b/transport.c
@@ -148,8 +148,10 @@ static struct ref *get_refs_from_bundle(struct transport *transport,
 
 	for (i = 0; i < data->header.references.nr; i++) {
 		struct ref_list_entry *e = data->header.references.list + i;
-		struct ref *ref = alloc_ref(e->name);
-		oidcpy(&ref->old_oid, &e->oid);
+		const char *name = e->name;
+		struct ref *ref = alloc_ref(name);
+		struct object_id *oid = &e->oid;
+		oidcpy(&ref->old_oid, oid);
 		ref->next = result;
 		result = ref;
 	}
-- 
2.32.0.599.g3967b4fa4ac


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v2 3/3] bundle: remove "ref_list" in favor of string-list.c API
  2021-06-21 15:16 ` [PATCH v2 0/3] bundle.c: " Ævar Arnfjörð Bjarmason
  2021-06-21 15:16   ` [PATCH v2 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
  2021-06-21 15:16   ` [PATCH v2 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
@ 2021-06-21 15:16   ` Ævar Arnfjörð Bjarmason
  2021-06-24 17:11     ` Jeff King
  2021-06-24 17:14   ` [PATCH v2 0/3] bundle.c: " Jeff King
  2021-06-30 14:06   ` [PATCH v3 0/3] Ævar Arnfjörð Bjarmason
  4 siblings, 1 reply; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-21 15:16 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

Move away from the "struct ref_list" in bundle.c in favor of the
almost identical string-list.c API.

That API fits this use-case perfectly, but did not exist in its
current form when this code was added in 2e0afafebd (Add git-bundle:
move objects and references by archive, 2007-02-22), with hindsight we
could have used the path-list API, which later got renamed to
string-list. See 8fd2cb4069 (Extract helper bits from
c-merge-recursive work, 2006-07-25)

We need to change "name" to "string" and "oid" to "util" to make this
conversion, but other than that the APIs are pretty much identical for
what bundle.c made use of.

Let's also replace the memset(..,0,...) pattern with a more idiomatic
"INIT" macro, and finally add a *_release() function so to free the
allocated memory.

Before this the add_to_ref_list() would leak memory, now e.g. "bundle
list-heads" reports no memory leaks at all under valgrind.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/bundle.c | 12 +++++-----
 bundle.c         | 62 ++++++++++++++++++++++++------------------------
 bundle.h         | 20 ++++++++--------
 transport.c      | 10 +++++---
 4 files changed, 54 insertions(+), 50 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index 7778297277a..8d822552808 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -106,7 +106,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 }
 
 static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int quiet = 0;
 	int ret;
@@ -121,7 +121,6 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 			builtin_bundle_verify_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -136,11 +135,12 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 	ret = 0;
 cleanup:
 	free(bundle_file);
+	bundle_header_release(&header);
 	return ret;
 }
 
 static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int ret;
 	struct option options[] = {
@@ -152,7 +152,6 @@ static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix
 			builtin_bundle_list_heads_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -161,11 +160,12 @@ static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix
 	ret = !!list_bundle_refs(&header, argc, argv);
 cleanup:
 	free(bundle_file);
+	bundle_header_release(&header);
 	return ret;
 }
 
 static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int die_no_repo = 0;
 	int ret;
@@ -178,7 +178,6 @@ static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix)
 			builtin_bundle_unbundle_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -189,6 +188,7 @@ static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix)
 	}
 	ret = !!unbundle(the_repository, &header, bundle_fd, 0) ||
 		list_bundle_refs(&header, argc, argv);
+	bundle_header_release(&header);
 cleanup:
 	if (die_no_repo)
 		die(_("Need a repository to unbundle."));
diff --git a/bundle.c b/bundle.c
index 7210e5e7105..1c3ae9e02ea 100644
--- a/bundle.c
+++ b/bundle.c
@@ -23,15 +23,6 @@ static struct {
 	{ 3, v3_bundle_signature },
 };
 
-static void add_to_ref_list(const struct object_id *oid, const char *name,
-		struct ref_list *list)
-{
-	ALLOC_GROW(list->list, list->nr + 1, list->alloc);
-	oidcpy(&list->list[list->nr].oid, oid);
-	list->list[list->nr].name = xstrdup(name);
-	list->nr++;
-}
-
 static int parse_capability(struct bundle_header *header, const char *capability)
 {
 	const char *arg;
@@ -79,7 +70,7 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
 	/* The bundle header ends with an empty line */
 	while (!strbuf_getwholeline_fd(&buf, fd, '\n') &&
 	       buf.len && buf.buf[0] != '\n') {
-		struct object_id oid;
+		struct object_id *oid;
 		int is_prereq = 0;
 		const char *p;
 
@@ -103,19 +94,22 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
 		 * Prerequisites have object name that is optionally
 		 * followed by SP and subject line.
 		 */
-		if (parse_oid_hex_algop(buf.buf, &oid, &p, header->hash_algo) ||
+		oid = xmalloc(sizeof(struct object_id));
+		if (parse_oid_hex_algop(buf.buf, oid, &p, header->hash_algo) ||
 		    (*p && !isspace(*p)) ||
 		    (!is_prereq && !*p)) {
 			if (report_path)
 				error(_("unrecognized header: %s%s (%d)"),
 				      (is_prereq ? "-" : ""), buf.buf, (int)buf.len);
 			status = -1;
+			free(oid);
 			break;
 		} else {
-			if (is_prereq)
-				add_to_ref_list(&oid, "", &header->prerequisites);
-			else
-				add_to_ref_list(&oid, p + 1, &header->references);
+			const char *string = is_prereq ? "" : p + 1;
+			struct string_list *list = is_prereq
+				? &header->prerequisites
+				: &header->references;
+			string_list_append(list, string)->util = oid;
 		}
 	}
 
@@ -139,19 +133,19 @@ int read_bundle_header(const char *path, struct bundle_header *header)
 
 int is_bundle(const char *path, int quiet)
 {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int fd = open(path, O_RDONLY);
 
 	if (fd < 0)
 		return 0;
-	memset(&header, 0, sizeof(header));
 	fd = parse_bundle_header(fd, &header, quiet ? NULL : path);
 	if (fd >= 0)
 		close(fd);
+	bundle_header_release(&header);
 	return (fd >= 0);
 }
 
-static int list_refs(struct ref_list *r, int argc, const char **argv)
+static int list_refs(struct string_list *r, int argc, const char **argv)
 {
 	int i;
 
@@ -162,14 +156,14 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 		if (argc > 1) {
 			int j;
 			for (j = 1; j < argc; j++)
-				if (!strcmp(r->list[i].name, argv[j]))
+				if (!strcmp(r->items[i].string, argv[j]))
 					break;
 			if (j == argc)
 				continue;
 		}
 
-		oid = &r->list[i].oid;
-		name = r->list[i].name;
+		oid = r->items[i].util;
+		name = r->items[i].string;
 		printf("%s %s\n", oid_to_hex(oid), name);
 	}
 	return 0;
@@ -186,7 +180,7 @@ int verify_bundle(struct repository *r,
 	 * Do fast check, then if any prereqs are missing then go line by line
 	 * to be verbose about the errors
 	 */
-	struct ref_list *p = &header->prerequisites;
+	struct string_list *p = &header->prerequisites;
 	struct rev_info revs;
 	const char *argv[] = {NULL, "--all", NULL};
 	struct commit *commit;
@@ -198,9 +192,9 @@ int verify_bundle(struct repository *r,
 
 	repo_init_revisions(r, &revs, NULL);
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		const char *name = e->name;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		const char *name = e->string;
+		struct object_id *oid = e->util;
 		struct object *o = parse_object(r, oid);
 		if (o) {
 			o->flags |= PREREQ_MARK;
@@ -225,9 +219,9 @@ int verify_bundle(struct repository *r,
 			i--;
 
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		const char *name = e->name;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		const char *name = e->string;
+		const struct object_id *oid = e->util;
 		struct object *o = parse_object(r, oid);
 		assert(o); /* otherwise we'd have returned early */
 		if (o->flags & SHOWN)
@@ -239,15 +233,15 @@ int verify_bundle(struct repository *r,
 
 	/* Clean up objects used, as they will be reused. */
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		struct object_id *oid = e->util;
 		commit = lookup_commit_reference_gently(r, oid, 1);
 		if (commit)
 			clear_commit_marks(commit, ALL_REV_FLAGS);
 	}
 
 	if (verbose) {
-		struct ref_list *r;
+		struct string_list *r;
 
 		r = &header->references;
 		printf_ln(Q_("The bundle contains this ref:",
@@ -584,3 +578,9 @@ int unbundle(struct repository *r, struct bundle_header *header,
 		return error(_("index-pack died"));
 	return 0;
 }
+
+void bundle_header_release(struct bundle_header *header)
+{
+	string_list_clear(&header->prerequisites, 1);
+	string_list_clear(&header->references, 1);
+}
diff --git a/bundle.h b/bundle.h
index f9e2d1c8ef5..f98c1d24d9a 100644
--- a/bundle.h
+++ b/bundle.h
@@ -3,22 +3,21 @@
 
 #include "strvec.h"
 #include "cache.h"
-
-struct ref_list {
-	unsigned int nr, alloc;
-	struct ref_list_entry {
-		struct object_id oid;
-		char *name;
-	} *list;
-};
+#include "string-list.h"
 
 struct bundle_header {
 	unsigned version;
-	struct ref_list prerequisites;
-	struct ref_list references;
+	struct string_list prerequisites;
+	struct string_list references;
 	const struct git_hash_algo *hash_algo;
 };
 
+#define BUNDLE_HEADER_INIT \
+{ \
+	.prerequisites = STRING_LIST_INIT_DUP, \
+	.references = STRING_LIST_INIT_DUP, \
+}
+
 int is_bundle(const char *path, int quiet);
 int read_bundle_header(const char *path, struct bundle_header *header);
 int create_bundle(struct repository *r, const char *path,
@@ -30,5 +29,6 @@ int unbundle(struct repository *r, struct bundle_header *header,
 	     int bundle_fd, int flags);
 int list_bundle_refs(struct bundle_header *header,
 		int argc, const char **argv);
+void bundle_header_release(struct bundle_header *header);
 
 #endif
diff --git a/transport.c b/transport.c
index 95c1138e9ae..963655c772c 100644
--- a/transport.c
+++ b/transport.c
@@ -147,14 +147,15 @@ static struct ref *get_refs_from_bundle(struct transport *transport,
 	transport->hash_algo = data->header.hash_algo;
 
 	for (i = 0; i < data->header.references.nr; i++) {
-		struct ref_list_entry *e = data->header.references.list + i;
-		const char *name = e->name;
+		struct string_list_item *e = data->header.references.items + i;
+		const char *name = e->string;
 		struct ref *ref = alloc_ref(name);
-		struct object_id *oid = &e->oid;
+		struct object_id *oid = e->util;
 		oidcpy(&ref->old_oid, oid);
 		ref->next = result;
 		result = ref;
 	}
+	string_list_clear(&data->header.references, 1);
 	return result;
 }
 
@@ -177,6 +178,7 @@ static int close_bundle(struct transport *transport)
 	struct bundle_transport_data *data = transport->data;
 	if (data->fd > 0)
 		close(data->fd);
+	bundle_header_release(&data->header);
 	free(data);
 	return 0;
 }
@@ -1083,6 +1085,8 @@ struct transport *transport_get(struct remote *remote, const char *url)
 		die(_("git-over-rsync is no longer supported"));
 	} else if (url_is_local_not_ssh(url) && is_file(url) && is_bundle(url, 1)) {
 		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
+		string_list_init(&data->header.prerequisites, 1);
+		string_list_init(&data->header.references, 1);
 		transport_check_allowed("file");
 		ret->data = data;
 		ret->vtable = &bundle_vtable;
-- 
2.32.0.599.g3967b4fa4ac


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  2021-06-21 15:16   ` [PATCH v2 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
@ 2021-06-24 16:54     ` Jeff King
  2021-06-24 19:28       ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 33+ messages in thread
From: Jeff King @ 2021-06-24 16:54 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak

On Mon, Jun 21, 2021 at 05:16:12PM +0200, Ævar Arnfjörð Bjarmason wrote:

> Fix a memory leak from the prefix_filename() function introduced with
> its use in 3b754eedd5 (bundle: use prefix_filename with bundle path,
> 2017-03-20).
> 
> As noted in that commit the leak was intentional as a part of being
> sloppy about freeing resources just before we exit, I'm changing this
> because I'll be fixing other memory leaks in the bundle API (including
> the library version) in subsequent commits. It's easier to reason
> about those fixes if valgrind runs cleanly at the end without any
> leaks whatsoever.

Looking at that old commit, it seems like this is a good candidate for
just inserting a single UNLEAK(bundle_file) into cmd_bundle(). But it
looks like the allocation has now migrated into all of the individual
sub-command functions, so we have to deal with it multiple times. They
could still use UNLEAK() if you want to avoid the "ret = foo(); free();
return ret" dance in each one, though.

We should avoid UNLEAK() in library-ish functions, but sub-commands that
are just one step away from cmd_bundle() returning are OK uses, IMHO.

> @@ -92,77 +93,107 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
>  	if (progress && all_progress_implied)
>  		strvec_push(&pack_opts, "--all-progress-implied");
>  
> -	if (!startup_info->have_repository)
> +	if (!startup_info->have_repository) {
> +		die_no_repo = 1;
> +		goto cleanup;
> +	}
> +	ret = !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
> +cleanup:
> +	free(bundle_file);
> +	if (die_no_repo)
>  		die(_("Need a repository to create a bundle."));
> -	return !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
> +	return ret;
>  }

This die_no_repo stuff confused me at first. But I think you are trying
to make sure we call free(bundle_file) before die? There is no point in
spending any effort on that, I think. When we exit() via die(), the
variable is still on the stack, and hence not leaked. And there are
probably a zillion other places we can hit a die() inside
create_bundle() anyway, which would produce the same effect. There's not
much point treating this one specially.

-Peff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 3/3] bundle: remove "ref_list" in favor of string-list.c API
  2021-06-21 15:16   ` [PATCH v2 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
@ 2021-06-24 17:11     ` Jeff King
  2021-06-24 19:31       ` Ævar Arnfjörð Bjarmason
  2021-06-29  1:02       ` Junio C Hamano
  0 siblings, 2 replies; 33+ messages in thread
From: Jeff King @ 2021-06-24 17:11 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak

On Mon, Jun 21, 2021 at 05:16:14PM +0200, Ævar Arnfjörð Bjarmason wrote:

> Move away from the "struct ref_list" in bundle.c in favor of the
> almost identical string-list.c API.
> 
> That API fits this use-case perfectly, but did not exist in its
> current form when this code was added in 2e0afafebd (Add git-bundle:
> move objects and references by archive, 2007-02-22), with hindsight we
> could have used the path-list API, which later got renamed to
> string-list. See 8fd2cb4069 (Extract helper bits from
> c-merge-recursive work, 2006-07-25)

I think this is a good direction, and I didn't see any errors in the
code. It's slightly sad that we end up with more lines than we started
with, but I think that's mostly because you're actually freeing the
memory now.

Two small nitpicks:

> @@ -103,19 +94,22 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
>  		 * Prerequisites have object name that is optionally
>  		 * followed by SP and subject line.
>  		 */
> -		if (parse_oid_hex_algop(buf.buf, &oid, &p, header->hash_algo) ||
> +		oid = xmalloc(sizeof(struct object_id));
> +		if (parse_oid_hex_algop(buf.buf, oid, &p, header->hash_algo) ||
>  		    (*p && !isspace(*p)) ||
>  		    (!is_prereq && !*p)) {
>  			if (report_path)
>  				error(_("unrecognized header: %s%s (%d)"),
>  				      (is_prereq ? "-" : ""), buf.buf, (int)buf.len);
>  			status = -1;
> +			free(oid);
>  			break;
>  		} else {

This would be slightly simpler if you kept a local "struct object_id",
and then called:

  string_list_append(list, string)->util = oiddup(&oid);

later when you know you want to save it. And then you don't have to
worry about the extra cleanup here. That does require an extra oidcpy()
under the hood, but I suspect that is lost in the noise.

I'm OK with it either way.

> -			if (is_prereq)
> -				add_to_ref_list(&oid, "", &header->prerequisites);
> -			else
> -				add_to_ref_list(&oid, p + 1, &header->references);
> +			const char *string = is_prereq ? "" : p + 1;
> +			struct string_list *list = is_prereq
> +				? &header->prerequisites
> +				: &header->references;
> +			string_list_append(list, string)->util = oid;

I'm usually a big fan of the ternary operator, and using variable
indirection to make it clear that we always call a function.  But here I
think it makes things more confusing.  The two sides of the if/else are
sufficiently simple that it's easy to see they both make the same
function call. And because there are two variables, we check is_prereq
twice, making it much harder to see the two cases.

I.e., I think:

  if (is_prereq)
          string_list_append(&header->prerequisites, "")->util = oid;
  else
          string_list_append(&header->references, p + 1)->util = oid;

is much more obvious.

-Peff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 0/3] bundle.c: remove "ref_list" in favor of string-list.c API
  2021-06-21 15:16 ` [PATCH v2 0/3] bundle.c: " Ævar Arnfjörð Bjarmason
                     ` (2 preceding siblings ...)
  2021-06-21 15:16   ` [PATCH v2 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
@ 2021-06-24 17:14   ` Jeff King
  2021-06-30 14:06   ` [PATCH v3 0/3] Ævar Arnfjörð Bjarmason
  4 siblings, 0 replies; 33+ messages in thread
From: Jeff King @ 2021-06-24 17:14 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak

On Mon, Jun 21, 2021 at 05:16:11PM +0200, Ævar Arnfjörð Bjarmason wrote:

> This v2 addresses an omission noted by Andrei Rybak[1]. I didn't
> factor out the "name" into a variable in 2/3 for ease of reading
> 3/3. That's now done.

This all looks OK to me. I left a few small comments on the patches
themselves.

The UNLEAK() thing I suggested for patch 1 does make that patch much
smaller and easier to read, but I suspect makes patch 3 harder (i.e.,
you are reusing the "cleanup" sections there to do the bundle header
release. That could _also_ get UNLEAKed, but at some point it becomes
more clear to actually clean up after ourselves, and I think patch 3
probably crosses that point). So I'm OK to ignore that.

I would prefer to see the "die()" thing I mentioned there addressed, as
well as the ternary thing from patch 3. But neither of them is incorrect
as-is; it's just a style/preference thing.

-Peff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  2021-06-24 16:54     ` Jeff King
@ 2021-06-24 19:28       ` Ævar Arnfjörð Bjarmason
  0 siblings, 0 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-24 19:28 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak


On Thu, Jun 24 2021, Jeff King wrote:

> On Mon, Jun 21, 2021 at 05:16:12PM +0200, Ævar Arnfjörð Bjarmason wrote:
>
>> Fix a memory leak from the prefix_filename() function introduced with
>> its use in 3b754eedd5 (bundle: use prefix_filename with bundle path,
>> 2017-03-20).
>> 
>> As noted in that commit the leak was intentional as a part of being
>> sloppy about freeing resources just before we exit, I'm changing this
>> because I'll be fixing other memory leaks in the bundle API (including
>> the library version) in subsequent commits. It's easier to reason
>> about those fixes if valgrind runs cleanly at the end without any
>> leaks whatsoever.
>
> Looking at that old commit, it seems like this is a good candidate for
> just inserting a single UNLEAK(bundle_file) into cmd_bundle(). But it
> looks like the allocation has now migrated into all of the individual
> sub-command functions, so we have to deal with it multiple times. They
> could still use UNLEAK() if you want to avoid the "ret = foo(); free();
> return ret" dance in each one, though.
>
> We should avoid UNLEAK() in library-ish functions, but sub-commands that
> are just one step away from cmd_bundle() returning are OK uses, IMHO.

I'll change it if you feel strongly about it, but I always read UNLEAK()
as "ok, this is too hard, we won't bother, it's just a one-off
built-in", and not necessarily a recommendation for a desired pattern.

I think it's nice to have e.g. valgrind be able to report no leaks in
the binaries we build by default, not just if you compile with
-DSUPPRESS_ANNOTATED_LEAKS.

>> @@ -92,77 +93,107 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
>>  	if (progress && all_progress_implied)
>>  		strvec_push(&pack_opts, "--all-progress-implied");
>>  
>> -	if (!startup_info->have_repository)
>> +	if (!startup_info->have_repository) {
>> +		die_no_repo = 1;
>> +		goto cleanup;
>> +	}
>> +	ret = !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
>> +cleanup:
>> +	free(bundle_file);
>> +	if (die_no_repo)
>>  		die(_("Need a repository to create a bundle."));
>> -	return !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
>> +	return ret;
>>  }
>
> This die_no_repo stuff confused me at first. But I think you are trying
> to make sure we call free(bundle_file) before die? There is no point in
> spending any effort on that, I think. When we exit() via die(), the
> variable is still on the stack, and hence not leaked. And there are
> probably a zillion other places we can hit a die() inside
> create_bundle() anyway, which would produce the same effect. There's not
> much point treating this one specially.

Right, it's there just for the free(), and yeah, there's a bunch of
places we'll leak anyway.

I do think per the above with valgrind that there's value in making the
common non-dying codepaths not leak though.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 3/3] bundle: remove "ref_list" in favor of string-list.c API
  2021-06-24 17:11     ` Jeff King
@ 2021-06-24 19:31       ` Ævar Arnfjörð Bjarmason
  2021-06-29  1:02       ` Junio C Hamano
  1 sibling, 0 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-24 19:31 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak


On Thu, Jun 24 2021, Jeff King wrote:

> On Mon, Jun 21, 2021 at 05:16:14PM +0200, Ævar Arnfjörð Bjarmason wrote:
>
>> Move away from the "struct ref_list" in bundle.c in favor of the
>> almost identical string-list.c API.
>> 
>> That API fits this use-case perfectly, but did not exist in its
>> current form when this code was added in 2e0afafebd (Add git-bundle:
>> move objects and references by archive, 2007-02-22), with hindsight we
>> could have used the path-list API, which later got renamed to
>> string-list. See 8fd2cb4069 (Extract helper bits from
>> c-merge-recursive work, 2006-07-25)
>
> I think this is a good direction, and I didn't see any errors in the
> code. It's slightly sad that we end up with more lines than we started
> with, but I think that's mostly because you're actually freeing the
> memory now.
>
> Two small nitpicks:
>
>> @@ -103,19 +94,22 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
>>  		 * Prerequisites have object name that is optionally
>>  		 * followed by SP and subject line.
>>  		 */
>> -		if (parse_oid_hex_algop(buf.buf, &oid, &p, header->hash_algo) ||
>> +		oid = xmalloc(sizeof(struct object_id));
>> +		if (parse_oid_hex_algop(buf.buf, oid, &p, header->hash_algo) ||
>>  		    (*p && !isspace(*p)) ||
>>  		    (!is_prereq && !*p)) {
>>  			if (report_path)
>>  				error(_("unrecognized header: %s%s (%d)"),
>>  				      (is_prereq ? "-" : ""), buf.buf, (int)buf.len);
>>  			status = -1;
>> +			free(oid);
>>  			break;
>>  		} else {
>
> This would be slightly simpler if you kept a local "struct object_id",
> and then called:
>
>   string_list_append(list, string)->util = oiddup(&oid);
>
> later when you know you want to save it. And then you don't have to
> worry about the extra cleanup here. That does require an extra oidcpy()
> under the hood, but I suspect that is lost in the noise.
>
> I'm OK with it either way.

That sounds simpler indeed, thanks.

>> -			if (is_prereq)
>> -				add_to_ref_list(&oid, "", &header->prerequisites);
>> -			else
>> -				add_to_ref_list(&oid, p + 1, &header->references);
>> +			const char *string = is_prereq ? "" : p + 1;
>> +			struct string_list *list = is_prereq
>> +				? &header->prerequisites
>> +				: &header->references;
>> +			string_list_append(list, string)->util = oid;
>
> I'm usually a big fan of the ternary operator, and using variable
> indirection to make it clear that we always call a function.  But here I
> think it makes things more confusing.  The two sides of the if/else are
> sufficiently simple that it's easy to see they both make the same
> function call. And because there are two variables, we check is_prereq
> twice, making it much harder to see the two cases.
>
> I.e., I think:
>
>   if (is_prereq)
>           string_list_append(&header->prerequisites, "")->util = oid;
>   else
>           string_list_append(&header->references, p + 1)->util = oid;
>
> is much more obvious.

Hah, that's actually the exact code I wrote to begin with, before
thinking "hrm, someone will probably say I should just use a ternary
here". I'll change it back :)

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 3/3] bundle: remove "ref_list" in favor of string-list.c API
  2021-06-24 17:11     ` Jeff King
  2021-06-24 19:31       ` Ævar Arnfjörð Bjarmason
@ 2021-06-29  1:02       ` Junio C Hamano
  1 sibling, 0 replies; 33+ messages in thread
From: Junio C Hamano @ 2021-06-29  1:02 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason, Jeff King
  Cc: git, Johannes Schindelin, Andrei Rybak

Jeff King <peff@peff.net> writes:

> I think this is a good direction, and I didn't see any errors in the
> code. It's slightly sad that we end up with more lines than we started
> with, but I think that's mostly because you're actually freeing the
> memory now.
> ...
> I.e., I think:
>
>   if (is_prereq)
>           string_list_append(&header->prerequisites, "")->util = oid;
>   else
>           string_list_append(&header->references, p + 1)->util = oid;
>
> is much more obvious.

Nicely done and nicely reviewed and improved.

Together with the "no point in freeing just before dying" on the
earlier step, polishing this topic to incorporate the suggested
changes should be fairly an easy task.  Let's not leave too many
loose ends hanging around and close this one with the last final
reroll (hopefully without "I did this too while at it" that meets
"oh, well, that is a bit controversi8al" to drag it unnecessarily
out).

Thanks.


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v3 0/3]
  2021-06-21 15:16 ` [PATCH v2 0/3] bundle.c: " Ævar Arnfjörð Bjarmason
                     ` (3 preceding siblings ...)
  2021-06-24 17:14   ` [PATCH v2 0/3] bundle.c: " Jeff King
@ 2021-06-30 14:06   ` Ævar Arnfjörð Bjarmason
  2021-06-30 14:06     ` [PATCH v3 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
                       ` (4 more replies)
  4 siblings, 5 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-30 14:06 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

Refactor the bundle API to use the string_list API instead of its own
version of a similar API. See [1] for v2.

Addresses comments by Jeff King about us being too overzelous in
trying not to leak memory (the 'die_no_repo' is gone), and other
flow/style comments of his.

I also added a bundle_header_init() function for use in transport.c,
and noticed a redundant call to string_list_clear() there.

1. https://lore.kernel.org/git/cover-0.3-00000000000-20210621T151357Z-avarab@gmail.com/

Ævar Arnfjörð Bjarmason (3):
  bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  bundle.c: use a temporary variable for OIDs and names
  bundle: remove "ref_list" in favor of string-list.c API

 builtin/bundle.c | 74 ++++++++++++++++++++++++++++++------------------
 bundle.c         | 65 ++++++++++++++++++++++++++----------------
 bundle.h         | 21 +++++++-------
 transport.c      | 10 +++++--
 4 files changed, 105 insertions(+), 65 deletions(-)

Range-diff against v2:
1:  6a8b20a7cf3 < -:  ----------- upload-pack: run is_repository_shallow() before setup_revisions()
2:  d88b2c04102 < -:  ----------- revision.h: unify "disable_stdin" and "read_from_stdin"
3:  d433d7b24a3 < -:  ----------- pack-objects.c: do stdin parsing via revision.c's API
4:  e59a06c3148 < -:  ----------- pack-objects.c: make use of REV_INFO_STDIN_LINE_PROCESS
5:  f4191088ac3 ! 1:  3d0d7a8e8b5 bundle cmd: stop leaking memory from parse_options_cmd_bundle()
    @@ Commit message
         about those fixes if valgrind runs cleanly at the end without any
         leaks whatsoever.
     
    +    An earlier version of this change went out of its way to not leak
    +    memory on the die() codepaths here, but that was deemed too verbose to
    +    worry about in a built-in that's dying anyway. The only reason we'd
    +    need that is to appease a mode like SANITIZE=leak within the scope of
    +    an entire test file.
    +
         Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
     
      ## builtin/bundle.c ##
    @@ builtin/bundle.c: static int cmd_bundle_create(int argc, const char **argv, cons
      	struct strvec pack_opts;
      	int version = -1;
     -
    -+	int die_no_repo = 0;
     +	int ret;
      	struct option options[] = {
      		OPT_SET_INT('q', "quiet", &progress,
    @@ builtin/bundle.c: static int cmd_bundle_create(int argc, const char **argv, cons
      	argc = parse_options_cmd_bundle(argc, argv, prefix,
      			builtin_bundle_create_usage, options, &bundle_file);
     @@ builtin/bundle.c: static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
    - 	if (progress && all_progress_implied)
    - 		strvec_push(&pack_opts, "--all-progress-implied");
      
    --	if (!startup_info->have_repository)
    -+	if (!startup_info->have_repository) {
    -+		die_no_repo = 1;
    -+		goto cleanup;
    -+	}
    -+	ret = !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
    -+cleanup:
    -+	free(bundle_file);
    -+	if (die_no_repo)
    + 	if (!startup_info->have_repository)
      		die(_("Need a repository to create a bundle."));
     -	return !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
    ++	ret = !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
    ++	free(bundle_file);
     +	return ret;
      }
      
    @@ builtin/bundle.c: static int cmd_bundle_create(int argc, const char **argv, cons
      	struct bundle_header header;
      	int bundle_fd = -1;
     -
    -+	int die_no_repo = 0;
     +	int ret;
      	struct option options[] = {
      		OPT_END()
    @@ builtin/bundle.c: static int cmd_bundle_create(int argc, const char **argv, cons
      	memset(&header, 0, sizeof(header));
     -	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
     -		return 1;
    --	if (!startup_info->have_repository)
    --		die(_("Need a repository to unbundle."));
    --	return !!unbundle(the_repository, &header, bundle_fd, 0) ||
     +	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
     +		ret = 1;
     +		goto cleanup;
     +	}
    -+	if (!startup_info->have_repository) {
    -+		die_no_repo = 1;
    -+		goto cleanup;
    -+	}
    + 	if (!startup_info->have_repository)
    + 		die(_("Need a repository to unbundle."));
    +-	return !!unbundle(the_repository, &header, bundle_fd, 0) ||
     +	ret = !!unbundle(the_repository, &header, bundle_fd, 0) ||
      		list_bundle_refs(&header, argc, argv);
     +cleanup:
    -+	if (die_no_repo)
    -+		die(_("Need a repository to unbundle."));
     +	free(bundle_file);
     +	return ret;
      }
6:  f297fd0432a ! 2:  e47646d3a98 bundle.c: use a temporary variable for OIDs and names
    @@ bundle.c: int verify_bundle(struct repository *r,
      	for (i = 0; i < p->nr; i++) {
      		struct ref_list_entry *e = p->list + i;
     -		struct object *o = parse_object(r, &e->oid);
    ++		const char *name = e->name;
     +		struct object_id *oid = &e->oid;
     +		struct object *o = parse_object(r, oid);
      		if (o) {
      			o->flags |= PREREQ_MARK;
    - 			add_pending_object(&revs, o, e->name);
    -@@ bundle.c: int verify_bundle(struct repository *r,
    +-			add_pending_object(&revs, o, e->name);
    ++			add_pending_object(&revs, o, name);
    + 			continue;
      		}
      		if (++ret == 1)
      			error("%s", message);
     -		error("%s %s", oid_to_hex(&e->oid), e->name);
    -+		error("%s %s", oid_to_hex(oid), e->name);
    ++		error("%s %s", oid_to_hex(oid), name);
      	}
      	if (revs.pending.nr != p->nr)
      		return ret;
    @@ bundle.c: int verify_bundle(struct repository *r,
      	for (i = 0; i < p->nr; i++) {
      		struct ref_list_entry *e = p->list + i;
     -		struct object *o = parse_object(r, &e->oid);
    ++		const char *name = e->name;
     +		struct object_id *oid = &e->oid;
     +		struct object *o = parse_object(r, oid);
      		assert(o); /* otherwise we'd have returned early */
    @@ bundle.c: int verify_bundle(struct repository *r,
      		if (++ret == 1)
      			error("%s", message);
     -		error("%s %s", oid_to_hex(&e->oid), e->name);
    -+		error("%s %s", oid_to_hex(oid), e->name);
    ++		error("%s %s", oid_to_hex(oid), name);
      	}
      
      	/* Clean up objects used, as they will be reused. */
    @@ bundle.c: int verify_bundle(struct repository *r,
     
      ## transport.c ##
     @@ transport.c: static struct ref *get_refs_from_bundle(struct transport *transport,
    + 
      	for (i = 0; i < data->header.references.nr; i++) {
      		struct ref_list_entry *e = data->header.references.list + i;
    - 		struct ref *ref = alloc_ref(e->name);
    +-		struct ref *ref = alloc_ref(e->name);
     -		oidcpy(&ref->old_oid, &e->oid);
    ++		const char *name = e->name;
    ++		struct ref *ref = alloc_ref(name);
     +		struct object_id *oid = &e->oid;
     +		oidcpy(&ref->old_oid, oid);
      		ref->next = result;
7:  887313d3b02 ! 3:  f1066ee1b9a bundle: remove "ref_list" in favor of string-list.c API
    @@ builtin/bundle.c: static int cmd_bundle_list_heads(int argc, const char **argv,
     -	struct bundle_header header;
     +	struct bundle_header header = BUNDLE_HEADER_INIT;
      	int bundle_fd = -1;
    - 	int die_no_repo = 0;
      	int ret;
    + 	struct option options[] = {
     @@ builtin/bundle.c: static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix)
      			builtin_bundle_unbundle_usage, options, &bundle_file);
      	/* bundle internals use argv[1] as further parameters */
    @@ builtin/bundle.c: static int cmd_bundle_unbundle(int argc, const char **argv, co
      		ret = 1;
      		goto cleanup;
     @@ builtin/bundle.c: static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix)
    - 	}
    + 		die(_("Need a repository to unbundle."));
      	ret = !!unbundle(the_repository, &header, bundle_fd, 0) ||
      		list_bundle_refs(&header, argc, argv);
     +	bundle_header_release(&header);
      cleanup:
    - 	if (die_no_repo)
    - 		die(_("Need a repository to unbundle."));
    + 	free(bundle_file);
    + 	return ret;
     
      ## bundle.c ##
     @@ bundle.c: static struct {
    @@ bundle.c: static struct {
      
     -static void add_to_ref_list(const struct object_id *oid, const char *name,
     -		struct ref_list *list)
    --{
    ++void bundle_header_init(struct bundle_header *header)
    + {
     -	ALLOC_GROW(list->list, list->nr + 1, list->alloc);
     -	oidcpy(&list->list[list->nr].oid, oid);
     -	list->list[list->nr].name = xstrdup(name);
     -	list->nr++;
    --}
    --
    - static int parse_capability(struct bundle_header *header, const char *capability)
    - {
    - 	const char *arg;
    -@@ bundle.c: static int parse_bundle_header(int fd, struct bundle_header *header,
    - 	/* The bundle header ends with an empty line */
    - 	while (!strbuf_getwholeline_fd(&buf, fd, '\n') &&
    - 	       buf.len && buf.buf[0] != '\n') {
    --		struct object_id oid;
    -+		struct object_id *oid;
    - 		int is_prereq = 0;
    - 		const char *p;
    ++	memset(header, 0, sizeof(*header));
    ++	string_list_init(&header->prerequisites, 1);
    ++	string_list_init(&header->references, 1);
    ++}
    ++
    ++void bundle_header_release(struct bundle_header *header)
    ++{
    ++	string_list_clear(&header->prerequisites, 1);
    ++	string_list_clear(&header->references, 1);
    + }
      
    + static int parse_capability(struct bundle_header *header, const char *capability)
     @@ bundle.c: static int parse_bundle_header(int fd, struct bundle_header *header,
    - 		 * Prerequisites have object name that is optionally
    - 		 * followed by SP and subject line.
    - 		 */
    --		if (parse_oid_hex_algop(buf.buf, &oid, &p, header->hash_algo) ||
    -+		oid = xmalloc(sizeof(struct object_id));
    -+		if (parse_oid_hex_algop(buf.buf, oid, &p, header->hash_algo) ||
    - 		    (*p && !isspace(*p)) ||
    - 		    (!is_prereq && !*p)) {
    - 			if (report_path)
    - 				error(_("unrecognized header: %s%s (%d)"),
    - 				      (is_prereq ? "-" : ""), buf.buf, (int)buf.len);
      			status = -1;
    -+			free(oid);
      			break;
      		} else {
    --			if (is_prereq)
    ++			struct object_id *dup = oiddup(&oid);
    + 			if (is_prereq)
     -				add_to_ref_list(&oid, "", &header->prerequisites);
    --			else
    ++				string_list_append(&header->prerequisites, "")->util = dup;
    + 			else
     -				add_to_ref_list(&oid, p + 1, &header->references);
    -+			const char *string = is_prereq ? "" : p + 1;
    -+			struct string_list *list = is_prereq
    -+				? &header->prerequisites
    -+				: &header->references;
    -+			string_list_append(list, string)->util = oid;
    ++				string_list_append(&header->references, p + 1)->util = dup;
      		}
      	}
      
    @@ bundle.c: int verify_bundle(struct repository *r,
      	repo_init_revisions(r, &revs, NULL);
      	for (i = 0; i < p->nr; i++) {
     -		struct ref_list_entry *e = p->list + i;
    +-		const char *name = e->name;
     -		struct object_id *oid = &e->oid;
     +		struct string_list_item *e = p->items + i;
    ++		const char *name = e->string;
     +		struct object_id *oid = e->util;
      		struct object *o = parse_object(r, oid);
      		if (o) {
      			o->flags |= PREREQ_MARK;
    --			add_pending_object(&revs, o, e->name);
    -+			add_pending_object(&revs, o, e->string);
    - 			continue;
    - 		}
    - 		if (++ret == 1)
    - 			error("%s", message);
    --		error("%s %s", oid_to_hex(oid), e->name);
    -+		error("%s %s", oid_to_hex(oid), e->string);
    - 	}
    - 	if (revs.pending.nr != p->nr)
    - 		return ret;
     @@ bundle.c: int verify_bundle(struct repository *r,
      			i--;
      
      	for (i = 0; i < p->nr; i++) {
     -		struct ref_list_entry *e = p->list + i;
    +-		const char *name = e->name;
     -		struct object_id *oid = &e->oid;
     +		struct string_list_item *e = p->items + i;
    ++		const char *name = e->string;
     +		const struct object_id *oid = e->util;
      		struct object *o = parse_object(r, oid);
      		assert(o); /* otherwise we'd have returned early */
      		if (o->flags & SHOWN)
    - 			continue;
    - 		if (++ret == 1)
    - 			error("%s", message);
    --		error("%s %s", oid_to_hex(oid), e->name);
    -+		error("%s %s", oid_to_hex(oid), e->string);
    - 	}
    +@@ bundle.c: int verify_bundle(struct repository *r,
      
      	/* Clean up objects used, as they will be reused. */
      	for (i = 0; i < p->nr; i++) {
    @@ bundle.c: int verify_bundle(struct repository *r,
      
      		r = &header->references;
      		printf_ln(Q_("The bundle contains this ref:",
    -@@ bundle.c: int unbundle(struct repository *r, struct bundle_header *header,
    - 		return error(_("index-pack died"));
    - 	return 0;
    - }
    -+
    -+void bundle_header_release(struct bundle_header *header)
    -+{
    -+	string_list_clear(&header->prerequisites, 1);
    -+	string_list_clear(&header->references, 1);
    -+}
     
      ## bundle.h ##
     @@
    @@ bundle.h
     +	.prerequisites = STRING_LIST_INIT_DUP, \
     +	.references = STRING_LIST_INIT_DUP, \
     +}
    ++void bundle_header_init(struct bundle_header *header);
    ++void bundle_header_release(struct bundle_header *header);
     +
      int is_bundle(const char *path, int quiet);
      int read_bundle_header(const char *path, struct bundle_header *header);
      int create_bundle(struct repository *r, const char *path,
    -@@ bundle.h: int unbundle(struct repository *r, struct bundle_header *header,
    - 	     int bundle_fd, int flags);
    - int list_bundle_refs(struct bundle_header *header,
    - 		int argc, const char **argv);
    -+void bundle_header_release(struct bundle_header *header);
    - 
    - #endif
     
      ## transport.c ##
     @@ transport.c: static struct ref *get_refs_from_bundle(struct transport *transport,
    @@ transport.c: static struct ref *get_refs_from_bundle(struct transport *transport
      
      	for (i = 0; i < data->header.references.nr; i++) {
     -		struct ref_list_entry *e = data->header.references.list + i;
    --		struct ref *ref = alloc_ref(e->name);
    --		struct object_id *oid = &e->oid;
    +-		const char *name = e->name;
     +		struct string_list_item *e = data->header.references.items + i;
    -+		struct ref *ref = alloc_ref(e->string);
    -+		const struct object_id *oid = e->util;
    ++		const char *name = e->string;
    + 		struct ref *ref = alloc_ref(name);
    +-		struct object_id *oid = &e->oid;
    ++		struct object_id *oid = e->util;
      		oidcpy(&ref->old_oid, oid);
      		ref->next = result;
      		result = ref;
    - 	}
    -+	string_list_clear(&data->header.references, 1);
    - 	return result;
    - }
    - 
     @@ transport.c: static int close_bundle(struct transport *transport)
      	struct bundle_transport_data *data = transport->data;
      	if (data->fd > 0)
    @@ transport.c: struct transport *transport_get(struct remote *remote, const char *
      		die(_("git-over-rsync is no longer supported"));
      	} else if (url_is_local_not_ssh(url) && is_file(url) && is_bundle(url, 1)) {
      		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
    -+		string_list_init(&data->header.prerequisites, 1);
    -+		string_list_init(&data->header.references, 1);
    ++		bundle_header_init(&data->header);
      		transport_check_allowed("file");
      		ret->data = data;
      		ret->vtable = &bundle_vtable;
-- 
2.32.0.613.g8e17abc2eb


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v3 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  2021-06-30 14:06   ` [PATCH v3 0/3] Ævar Arnfjörð Bjarmason
@ 2021-06-30 14:06     ` Ævar Arnfjörð Bjarmason
  2021-06-30 17:26       ` Jeff King
  2021-06-30 14:06     ` [PATCH v3 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
                       ` (3 subsequent siblings)
  4 siblings, 1 reply; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-30 14:06 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

Fix a memory leak from the prefix_filename() function introduced with
its use in 3b754eedd5 (bundle: use prefix_filename with bundle path,
2017-03-20).

As noted in that commit the leak was intentional as a part of being
sloppy about freeing resources just before we exit, I'm changing this
because I'll be fixing other memory leaks in the bundle API (including
the library version) in subsequent commits. It's easier to reason
about those fixes if valgrind runs cleanly at the end without any
leaks whatsoever.

An earlier version of this change went out of its way to not leak
memory on the die() codepaths here, but that was deemed too verbose to
worry about in a built-in that's dying anyway. The only reason we'd
need that is to appease a mode like SANITIZE=leak within the scope of
an entire test file.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/bundle.c | 62 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 21 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index ea6948110b0..15e2bd61965 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -46,7 +46,7 @@ static int parse_options_cmd_bundle(int argc,
 		const char* prefix,
 		const char * const usagestr[],
 		const struct option options[],
-		const char **bundle_file) {
+		char **bundle_file) {
 	int newargc;
 	newargc = parse_options(argc, argv, NULL, options, usagestr,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
@@ -61,7 +61,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 	int progress = isatty(STDERR_FILENO);
 	struct strvec pack_opts;
 	int version = -1;
-
+	int ret;
 	struct option options[] = {
 		OPT_SET_INT('q', "quiet", &progress,
 			    N_("do not show progress meter"), 0),
@@ -76,7 +76,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 			    N_("specify bundle format version")),
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_create_usage, options, &bundle_file);
@@ -94,75 +94,95 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 
 	if (!startup_info->have_repository)
 		die(_("Need a repository to create a bundle."));
-	return !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
+	ret = !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
+	free(bundle_file);
+	return ret;
 }
 
 static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
 	int quiet = 0;
-
+	int ret;
 	struct option options[] = {
 		OPT_BOOL('q', "quiet", &quiet,
 			    N_("do not show bundle details")),
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_verify_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
 	close(bundle_fd);
-	if (verify_bundle(the_repository, &header, !quiet))
-		return 1;
+	if (verify_bundle(the_repository, &header, !quiet)) {
+		ret = 1;
+		goto cleanup;
+	}
+
 	fprintf(stderr, _("%s is okay\n"), bundle_file);
-	return 0;
+	ret = 0;
+cleanup:
+	free(bundle_file);
+	return ret;
 }
 
 static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
-
+	int ret;
 	struct option options[] = {
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_list_heads_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
 	close(bundle_fd);
-	return !!list_bundle_refs(&header, argc, argv);
+	ret = !!list_bundle_refs(&header, argc, argv);
+cleanup:
+	free(bundle_file);
+	return ret;
 }
 
 static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
-
+	int ret;
 	struct option options[] = {
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_unbundle_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
 	if (!startup_info->have_repository)
 		die(_("Need a repository to unbundle."));
-	return !!unbundle(the_repository, &header, bundle_fd, 0) ||
+	ret = !!unbundle(the_repository, &header, bundle_fd, 0) ||
 		list_bundle_refs(&header, argc, argv);
+cleanup:
+	free(bundle_file);
+	return ret;
 }
 
 int cmd_bundle(int argc, const char **argv, const char *prefix)
-- 
2.32.0.613.g8e17abc2eb


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v3 2/3] bundle.c: use a temporary variable for OIDs and names
  2021-06-30 14:06   ` [PATCH v3 0/3] Ævar Arnfjörð Bjarmason
  2021-06-30 14:06     ` [PATCH v3 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
@ 2021-06-30 14:06     ` Ævar Arnfjörð Bjarmason
  2021-06-30 14:06     ` [PATCH v3 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-30 14:06 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

In preparation for moving away from accessing the OID and name via the
"oid" and "name" slots in a subsequent commit, change the code that
accesses it to use named variables. This makes the subsequent change
smaller.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 bundle.c    | 26 ++++++++++++++++++--------
 transport.c |  6 ++++--
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/bundle.c b/bundle.c
index 693d6195514..7210e5e7105 100644
--- a/bundle.c
+++ b/bundle.c
@@ -156,6 +156,9 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 	int i;
 
 	for (i = 0; i < r->nr; i++) {
+		struct object_id *oid;
+		const char *name;
+
 		if (argc > 1) {
 			int j;
 			for (j = 1; j < argc; j++)
@@ -164,8 +167,10 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 			if (j == argc)
 				continue;
 		}
-		printf("%s %s\n", oid_to_hex(&r->list[i].oid),
-				r->list[i].name);
+
+		oid = &r->list[i].oid;
+		name = r->list[i].name;
+		printf("%s %s\n", oid_to_hex(oid), name);
 	}
 	return 0;
 }
@@ -194,15 +199,17 @@ int verify_bundle(struct repository *r,
 	repo_init_revisions(r, &revs, NULL);
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		struct object *o = parse_object(r, &e->oid);
+		const char *name = e->name;
+		struct object_id *oid = &e->oid;
+		struct object *o = parse_object(r, oid);
 		if (o) {
 			o->flags |= PREREQ_MARK;
-			add_pending_object(&revs, o, e->name);
+			add_pending_object(&revs, o, name);
 			continue;
 		}
 		if (++ret == 1)
 			error("%s", message);
-		error("%s %s", oid_to_hex(&e->oid), e->name);
+		error("%s %s", oid_to_hex(oid), name);
 	}
 	if (revs.pending.nr != p->nr)
 		return ret;
@@ -219,19 +226,22 @@ int verify_bundle(struct repository *r,
 
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		struct object *o = parse_object(r, &e->oid);
+		const char *name = e->name;
+		struct object_id *oid = &e->oid;
+		struct object *o = parse_object(r, oid);
 		assert(o); /* otherwise we'd have returned early */
 		if (o->flags & SHOWN)
 			continue;
 		if (++ret == 1)
 			error("%s", message);
-		error("%s %s", oid_to_hex(&e->oid), e->name);
+		error("%s %s", oid_to_hex(oid), name);
 	}
 
 	/* Clean up objects used, as they will be reused. */
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		commit = lookup_commit_reference_gently(r, &e->oid, 1);
+		struct object_id *oid = &e->oid;
+		commit = lookup_commit_reference_gently(r, oid, 1);
 		if (commit)
 			clear_commit_marks(commit, ALL_REV_FLAGS);
 	}
diff --git a/transport.c b/transport.c
index 50f5830eb6b..95c1138e9ae 100644
--- a/transport.c
+++ b/transport.c
@@ -148,8 +148,10 @@ static struct ref *get_refs_from_bundle(struct transport *transport,
 
 	for (i = 0; i < data->header.references.nr; i++) {
 		struct ref_list_entry *e = data->header.references.list + i;
-		struct ref *ref = alloc_ref(e->name);
-		oidcpy(&ref->old_oid, &e->oid);
+		const char *name = e->name;
+		struct ref *ref = alloc_ref(name);
+		struct object_id *oid = &e->oid;
+		oidcpy(&ref->old_oid, oid);
 		ref->next = result;
 		result = ref;
 	}
-- 
2.32.0.613.g8e17abc2eb


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v3 3/3] bundle: remove "ref_list" in favor of string-list.c API
  2021-06-30 14:06   ` [PATCH v3 0/3] Ævar Arnfjörð Bjarmason
  2021-06-30 14:06     ` [PATCH v3 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
  2021-06-30 14:06     ` [PATCH v3 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
@ 2021-06-30 14:06     ` Ævar Arnfjörð Bjarmason
  2021-06-30 21:18       ` Junio C Hamano
  2021-06-30 17:34     ` [PATCH v3 0/3] Jeff King
  2021-07-02  9:57     ` [PATCH v4 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
  4 siblings, 1 reply; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-30 14:06 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

Move away from the "struct ref_list" in bundle.c in favor of the
almost identical string-list.c API.

That API fits this use-case perfectly, but did not exist in its
current form when this code was added in 2e0afafebd (Add git-bundle:
move objects and references by archive, 2007-02-22), with hindsight we
could have used the path-list API, which later got renamed to
string-list. See 8fd2cb4069 (Extract helper bits from
c-merge-recursive work, 2006-07-25)

We need to change "name" to "string" and "oid" to "util" to make this
conversion, but other than that the APIs are pretty much identical for
what bundle.c made use of.

Let's also replace the memset(..,0,...) pattern with a more idiomatic
"INIT" macro, and finally add a *_release() function so to free the
allocated memory.

Before this the add_to_ref_list() would leak memory, now e.g. "bundle
list-heads" reports no memory leaks at all under valgrind.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/bundle.c | 12 +++++------
 bundle.c         | 53 ++++++++++++++++++++++++++----------------------
 bundle.h         | 21 ++++++++++---------
 transport.c      |  8 +++++---
 4 files changed, 51 insertions(+), 43 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index 15e2bd61965..053a51bea1b 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -100,7 +100,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 }
 
 static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int quiet = 0;
 	int ret;
@@ -115,7 +115,6 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 			builtin_bundle_verify_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -130,11 +129,12 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 	ret = 0;
 cleanup:
 	free(bundle_file);
+	bundle_header_release(&header);
 	return ret;
 }
 
 static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int ret;
 	struct option options[] = {
@@ -146,7 +146,6 @@ static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix
 			builtin_bundle_list_heads_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -155,11 +154,12 @@ static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix
 	ret = !!list_bundle_refs(&header, argc, argv);
 cleanup:
 	free(bundle_file);
+	bundle_header_release(&header);
 	return ret;
 }
 
 static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int ret;
 	struct option options[] = {
@@ -171,7 +171,6 @@ static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix)
 			builtin_bundle_unbundle_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -180,6 +179,7 @@ static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix)
 		die(_("Need a repository to unbundle."));
 	ret = !!unbundle(the_repository, &header, bundle_fd, 0) ||
 		list_bundle_refs(&header, argc, argv);
+	bundle_header_release(&header);
 cleanup:
 	free(bundle_file);
 	return ret;
diff --git a/bundle.c b/bundle.c
index 7210e5e7105..cd5356a66f3 100644
--- a/bundle.c
+++ b/bundle.c
@@ -23,13 +23,17 @@ static struct {
 	{ 3, v3_bundle_signature },
 };
 
-static void add_to_ref_list(const struct object_id *oid, const char *name,
-		struct ref_list *list)
+void bundle_header_init(struct bundle_header *header)
 {
-	ALLOC_GROW(list->list, list->nr + 1, list->alloc);
-	oidcpy(&list->list[list->nr].oid, oid);
-	list->list[list->nr].name = xstrdup(name);
-	list->nr++;
+	memset(header, 0, sizeof(*header));
+	string_list_init(&header->prerequisites, 1);
+	string_list_init(&header->references, 1);
+}
+
+void bundle_header_release(struct bundle_header *header)
+{
+	string_list_clear(&header->prerequisites, 1);
+	string_list_clear(&header->references, 1);
 }
 
 static int parse_capability(struct bundle_header *header, const char *capability)
@@ -112,10 +116,11 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
 			status = -1;
 			break;
 		} else {
+			struct object_id *dup = oiddup(&oid);
 			if (is_prereq)
-				add_to_ref_list(&oid, "", &header->prerequisites);
+				string_list_append(&header->prerequisites, "")->util = dup;
 			else
-				add_to_ref_list(&oid, p + 1, &header->references);
+				string_list_append(&header->references, p + 1)->util = dup;
 		}
 	}
 
@@ -139,19 +144,19 @@ int read_bundle_header(const char *path, struct bundle_header *header)
 
 int is_bundle(const char *path, int quiet)
 {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int fd = open(path, O_RDONLY);
 
 	if (fd < 0)
 		return 0;
-	memset(&header, 0, sizeof(header));
 	fd = parse_bundle_header(fd, &header, quiet ? NULL : path);
 	if (fd >= 0)
 		close(fd);
+	bundle_header_release(&header);
 	return (fd >= 0);
 }
 
-static int list_refs(struct ref_list *r, int argc, const char **argv)
+static int list_refs(struct string_list *r, int argc, const char **argv)
 {
 	int i;
 
@@ -162,14 +167,14 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 		if (argc > 1) {
 			int j;
 			for (j = 1; j < argc; j++)
-				if (!strcmp(r->list[i].name, argv[j]))
+				if (!strcmp(r->items[i].string, argv[j]))
 					break;
 			if (j == argc)
 				continue;
 		}
 
-		oid = &r->list[i].oid;
-		name = r->list[i].name;
+		oid = r->items[i].util;
+		name = r->items[i].string;
 		printf("%s %s\n", oid_to_hex(oid), name);
 	}
 	return 0;
@@ -186,7 +191,7 @@ int verify_bundle(struct repository *r,
 	 * Do fast check, then if any prereqs are missing then go line by line
 	 * to be verbose about the errors
 	 */
-	struct ref_list *p = &header->prerequisites;
+	struct string_list *p = &header->prerequisites;
 	struct rev_info revs;
 	const char *argv[] = {NULL, "--all", NULL};
 	struct commit *commit;
@@ -198,9 +203,9 @@ int verify_bundle(struct repository *r,
 
 	repo_init_revisions(r, &revs, NULL);
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		const char *name = e->name;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		const char *name = e->string;
+		struct object_id *oid = e->util;
 		struct object *o = parse_object(r, oid);
 		if (o) {
 			o->flags |= PREREQ_MARK;
@@ -225,9 +230,9 @@ int verify_bundle(struct repository *r,
 			i--;
 
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		const char *name = e->name;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		const char *name = e->string;
+		const struct object_id *oid = e->util;
 		struct object *o = parse_object(r, oid);
 		assert(o); /* otherwise we'd have returned early */
 		if (o->flags & SHOWN)
@@ -239,15 +244,15 @@ int verify_bundle(struct repository *r,
 
 	/* Clean up objects used, as they will be reused. */
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		struct object_id *oid = e->util;
 		commit = lookup_commit_reference_gently(r, oid, 1);
 		if (commit)
 			clear_commit_marks(commit, ALL_REV_FLAGS);
 	}
 
 	if (verbose) {
-		struct ref_list *r;
+		struct string_list *r;
 
 		r = &header->references;
 		printf_ln(Q_("The bundle contains this ref:",
diff --git a/bundle.h b/bundle.h
index f9e2d1c8ef5..1927d8cd6a4 100644
--- a/bundle.h
+++ b/bundle.h
@@ -3,22 +3,23 @@
 
 #include "strvec.h"
 #include "cache.h"
-
-struct ref_list {
-	unsigned int nr, alloc;
-	struct ref_list_entry {
-		struct object_id oid;
-		char *name;
-	} *list;
-};
+#include "string-list.h"
 
 struct bundle_header {
 	unsigned version;
-	struct ref_list prerequisites;
-	struct ref_list references;
+	struct string_list prerequisites;
+	struct string_list references;
 	const struct git_hash_algo *hash_algo;
 };
 
+#define BUNDLE_HEADER_INIT \
+{ \
+	.prerequisites = STRING_LIST_INIT_DUP, \
+	.references = STRING_LIST_INIT_DUP, \
+}
+void bundle_header_init(struct bundle_header *header);
+void bundle_header_release(struct bundle_header *header);
+
 int is_bundle(const char *path, int quiet);
 int read_bundle_header(const char *path, struct bundle_header *header);
 int create_bundle(struct repository *r, const char *path,
diff --git a/transport.c b/transport.c
index 95c1138e9ae..745ffa22474 100644
--- a/transport.c
+++ b/transport.c
@@ -147,10 +147,10 @@ static struct ref *get_refs_from_bundle(struct transport *transport,
 	transport->hash_algo = data->header.hash_algo;
 
 	for (i = 0; i < data->header.references.nr; i++) {
-		struct ref_list_entry *e = data->header.references.list + i;
-		const char *name = e->name;
+		struct string_list_item *e = data->header.references.items + i;
+		const char *name = e->string;
 		struct ref *ref = alloc_ref(name);
-		struct object_id *oid = &e->oid;
+		struct object_id *oid = e->util;
 		oidcpy(&ref->old_oid, oid);
 		ref->next = result;
 		result = ref;
@@ -177,6 +177,7 @@ static int close_bundle(struct transport *transport)
 	struct bundle_transport_data *data = transport->data;
 	if (data->fd > 0)
 		close(data->fd);
+	bundle_header_release(&data->header);
 	free(data);
 	return 0;
 }
@@ -1083,6 +1084,7 @@ struct transport *transport_get(struct remote *remote, const char *url)
 		die(_("git-over-rsync is no longer supported"));
 	} else if (url_is_local_not_ssh(url) && is_file(url) && is_bundle(url, 1)) {
 		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
+		bundle_header_init(&data->header);
 		transport_check_allowed("file");
 		ret->data = data;
 		ret->vtable = &bundle_vtable;
-- 
2.32.0.613.g8e17abc2eb


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH v3 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  2021-06-30 14:06     ` [PATCH v3 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
@ 2021-06-30 17:26       ` Jeff King
  2021-06-30 18:00         ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 33+ messages in thread
From: Jeff King @ 2021-06-30 17:26 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak

On Wed, Jun 30, 2021 at 04:06:14PM +0200, Ævar Arnfjörð Bjarmason wrote:

> Fix a memory leak from the prefix_filename() function introduced with
> its use in 3b754eedd5 (bundle: use prefix_filename with bundle path,
> 2017-03-20).
> 
> As noted in that commit the leak was intentional as a part of being
> sloppy about freeing resources just before we exit, I'm changing this
> because I'll be fixing other memory leaks in the bundle API (including
> the library version) in subsequent commits. It's easier to reason
> about those fixes if valgrind runs cleanly at the end without any
> leaks whatsoever.

Thanks, this looks good to me.

One thing, though...

> An earlier version of this change went out of its way to not leak
> memory on the die() codepaths here, but that was deemed too verbose to
> worry about in a built-in that's dying anyway. The only reason we'd
> need that is to appease a mode like SANITIZE=leak within the scope of
> an entire test file.

Obviously you changed this as I asked, but this final sentence makes me
think we're not on the same page with respect to die(). I don't think
any kind of mode matters here. When we call die(), whatever we have on
the stack is _not_ a leak, by LSan's or valgrind's standards. Because we
still have access to those bytes. And nor can we ever get rid of such
cases. If we ever do:

  void foo(const char *str)
  {
	char *x = xstrdup(str);
	bar(x);
	free(x);
  }

  void bar(const char *x)
  {
	if (!strcmp(x, "foo"))
		die("whatever");
  }

Then "x" will always still be allocated when we die(). We cannot free it
in bar(), where it is read-only. We cannot free it in foo() before we
call bar(), because it is needed there. But control never returns to the
free() statement.

So this code is perfectly fine and unavoidable. In the case you were
touching it was foo() that was calling die() directly, so we could work
around it with some conditionals. But from the leak-checker's
perspective the two cases are the same.

-Peff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3 0/3]
  2021-06-30 14:06   ` [PATCH v3 0/3] Ævar Arnfjörð Bjarmason
                       ` (2 preceding siblings ...)
  2021-06-30 14:06     ` [PATCH v3 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
@ 2021-06-30 17:34     ` Jeff King
  2021-06-30 17:45       ` Jeff King
  2021-07-02  9:57     ` [PATCH v4 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
  4 siblings, 1 reply; 33+ messages in thread
From: Jeff King @ 2021-06-30 17:34 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak

On Wed, Jun 30, 2021 at 04:06:13PM +0200, Ævar Arnfjörð Bjarmason wrote:

> Refactor the bundle API to use the string_list API instead of its own
> version of a similar API. See [1] for v2.
> 
> Addresses comments by Jeff King about us being too overzelous in
> trying not to leak memory (the 'die_no_repo' is gone), and other
> flow/style comments of his.
> 
> I also added a bundle_header_init() function for use in transport.c,
> and noticed a redundant call to string_list_clear() there.

Thanks, all three look good to me.

As an aside, having to have a separate bundle_header_init() and
BUNDLE_HEADER_INIT is annoying (because they both must be kept up to
date with each other), but quite common in our code base. I wonder if
writing:

  void bundle_header_init(struct bundle_header *header)
  {
	struct bundle_header blank = BUNDLE_HEADER_INIT;
	memcpy(header, &blank, sizeof(*header));
  }

would let a smart enough compiler just init "header" in place without
the extra copy (the performance of a single bundle_header almost
certainly doesn't matter, but it might for other types).

Just musing. ;)

-Peff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3 0/3]
  2021-06-30 17:34     ` [PATCH v3 0/3] Jeff King
@ 2021-06-30 17:45       ` Jeff King
  2021-06-30 18:00         ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 33+ messages in thread
From: Jeff King @ 2021-06-30 17:45 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak

On Wed, Jun 30, 2021 at 01:34:07PM -0400, Jeff King wrote:

> As an aside, having to have a separate bundle_header_init() and
> BUNDLE_HEADER_INIT is annoying (because they both must be kept up to
> date with each other), but quite common in our code base. I wonder if
> writing:
> 
>   void bundle_header_init(struct bundle_header *header)
>   {
> 	struct bundle_header blank = BUNDLE_HEADER_INIT;
> 	memcpy(header, &blank, sizeof(*header));
>   }
> 
> would let a smart enough compiler just init "header" in place without
> the extra copy (the performance of a single bundle_header almost
> certainly doesn't matter, but it might for other types).
> 
> Just musing. ;)

For my own curiosity, the answer is yes: https://godbolt.org/z/s54dc6ss9

With "gcc -O2" the memcpy goes away and we init "header" directly.

If we want to start using this technique widely, I don't think it should
be part of your series, though. This probably applies to quite a few
data structures, so it would make more sense to have a series which
converts several.

-Peff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3 0/3]
  2021-06-30 17:45       ` Jeff King
@ 2021-06-30 18:00         ` Ævar Arnfjörð Bjarmason
  2021-07-01 10:53           ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-30 18:00 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak


On Wed, Jun 30 2021, Jeff King wrote:

> On Wed, Jun 30, 2021 at 01:34:07PM -0400, Jeff King wrote:
>
>> As an aside, having to have a separate bundle_header_init() and
>> BUNDLE_HEADER_INIT is annoying (because they both must be kept up to
>> date with each other), but quite common in our code base. I wonder if
>> writing:
>> 
>>   void bundle_header_init(struct bundle_header *header)
>>   {
>> 	struct bundle_header blank = BUNDLE_HEADER_INIT;
>> 	memcpy(header, &blank, sizeof(*header));
>>   }
>> 
>> would let a smart enough compiler just init "header" in place without
>> the extra copy (the performance of a single bundle_header almost
>> certainly doesn't matter, but it might for other types).
>> 
>> Just musing. ;)
>
> For my own curiosity, the answer is yes: https://godbolt.org/z/s54dc6ss9
>
> With "gcc -O2" the memcpy goes away and we init "header" directly.
>
> If we want to start using this technique widely, I don't think it should
> be part of your series, though. This probably applies to quite a few
> data structures, so it would make more sense to have a series which
> converts several.

That's cool, yeah that would make quite a lot of code better. Thanks!

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  2021-06-30 17:26       ` Jeff King
@ 2021-06-30 18:00         ` Ævar Arnfjörð Bjarmason
  2021-07-01 15:41           ` Jeff King
  0 siblings, 1 reply; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-30 18:00 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak


On Wed, Jun 30 2021, Jeff King wrote:

> On Wed, Jun 30, 2021 at 04:06:14PM +0200, Ævar Arnfjörð Bjarmason wrote:
>
>> Fix a memory leak from the prefix_filename() function introduced with
>> its use in 3b754eedd5 (bundle: use prefix_filename with bundle path,
>> 2017-03-20).
>> 
>> As noted in that commit the leak was intentional as a part of being
>> sloppy about freeing resources just before we exit, I'm changing this
>> because I'll be fixing other memory leaks in the bundle API (including
>> the library version) in subsequent commits. It's easier to reason
>> about those fixes if valgrind runs cleanly at the end without any
>> leaks whatsoever.
>
> Thanks, this looks good to me.
>
> One thing, though...
>
>> An earlier version of this change went out of its way to not leak
>> memory on the die() codepaths here, but that was deemed too verbose to
>> worry about in a built-in that's dying anyway. The only reason we'd
>> need that is to appease a mode like SANITIZE=leak within the scope of
>> an entire test file.
>
> Obviously you changed this as I asked, but this final sentence makes me
> think we're not on the same page with respect to die(). I don't think
> any kind of mode matters here. When we call die(), whatever we have on
> the stack is _not_ a leak, by LSan's or valgrind's standards. Because we
> still have access to those bytes. And nor can we ever get rid of such
> cases. If we ever do:
>
>   void foo(const char *str)
>   {
> 	char *x = xstrdup(str);
> 	bar(x);
> 	free(x);
>   }
>
>   void bar(const char *x)
>   {
> 	if (!strcmp(x, "foo"))
> 		die("whatever");
>   }
>
> Then "x" will always still be allocated when we die(). We cannot free it
> in bar(), where it is read-only. We cannot free it in foo() before we
> call bar(), because it is needed there. But control never returns to the
> free() statement.
>
> So this code is perfectly fine and unavoidable. In the case you were
> touching it was foo() that was calling die() directly, so we could work
> around it with some conditionals. But from the leak-checker's
> perspective the two cases are the same.

I've got you to blame for introducing SANITIZE=*. Now I've got these
leak checkers all mixed up :)

Yes you're right, FWIW I (re-)wrote this commit message just before
sending and should have said "a heap leak checker" instead of
"SANITIZE=leak", I really meant valgrind.

I originally ended with the "we are about to die" tracking because I was
tracing things with valgrind, which does complain about this sort of
thing. I.e.:
    
     24 bytes in 1 blocks are still reachable in loss record 8 of 21
        at 0x48356AF: malloc (vg_replace_malloc.c:298)
        by 0x4837DE7: realloc (vg_replace_malloc.c:826)
        by 0x3C06F1: xrealloc (wrapper.c:126)
        by 0x380EC9: strbuf_grow (strbuf.c:98)
        by 0x381A14: strbuf_add (strbuf.c:297)
        by 0x20ADC5: strbuf_addstr (strbuf.h:304)
        by 0x20B66D: prefix_filename (abspath.c:277)
        by 0x13CDC6: parse_options_cmd_bundle (bundle.c:55)
        by 0x13D565: cmd_bundle_unbundle (bundle.c:170)
        by 0x13D829: cmd_bundle (bundle.c:214)
        by 0x1279F4: run_builtin (git.c:461)
        by 0x127DFB: handle_builtin (git.c:714)

Re what I mentioned/asked in
https://lore.kernel.org/git/87czsv2idy.fsf@evledraar.gmail.com/ I was
experimenting with doing leak checking in the tests.

In this case we have 21 in total under --show-leak-kinds=all (and it was
20 in v2 of this series).

I've found that only including the file tho builtin is in cuts down on
it to a meaningful set of leaks. I.e. to throw out everything not
including /\bbundle\.c:/. We leak in a lot of things we call from
common-main.c, git.c, exec-cmd.c etc.

Maybe if we do end up with a test mode for this we shouldn't care about
checkers like valgrind and only cater to the SANITIZE=leak mode.

I'm still partial to the idea that we'll get the most win out of
something that we can run in the tests by default, i.e. we'll only need
to have a valgrind on the system & have someone run "make test" to run a
(limited set of) tests with this.

Whereas SANITIZE=leak is always a dev-only feature people may not have
on all the time.


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3 3/3] bundle: remove "ref_list" in favor of string-list.c API
  2021-06-30 14:06     ` [PATCH v3 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
@ 2021-06-30 21:18       ` Junio C Hamano
  0 siblings, 0 replies; 33+ messages in thread
From: Junio C Hamano @ 2021-06-30 21:18 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Jeff King, Johannes Schindelin, Andrei Rybak

Ævar Arnfjörð Bjarmason  <avarab@gmail.com> writes:

> Move away from the "struct ref_list" in bundle.c in favor of the
> almost identical string-list.c API.

Nice.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3 0/3]
  2021-06-30 18:00         ` Ævar Arnfjörð Bjarmason
@ 2021-07-01 10:53           ` Ævar Arnfjörð Bjarmason
  0 siblings, 0 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-07-01 10:53 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak


On Wed, Jun 30 2021, Ævar Arnfjörð Bjarmason wrote:

> On Wed, Jun 30 2021, Jeff King wrote:
>
>> On Wed, Jun 30, 2021 at 01:34:07PM -0400, Jeff King wrote:
>>
>>> As an aside, having to have a separate bundle_header_init() and
>>> BUNDLE_HEADER_INIT is annoying (because they both must be kept up to
>>> date with each other), but quite common in our code base. I wonder if
>>> writing:
>>> 
>>>   void bundle_header_init(struct bundle_header *header)
>>>   {
>>> 	struct bundle_header blank = BUNDLE_HEADER_INIT;
>>> 	memcpy(header, &blank, sizeof(*header));
>>>   }
>>> 
>>> would let a smart enough compiler just init "header" in place without
>>> the extra copy (the performance of a single bundle_header almost
>>> certainly doesn't matter, but it might for other types).
>>> 
>>> Just musing. ;)
>>
>> For my own curiosity, the answer is yes: https://godbolt.org/z/s54dc6ss9
>>
>> With "gcc -O2" the memcpy goes away and we init "header" directly.
>>
>> If we want to start using this technique widely, I don't think it should
>> be part of your series, though. This probably applies to quite a few
>> data structures, so it would make more sense to have a series which
>> converts several.
>
> That's cool, yeah that would make quite a lot of code better. Thanks!

Just for future reference: I submitted a small series to make use of
this suggested idiom:
https://lore.kernel.org/git/cover-0.5-00000000000-20210701T104855Z-avarab@gmail.com/

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  2021-06-30 18:00         ` Ævar Arnfjörð Bjarmason
@ 2021-07-01 15:41           ` Jeff King
  0 siblings, 0 replies; 33+ messages in thread
From: Jeff King @ 2021-07-01 15:41 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak

On Wed, Jun 30, 2021 at 08:00:50PM +0200, Ævar Arnfjörð Bjarmason wrote:

> > So this code is perfectly fine and unavoidable. In the case you were
> > touching it was foo() that was calling die() directly, so we could work
> > around it with some conditionals. But from the leak-checker's
> > perspective the two cases are the same.
> 
> I've got you to blame for introducing SANITIZE=*. Now I've got these
> leak checkers all mixed up :)
> 
> Yes you're right, FWIW I (re-)wrote this commit message just before
> sending and should have said "a heap leak checker" instead of
> "SANITIZE=leak", I really meant valgrind.

Ah, OK, that makes more sense.

I think the distinction here isn't "heap leak checker" (since valgrind
does still look at the whole memory space to find references), but
rather the treatment of "still reachable" items. I think LSan/ASan
ignore these entirely.

> I originally ended with the "we are about to die" tracking because I was
> tracing things with valgrind, which does complain about this sort of
> thing. I.e.:
>     
>      24 bytes in 1 blocks are still reachable in loss record 8 of 21
>         at 0x48356AF: malloc (vg_replace_malloc.c:298)
>         by 0x4837DE7: realloc (vg_replace_malloc.c:826)
>         by 0x3C06F1: xrealloc (wrapper.c:126)
>         by 0x380EC9: strbuf_grow (strbuf.c:98)
>         by 0x381A14: strbuf_add (strbuf.c:297)
>         by 0x20ADC5: strbuf_addstr (strbuf.h:304)
>         by 0x20B66D: prefix_filename (abspath.c:277)
>         by 0x13CDC6: parse_options_cmd_bundle (bundle.c:55)
>         by 0x13D565: cmd_bundle_unbundle (bundle.c:170)
>         by 0x13D829: cmd_bundle (bundle.c:214)
>         by 0x1279F4: run_builtin (git.c:461)
>         by 0x127DFB: handle_builtin (git.c:714)
> 
> Re what I mentioned/asked in
> https://lore.kernel.org/git/87czsv2idy.fsf@evledraar.gmail.com/ I was
> experimenting with doing leak checking in the tests.
> 
> In this case we have 21 in total under --show-leak-kinds=all (and it was
> 20 in v2 of this series).

IMHO these "still reachable" leaks are not interesting. They'll fall
into one of two categories:

  - allocations still on the stack when we exit() without unwinding.
    These are always uninteresting, since by definition we are exiting
    the process.

  - globals that hang on to allocations (which will still be present
    even if we exit the program by returning up through main()). Most of
    these will be items we intend to last for the program length (e.g.,
    fields in the_repository).

    It's _possible_ that some of these could be interesting. E.g., a
    program might have two phases: in the first, we rely on some
    subsystem which uses global variables to cache some data (say,
    parsed config values in remote.c or userdiff.c), and in the second
    phase we no longer use that subsystem. We could be instructing the
    subsystem to free up the memory after the first phase. But in
    practice this is awkward, because the program-level code doesn't
    know about subsystem allocations (or even which subsystems might
    have been recursively triggered).

    And while still-reachable tracking could be used to find
    opportunities like this, I think there's a better approach to
    finding these. If subsystems avoid global-variable caches and
    instead stick their allocations into context structs, then the
    memory is associated with those structs. So for example, if
    userdiff attached its storage to a diff_options, which is attached
    to a rev_info, then any "leaking" is all predicated on the rev_info
    (which the main program either cleans up, or annotates with UNLEAK).

> Maybe if we do end up with a test mode for this we shouldn't care about
> checkers like valgrind and only cater to the SANITIZE=leak mode.

I do find SANITIZE=leak to be a more useful tool in general, but I'm not
at all against using valgrind if people find it convenient. I just think
"reachable" leaks are not interesting enough to be part of what we're
searching for when we run the test suite.

> I'm still partial to the idea that we'll get the most win out of
> something that we can run in the tests by default, i.e. we'll only need
> to have a valgrind on the system & have someone run "make test" to run a
> (limited set of) tests with this.
> 
> Whereas SANITIZE=leak is always a dev-only feature people may not have
> on all the time.

That is exactly why I think "SANITIZE=leak" is better: it is easier for
people to run. valgrind is _extremely_ slow. It's also not available
everywhere; ASan/LSan aren't either, but they're pretty standard in
clang and gcc these days.

It is nice that valgrind can run on an un-instrumented binary, but I
sort of assume that anybody running "make test" will be able to build
the binary, since "make" is going to want to do that. I.e., I think
"make SANITIZE=leak test" is already doing what we want (we just need to
further annotate known-failures).

-Peff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v4 0/3] bundle.c: remove "ref_list" in favor of string-list.c API
  2021-06-30 14:06   ` [PATCH v3 0/3] Ævar Arnfjörð Bjarmason
                       ` (3 preceding siblings ...)
  2021-06-30 17:34     ` [PATCH v3 0/3] Jeff King
@ 2021-07-02  9:57     ` Ævar Arnfjörð Bjarmason
  2021-07-02  9:57       ` [PATCH v4 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
                         ` (3 more replies)
  4 siblings, 4 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-07-02  9:57 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

This re-roll of v3 changes the discussion in the 1/3 commit message,
it incorrectly referred to SANITIZE=leak when I meant valgrind.

I also changed the bundle_header_init() pattern to use the same
"memcpy() a blank" as in my parallel series to do that more generally.

v3 at:
https://lore.kernel.org/git/cover-0.3-00000000000-20210630T140339Z-avarab@gmail.com/

Ævar Arnfjörð Bjarmason (3):
  bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  bundle.c: use a temporary variable for OIDs and names
  bundle: remove "ref_list" in favor of string-list.c API

 builtin/bundle.c | 74 ++++++++++++++++++++++++++++++------------------
 bundle.c         | 64 +++++++++++++++++++++++++----------------
 bundle.h         | 21 +++++++-------
 transport.c      | 10 +++++--
 4 files changed, 104 insertions(+), 65 deletions(-)

Range-diff against v3:
1:  3d0d7a8e8b5 ! 1:  8e1d08113e5 bundle cmd: stop leaking memory from parse_options_cmd_bundle()
    @@ Commit message
         about those fixes if valgrind runs cleanly at the end without any
         leaks whatsoever.
     
    -    An earlier version of this change went out of its way to not leak
    -    memory on the die() codepaths here, but that was deemed too verbose to
    -    worry about in a built-in that's dying anyway. The only reason we'd
    -    need that is to appease a mode like SANITIZE=leak within the scope of
    -    an entire test file.
    +    An earlier version of this change[1] went out of its way to not leak
    +    memory on the die() codepaths here, but doing so will only avoid
    +    reports of potential leaks under heap-only leak trackers such as
    +    valgrind, not the SANITIZE=leak mode.
    +
    +    Avoiding those leaks as well might be useful to enable us to run
    +    cleanly under the likes of valgrind in the future. But for now the
    +    relative verbosity of the resulting code, and the fact that we don't
    +    have some valgrind or SANITIZE=leak mode as part of our CI (it's only
    +    run ad-hoc, see [2]), means we're not worrying about that for now.
    +
    +    1. https://lore.kernel.org/git/87v95vdxrc.fsf@evledraar.gmail.com/
    +    2. https://lore.kernel.org/git/87czsv2idy.fsf@evledraar.gmail.com/
     
         Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
     
2:  e47646d3a98 = 2:  5ce376682b3 bundle.c: use a temporary variable for OIDs and names
3:  f1066ee1b9a ! 3:  3e5972e4184 bundle: remove "ref_list" in favor of string-list.c API
    @@ Commit message
         Before this the add_to_ref_list() would leak memory, now e.g. "bundle
         list-heads" reports no memory leaks at all under valgrind.
     
    +    In the bundle_header_init() function we're using a clever trick to
    +    memcpy() what we'd get from the corresponding
    +    BUNDLE_HEADER_INIT. There is a concurrent series to make use of that
    +    pattern more generally, see [1].
    +
    +    1. https://lore.kernel.org/git/cover-0.5-00000000000-20210701T104855Z-avarab@gmail.com/
    +
         Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
     
      ## builtin/bundle.c ##
    @@ bundle.c: static struct {
     -	oidcpy(&list->list[list->nr].oid, oid);
     -	list->list[list->nr].name = xstrdup(name);
     -	list->nr++;
    -+	memset(header, 0, sizeof(*header));
    -+	string_list_init(&header->prerequisites, 1);
    -+	string_list_init(&header->references, 1);
    ++	struct bundle_header blank = BUNDLE_HEADER_INIT;
    ++	memcpy(header, &blank, sizeof(*header));
     +}
     +
     +void bundle_header_release(struct bundle_header *header)
-- 
2.32.0.632.g49a94b9226d


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v4 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle()
  2021-07-02  9:57     ` [PATCH v4 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
@ 2021-07-02  9:57       ` Ævar Arnfjörð Bjarmason
  2021-07-02  9:57       ` [PATCH v4 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-07-02  9:57 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

Fix a memory leak from the prefix_filename() function introduced with
its use in 3b754eedd5 (bundle: use prefix_filename with bundle path,
2017-03-20).

As noted in that commit the leak was intentional as a part of being
sloppy about freeing resources just before we exit, I'm changing this
because I'll be fixing other memory leaks in the bundle API (including
the library version) in subsequent commits. It's easier to reason
about those fixes if valgrind runs cleanly at the end without any
leaks whatsoever.

An earlier version of this change[1] went out of its way to not leak
memory on the die() codepaths here, but doing so will only avoid
reports of potential leaks under heap-only leak trackers such as
valgrind, not the SANITIZE=leak mode.

Avoiding those leaks as well might be useful to enable us to run
cleanly under the likes of valgrind in the future. But for now the
relative verbosity of the resulting code, and the fact that we don't
have some valgrind or SANITIZE=leak mode as part of our CI (it's only
run ad-hoc, see [2]), means we're not worrying about that for now.

1. https://lore.kernel.org/git/87v95vdxrc.fsf@evledraar.gmail.com/
2. https://lore.kernel.org/git/87czsv2idy.fsf@evledraar.gmail.com/

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/bundle.c | 62 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 21 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index ea6948110b0..15e2bd61965 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -46,7 +46,7 @@ static int parse_options_cmd_bundle(int argc,
 		const char* prefix,
 		const char * const usagestr[],
 		const struct option options[],
-		const char **bundle_file) {
+		char **bundle_file) {
 	int newargc;
 	newargc = parse_options(argc, argv, NULL, options, usagestr,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
@@ -61,7 +61,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 	int progress = isatty(STDERR_FILENO);
 	struct strvec pack_opts;
 	int version = -1;
-
+	int ret;
 	struct option options[] = {
 		OPT_SET_INT('q', "quiet", &progress,
 			    N_("do not show progress meter"), 0),
@@ -76,7 +76,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 			    N_("specify bundle format version")),
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_create_usage, options, &bundle_file);
@@ -94,75 +94,95 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 
 	if (!startup_info->have_repository)
 		die(_("Need a repository to create a bundle."));
-	return !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
+	ret = !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
+	free(bundle_file);
+	return ret;
 }
 
 static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
 	int quiet = 0;
-
+	int ret;
 	struct option options[] = {
 		OPT_BOOL('q', "quiet", &quiet,
 			    N_("do not show bundle details")),
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_verify_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
 	close(bundle_fd);
-	if (verify_bundle(the_repository, &header, !quiet))
-		return 1;
+	if (verify_bundle(the_repository, &header, !quiet)) {
+		ret = 1;
+		goto cleanup;
+	}
+
 	fprintf(stderr, _("%s is okay\n"), bundle_file);
-	return 0;
+	ret = 0;
+cleanup:
+	free(bundle_file);
+	return ret;
 }
 
 static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
-
+	int ret;
 	struct option options[] = {
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_list_heads_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
 	close(bundle_fd);
-	return !!list_bundle_refs(&header, argc, argv);
+	ret = !!list_bundle_refs(&header, argc, argv);
+cleanup:
+	free(bundle_file);
+	return ret;
 }
 
 static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix) {
 	struct bundle_header header;
 	int bundle_fd = -1;
-
+	int ret;
 	struct option options[] = {
 		OPT_END()
 	};
-	const char* bundle_file;
+	char *bundle_file;
 
 	argc = parse_options_cmd_bundle(argc, argv, prefix,
 			builtin_bundle_unbundle_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
 	memset(&header, 0, sizeof(header));
-	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0)
-		return 1;
+	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
+		ret = 1;
+		goto cleanup;
+	}
 	if (!startup_info->have_repository)
 		die(_("Need a repository to unbundle."));
-	return !!unbundle(the_repository, &header, bundle_fd, 0) ||
+	ret = !!unbundle(the_repository, &header, bundle_fd, 0) ||
 		list_bundle_refs(&header, argc, argv);
+cleanup:
+	free(bundle_file);
+	return ret;
 }
 
 int cmd_bundle(int argc, const char **argv, const char *prefix)
-- 
2.32.0.632.g49a94b9226d


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v4 2/3] bundle.c: use a temporary variable for OIDs and names
  2021-07-02  9:57     ` [PATCH v4 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
  2021-07-02  9:57       ` [PATCH v4 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
@ 2021-07-02  9:57       ` Ævar Arnfjörð Bjarmason
  2021-07-02  9:57       ` [PATCH v4 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
  2021-07-03 10:52       ` [PATCH v4 0/3] bundle.c: " Jeff King
  3 siblings, 0 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-07-02  9:57 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

In preparation for moving away from accessing the OID and name via the
"oid" and "name" slots in a subsequent commit, change the code that
accesses it to use named variables. This makes the subsequent change
smaller.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 bundle.c    | 26 ++++++++++++++++++--------
 transport.c |  6 ++++--
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/bundle.c b/bundle.c
index 693d6195514..7210e5e7105 100644
--- a/bundle.c
+++ b/bundle.c
@@ -156,6 +156,9 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 	int i;
 
 	for (i = 0; i < r->nr; i++) {
+		struct object_id *oid;
+		const char *name;
+
 		if (argc > 1) {
 			int j;
 			for (j = 1; j < argc; j++)
@@ -164,8 +167,10 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 			if (j == argc)
 				continue;
 		}
-		printf("%s %s\n", oid_to_hex(&r->list[i].oid),
-				r->list[i].name);
+
+		oid = &r->list[i].oid;
+		name = r->list[i].name;
+		printf("%s %s\n", oid_to_hex(oid), name);
 	}
 	return 0;
 }
@@ -194,15 +199,17 @@ int verify_bundle(struct repository *r,
 	repo_init_revisions(r, &revs, NULL);
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		struct object *o = parse_object(r, &e->oid);
+		const char *name = e->name;
+		struct object_id *oid = &e->oid;
+		struct object *o = parse_object(r, oid);
 		if (o) {
 			o->flags |= PREREQ_MARK;
-			add_pending_object(&revs, o, e->name);
+			add_pending_object(&revs, o, name);
 			continue;
 		}
 		if (++ret == 1)
 			error("%s", message);
-		error("%s %s", oid_to_hex(&e->oid), e->name);
+		error("%s %s", oid_to_hex(oid), name);
 	}
 	if (revs.pending.nr != p->nr)
 		return ret;
@@ -219,19 +226,22 @@ int verify_bundle(struct repository *r,
 
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		struct object *o = parse_object(r, &e->oid);
+		const char *name = e->name;
+		struct object_id *oid = &e->oid;
+		struct object *o = parse_object(r, oid);
 		assert(o); /* otherwise we'd have returned early */
 		if (o->flags & SHOWN)
 			continue;
 		if (++ret == 1)
 			error("%s", message);
-		error("%s %s", oid_to_hex(&e->oid), e->name);
+		error("%s %s", oid_to_hex(oid), name);
 	}
 
 	/* Clean up objects used, as they will be reused. */
 	for (i = 0; i < p->nr; i++) {
 		struct ref_list_entry *e = p->list + i;
-		commit = lookup_commit_reference_gently(r, &e->oid, 1);
+		struct object_id *oid = &e->oid;
+		commit = lookup_commit_reference_gently(r, oid, 1);
 		if (commit)
 			clear_commit_marks(commit, ALL_REV_FLAGS);
 	}
diff --git a/transport.c b/transport.c
index 50f5830eb6b..95c1138e9ae 100644
--- a/transport.c
+++ b/transport.c
@@ -148,8 +148,10 @@ static struct ref *get_refs_from_bundle(struct transport *transport,
 
 	for (i = 0; i < data->header.references.nr; i++) {
 		struct ref_list_entry *e = data->header.references.list + i;
-		struct ref *ref = alloc_ref(e->name);
-		oidcpy(&ref->old_oid, &e->oid);
+		const char *name = e->name;
+		struct ref *ref = alloc_ref(name);
+		struct object_id *oid = &e->oid;
+		oidcpy(&ref->old_oid, oid);
 		ref->next = result;
 		result = ref;
 	}
-- 
2.32.0.632.g49a94b9226d


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v4 3/3] bundle: remove "ref_list" in favor of string-list.c API
  2021-07-02  9:57     ` [PATCH v4 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
  2021-07-02  9:57       ` [PATCH v4 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
  2021-07-02  9:57       ` [PATCH v4 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
@ 2021-07-02  9:57       ` Ævar Arnfjörð Bjarmason
  2021-07-03 10:52       ` [PATCH v4 0/3] bundle.c: " Jeff King
  3 siblings, 0 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-07-02  9:57 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, Andrei Rybak,
	Ævar Arnfjörð Bjarmason

Move away from the "struct ref_list" in bundle.c in favor of the
almost identical string-list.c API.

That API fits this use-case perfectly, but did not exist in its
current form when this code was added in 2e0afafebd (Add git-bundle:
move objects and references by archive, 2007-02-22), with hindsight we
could have used the path-list API, which later got renamed to
string-list. See 8fd2cb4069 (Extract helper bits from
c-merge-recursive work, 2006-07-25)

We need to change "name" to "string" and "oid" to "util" to make this
conversion, but other than that the APIs are pretty much identical for
what bundle.c made use of.

Let's also replace the memset(..,0,...) pattern with a more idiomatic
"INIT" macro, and finally add a *_release() function so to free the
allocated memory.

Before this the add_to_ref_list() would leak memory, now e.g. "bundle
list-heads" reports no memory leaks at all under valgrind.

In the bundle_header_init() function we're using a clever trick to
memcpy() what we'd get from the corresponding
BUNDLE_HEADER_INIT. There is a concurrent series to make use of that
pattern more generally, see [1].

1. https://lore.kernel.org/git/cover-0.5-00000000000-20210701T104855Z-avarab@gmail.com/

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/bundle.c | 12 +++++------
 bundle.c         | 52 ++++++++++++++++++++++++++----------------------
 bundle.h         | 21 +++++++++----------
 transport.c      |  8 +++++---
 4 files changed, 50 insertions(+), 43 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index 15e2bd61965..053a51bea1b 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -100,7 +100,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 }
 
 static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int quiet = 0;
 	int ret;
@@ -115,7 +115,6 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 			builtin_bundle_verify_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -130,11 +129,12 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 	ret = 0;
 cleanup:
 	free(bundle_file);
+	bundle_header_release(&header);
 	return ret;
 }
 
 static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int ret;
 	struct option options[] = {
@@ -146,7 +146,6 @@ static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix
 			builtin_bundle_list_heads_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -155,11 +154,12 @@ static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix
 	ret = !!list_bundle_refs(&header, argc, argv);
 cleanup:
 	free(bundle_file);
+	bundle_header_release(&header);
 	return ret;
 }
 
 static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix) {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int bundle_fd = -1;
 	int ret;
 	struct option options[] = {
@@ -171,7 +171,6 @@ static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix)
 			builtin_bundle_unbundle_usage, options, &bundle_file);
 	/* bundle internals use argv[1] as further parameters */
 
-	memset(&header, 0, sizeof(header));
 	if ((bundle_fd = read_bundle_header(bundle_file, &header)) < 0) {
 		ret = 1;
 		goto cleanup;
@@ -180,6 +179,7 @@ static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix)
 		die(_("Need a repository to unbundle."));
 	ret = !!unbundle(the_repository, &header, bundle_fd, 0) ||
 		list_bundle_refs(&header, argc, argv);
+	bundle_header_release(&header);
 cleanup:
 	free(bundle_file);
 	return ret;
diff --git a/bundle.c b/bundle.c
index 7210e5e7105..ab63f402261 100644
--- a/bundle.c
+++ b/bundle.c
@@ -23,13 +23,16 @@ static struct {
 	{ 3, v3_bundle_signature },
 };
 
-static void add_to_ref_list(const struct object_id *oid, const char *name,
-		struct ref_list *list)
+void bundle_header_init(struct bundle_header *header)
 {
-	ALLOC_GROW(list->list, list->nr + 1, list->alloc);
-	oidcpy(&list->list[list->nr].oid, oid);
-	list->list[list->nr].name = xstrdup(name);
-	list->nr++;
+	struct bundle_header blank = BUNDLE_HEADER_INIT;
+	memcpy(header, &blank, sizeof(*header));
+}
+
+void bundle_header_release(struct bundle_header *header)
+{
+	string_list_clear(&header->prerequisites, 1);
+	string_list_clear(&header->references, 1);
 }
 
 static int parse_capability(struct bundle_header *header, const char *capability)
@@ -112,10 +115,11 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
 			status = -1;
 			break;
 		} else {
+			struct object_id *dup = oiddup(&oid);
 			if (is_prereq)
-				add_to_ref_list(&oid, "", &header->prerequisites);
+				string_list_append(&header->prerequisites, "")->util = dup;
 			else
-				add_to_ref_list(&oid, p + 1, &header->references);
+				string_list_append(&header->references, p + 1)->util = dup;
 		}
 	}
 
@@ -139,19 +143,19 @@ int read_bundle_header(const char *path, struct bundle_header *header)
 
 int is_bundle(const char *path, int quiet)
 {
-	struct bundle_header header;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
 	int fd = open(path, O_RDONLY);
 
 	if (fd < 0)
 		return 0;
-	memset(&header, 0, sizeof(header));
 	fd = parse_bundle_header(fd, &header, quiet ? NULL : path);
 	if (fd >= 0)
 		close(fd);
+	bundle_header_release(&header);
 	return (fd >= 0);
 }
 
-static int list_refs(struct ref_list *r, int argc, const char **argv)
+static int list_refs(struct string_list *r, int argc, const char **argv)
 {
 	int i;
 
@@ -162,14 +166,14 @@ static int list_refs(struct ref_list *r, int argc, const char **argv)
 		if (argc > 1) {
 			int j;
 			for (j = 1; j < argc; j++)
-				if (!strcmp(r->list[i].name, argv[j]))
+				if (!strcmp(r->items[i].string, argv[j]))
 					break;
 			if (j == argc)
 				continue;
 		}
 
-		oid = &r->list[i].oid;
-		name = r->list[i].name;
+		oid = r->items[i].util;
+		name = r->items[i].string;
 		printf("%s %s\n", oid_to_hex(oid), name);
 	}
 	return 0;
@@ -186,7 +190,7 @@ int verify_bundle(struct repository *r,
 	 * Do fast check, then if any prereqs are missing then go line by line
 	 * to be verbose about the errors
 	 */
-	struct ref_list *p = &header->prerequisites;
+	struct string_list *p = &header->prerequisites;
 	struct rev_info revs;
 	const char *argv[] = {NULL, "--all", NULL};
 	struct commit *commit;
@@ -198,9 +202,9 @@ int verify_bundle(struct repository *r,
 
 	repo_init_revisions(r, &revs, NULL);
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		const char *name = e->name;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		const char *name = e->string;
+		struct object_id *oid = e->util;
 		struct object *o = parse_object(r, oid);
 		if (o) {
 			o->flags |= PREREQ_MARK;
@@ -225,9 +229,9 @@ int verify_bundle(struct repository *r,
 			i--;
 
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		const char *name = e->name;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		const char *name = e->string;
+		const struct object_id *oid = e->util;
 		struct object *o = parse_object(r, oid);
 		assert(o); /* otherwise we'd have returned early */
 		if (o->flags & SHOWN)
@@ -239,15 +243,15 @@ int verify_bundle(struct repository *r,
 
 	/* Clean up objects used, as they will be reused. */
 	for (i = 0; i < p->nr; i++) {
-		struct ref_list_entry *e = p->list + i;
-		struct object_id *oid = &e->oid;
+		struct string_list_item *e = p->items + i;
+		struct object_id *oid = e->util;
 		commit = lookup_commit_reference_gently(r, oid, 1);
 		if (commit)
 			clear_commit_marks(commit, ALL_REV_FLAGS);
 	}
 
 	if (verbose) {
-		struct ref_list *r;
+		struct string_list *r;
 
 		r = &header->references;
 		printf_ln(Q_("The bundle contains this ref:",
diff --git a/bundle.h b/bundle.h
index f9e2d1c8ef5..1927d8cd6a4 100644
--- a/bundle.h
+++ b/bundle.h
@@ -3,22 +3,23 @@
 
 #include "strvec.h"
 #include "cache.h"
-
-struct ref_list {
-	unsigned int nr, alloc;
-	struct ref_list_entry {
-		struct object_id oid;
-		char *name;
-	} *list;
-};
+#include "string-list.h"
 
 struct bundle_header {
 	unsigned version;
-	struct ref_list prerequisites;
-	struct ref_list references;
+	struct string_list prerequisites;
+	struct string_list references;
 	const struct git_hash_algo *hash_algo;
 };
 
+#define BUNDLE_HEADER_INIT \
+{ \
+	.prerequisites = STRING_LIST_INIT_DUP, \
+	.references = STRING_LIST_INIT_DUP, \
+}
+void bundle_header_init(struct bundle_header *header);
+void bundle_header_release(struct bundle_header *header);
+
 int is_bundle(const char *path, int quiet);
 int read_bundle_header(const char *path, struct bundle_header *header);
 int create_bundle(struct repository *r, const char *path,
diff --git a/transport.c b/transport.c
index 95c1138e9ae..745ffa22474 100644
--- a/transport.c
+++ b/transport.c
@@ -147,10 +147,10 @@ static struct ref *get_refs_from_bundle(struct transport *transport,
 	transport->hash_algo = data->header.hash_algo;
 
 	for (i = 0; i < data->header.references.nr; i++) {
-		struct ref_list_entry *e = data->header.references.list + i;
-		const char *name = e->name;
+		struct string_list_item *e = data->header.references.items + i;
+		const char *name = e->string;
 		struct ref *ref = alloc_ref(name);
-		struct object_id *oid = &e->oid;
+		struct object_id *oid = e->util;
 		oidcpy(&ref->old_oid, oid);
 		ref->next = result;
 		result = ref;
@@ -177,6 +177,7 @@ static int close_bundle(struct transport *transport)
 	struct bundle_transport_data *data = transport->data;
 	if (data->fd > 0)
 		close(data->fd);
+	bundle_header_release(&data->header);
 	free(data);
 	return 0;
 }
@@ -1083,6 +1084,7 @@ struct transport *transport_get(struct remote *remote, const char *url)
 		die(_("git-over-rsync is no longer supported"));
 	} else if (url_is_local_not_ssh(url) && is_file(url) && is_bundle(url, 1)) {
 		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
+		bundle_header_init(&data->header);
 		transport_check_allowed("file");
 		ret->data = data;
 		ret->vtable = &bundle_vtable;
-- 
2.32.0.632.g49a94b9226d


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH v4 0/3] bundle.c: remove "ref_list" in favor of string-list.c API
  2021-07-02  9:57     ` [PATCH v4 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
                         ` (2 preceding siblings ...)
  2021-07-02  9:57       ` [PATCH v4 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
@ 2021-07-03 10:52       ` Jeff King
  2021-07-03 11:28         ` Ævar Arnfjörð Bjarmason
  3 siblings, 1 reply; 33+ messages in thread
From: Jeff King @ 2021-07-03 10:52 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak

On Fri, Jul 02, 2021 at 11:57:29AM +0200, Ævar Arnfjörð Bjarmason wrote:

> This re-roll of v3 changes the discussion in the 1/3 commit message,
> it incorrectly referred to SANITIZE=leak when I meant valgrind.
> 
> I also changed the bundle_header_init() pattern to use the same
> "memcpy() a blank" as in my parallel series to do that more generally.

Thanks, this looks good to me.

I'd probably word the discussion about die() a bit differently, but you've
already seen my expositions on leak-checking, and it's all tangent here.
So let's move forward with this, and we can let leak-checking
philosophies iron themselves out as we fix more cases. :)

-Peff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v4 0/3] bundle.c: remove "ref_list" in favor of string-list.c API
  2021-07-03 10:52       ` [PATCH v4 0/3] bundle.c: " Jeff King
@ 2021-07-03 11:28         ` Ævar Arnfjörð Bjarmason
  0 siblings, 0 replies; 33+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-07-03 11:28 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Johannes Schindelin, Andrei Rybak


On Sat, Jul 03 2021, Jeff King wrote:

> On Fri, Jul 02, 2021 at 11:57:29AM +0200, Ævar Arnfjörð Bjarmason wrote:
>
>> This re-roll of v3 changes the discussion in the 1/3 commit message,
>> it incorrectly referred to SANITIZE=leak when I meant valgrind.
>> 
>> I also changed the bundle_header_init() pattern to use the same
>> "memcpy() a blank" as in my parallel series to do that more generally.
>
> Thanks, this looks good to me.
>
> I'd probably word the discussion about die() a bit differently, but you've
> already seen my expositions on leak-checking, and it's all tangent here.
> So let's move forward with this, and we can let leak-checking
> philosophies iron themselves out as we fix more cases. :)

Thanks for the detailed review over multiple rounds.

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2021-07-03 11:28 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-17 11:21 [PATCH 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
2021-06-17 11:21 ` [PATCH 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
2021-06-17 11:21 ` [PATCH 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
2021-06-17 11:21 ` [PATCH 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
2021-06-19  2:12   ` Andrei Rybak
2021-06-21 15:16 ` [PATCH v2 0/3] bundle.c: " Ævar Arnfjörð Bjarmason
2021-06-21 15:16   ` [PATCH v2 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
2021-06-24 16:54     ` Jeff King
2021-06-24 19:28       ` Ævar Arnfjörð Bjarmason
2021-06-21 15:16   ` [PATCH v2 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
2021-06-21 15:16   ` [PATCH v2 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
2021-06-24 17:11     ` Jeff King
2021-06-24 19:31       ` Ævar Arnfjörð Bjarmason
2021-06-29  1:02       ` Junio C Hamano
2021-06-24 17:14   ` [PATCH v2 0/3] bundle.c: " Jeff King
2021-06-30 14:06   ` [PATCH v3 0/3] Ævar Arnfjörð Bjarmason
2021-06-30 14:06     ` [PATCH v3 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
2021-06-30 17:26       ` Jeff King
2021-06-30 18:00         ` Ævar Arnfjörð Bjarmason
2021-07-01 15:41           ` Jeff King
2021-06-30 14:06     ` [PATCH v3 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
2021-06-30 14:06     ` [PATCH v3 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
2021-06-30 21:18       ` Junio C Hamano
2021-06-30 17:34     ` [PATCH v3 0/3] Jeff King
2021-06-30 17:45       ` Jeff King
2021-06-30 18:00         ` Ævar Arnfjörð Bjarmason
2021-07-01 10:53           ` Ævar Arnfjörð Bjarmason
2021-07-02  9:57     ` [PATCH v4 0/3] bundle.c: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
2021-07-02  9:57       ` [PATCH v4 1/3] bundle cmd: stop leaking memory from parse_options_cmd_bundle() Ævar Arnfjörð Bjarmason
2021-07-02  9:57       ` [PATCH v4 2/3] bundle.c: use a temporary variable for OIDs and names Ævar Arnfjörð Bjarmason
2021-07-02  9:57       ` [PATCH v4 3/3] bundle: remove "ref_list" in favor of string-list.c API Ævar Arnfjörð Bjarmason
2021-07-03 10:52       ` [PATCH v4 0/3] bundle.c: " Jeff King
2021-07-03 11:28         ` Ævar Arnfjörð Bjarmason

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).