git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Jeff King <peff@peff.net>
To: git@vger.kernel.org
Cc: "Eric Sunshine" <sunshine@sunshineco.com>,
	"Junio C Hamano" <gitster@pobox.com>,
	"Johannes Schindelin" <Johannes.Schindelin@gmx.de>,
	"SZEDER Gábor" <szeder.dev@gmail.com>
Subject: [PATCH v2 04/11] fast-export: tighten anonymize_mem() interface to handle only strings
Date: Thu, 25 Jun 2020 15:48:21 -0400	[thread overview]
Message-ID: <20200625194821.GD4029374@coredump.intra.peff.net> (raw)
In-Reply-To: <20200625194802.GA4028913@coredump.intra.peff.net>

While the anonymize_mem() interface _can_ store arbitrary byte
sequences, none of the callers uses this feature (as of the previous
commit). We'd like to keep it that way, as we'll be exposing the
string-like nature of the anonymization routines to the user. So let's
tighten up the interface a bit:

  - don't treat "len" as an out-parameter from anonymize_mem(); this
    ensures callers treat the pointer result as a NUL-terminated string

  - likewise, don't treat "len" as an out-parameter from generator
    functions

  - swap out "void *" for "char *" as appropriate to signal that we
    don't handle arbitrary memory

  - rename the function to anonymize_str()

This will also open up some optimization opportunities in a future
patch.

Note that we can't drop the "len" parameter entirely. Some callers do
pass in partial strings (e.g., "foo/bar", len=3) to avoid copying, and
we need to handle those still.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/fast-export.c | 53 +++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 4a3a4c933e..d8ea067630 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -145,31 +145,30 @@ static int anonymized_entry_cmp(const void *unused_cmp_data,
  * the same anonymized string with another. The actual generation
  * is farmed out to the generate function.
  */
-static const void *anonymize_mem(struct hashmap *map,
-				 void *(*generate)(const void *, size_t *),
-				 const void *orig, size_t *len)
+static const char *anonymize_str(struct hashmap *map,
+				 char *(*generate)(const char *, size_t),
+				 const char *orig, size_t len)
 {
 	struct anonymized_entry key, *ret;
 
 	if (!map->cmpfn)
 		hashmap_init(map, anonymized_entry_cmp, NULL, 0);
 
-	hashmap_entry_init(&key.hash, memhash(orig, *len));
+	hashmap_entry_init(&key.hash, memhash(orig, len));
 	key.orig = orig;
-	key.orig_len = *len;
+	key.orig_len = len;
 	ret = hashmap_get_entry(map, &key, hash, NULL);
 
 	if (!ret) {
 		ret = xmalloc(sizeof(*ret));
 		hashmap_entry_init(&ret->hash, key.hash.hash);
-		ret->orig = xmemdupz(orig, *len);
-		ret->orig_len = *len;
+		ret->orig = xmemdupz(orig, len);
+		ret->orig_len = len;
 		ret->anon = generate(orig, len);
-		ret->anon_len = *len;
+		ret->anon_len = strlen(ret->anon);
 		hashmap_put(map, &ret->hash);
 	}
 
-	*len = ret->anon_len;
 	return ret->anon;
 }
 
@@ -181,13 +180,13 @@ static const void *anonymize_mem(struct hashmap *map,
  */
 static void anonymize_path(struct strbuf *out, const char *path,
 			   struct hashmap *map,
-			   void *(*generate)(const void *, size_t *))
+			   char *(*generate)(const char *, size_t))
 {
 	while (*path) {
 		const char *end_of_component = strchrnul(path, '/');
 		size_t len = end_of_component - path;
-		const char *c = anonymize_mem(map, generate, path, &len);
-		strbuf_add(out, c, len);
+		const char *c = anonymize_str(map, generate, path, len);
+		strbuf_addstr(out, c);
 		path = end_of_component;
 		if (*path)
 			strbuf_addch(out, *path++);
@@ -361,12 +360,12 @@ static void print_path_1(const char *path)
 		printf("%s", path);
 }
 
-static void *anonymize_path_component(const void *path, size_t *len)
+static char *anonymize_path_component(const char *path, size_t len)
 {
 	static int counter;
 	struct strbuf out = STRBUF_INIT;
 	strbuf_addf(&out, "path%d", counter++);
-	return strbuf_detach(&out, len);
+	return strbuf_detach(&out, NULL);
 }
 
 static void print_path(const char *path)
@@ -383,7 +382,7 @@ static void print_path(const char *path)
 	}
 }
 
-static void *generate_fake_oid(const void *old, size_t *len)
+static char *generate_fake_oid(const char *old, size_t len)
 {
 	static uint32_t counter = 1; /* avoid null oid */
 	const unsigned hashsz = the_hash_algo->rawsz;
@@ -399,7 +398,7 @@ static const char *anonymize_oid(const char *oid_hex)
 {
 	static struct hashmap objs;
 	size_t len = strlen(oid_hex);
-	return anonymize_mem(&objs, generate_fake_oid, oid_hex, &len);
+	return anonymize_str(&objs, generate_fake_oid, oid_hex, len);
 }
 
 static void show_filemodify(struct diff_queue_struct *q,
@@ -496,12 +495,12 @@ static const char *find_encoding(const char *begin, const char *end)
 	return bol;
 }
 
-static void *anonymize_ref_component(const void *old, size_t *len)
+static char *anonymize_ref_component(const char *old, size_t len)
 {
 	static int counter;
 	struct strbuf out = STRBUF_INIT;
 	strbuf_addf(&out, "ref%d", counter++);
-	return strbuf_detach(&out, len);
+	return strbuf_detach(&out, NULL);
 }
 
 static const char *anonymize_refname(const char *refname)
@@ -550,13 +549,13 @@ static char *anonymize_commit_message(const char *old)
 }
 
 static struct hashmap idents;
-static void *anonymize_ident(const void *old, size_t *len)
+static char *anonymize_ident(const char *old, size_t len)
 {
 	static int counter;
 	struct strbuf out = STRBUF_INIT;
 	strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter);
 	counter++;
-	return strbuf_detach(&out, len);
+	return strbuf_detach(&out, NULL);
 }
 
 /*
@@ -591,9 +590,9 @@ static void anonymize_ident_line(const char **beg, const char **end)
 		size_t len;
 
 		len = split.mail_end - split.name_begin;
-		ident = anonymize_mem(&idents, anonymize_ident,
-				      split.name_begin, &len);
-		strbuf_add(out, ident, len);
+		ident = anonymize_str(&idents, anonymize_ident,
+				      split.name_begin, len);
+		strbuf_addstr(out, ident);
 		strbuf_addch(out, ' ');
 		strbuf_add(out, split.date_begin, split.tz_end - split.date_begin);
 	} else {
@@ -733,12 +732,12 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 	show_progress();
 }
 
-static void *anonymize_tag(const void *old, size_t *len)
+static char *anonymize_tag(const char *old, size_t len)
 {
 	static int counter;
 	struct strbuf out = STRBUF_INIT;
 	strbuf_addf(&out, "tag message %d", counter++);
-	return strbuf_detach(&out, len);
+	return strbuf_detach(&out, NULL);
 }
 
 static void handle_tail(struct object_array *commits, struct rev_info *revs,
@@ -808,8 +807,8 @@ static void handle_tag(const char *name, struct tag *tag)
 		name = anonymize_refname(name);
 		if (message) {
 			static struct hashmap tags;
-			message = anonymize_mem(&tags, anonymize_tag,
-						message, &message_size);
+			message = anonymize_str(&tags, anonymize_tag,
+						message, message_size);
 		}
 	}
 
-- 
2.27.0.593.gb3082a2aaf


  parent reply	other threads:[~2020-06-25 19:48 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-19 13:23 [PATCH 0/3] fast-export: allow dumping anonymization mappings Jeff King
2020-06-19 13:25 ` [PATCH 1/3] fast-export: allow dumping the refname mapping Jeff King
2020-06-19 15:51   ` Eric Sunshine
2020-06-19 16:01     ` Jeff King
2020-06-19 16:18       ` Eric Sunshine
2020-06-19 17:45         ` Jeff King
2020-06-19 18:00           ` Eric Sunshine
2020-06-22 21:30             ` Jeff King
2020-06-19 19:20         ` Junio C Hamano
2020-06-22 21:32           ` Jeff King
2020-06-19 13:26 ` [PATCH 2/3] fast-export: anonymize "master" refname Jeff King
2020-06-19 13:29 ` [PATCH 3/3] fast-export: allow dumping the path mapping Jeff King
2020-06-19 16:00   ` Eric Sunshine
2020-06-19 19:24   ` Junio C Hamano
2020-06-22 21:38     ` Jeff King
2020-06-19 13:51 ` [PATCH 0/3] fast-export: allow dumping anonymization mappings Johannes Schindelin
2020-06-22 16:35   ` Junio C Hamano
2020-06-22 21:47 ` [PATCH v2 0/4] " Jeff King
2020-06-22 21:47   ` [PATCH v2 1/4] fast-export: allow dumping the refname mapping Jeff King
2020-06-22 21:48   ` [PATCH v2 2/4] fast-export: anonymize "master" refname Jeff King
2020-06-22 21:48   ` [PATCH v2 3/4] fast-export: refactor path printing to not rely on stdout Jeff King
2020-06-22 21:48   ` [PATCH v2 4/4] fast-export: allow dumping the path mapping Jeff King
2020-06-23 15:24   ` [alternative 0/10] fast-export: allow seeding the anonymized mapping Jeff King
2020-06-23 15:24     ` [PATCH 01/10] t9351: derive anonymized tree checks from original repo Jeff King
2020-06-23 15:24     ` [PATCH 02/10] fast-export: use xmemdupz() for anonymizing oids Jeff King
2020-06-23 15:24     ` [PATCH 03/10] fast-export: store anonymized oids as hex strings Jeff King
2020-06-24 11:43       ` SZEDER Gábor
2020-06-24 15:54         ` Jeff King
2020-06-25 15:49           ` Jeff King
2020-06-25 20:45             ` SZEDER Gábor
2020-06-25 21:15               ` Jeff King
2020-06-29 13:17                 ` Johannes Schindelin
2020-06-30 19:35                   ` Jeff King
2020-06-23 15:24     ` [PATCH 04/10] fast-export: tighten anonymize_mem() interface to handle only strings Jeff King
2020-06-23 15:24     ` [PATCH 05/10] fast-export: stop storing lengths in anonymized hashmaps Jeff King
2020-06-23 15:24     ` [PATCH 06/10] fast-export: use a flex array to store anonymized entries Jeff King
2020-06-23 15:25     ` [PATCH 07/10] fast-export: move global "idents" anonymize hashmap into function Jeff King
2020-06-23 15:25     ` [PATCH 08/10] fast-export: add a "data" callback parameter to anonymize_str() Jeff King
2020-06-24 19:58       ` Junio C Hamano
2020-06-23 15:25     ` [PATCH 09/10] fast-export: allow seeding the anonymized mapping Jeff King
2020-06-23 17:16       ` Eric Sunshine
2020-06-23 18:30         ` Jeff King
2020-06-23 20:30           ` Eric Sunshine
2020-06-24 15:47             ` Jeff King
2020-06-23 18:11       ` Eric Sunshine
2020-06-23 18:35         ` Jeff King
2020-06-23 20:35           ` Eric Sunshine
2020-06-24 15:48             ` Jeff King
2020-06-23 15:25     ` [PATCH 10/10] fast-export: anonymize "master" refname Jeff King
2020-06-23 19:34     ` [alternative 0/10] fast-export: allow seeding the anonymized mapping Junio C Hamano
2020-06-23 19:44       ` Jeff King
2020-06-25 19:48     ` [PATCH v2 0/11] " Jeff King
2020-06-25 19:48       ` [PATCH v2 01/11] t9351: derive anonymized tree checks from original repo Jeff King
2020-06-25 19:48       ` [PATCH v2 02/11] fast-export: use xmemdupz() for anonymizing oids Jeff King
2020-06-25 19:48       ` [PATCH v2 03/11] fast-export: store anonymized oids as hex strings Jeff King
2020-06-25 19:48       ` Jeff King [this message]
2020-06-25 19:48       ` [PATCH v2 05/11] fast-export: stop storing lengths in anonymized hashmaps Jeff King
2020-06-25 19:48       ` [PATCH v2 06/11] fast-export: use a flex array to store anonymized entries Jeff King
2020-06-25 19:48       ` [PATCH v2 07/11] fast-export: move global "idents" anonymize hashmap into function Jeff King
2020-06-25 19:48       ` [PATCH v2 08/11] fast-export: add a "data" callback parameter to anonymize_str() Jeff King
2020-06-25 19:48       ` [PATCH v2 09/11] fast-export: allow seeding the anonymized mapping Jeff King
2020-06-25 19:48       ` [PATCH v2 10/11] fast-export: anonymize "master" refname Jeff King
2020-06-25 19:48       ` [PATCH v2 11/11] fast-export: use local array to store anonymized oid Jeff King
2020-06-25 21:22       ` [PATCH v2 0/11] fast-export: allow seeding the anonymized mapping Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200625194821.GD4029374@coredump.intra.peff.net \
    --to=peff@peff.net \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=sunshine@sunshineco.com \
    --cc=szeder.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).