git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / Atom feed
* [PATCH 0/3] Test oidmap
@ 2019-06-09  4:49 Christian Couder
  2019-06-09  4:49 ` [PATCH 1/3] t/helper: add test-oidmap.c Christian Couder
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Christian Couder @ 2019-06-09  4:49 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Jonathan Tan, Brandon Williams, Christian Couder,
	Christian Couder

From: Christian Couder <christian.couder@gmail.com>

Unlike hashmap that has t/helper/test-hashmap.c and t/t0011-hashmap.sh
oidmap has no specific test. The goal of this small patch series is to
change that and also improve oidmap a bit while at it.

Christian Couder (3):
  t/helper: add test-oidmap.c
  t: add t0016-oidmap.sh
  oidmap: use sha1hash() instead of static hash() function

 Makefile               |   1 +
 oidmap.c               |  13 +---
 t/helper/test-oidmap.c | 134 +++++++++++++++++++++++++++++++++++++++++
 t/helper/test-tool.c   |   1 +
 t/helper/test-tool.h   |   1 +
 t/t0016-oidmap.sh      | 100 ++++++++++++++++++++++++++++++
 6 files changed, 240 insertions(+), 10 deletions(-)
 create mode 100644 t/helper/test-oidmap.c
 create mode 100755 t/t0016-oidmap.sh

-- 
2.22.0.14.g9023ccb50a


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/3] t/helper: add test-oidmap.c
  2019-06-09  4:49 [PATCH 0/3] Test oidmap Christian Couder
@ 2019-06-09  4:49 ` Christian Couder
  2019-06-09  4:49 ` [PATCH 2/3] t: add t0016-oidmap.sh Christian Couder
  2019-06-09  4:49 ` [PATCH 3/3] oidmap: use sha1hash() instead of static hash() function Christian Couder
  2 siblings, 0 replies; 14+ messages in thread
From: Christian Couder @ 2019-06-09  4:49 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Jonathan Tan, Brandon Williams, Christian Couder

This new helper is very similar to "test-hashmap.c" and will help
test how `struct oidmap` from oidmap.{c,h} can be used.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 Makefile               |   1 +
 t/helper/test-oidmap.c | 134 +++++++++++++++++++++++++++++++++++++++++
 t/helper/test-tool.c   |   1 +
 t/helper/test-tool.h   |   1 +
 4 files changed, 137 insertions(+)
 create mode 100644 t/helper/test-oidmap.c

diff --git a/Makefile b/Makefile
index 8a7e235352..5efc7700ed 100644
--- a/Makefile
+++ b/Makefile
@@ -727,6 +727,7 @@ TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o
 TEST_BUILTINS_OBJS += test-match-trees.o
 TEST_BUILTINS_OBJS += test-mergesort.o
 TEST_BUILTINS_OBJS += test-mktemp.o
+TEST_BUILTINS_OBJS += test-oidmap.o
 TEST_BUILTINS_OBJS += test-online-cpus.o
 TEST_BUILTINS_OBJS += test-parse-options.o
 TEST_BUILTINS_OBJS += test-path-utils.o
diff --git a/t/helper/test-oidmap.c b/t/helper/test-oidmap.c
new file mode 100644
index 0000000000..0ba122a264
--- /dev/null
+++ b/t/helper/test-oidmap.c
@@ -0,0 +1,134 @@
+#include "test-tool.h"
+#include "cache.h"
+#include "oidmap.h"
+#include "strbuf.h"
+
+/* key is an oid and value is a name (could be a refname for example) */
+struct test_entry {
+	struct oidmap_entry entry;
+	char name[FLEX_ARRAY];
+};
+
+#define DELIM " \t\r\n"
+
+/*
+ * Read stdin line by line and print result of commands to stdout:
+ *
+ * hash oidkey -> sha1hash(oidkey)
+ * put oidkey namevalue -> NULL / old namevalue
+ * get oidkey -> NULL / namevalue
+ * remove oidkey -> NULL / old namevalue
+ * iterate -> oidkey1 namevalue1\noidkey2 namevalue2\n...
+ *
+ */
+int cmd__oidmap(int argc, const char **argv)
+{
+	struct strbuf line = STRBUF_INIT;
+	struct oidmap map = OIDMAP_INIT;
+
+	setup_git_directory();
+
+	/* init oidmap */
+	oidmap_init(&map, 0);
+
+	/* process commands from stdin */
+	while (strbuf_getline(&line, stdin) != EOF) {
+		char *cmd, *p1 = NULL, *p2 = NULL;
+		struct test_entry *entry;
+		struct object_id oid;
+
+		/* break line into command and up to two parameters */
+		cmd = strtok(line.buf, DELIM);
+		/* ignore empty lines */
+		if (!cmd || *cmd == '#')
+			continue;
+
+		p1 = strtok(NULL, DELIM);
+		if (p1)
+			p2 = strtok(NULL, DELIM);
+
+		if (!strcmp("hash", cmd) && p1) {
+
+			/* print hash of oid */
+			if (!get_oid(p1, &oid))
+				printf("%u\n", sha1hash(oid.hash));
+			else
+				printf("Unknown oid: %s\n", p1);
+
+		} else if (!strcmp("add", cmd) && p1 && p2) {
+
+			if (get_oid(p1, &oid)) {
+				printf("Unknown oid: %s\n", p1);
+				continue;
+			}
+
+			/* create entry with oidkey from p1, value = p2 */
+			FLEX_ALLOC_STR(entry, name, p2);
+			oidcpy(&entry->entry.oid, &oid);
+
+			/* add to oidmap */
+			oidmap_put(&map, entry);
+
+		} else if (!strcmp("put", cmd) && p1 && p2) {
+
+			if (get_oid(p1, &oid)) {
+				printf("Unknown oid: %s\n", p1);
+				continue;
+			}
+
+			/* create entry with oid_key = p1, name_value = p2 */
+			FLEX_ALLOC_STR(entry, name, p2);
+			oidcpy(&entry->entry.oid, &oid);
+
+			/* add / replace entry */
+			entry = oidmap_put(&map, entry);
+
+			/* print and free replaced entry, if any */
+			puts(entry ? entry->name : "NULL");
+			free(entry);
+
+		} else if (!strcmp("get", cmd) && p1) {
+
+			if (get_oid(p1, &oid)) {
+				printf("Unknown oid: %s\n", p1);
+				continue;
+			}
+
+			/* lookup entry in oidmap */
+			entry = oidmap_get(&map, &oid);
+
+			/* print result */
+			puts(entry ? entry->name : "NULL");
+
+		} else if (!strcmp("remove", cmd) && p1) {
+
+			if (get_oid(p1, &oid)) {
+				printf("Unknown oid: %s\n", p1);
+				continue;
+			}
+
+			/* remove entry from oidmap */
+			entry = oidmap_remove(&map, &oid);
+
+			/* print result and free entry*/
+			puts(entry ? entry->name : "NULL");
+			free(entry);
+
+		} else if (!strcmp("iterate", cmd)) {
+
+			struct oidmap_iter iter;
+			oidmap_iter_init(&map, &iter);
+			while ((entry = oidmap_iter_next(&iter)))
+				printf("%s %s\n", oid_to_hex(&entry->entry.oid), entry->name);
+
+		} else {
+
+			printf("Unknown command %s\n", cmd);
+
+		}
+	}
+
+	strbuf_release(&line);
+	oidmap_free(&map, 1);
+	return 0;
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 087a8c0cc9..1eac25233f 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -35,6 +35,7 @@ static struct test_cmd cmds[] = {
 	{ "match-trees", cmd__match_trees },
 	{ "mergesort", cmd__mergesort },
 	{ "mktemp", cmd__mktemp },
+	{ "oidmap", cmd__oidmap },
 	{ "online-cpus", cmd__online_cpus },
 	{ "parse-options", cmd__parse_options },
 	{ "path-utils", cmd__path_utils },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 7e703f3038..c7a46dc320 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -25,6 +25,7 @@ int cmd__lazy_init_name_hash(int argc, const char **argv);
 int cmd__match_trees(int argc, const char **argv);
 int cmd__mergesort(int argc, const char **argv);
 int cmd__mktemp(int argc, const char **argv);
+int cmd__oidmap(int argc, const char **argv);
 int cmd__online_cpus(int argc, const char **argv);
 int cmd__parse_options(int argc, const char **argv);
 int cmd__path_utils(int argc, const char **argv);
-- 
2.22.0.14.g9023ccb50a


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/3] t: add t0016-oidmap.sh
  2019-06-09  4:49 [PATCH 0/3] Test oidmap Christian Couder
  2019-06-09  4:49 ` [PATCH 1/3] t/helper: add test-oidmap.c Christian Couder
@ 2019-06-09  4:49 ` Christian Couder
  2019-06-09  9:22   ` SZEDER Gábor
  2019-06-09  4:49 ` [PATCH 3/3] oidmap: use sha1hash() instead of static hash() function Christian Couder
  2 siblings, 1 reply; 14+ messages in thread
From: Christian Couder @ 2019-06-09  4:49 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Jonathan Tan, Brandon Williams, Christian Couder,
	Christian Couder

From: Christian Couder <christian.couder@gmail.com>

Add actual tests for operations using `struct oidmap` from oidmap.{c,h}.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 t/t0016-oidmap.sh | 100 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100755 t/t0016-oidmap.sh

diff --git a/t/t0016-oidmap.sh b/t/t0016-oidmap.sh
new file mode 100755
index 0000000000..3a8e8bdb3d
--- /dev/null
+++ b/t/t0016-oidmap.sh
@@ -0,0 +1,100 @@
+#!/bin/sh
+
+test_description='test oidmap'
+. ./test-lib.sh
+
+# This purposefully is very similar to t0011-hashmap.sh
+
+test_oidmap() {
+	echo "$1" | test-tool oidmap $3 > actual &&
+	echo "$2" > expect &&
+	test_cmp expect actual
+}
+
+
+test_expect_success 'setup' '
+
+	test_commit one &&
+	test_commit two &&
+	test_commit three &&
+	test_commit four
+
+'
+
+test_oidhash() {
+	git rev-parse "$1" | perl -ne 'print hex("$4$3$2$1") . "\n" if m/^(..)(..)(..)(..).*/;'
+}
+
+test_expect_success PERL 'hash' '
+
+test_oidmap "hash one
+hash two
+hash invalidOid
+hash three" "$(test_oidhash one)
+$(test_oidhash two)
+Unknown oid: invalidOid
+$(test_oidhash three)"
+
+'
+
+test_expect_success 'put' '
+
+test_oidmap "put one 1
+put two 2
+put invalidOid 4
+put three 3" "NULL
+NULL
+Unknown oid: invalidOid
+NULL"
+
+'
+
+test_expect_success 'replace' '
+
+test_oidmap "put one 1
+put two 2
+put three 3
+put invalidOid 4
+put two deux
+put one un" "NULL
+NULL
+NULL
+Unknown oid: invalidOid
+2
+1"
+
+'
+
+test_expect_success 'get' '
+
+test_oidmap "put one 1
+put two 2
+put three 3
+get two
+get four
+get invalidOid
+get one" "NULL
+NULL
+NULL
+2
+NULL
+Unknown oid: invalidOid
+1"
+
+'
+
+test_expect_success 'iterate' '
+
+test_oidmap "put one 1
+put two 2
+put three 3
+iterate" "NULL
+NULL
+NULL
+$(git rev-parse two) 2
+$(git rev-parse one) 1
+$(git rev-parse three) 3"
+
+'
+
+test_done
-- 
2.22.0.14.g9023ccb50a


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/3] oidmap: use sha1hash() instead of static hash() function
  2019-06-09  4:49 [PATCH 0/3] Test oidmap Christian Couder
  2019-06-09  4:49 ` [PATCH 1/3] t/helper: add test-oidmap.c Christian Couder
  2019-06-09  4:49 ` [PATCH 2/3] t: add t0016-oidmap.sh Christian Couder
@ 2019-06-09  4:49 ` Christian Couder
  2 siblings, 0 replies; 14+ messages in thread
From: Christian Couder @ 2019-06-09  4:49 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Jonathan Tan, Brandon Williams, Christian Couder,
	Christian Couder

From: Christian Couder <christian.couder@gmail.com>

Get rid of the static hash() function in oidmap.c which is redundant
with sha1hash(). Use sha1hash() directly instead.

Let's be more consistent and not use several hash functions doing
nearly exactly the same thing.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 oidmap.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/oidmap.c b/oidmap.c
index b0841a0f58..01c206aaef 100644
--- a/oidmap.c
+++ b/oidmap.c
@@ -12,13 +12,6 @@ static int oidmap_neq(const void *hashmap_cmp_fn_data,
 		      &((const struct oidmap_entry *) entry_or_key)->oid);
 }
 
-static int hash(const struct object_id *oid)
-{
-	int hash;
-	memcpy(&hash, oid->hash, sizeof(hash));
-	return hash;
-}
-
 void oidmap_init(struct oidmap *map, size_t initial_size)
 {
 	hashmap_init(&map->map, oidmap_neq, NULL, initial_size);
@@ -36,7 +29,7 @@ void *oidmap_get(const struct oidmap *map, const struct object_id *key)
 	if (!map->map.cmpfn)
 		return NULL;
 
-	return hashmap_get_from_hash(&map->map, hash(key), key);
+	return hashmap_get_from_hash(&map->map, sha1hash(key->hash), key);
 }
 
 void *oidmap_remove(struct oidmap *map, const struct object_id *key)
@@ -46,7 +39,7 @@ void *oidmap_remove(struct oidmap *map, const struct object_id *key)
 	if (!map->map.cmpfn)
 		oidmap_init(map, 0);
 
-	hashmap_entry_init(&entry, hash(key));
+	hashmap_entry_init(&entry, sha1hash(key->hash));
 	return hashmap_remove(&map->map, &entry, key);
 }
 
@@ -57,6 +50,6 @@ void *oidmap_put(struct oidmap *map, void *entry)
 	if (!map->map.cmpfn)
 		oidmap_init(map, 0);
 
-	hashmap_entry_init(&to_put->internal_entry, hash(&to_put->oid));
+	hashmap_entry_init(&to_put->internal_entry, sha1hash(to_put->oid.hash));
 	return hashmap_put(&map->map, to_put);
 }
-- 
2.22.0.14.g9023ccb50a


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] t: add t0016-oidmap.sh
  2019-06-09  4:49 ` [PATCH 2/3] t: add t0016-oidmap.sh Christian Couder
@ 2019-06-09  9:22   ` SZEDER Gábor
  2019-06-09 20:24     ` Christian Couder
                       ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: SZEDER Gábor @ 2019-06-09  9:22 UTC (permalink / raw)
  To: Christian Couder
  Cc: git, Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Jonathan Tan, Brandon Williams, Christian Couder

On Sun, Jun 09, 2019 at 06:49:06AM +0200, Christian Couder wrote:
> From: Christian Couder <christian.couder@gmail.com>
> 
> Add actual tests for operations using `struct oidmap` from oidmap.{c,h}.
> 
> Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
> ---
>  t/t0016-oidmap.sh | 100 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 100 insertions(+)
>  create mode 100755 t/t0016-oidmap.sh
> 
> diff --git a/t/t0016-oidmap.sh b/t/t0016-oidmap.sh
> new file mode 100755
> index 0000000000..3a8e8bdb3d
> --- /dev/null
> +++ b/t/t0016-oidmap.sh
> @@ -0,0 +1,100 @@
> +#!/bin/sh
> +
> +test_description='test oidmap'
> +. ./test-lib.sh
> +
> +# This purposefully is very similar to t0011-hashmap.sh
> +
> +test_oidmap() {
> +	echo "$1" | test-tool oidmap $3 > actual &&
> +	echo "$2" > expect &&

Style nit: space between redirection op and filename.

> +	test_cmp expect actual
> +}
> +
> +
> +test_expect_success 'setup' '
> +
> +	test_commit one &&
> +	test_commit two &&
> +	test_commit three &&
> +	test_commit four
> +
> +'
> +
> +test_oidhash() {
> +	git rev-parse "$1" | perl -ne 'print hex("$4$3$2$1") . "\n" if m/^(..)(..)(..)(..).*/;'

New Perl dependencies always make Dscho sad... :)

So, 'test oidmap' from the previous patch prints the value we want to
check with:

    printf("%u\n", sha1hash(oid.hash));

First, since object ids inherently make more sense as hex values, it
would be more appropriate to print that hash with the '%x' format
specifier, and then we wouldn't need Perl's hex() anymore, and thus
could swap the order of the first four bytes in oidmap's hash without
relying on Perl, e.g. with:

  sed -e 's/^\(..\)\(..\)\(..\)\(..\).*/\4\3\2\1/'

Second, and more importantly, the need for swapping the byte order
indicates that this test would fail on big-endian systems, I'm afraid.
So I think we need an additional bswap32() on the printing side, and
then could further simplify 'test_oidhash':


diff --git a/t/helper/test-oidmap.c b/t/helper/test-oidmap.c
index 0ba122a264..4177912f9a 100644
--- a/t/helper/test-oidmap.c
+++ b/t/helper/test-oidmap.c
@@ -51,7 +51,7 @@ int cmd__oidmap(int argc, const char **argv)
 
 			/* print hash of oid */
 			if (!get_oid(p1, &oid))
-				printf("%u\n", sha1hash(oid.hash));
+				printf("%x\n", bswap32(sha1hash(oid.hash)));
 			else
 				printf("Unknown oid: %s\n", p1);
 
diff --git a/t/t0016-oidmap.sh b/t/t0016-oidmap.sh
index 3a8e8bdb3d..9c0d88a316 100755
--- a/t/t0016-oidmap.sh
+++ b/t/t0016-oidmap.sh
@@ -22,10 +22,10 @@ test_expect_success 'setup' '
 '
 
 test_oidhash() {
-	git rev-parse "$1" | perl -ne 'print hex("$4$3$2$1") . "\n" if m/^(..)(..)(..)(..).*/;'
+	git rev-parse "$1" | cut -c1-8
 }
 
-test_expect_success PERL 'hash' '
+test_expect_success 'hash' '
 
 test_oidmap "hash one
 hash two


> +}
> +
> +test_expect_success PERL 'hash' '
> +
> +test_oidmap "hash one
> +hash two
> +hash invalidOid
> +hash three" "$(test_oidhash one)
> +$(test_oidhash two)
> +Unknown oid: invalidOid
> +$(test_oidhash three)"
> +
> +'
> +
> +test_expect_success 'put' '
> +
> +test_oidmap "put one 1
> +put two 2
> +put invalidOid 4
> +put three 3" "NULL
> +NULL
> +Unknown oid: invalidOid
> +NULL"
> +
> +'
> +
> +test_expect_success 'replace' '
> +
> +test_oidmap "put one 1
> +put two 2
> +put three 3
> +put invalidOid 4
> +put two deux
> +put one un" "NULL
> +NULL
> +NULL
> +Unknown oid: invalidOid
> +2
> +1"
> +
> +'
> +
> +test_expect_success 'get' '
> +
> +test_oidmap "put one 1
> +put two 2
> +put three 3
> +get two
> +get four
> +get invalidOid
> +get one" "NULL
> +NULL
> +NULL
> +2
> +NULL
> +Unknown oid: invalidOid
> +1"
> +
> +'
> +
> +test_expect_success 'iterate' '
> +
> +test_oidmap "put one 1
> +put two 2
> +put three 3
> +iterate" "NULL
> +NULL
> +NULL
> +$(git rev-parse two) 2
> +$(git rev-parse one) 1
> +$(git rev-parse three) 3"
> +
> +'
> +
> +test_done
> -- 
> 2.22.0.14.g9023ccb50a
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] t: add t0016-oidmap.sh
  2019-06-09  9:22   ` SZEDER Gábor
@ 2019-06-09 20:24     ` Christian Couder
  2019-06-09 21:21       ` SZEDER Gábor
  2019-06-10 16:46     ` Junio C Hamano
  2019-06-13 17:19     ` Jeff King
  2 siblings, 1 reply; 14+ messages in thread
From: Christian Couder @ 2019-06-09 20:24 UTC (permalink / raw)
  To: SZEDER Gábor
  Cc: git, Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Jonathan Tan, Brandon Williams, Christian Couder

On Sun, Jun 9, 2019 at 11:23 AM SZEDER Gábor <szeder.dev@gmail.com> wrote:
>
> On Sun, Jun 09, 2019 at 06:49:06AM +0200, Christian Couder wrote:
> > +
> > +test_oidmap() {
> > +     echo "$1" | test-tool oidmap $3 > actual &&
> > +     echo "$2" > expect &&
>
> Style nit: space between redirection op and filename.

Thanks for spotting this. It's fixed in my current version.

> > +test_oidhash() {
> > +     git rev-parse "$1" | perl -ne 'print hex("$4$3$2$1") . "\n" if m/^(..)(..)(..)(..).*/;'
>
> New Perl dependencies always make Dscho sad... :)

Yeah, I was not sure how to do it properly in shell so I was hoping I
would get suggestions about this. Thanks for looking at this!

I could have hardcoded the values as it is done in t0011-hashmap.sh,
but I thought it was better to find a function that does he job.

> So, 'test oidmap' from the previous patch prints the value we want to
> check with:
>
>     printf("%u\n", sha1hash(oid.hash));

Yeah, I did it this way because "test-hashmap.c" does the same kind of
thing to print hashes:

            printf("%u %u %u %u\n",
                   strhash(p1), memhash(p1, strlen(p1)),
                   strihash(p1), memihash(p1, strlen(p1)));

> First, since object ids inherently make more sense as hex values, it
> would be more appropriate to print that hash with the '%x' format
> specifier,

I would be ok with that, but then I think it would make sense to also
print hex values in "test-hashmap.c".

> and then we wouldn't need Perl's hex() anymore, and thus
> could swap the order of the first four bytes in oidmap's hash without
> relying on Perl, e.g. with:
>
>   sed -e 's/^\(..\)\(..\)\(..\)\(..\).*/\4\3\2\1/'
>
> Second, and more importantly, the need for swapping the byte order
> indicates that this test would fail on big-endian systems, I'm afraid.
> So I think we need an additional bswap32() on the printing side,

Ok, but then shouldn't we also use bswap32() in "test-hashmap.c"?

By the way it seems that we use ntohl() or htonl() instead of
bswap32() in the source code.

> and then could further simplify 'test_oidhash':
>
> diff --git a/t/helper/test-oidmap.c b/t/helper/test-oidmap.c
> index 0ba122a264..4177912f9a 100644
> --- a/t/helper/test-oidmap.c
> +++ b/t/helper/test-oidmap.c
> @@ -51,7 +51,7 @@ int cmd__oidmap(int argc, const char **argv)
>
>                         /* print hash of oid */
>                         if (!get_oid(p1, &oid))
> -                               printf("%u\n", sha1hash(oid.hash));
> +                               printf("%x\n", bswap32(sha1hash(oid.hash)));
>                         else
>                                 printf("Unknown oid: %s\n", p1);
>
> diff --git a/t/t0016-oidmap.sh b/t/t0016-oidmap.sh
> index 3a8e8bdb3d..9c0d88a316 100755
> --- a/t/t0016-oidmap.sh
> +++ b/t/t0016-oidmap.sh
> @@ -22,10 +22,10 @@ test_expect_success 'setup' '
>  '
>
>  test_oidhash() {
> -       git rev-parse "$1" | perl -ne 'print hex("$4$3$2$1") . "\n" if m/^(..)(..)(..)(..).*/;'
> +       git rev-parse "$1" | cut -c1-8
>  }
>
> -test_expect_success PERL 'hash' '
> +test_expect_success 'hash' '

Yeah, I agree that it seems better to me this way.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] t: add t0016-oidmap.sh
  2019-06-09 20:24     ` Christian Couder
@ 2019-06-09 21:21       ` SZEDER Gábor
  2019-06-09 21:51         ` Christian Couder
  0 siblings, 1 reply; 14+ messages in thread
From: SZEDER Gábor @ 2019-06-09 21:21 UTC (permalink / raw)
  To: Christian Couder
  Cc: git, Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Jonathan Tan, Brandon Williams, Christian Couder

On Sun, Jun 09, 2019 at 10:24:55PM +0200, Christian Couder wrote:
> On Sun, Jun 9, 2019 at 11:23 AM SZEDER Gábor <szeder.dev@gmail.com> wrote:
> >
> > On Sun, Jun 09, 2019 at 06:49:06AM +0200, Christian Couder wrote:
> > > +
> > > +test_oidmap() {
> > > +     echo "$1" | test-tool oidmap $3 > actual &&
> > > +     echo "$2" > expect &&
> >
> > Style nit: space between redirection op and filename.
> 
> Thanks for spotting this. It's fixed in my current version.
> 
> > > +test_oidhash() {
> > > +     git rev-parse "$1" | perl -ne 'print hex("$4$3$2$1") . "\n" if m/^(..)(..)(..)(..).*/;'
> >
> > New Perl dependencies always make Dscho sad... :)
> 
> Yeah, I was not sure how to do it properly in shell so I was hoping I
> would get suggestions about this. Thanks for looking at this!
> 
> I could have hardcoded the values as it is done in t0011-hashmap.sh,
> but I thought it was better to find a function that does he job.

Well, I'm fine with hardcoding the expected hash values (in network
byte order) as well, because then we won't add another git process
upstream of a pipe that would pop up during audit later...

> > So, 'test oidmap' from the previous patch prints the value we want to
> > check with:
> >
> >     printf("%u\n", sha1hash(oid.hash));
> 
> Yeah, I did it this way because "test-hashmap.c" does the same kind of
> thing to print hashes:
> 
>             printf("%u %u %u %u\n",
>                    strhash(p1), memhash(p1, strlen(p1)),
>                    strihash(p1), memihash(p1, strlen(p1)));
> 
> > First, since object ids inherently make more sense as hex values, it
> > would be more appropriate to print that hash with the '%x' format
> > specifier,
> 
> I would be ok with that, but then I think it would make sense to also
> print hex values in "test-hashmap.c".
> 
> > and then we wouldn't need Perl's hex() anymore, and thus
> > could swap the order of the first four bytes in oidmap's hash without
> > relying on Perl, e.g. with:
> >
> >   sed -e 's/^\(..\)\(..\)\(..\)\(..\).*/\4\3\2\1/'
> >
> > Second, and more importantly, the need for swapping the byte order
> > indicates that this test would fail on big-endian systems, I'm afraid.
> > So I think we need an additional bswap32() on the printing side,
> 
> Ok, but then shouldn't we also use bswap32() in "test-hashmap.c"?

No.  The two test scripts/helpers work with different hashes.  t0011
and 'test-hashmap.c' uses the various FNV-1-based hash functions
(strhash(), memhash(), ...) to calculate an unsigned int hash of the
items stored in the hashmap, therefore their hashes will be the same
regardless of endianness.  In an oidmap, however, the hash is simply
the first four bytes of the object id as an unsigned int as is, and
look at how sha1hash() does it, and indeed at the last sentence of the
comment in front of it:

 * [...] Note that
 * the results will be different on big-endian and little-endian
 * platforms, so they should not be stored or transferred over the net.
 */
static inline unsigned int sha1hash(const unsigned char *sha1)
{
        /*
         * Equivalent to 'return *(unsigned int *)sha1;', but safe on
         * platforms that don't support unaligned reads.
         */
        unsigned int hash;
        memcpy(&hash, sha1, sizeof(hash));
        return hash;
}

> By the way it seems that we use ntohl() or htonl() instead of
> bswap32() in the source code.

OK.

> > and then could further simplify 'test_oidhash':
> >
> > diff --git a/t/helper/test-oidmap.c b/t/helper/test-oidmap.c
> > index 0ba122a264..4177912f9a 100644
> > --- a/t/helper/test-oidmap.c
> > +++ b/t/helper/test-oidmap.c
> > @@ -51,7 +51,7 @@ int cmd__oidmap(int argc, const char **argv)
> >
> >                         /* print hash of oid */
> >                         if (!get_oid(p1, &oid))
> > -                               printf("%u\n", sha1hash(oid.hash));
> > +                               printf("%x\n", bswap32(sha1hash(oid.hash)));
> >                         else
> >                                 printf("Unknown oid: %s\n", p1);
> >
> > diff --git a/t/t0016-oidmap.sh b/t/t0016-oidmap.sh
> > index 3a8e8bdb3d..9c0d88a316 100755
> > --- a/t/t0016-oidmap.sh
> > +++ b/t/t0016-oidmap.sh
> > @@ -22,10 +22,10 @@ test_expect_success 'setup' '
> >  '
> >
> >  test_oidhash() {
> > -       git rev-parse "$1" | perl -ne 'print hex("$4$3$2$1") . "\n" if m/^(..)(..)(..)(..).*/;'
> > +       git rev-parse "$1" | cut -c1-8
> >  }
> >
> > -test_expect_success PERL 'hash' '
> > +test_expect_success 'hash' '
> 
> Yeah, I agree that it seems better to me this way.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] t: add t0016-oidmap.sh
  2019-06-09 21:21       ` SZEDER Gábor
@ 2019-06-09 21:51         ` Christian Couder
  0 siblings, 0 replies; 14+ messages in thread
From: Christian Couder @ 2019-06-09 21:51 UTC (permalink / raw)
  To: SZEDER Gábor
  Cc: git, Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Jonathan Tan, Brandon Williams, Christian Couder

On Sun, Jun 9, 2019 at 11:21 PM SZEDER Gábor <szeder.dev@gmail.com> wrote:
>
> On Sun, Jun 09, 2019 at 10:24:55PM +0200, Christian Couder wrote:
> > On Sun, Jun 9, 2019 at 11:23 AM SZEDER Gábor <szeder.dev@gmail.com> wrote:
> > >
> > > New Perl dependencies always make Dscho sad... :)
> >
> > Yeah, I was not sure how to do it properly in shell so I was hoping I
> > would get suggestions about this. Thanks for looking at this!
> >
> > I could have hardcoded the values as it is done in t0011-hashmap.sh,
> > but I thought it was better to find a function that does he job.
>
> Well, I'm fine with hardcoding the expected hash values (in network
> byte order) as well, because then we won't add another git process
> upstream of a pipe that would pop up during audit later...

Ok, I think I will do that then.

> > > So, 'test oidmap' from the previous patch prints the value we want to
> > > check with:
> > >
> > >     printf("%u\n", sha1hash(oid.hash));
> >
> > Yeah, I did it this way because "test-hashmap.c" does the same kind of
> > thing to print hashes:
> >
> >             printf("%u %u %u %u\n",
> >                    strhash(p1), memhash(p1, strlen(p1)),
> >                    strihash(p1), memihash(p1, strlen(p1)));
> >
> > > First, since object ids inherently make more sense as hex values, it
> > > would be more appropriate to print that hash with the '%x' format
> > > specifier,
> >
> > I would be ok with that, but then I think it would make sense to also
> > print hex values in "test-hashmap.c".
> >
> > > and then we wouldn't need Perl's hex() anymore, and thus
> > > could swap the order of the first four bytes in oidmap's hash without
> > > relying on Perl, e.g. with:
> > >
> > >   sed -e 's/^\(..\)\(..\)\(..\)\(..\).*/\4\3\2\1/'
> > >
> > > Second, and more importantly, the need for swapping the byte order
> > > indicates that this test would fail on big-endian systems, I'm afraid.
> > > So I think we need an additional bswap32() on the printing side,
> >
> > Ok, but then shouldn't we also use bswap32() in "test-hashmap.c"?
>
> No.  The two test scripts/helpers work with different hashes.  t0011
> and 'test-hashmap.c' uses the various FNV-1-based hash functions
> (strhash(), memhash(), ...) to calculate an unsigned int hash of the
> items stored in the hashmap, therefore their hashes will be the same
> regardless of endianness.

I see. Thanks for explaining that.

>  In an oidmap, however, the hash is simply
> the first four bytes of the object id as an unsigned int as is,

Yeah, I had realized that.

Thanks,
Christian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] t: add t0016-oidmap.sh
  2019-06-09  9:22   ` SZEDER Gábor
  2019-06-09 20:24     ` Christian Couder
@ 2019-06-10 16:46     ` Junio C Hamano
  2019-06-13 17:19     ` Jeff King
  2 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2019-06-10 16:46 UTC (permalink / raw)
  To: SZEDER Gábor
  Cc: Christian Couder, git, Ævar Arnfjörð Bjarmason,
	Jonathan Tan, Brandon Williams, Christian Couder

SZEDER Gábor <szeder.dev@gmail.com> writes:

> So, 'test oidmap' from the previous patch prints the value we want to
> check with:
>
>     printf("%u\n", sha1hash(oid.hash));
>
> First, since object ids inherently make more sense as hex values, it
> would be more appropriate to print that hash with the '%x' format
> specifier, and then we wouldn't need Perl's hex() anymore, and thus
> could swap the order of the first four bytes in oidmap's hash without
> relying on Perl, e.g. with:
>
>   sed -e 's/^\(..\)\(..\)\(..\)\(..\).*/\4\3\2\1/'
>
> Second, and more importantly, the need for swapping the byte order
> indicates that this test would fail on big-endian systems, I'm afraid.
> So I think we need an additional bswap32() on the printing side, and
> then could further simplify 'test_oidhash':

Yup, if we are doing an ad-hoc t/helper/ command, we should strive
to make it help the driving scripts around it to become simpler, and
your suggestion to do s/%u/%x/ is a good example of doing so.

Thanks for a dose of sanity.  The goal of the series may be
worthwhile, and helping hands in improving its execution is very
much appreciated.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] t: add t0016-oidmap.sh
  2019-06-09  9:22   ` SZEDER Gábor
  2019-06-09 20:24     ` Christian Couder
  2019-06-10 16:46     ` Junio C Hamano
@ 2019-06-13 17:19     ` Jeff King
  2019-06-13 17:52       ` SZEDER Gábor
  2 siblings, 1 reply; 14+ messages in thread
From: Jeff King @ 2019-06-13 17:19 UTC (permalink / raw)
  To: SZEDER Gábor
  Cc: Christian Couder, git, Junio C Hamano,
	Ævar Arnfjörð Bjarmason, Jonathan Tan,
	Brandon Williams, Christian Couder

On Sun, Jun 09, 2019 at 11:22:59AM +0200, SZEDER Gábor wrote:

> So, 'test oidmap' from the previous patch prints the value we want to
> check with:
> 
>     printf("%u\n", sha1hash(oid.hash));
> 
> First, since object ids inherently make more sense as hex values, it
> would be more appropriate to print that hash with the '%x' format
> specifier, and then we wouldn't need Perl's hex() anymore, and thus
> could swap the order of the first four bytes in oidmap's hash without
> relying on Perl, e.g. with:
> 
>   sed -e 's/^\(..\)\(..\)\(..\)\(..\).*/\4\3\2\1/'
> 
> Second, and more importantly, the need for swapping the byte order
> indicates that this test would fail on big-endian systems, I'm afraid.
> So I think we need an additional bswap32() on the printing side, and
> then could further simplify 'test_oidhash':

I agree with all your points about using hex and pushing the logic into
test-oidmap.c. BUT.

At the point where we are normalizing byte order of the hashes, I have
to wonder: why do we care about testing the hash value in the first
place? We care that oidmap can store and retrieve values, and that it
performs well. But as long as it does those things, I don't think
anybody cares if it uses the first 4 bytes of the sha1 or the last 4.

I know there are testing philosophies that go to this level of white-box
testing, but I don't think we usually do in Git. A unit test of oidmap's
externally visible behavior seems like the right level to me.

-Peff

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] t: add t0016-oidmap.sh
  2019-06-13 17:19     ` Jeff King
@ 2019-06-13 17:52       ` SZEDER Gábor
  2019-06-13 19:02         ` Jeff King
  0 siblings, 1 reply; 14+ messages in thread
From: SZEDER Gábor @ 2019-06-13 17:52 UTC (permalink / raw)
  To: Jeff King
  Cc: Christian Couder, git, Junio C Hamano,
	Ævar Arnfjörð Bjarmason, Jonathan Tan,
	Brandon Williams, Christian Couder

On Thu, Jun 13, 2019 at 01:19:13PM -0400, Jeff King wrote:
> On Sun, Jun 09, 2019 at 11:22:59AM +0200, SZEDER Gábor wrote:
> 
> > So, 'test oidmap' from the previous patch prints the value we want to
> > check with:
> > 
> >     printf("%u\n", sha1hash(oid.hash));
> > 
> > First, since object ids inherently make more sense as hex values, it
> > would be more appropriate to print that hash with the '%x' format
> > specifier, and then we wouldn't need Perl's hex() anymore, and thus
> > could swap the order of the first four bytes in oidmap's hash without
> > relying on Perl, e.g. with:
> > 
> >   sed -e 's/^\(..\)\(..\)\(..\)\(..\).*/\4\3\2\1/'
> > 
> > Second, and more importantly, the need for swapping the byte order
> > indicates that this test would fail on big-endian systems, I'm afraid.
> > So I think we need an additional bswap32() on the printing side, and
> > then could further simplify 'test_oidhash':
> 
> I agree with all your points about using hex and pushing the logic into
> test-oidmap.c. BUT.
> 
> At the point where we are normalizing byte order of the hashes, I have
> to wonder: why do we care about testing the hash value in the first
> place? We care that oidmap can store and retrieve values, and that it
> performs well. But as long as it does those things, I don't think
> anybody cares if it uses the first 4 bytes of the sha1 or the last 4.
> 
> I know there are testing philosophies that go to this level of
> white-box testing, but I don't think we usually do in Git. A unit
> test of oidmap's externally visible behavior seems like the right
> level to me.

That's a good point...  but then why does 't0011-hashmap.sh' do it in
the first place?  As far as I understood this t0016 mainly follows
suit of t0011.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] t: add t0016-oidmap.sh
  2019-06-13 17:52       ` SZEDER Gábor
@ 2019-06-13 19:02         ` Jeff King
  2019-06-13 22:22           ` Junio C Hamano
  0 siblings, 1 reply; 14+ messages in thread
From: Jeff King @ 2019-06-13 19:02 UTC (permalink / raw)
  To: SZEDER Gábor
  Cc: Christian Couder, git, Junio C Hamano,
	Ævar Arnfjörð Bjarmason, Jonathan Tan,
	Brandon Williams, Christian Couder

On Thu, Jun 13, 2019 at 07:52:36PM +0200, SZEDER Gábor wrote:

> > At the point where we are normalizing byte order of the hashes, I have
> > to wonder: why do we care about testing the hash value in the first
> > place? We care that oidmap can store and retrieve values, and that it
> > performs well. But as long as it does those things, I don't think
> > anybody cares if it uses the first 4 bytes of the sha1 or the last 4.
> > 
> > I know there are testing philosophies that go to this level of
> > white-box testing, but I don't think we usually do in Git. A unit
> > test of oidmap's externally visible behavior seems like the right
> > level to me.
> 
> That's a good point...  but then why does 't0011-hashmap.sh' do it in
> the first place?  As far as I understood this t0016 mainly follows
> suit of t0011.

I'd make the same argument against t0011. :)

I think there it at least made a little more sense because we truly are
hashing ourselves, rather than just copying out some sha1 bytes. But I
think I'd still argue that if I updated strhash() to use a different
hash, I should not have to be updating t0011 to change out the hashes.

-Peff

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] t: add t0016-oidmap.sh
  2019-06-13 19:02         ` Jeff King
@ 2019-06-13 22:22           ` Junio C Hamano
  2019-06-14 10:36             ` Christian Couder
  0 siblings, 1 reply; 14+ messages in thread
From: Junio C Hamano @ 2019-06-13 22:22 UTC (permalink / raw)
  To: Jeff King
  Cc: SZEDER Gábor, Christian Couder, git,
	Ævar Arnfjörð Bjarmason, Jonathan Tan,
	Brandon Williams, Christian Couder

Jeff King <peff@peff.net> writes:

>> > I know there are testing philosophies that go to this level of
>> > white-box testing, but I don't think we usually do in Git. A unit
>> > test of oidmap's externally visible behavior seems like the right
>> > level to me.
>> 
>> That's a good point...  but then why does 't0011-hashmap.sh' do it in
>> the first place?  As far as I understood this t0016 mainly follows
>> suit of t0011.
>
> I'd make the same argument against t0011. :)

Yeah, I tend to agree.  It is not a good excuse that somebody else
alerady has made a mistake.

> I think there it at least made a little more sense because we truly are
> hashing ourselves, rather than just copying out some sha1 bytes. But I
> think I'd still argue that if I updated strhash() to use a different
> hash, I should not have to be updating t0011 to change out the hashes.

True, too.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] t: add t0016-oidmap.sh
  2019-06-13 22:22           ` Junio C Hamano
@ 2019-06-14 10:36             ` Christian Couder
  0 siblings, 0 replies; 14+ messages in thread
From: Christian Couder @ 2019-06-14 10:36 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jeff King, SZEDER Gábor, git,
	Ævar Arnfjörð Bjarmason, Jonathan Tan,
	Brandon Williams, Christian Couder

On Fri, Jun 14, 2019 at 12:22 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Jeff King <peff@peff.net> writes:
>
> >> > I know there are testing philosophies that go to this level of
> >> > white-box testing, but I don't think we usually do in Git. A unit
> >> > test of oidmap's externally visible behavior seems like the right
> >> > level to me.
> >>
> >> That's a good point...  but then why does 't0011-hashmap.sh' do it in
> >> the first place?  As far as I understood this t0016 mainly follows
> >> suit of t0011.
> >
> > I'd make the same argument against t0011. :)
>
> Yeah, I tend to agree.  It is not a good excuse that somebody else
> alerady has made a mistake.

Ok, I will remove the "hash" test in t0016 and the corresponding code
in test-oidmap.c.

> > I think there it at least made a little more sense because we truly are
> > hashing ourselves, rather than just copying out some sha1 bytes. But I
> > think I'd still argue that if I updated strhash() to use a different
> > hash, I should not have to be updating t0011 to change out the hashes.
>
> True, too.

I will also send an additional patch to remove similar code in t00161
and test-hashmap.c.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, back to index

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-09  4:49 [PATCH 0/3] Test oidmap Christian Couder
2019-06-09  4:49 ` [PATCH 1/3] t/helper: add test-oidmap.c Christian Couder
2019-06-09  4:49 ` [PATCH 2/3] t: add t0016-oidmap.sh Christian Couder
2019-06-09  9:22   ` SZEDER Gábor
2019-06-09 20:24     ` Christian Couder
2019-06-09 21:21       ` SZEDER Gábor
2019-06-09 21:51         ` Christian Couder
2019-06-10 16:46     ` Junio C Hamano
2019-06-13 17:19     ` Jeff King
2019-06-13 17:52       ` SZEDER Gábor
2019-06-13 19:02         ` Jeff King
2019-06-13 22:22           ` Junio C Hamano
2019-06-14 10:36             ` Christian Couder
2019-06-09  4:49 ` [PATCH 3/3] oidmap: use sha1hash() instead of static hash() function Christian Couder

git@vger.kernel.org list mirror (unofficial, one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox