git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "brian m. carlson" <sandals@crustytoothpaste.net>
To: <git@vger.kernel.org>
Cc: Derrick Stolee <dstolee@microsoft.com>
Subject: [PATCH 08/15] cache: compare the entire buffer for struct object_id
Date: Sat, 10 Apr 2021 15:21:33 +0000	[thread overview]
Message-ID: <20210410152140.3525040-9-sandals@crustytoothpaste.net> (raw)
In-Reply-To: <20210410152140.3525040-1-sandals@crustytoothpaste.net>

Currently, when we compare two object IDs, we have to take a branch to
determine what the hash size is supposed to be.  The compiler can
optimize well for a single length, but has trouble when there are two
possible lengths.

There is, however, an alternative: we can ensure that we always compare
the full length of the hash buffer, but in turn we must zero the
remainder of the buffer when using SHA-1; otherwise, we'll end up with
incompatible junk at the end of otherwise equivalent object IDs that
will prevent them from matching.  This is an acceptable tradeoff,
because we generally read an object ID in once, but then compare it
against others multiple times.

This latter approach also has some benefits as well: since we will have
annotated every location in which we load an object ID into an instance
of struct object_id, if we want to set the hash algorithm for the object
ID, we can do so at the same time.

Adopt this latter approach, since it provides us greater flexibility and
lets us read and store object IDs for multiple algorithms at once.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 hash.h        | 13 ++++++++++---
 hex.c         |  9 ++++++---
 notes.c       |  3 +++
 object-file.c |  1 +
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/hash.h b/hash.h
index c8f03d8aee..04eba5c56b 100644
--- a/hash.h
+++ b/hash.h
@@ -205,7 +205,7 @@ static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
 
 static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
 {
-	return hashcmp(oid1->hash, oid2->hash);
+	return memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ);
 }
 
 static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
@@ -221,7 +221,7 @@ static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
 
 static inline int oideq(const struct object_id *oid1, const struct object_id *oid2)
 {
-	return hasheq(oid1->hash, oid2->hash);
+	return !memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ);
 }
 
 static inline int is_null_oid(const struct object_id *oid)
@@ -258,7 +258,9 @@ static inline void oidclr(struct object_id *oid)
 
 static inline void oidread(struct object_id *oid, const unsigned char *hash)
 {
-	memcpy(oid->hash, hash, the_hash_algo->rawsz);
+	size_t rawsz = the_hash_algo->rawsz;
+	memcpy(oid->hash, hash, rawsz);
+	memset(oid->hash + rawsz, 0, GIT_MAX_RAWSZ - rawsz);
 }
 
 static inline int is_empty_blob_sha1(const unsigned char *sha1)
@@ -281,6 +283,11 @@ static inline int is_empty_tree_oid(const struct object_id *oid)
 	return oideq(oid, the_hash_algo->empty_tree);
 }
 
+static inline void oid_pad_buffer(struct object_id *oid, const struct git_hash_algo *algop)
+{
+	memset(oid->hash + algop->rawsz, 0, GIT_MAX_RAWSZ - algop->rawsz);
+}
+
 const char *empty_tree_oid_hex(void);
 const char *empty_blob_oid_hex(void);
 
diff --git a/hex.c b/hex.c
index da51e64929..5fa3e71cb9 100644
--- a/hex.c
+++ b/hex.c
@@ -69,7 +69,10 @@ int get_sha1_hex(const char *hex, unsigned char *sha1)
 int get_oid_hex_algop(const char *hex, struct object_id *oid,
 		      const struct git_hash_algo *algop)
 {
-	return get_hash_hex_algop(hex, oid->hash, algop);
+	int ret = get_hash_hex_algop(hex, oid->hash, algop);
+	if (!ret)
+		oid_pad_buffer(oid, algop);
+	return ret;
 }
 
 /*
@@ -80,7 +83,7 @@ int get_oid_hex_any(const char *hex, struct object_id *oid)
 {
 	int i;
 	for (i = GIT_HASH_NALGOS - 1; i > 0; i--) {
-		if (!get_hash_hex_algop(hex, oid->hash, &hash_algos[i]))
+		if (!get_oid_hex_algop(hex, oid, &hash_algos[i]))
 			return i;
 	}
 	return GIT_HASH_UNKNOWN;
@@ -95,7 +98,7 @@ int parse_oid_hex_algop(const char *hex, struct object_id *oid,
 			const char **end,
 			const struct git_hash_algo *algop)
 {
-	int ret = get_hash_hex_algop(hex, oid->hash, algop);
+	int ret = get_oid_hex_algop(hex, oid, algop);
 	if (!ret)
 		*end = hex + algop->hexsz;
 	return ret;
diff --git a/notes.c b/notes.c
index a44b25858f..1dfe9e2b9f 100644
--- a/notes.c
+++ b/notes.c
@@ -455,6 +455,8 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree,
 		CALLOC_ARRAY(l, 1);
 		oidcpy(&l->key_oid, &object_oid);
 		oidcpy(&l->val_oid, &entry.oid);
+		oid_pad_buffer(&l->key_oid, the_hash_algo);
+		oid_pad_buffer(&l->val_oid, the_hash_algo);
 		if (note_tree_insert(t, node, n, l, type,
 				     combine_notes_concatenate))
 			die("Failed to load %s %s into notes tree "
@@ -484,6 +486,7 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree,
 				strbuf_addch(&non_note_path, '/');
 			}
 			strbuf_addstr(&non_note_path, entry.path);
+			oid_pad_buffer(&entry.oid, the_hash_algo);
 			add_non_note(t, strbuf_detach(&non_note_path, NULL),
 				     entry.mode, entry.oid.hash);
 		}
diff --git a/object-file.c b/object-file.c
index 3f43c376e7..8e338247cc 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2352,6 +2352,7 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr,
 		if (namelen == the_hash_algo->hexsz - 2 &&
 		    !hex_to_bytes(oid.hash + 1, de->d_name,
 				  the_hash_algo->rawsz - 1)) {
+			oid_pad_buffer(&oid, the_hash_algo);
 			if (obj_cb) {
 				r = obj_cb(&oid, path->buf, data);
 				if (r)

  parent reply	other threads:[~2021-04-10 15:22 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-10 15:21 [PATCH 00/15] SHA-256 / SHA-1 interop, part 1 brian m. carlson
2021-04-10 15:21 ` [PATCH 01/15] sha1-file: allow hashing objects literally with any algorithm brian m. carlson
2021-04-15  8:55   ` Denton Liu
2021-04-15 23:03     ` brian m. carlson
2021-04-16 15:04   ` Ævar Arnfjörð Bjarmason
2021-04-16 18:55     ` Junio C Hamano
2021-04-10 15:21 ` [PATCH 02/15] builtin/hash-object: allow literally hashing with a given algorithm brian m. carlson
2021-04-11  8:52   ` Ævar Arnfjörð Bjarmason
2021-04-11 21:07     ` brian m. carlson
2021-04-16 15:21   ` Ævar Arnfjörð Bjarmason
2021-04-16 17:27   ` Ævar Arnfjörð Bjarmason
2021-04-10 15:21 ` [PATCH 03/15] cache: add an algo member to struct object_id brian m. carlson
2021-04-11 11:55   ` Ævar Arnfjörð Bjarmason
2021-04-11 21:37     ` brian m. carlson
2021-04-13 12:12   ` Derrick Stolee
2021-04-14  1:08     ` brian m. carlson
2021-04-15  8:47       ` Ævar Arnfjörð Bjarmason
2021-04-15 23:51         ` brian m. carlson
2021-04-10 15:21 ` [PATCH 04/15] Always use oidread to read into " brian m. carlson
2021-04-10 15:21 ` [PATCH 05/15] hash: add a function to finalize object IDs brian m. carlson
2021-04-10 15:21 ` [PATCH 06/15] Use the final_oid_fn to finalize hashing of " brian m. carlson
2021-04-10 15:21 ` [PATCH 07/15] builtin/pack-redundant: avoid casting buffers to struct object_id brian m. carlson
2021-04-10 15:21 ` brian m. carlson [this message]
2021-04-11  8:17   ` [PATCH 08/15] cache: compare the entire buffer for " Chris Torek
2021-04-11 11:36   ` Ævar Arnfjörð Bjarmason
2021-04-11 21:05     ` brian m. carlson
2021-04-10 15:21 ` [PATCH 09/15] hash: set and copy algo field in " brian m. carlson
2021-04-11 11:57   ` Ævar Arnfjörð Bjarmason
2021-04-11 21:48     ` brian m. carlson
2021-04-11 22:12       ` Ævar Arnfjörð Bjarmason
2021-04-11 23:52         ` brian m. carlson
2021-04-12 11:02           ` [PATCH 0/2] C99: harder dependency on variadic macros Ævar Arnfjörð Bjarmason
2021-04-12 11:02             ` [PATCH 1/2] git-compat-util.h: clarify comment on GCC-specific code Ævar Arnfjörð Bjarmason
2021-04-13  7:57               ` Jeff King
2021-04-13 21:07                 ` Junio C Hamano
2021-04-14  5:21                   ` Jeff King
2021-04-14  6:12                     ` Ævar Arnfjörð Bjarmason
2021-04-14  7:31                       ` Jeff King
2021-05-21  2:06               ` Jonathan Nieder
2021-04-12 11:02             ` [PATCH 2/2] C99 support: remove non-HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2021-04-12 17:58               ` Junio C Hamano
2021-04-13  8:00                 ` Jeff King
2021-05-21  2:50               ` Jonathan Nieder
2021-04-12 12:14             ` [PATCH 0/2] C99: harder dependency on variadic macros Bagas Sanjaya
2021-04-12 12:41               ` Ævar Arnfjörð Bjarmason
2021-04-12 22:57                 ` brian m. carlson
2021-04-12 23:19                   ` Junio C Hamano
2022-01-28 11:11             ` [PATCH v2 0/2] C99: remove hardcoded-out !HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2022-01-28 11:11               ` [PATCH v2 1/2] git-compat-util.h: clarify GCC v.s. C99-specific in comment Ævar Arnfjörð Bjarmason
2022-01-28 11:11               ` [PATCH v2 2/2] C99: remove hardcoded-out !HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2022-01-28 22:40                 ` Junio C Hamano
2022-02-19 10:41               ` [PATCH v3 0/3] C99: remove dead " Ævar Arnfjörð Bjarmason
2022-02-19 10:41                 ` [PATCH v3 1/3] git-compat-util.h: clarify GCC v.s. C99-specific in comment Ævar Arnfjörð Bjarmason
2022-02-19 10:41                 ` [PATCH v3 2/3] C99: remove hardcoded-out !HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2022-02-19 10:41                 ` [PATCH v3 3/3] trace.h: remove never-used TRACE_CONTEXT Ævar Arnfjörð Bjarmason
2022-02-20 12:02                   ` Junio C Hamano
2022-02-20 12:38                     ` Ævar Arnfjörð Bjarmason
2022-02-20 20:12                       ` Junio C Hamano
2022-02-21 16:05                 ` [PATCH v4 0/2] C99: remove dead !HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2022-02-21 16:05                   ` [PATCH v4 1/2] git-compat-util.h: clarify GCC v.s. C99-specific in comment Ævar Arnfjörð Bjarmason
2022-02-21 16:05                   ` [PATCH v4 2/2] C99: remove hardcoded-out !HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2021-04-12 10:53         ` [PATCH 09/15] hash: set and copy algo field in struct object_id Junio C Hamano
2021-04-12 11:13           ` Ævar Arnfjörð Bjarmason
2021-04-10 15:21 ` [PATCH 10/15] hash: provide per-algorithm null OIDs brian m. carlson
2021-04-11 14:03   ` Junio C Hamano
2021-04-11 21:51     ` brian m. carlson
2021-04-10 15:21 ` [PATCH 11/15] builtin/show-index: set the algorithm for object IDs brian m. carlson
2021-04-10 15:21 ` [PATCH 12/15] commit-graph: don't store file hashes as struct object_id brian m. carlson
2021-04-10 15:21 ` [PATCH 13/15] builtin/pack-objects: avoid using struct object_id for pack hash brian m. carlson
2021-04-10 15:21 ` [PATCH 14/15] hex: default to the_hash_algo on zero algorithm value brian m. carlson
2021-04-10 15:21 ` [PATCH 15/15] hex: print objects using the hash algorithm member brian m. carlson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210410152140.3525040-9-sandals@crustytoothpaste.net \
    --to=sandals@crustytoothpaste.net \
    --cc=dstolee@microsoft.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).