git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "brian m. carlson" <sandals@crustytoothpaste.net>
To: <git@vger.kernel.org>
Cc: "Derrick Stolee" <dstolee@microsoft.com>,
	"Ævar Arnfjörð Bjarmason" <avarab@gmail.com>
Subject: [PATCH v2 04/13] hash: add a function to finalize object IDs
Date: Mon, 26 Apr 2021 01:02:52 +0000	[thread overview]
Message-ID: <20210426010301.1093562-5-sandals@crustytoothpaste.net> (raw)
In-Reply-To: <20210426010301.1093562-1-sandals@crustytoothpaste.net>

To avoid the penalty of having to branch in hash comparison functions,
we'll want to always compare the full hash member in a struct object_id,
which will require that SHA-1 object IDs be zero-padded.  To do so, add
a function which finalizes a hash context and writes it into an object
ID that performs this padding.

Move the definition of struct object_id and the constant definitions
higher up so we they are available for us to use.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 hash.h        | 50 +++++++++++++++++++++++++++-----------------------
 object-file.c | 25 +++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/hash.h b/hash.h
index dafdcb3335..c8f03d8aee 100644
--- a/hash.h
+++ b/hash.h
@@ -95,6 +95,29 @@ static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *s
 /* Number of algorithms supported (including unknown). */
 #define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1)
 
+/* The length in bytes and in hex digits of an object name (SHA-1 value). */
+#define GIT_SHA1_RAWSZ 20
+#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ)
+/* The block size of SHA-1. */
+#define GIT_SHA1_BLKSZ 64
+
+/* The length in bytes and in hex digits of an object name (SHA-256 value). */
+#define GIT_SHA256_RAWSZ 32
+#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ)
+/* The block size of SHA-256. */
+#define GIT_SHA256_BLKSZ 64
+
+/* The length in byte and in hex digits of the largest possible hash value. */
+#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ
+#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ
+/* The largest possible block size for any supported hash. */
+#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ
+
+struct object_id {
+	unsigned char hash[GIT_MAX_RAWSZ];
+	int algo;
+};
+
 /* A suitably aligned type for stack allocations of hash contexts. */
 union git_hash_ctx {
 	git_SHA_CTX sha1;
@@ -106,6 +129,7 @@ typedef void (*git_hash_init_fn)(git_hash_ctx *ctx);
 typedef void (*git_hash_clone_fn)(git_hash_ctx *dst, const git_hash_ctx *src);
 typedef void (*git_hash_update_fn)(git_hash_ctx *ctx, const void *in, size_t len);
 typedef void (*git_hash_final_fn)(unsigned char *hash, git_hash_ctx *ctx);
+typedef void (*git_hash_final_oid_fn)(struct object_id *oid, git_hash_ctx *ctx);
 
 struct git_hash_algo {
 	/*
@@ -138,6 +162,9 @@ struct git_hash_algo {
 	/* The hash finalization function. */
 	git_hash_final_fn final_fn;
 
+	/* The hash finalization function for object IDs. */
+	git_hash_final_oid_fn final_oid_fn;
+
 	/* The OID of the empty tree. */
 	const struct object_id *empty_tree;
 
@@ -161,29 +188,6 @@ static inline int hash_algo_by_ptr(const struct git_hash_algo *p)
 	return p - hash_algos;
 }
 
-/* The length in bytes and in hex digits of an object name (SHA-1 value). */
-#define GIT_SHA1_RAWSZ 20
-#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ)
-/* The block size of SHA-1. */
-#define GIT_SHA1_BLKSZ 64
-
-/* The length in bytes and in hex digits of an object name (SHA-256 value). */
-#define GIT_SHA256_RAWSZ 32
-#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ)
-/* The block size of SHA-256. */
-#define GIT_SHA256_BLKSZ 64
-
-/* The length in byte and in hex digits of the largest possible hash value. */
-#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ
-#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ
-/* The largest possible block size for any supported hash. */
-#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ
-
-struct object_id {
-	unsigned char hash[GIT_MAX_RAWSZ];
-	int algo;
-};
-
 #define the_hash_algo the_repository->hash_algo
 
 extern const struct object_id null_oid;
diff --git a/object-file.c b/object-file.c
index 624af408cd..ef1eb984c8 100644
--- a/object-file.c
+++ b/object-file.c
@@ -89,6 +89,12 @@ static void git_hash_sha1_final(unsigned char *hash, git_hash_ctx *ctx)
 	git_SHA1_Final(hash, &ctx->sha1);
 }
 
+static void git_hash_sha1_final_oid(struct object_id *oid, git_hash_ctx *ctx)
+{
+	git_SHA1_Final(oid->hash, &ctx->sha1);
+	memset(oid->hash + GIT_SHA1_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA1_RAWSZ);
+}
+
 
 static void git_hash_sha256_init(git_hash_ctx *ctx)
 {
@@ -110,6 +116,16 @@ static void git_hash_sha256_final(unsigned char *hash, git_hash_ctx *ctx)
 	git_SHA256_Final(hash, &ctx->sha256);
 }
 
+static void git_hash_sha256_final_oid(struct object_id *oid, git_hash_ctx *ctx)
+{
+	git_SHA256_Final(oid->hash, &ctx->sha256);
+	/*
+	 * This currently does nothing, so the compiler should optimize it out,
+	 * but keep it in case we extend the hash size again.
+	 */
+	memset(oid->hash + GIT_SHA256_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA256_RAWSZ);
+}
+
 static void git_hash_unknown_init(git_hash_ctx *ctx)
 {
 	BUG("trying to init unknown hash");
@@ -130,6 +146,12 @@ static void git_hash_unknown_final(unsigned char *hash, git_hash_ctx *ctx)
 	BUG("trying to finalize unknown hash");
 }
 
+static void git_hash_unknown_final_oid(struct object_id *oid, git_hash_ctx *ctx)
+{
+	BUG("trying to finalize unknown hash");
+}
+
+
 const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
 	{
 		NULL,
@@ -141,6 +163,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
 		git_hash_unknown_clone,
 		git_hash_unknown_update,
 		git_hash_unknown_final,
+		git_hash_unknown_final_oid,
 		NULL,
 		NULL,
 	},
@@ -155,6 +178,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
 		git_hash_sha1_clone,
 		git_hash_sha1_update,
 		git_hash_sha1_final,
+		git_hash_sha1_final_oid,
 		&empty_tree_oid,
 		&empty_blob_oid,
 	},
@@ -169,6 +193,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
 		git_hash_sha256_clone,
 		git_hash_sha256_update,
 		git_hash_sha256_final,
+		git_hash_sha256_final_oid,
 		&empty_tree_oid_sha256,
 		&empty_blob_oid_sha256,
 	}

  parent reply	other threads:[~2021-04-26  1:04 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-26  1:02 [PATCH v2 00/13] SHA-256 / SHA-1 interop, part 1 brian m. carlson
2021-04-26  1:02 ` [PATCH v2 01/13] hash: add an algo member to struct object_id brian m. carlson
2021-05-07 13:58   ` Matheus Tavares Bernardino
2021-05-07 20:07     ` brian m. carlson
2021-04-26  1:02 ` [PATCH v2 02/13] Always use oidread to read into " brian m. carlson
2021-04-26  1:02 ` [PATCH v2 03/13] http-push: set algorithm when reading object ID brian m. carlson
2021-04-26  1:02 ` brian m. carlson [this message]
2021-04-26  1:02 ` [PATCH v2 05/13] Use the final_oid_fn to finalize hashing of object IDs brian m. carlson
2021-04-26  1:02 ` [PATCH v2 06/13] builtin/pack-redundant: avoid casting buffers to struct object_id brian m. carlson
2021-04-26  1:02 ` [PATCH v2 07/13] hash: set, copy, and use algo field in " brian m. carlson
2021-04-26  1:02 ` [PATCH v2 08/13] hash: provide per-algorithm null OIDs brian m. carlson
2021-04-26  1:02 ` [PATCH v2 09/13] builtin/show-index: set the algorithm for object IDs brian m. carlson
2021-04-26  1:02 ` [PATCH v2 10/13] commit-graph: don't store file hashes as struct object_id brian m. carlson
2021-04-26  1:02 ` [PATCH v2 11/13] builtin/pack-objects: avoid using struct object_id for pack hash brian m. carlson
2021-04-26  1:03 ` [PATCH v2 12/13] hex: default to the_hash_algo on zero algorithm value brian m. carlson
2021-04-26  1:03 ` [PATCH v2 13/13] hex: print objects using the hash algorithm member brian m. carlson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210426010301.1093562-5-sandals@crustytoothpaste.net \
    --to=sandals@crustytoothpaste.net \
    --cc=avarab@gmail.com \
    --cc=dstolee@microsoft.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).