git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Eric W. Biederman" <ebiederm@gmail.com>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org,
	"brian m. carlson" <sandals@crustytoothpaste.net>,
	Eric Sunshine <sunshine@sunshineco.com>,
	"Eric W. Biederman" <ebiederm@xmission.com>
Subject: [PATCH v2 03/30] object-names: support input of oids in any supported hash
Date: Sun,  1 Oct 2023 21:40:07 -0500	[thread overview]
Message-ID: <20231002024034.2611-3-ebiederm@gmail.com> (raw)
In-Reply-To: <878r8l929e.fsf@gmail.froward.int.ebiederm.org>

From: "Eric W. Biederman" <ebiederm@xmission.com>

Support short oids encoded in any algorithm, while ensuring enough of
the oid is specified to disambiguate between all of the oids in the
repository encoded in any algorithm.

By default have the code continue to only accept oids specified in the
storage hash algorithm of the repository, but when something is
ambiguous display all of the possible oids from any accepted oid
encoding.

A new flag is added GET_OID_HASH_ANY that when supplied causes the
code to accept oids specified in any hash algorithm, and to return the
oids that were resolved.

This implements the functionality that allows both SHA-1 and SHA-256
object names, from the "Object names on the command line" section of
the hash function transition document.

Care is taken in get_short_oid so that when the result is ambiguous
the output remains the same if GIT_OID_HASH_ANY was not supplied.  If
GET_OID_HASH_ANY was supplied objects of any hash algorithm that match
the prefix are displayed.

This required updating repo_for_each_abbrev to give it a parameter so
that it knows to look at all hash algorithms.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 builtin/rev-parse.c |  2 +-
 hash-ll.h           |  1 +
 object-name.c       | 46 ++++++++++++++++++++++++++++++++++-----------
 object-name.h       |  3 ++-
 4 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c
index fde8861ca4e0..43e96765400c 100644
--- a/builtin/rev-parse.c
+++ b/builtin/rev-parse.c
@@ -882,7 +882,7 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix)
 				continue;
 			}
 			if (skip_prefix(arg, "--disambiguate=", &arg)) {
-				repo_for_each_abbrev(the_repository, arg,
+				repo_for_each_abbrev(the_repository, arg, the_hash_algo,
 						     show_abbrev, NULL);
 				continue;
 			}
diff --git a/hash-ll.h b/hash-ll.h
index 10d84cc20888..2cfde63ae1cf 100644
--- a/hash-ll.h
+++ b/hash-ll.h
@@ -145,6 +145,7 @@ struct object_id {
 #define GET_OID_RECORD_PATH     0200
 #define GET_OID_ONLY_TO_DIE    04000
 #define GET_OID_REQUIRE_PATH  010000
+#define GET_OID_HASH_ANY      020000
 
 #define GET_OID_DISAMBIGUATORS \
 	(GET_OID_COMMIT | GET_OID_COMMITTISH | \
diff --git a/object-name.c b/object-name.c
index 0bfa29dbbfe9..7dd6e5e47566 100644
--- a/object-name.c
+++ b/object-name.c
@@ -25,6 +25,7 @@
 #include "midx.h"
 #include "commit-reach.h"
 #include "date.h"
+#include "object-file-convert.h"
 
 static int get_oid_oneline(struct repository *r, const char *, struct object_id *, struct commit_list *);
 
@@ -49,6 +50,7 @@ struct disambiguate_state {
 
 static void update_candidates(struct disambiguate_state *ds, const struct object_id *current)
 {
+	/* The hash algorithm of current has already been filtered */
 	if (ds->always_call_fn) {
 		ds->ambiguous = ds->fn(ds->repo, current, ds->cb_data) ? 1 : 0;
 		return;
@@ -134,6 +136,8 @@ static void unique_in_midx(struct multi_pack_index *m,
 {
 	uint32_t num, i, first = 0;
 	const struct object_id *current = NULL;
+	int len = ds->len > ds->repo->hash_algo->hexsz ?
+		ds->repo->hash_algo->hexsz : ds->len;
 	num = m->num_objects;
 
 	if (!num)
@@ -149,7 +153,7 @@ static void unique_in_midx(struct multi_pack_index *m,
 	for (i = first; i < num && !ds->ambiguous; i++) {
 		struct object_id oid;
 		current = nth_midxed_object_oid(&oid, m, i);
-		if (!match_hash(ds->len, ds->bin_pfx.hash, current->hash))
+		if (!match_hash(len, ds->bin_pfx.hash, current->hash))
 			break;
 		update_candidates(ds, current);
 	}
@@ -159,6 +163,8 @@ static void unique_in_pack(struct packed_git *p,
 			   struct disambiguate_state *ds)
 {
 	uint32_t num, i, first = 0;
+	int len = ds->len > ds->repo->hash_algo->hexsz ?
+		ds->repo->hash_algo->hexsz : ds->len;
 
 	if (p->multi_pack_index)
 		return;
@@ -177,7 +183,7 @@ static void unique_in_pack(struct packed_git *p,
 	for (i = first; i < num && !ds->ambiguous; i++) {
 		struct object_id oid;
 		nth_packed_object_id(&oid, p, i);
-		if (!match_hash(ds->len, ds->bin_pfx.hash, oid.hash))
+		if (!match_hash(len, ds->bin_pfx.hash, oid.hash))
 			break;
 		update_candidates(ds, &oid);
 	}
@@ -188,6 +194,10 @@ static void find_short_packed_object(struct disambiguate_state *ds)
 	struct multi_pack_index *m;
 	struct packed_git *p;
 
+	/* Skip, unless oids from the storage hash algorithm are wanted */
+	if (ds->bin_pfx.algo && (&hash_algos[ds->bin_pfx.algo] != ds->repo->hash_algo))
+		return;
+
 	for (m = get_multi_pack_index(ds->repo); m && !ds->ambiguous;
 	     m = m->next)
 		unique_in_midx(m, ds);
@@ -326,11 +336,12 @@ int set_disambiguate_hint_config(const char *var, const char *value)
 
 static int init_object_disambiguation(struct repository *r,
 				      const char *name, int len,
+				      const struct git_hash_algo *algo,
 				      struct disambiguate_state *ds)
 {
 	int i;
 
-	if (len < MINIMUM_ABBREV || len > the_hash_algo->hexsz)
+	if (len < MINIMUM_ABBREV || len > GIT_MAX_HEXSZ)
 		return -1;
 
 	memset(ds, 0, sizeof(*ds));
@@ -357,6 +368,7 @@ static int init_object_disambiguation(struct repository *r,
 	ds->len = len;
 	ds->hex_pfx[len] = '\0';
 	ds->repo = r;
+	ds->bin_pfx.algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN;
 	prepare_alt_odb(r);
 	return 0;
 }
@@ -491,9 +503,10 @@ static int repo_collect_ambiguous(struct repository *r UNUSED,
 	return collect_ambiguous(oid, data);
 }
 
-static int sort_ambiguous(const void *a, const void *b, void *ctx)
+static int sort_ambiguous(const void *va, const void *vb, void *ctx)
 {
 	struct repository *sort_ambiguous_repo = ctx;
+	const struct object_id *a = va, *b = vb;
 	int a_type = oid_object_info(sort_ambiguous_repo, a, NULL);
 	int b_type = oid_object_info(sort_ambiguous_repo, b, NULL);
 	int a_type_sort;
@@ -503,8 +516,12 @@ static int sort_ambiguous(const void *a, const void *b, void *ctx)
 	 * Sorts by hash within the same object type, just as
 	 * oid_array_for_each_unique() would do.
 	 */
-	if (a_type == b_type)
-		return oidcmp(a, b);
+	if (a_type == b_type) {
+		if (a->algo == b->algo)
+			return oidcmp(a, b);
+		else
+			return a->algo > b->algo ? 1 : -1;
+	}
 
 	/*
 	 * Between object types show tags, then commits, and finally
@@ -533,8 +550,12 @@ static enum get_oid_result get_short_oid(struct repository *r,
 	int status;
 	struct disambiguate_state ds;
 	int quietly = !!(flags & GET_OID_QUIETLY);
+	const struct git_hash_algo *algo = r->hash_algo;
+
+	if (flags & GET_OID_HASH_ANY)
+		algo = NULL;
 
-	if (init_object_disambiguation(r, name, len, &ds) < 0)
+	if (init_object_disambiguation(r, name, len, algo, &ds) < 0)
 		return -1;
 
 	if (HAS_MULTI_BITS(flags & GET_OID_DISAMBIGUATORS))
@@ -588,7 +609,7 @@ static enum get_oid_result get_short_oid(struct repository *r,
 		if (!ds.ambiguous)
 			ds.fn = NULL;
 
-		repo_for_each_abbrev(r, ds.hex_pfx, collect_ambiguous, &collect);
+		repo_for_each_abbrev(r, ds.hex_pfx, algo, collect_ambiguous, &collect);
 		sort_ambiguous_oid_array(r, &collect);
 
 		if (oid_array_for_each(&collect, show_ambiguous_object, &out))
@@ -610,13 +631,14 @@ static enum get_oid_result get_short_oid(struct repository *r,
 }
 
 int repo_for_each_abbrev(struct repository *r, const char *prefix,
+			 const struct git_hash_algo *algo,
 			 each_abbrev_fn fn, void *cb_data)
 {
 	struct oid_array collect = OID_ARRAY_INIT;
 	struct disambiguate_state ds;
 	int ret;
 
-	if (init_object_disambiguation(r, prefix, strlen(prefix), &ds) < 0)
+	if (init_object_disambiguation(r, prefix, strlen(prefix), algo, &ds) < 0)
 		return -1;
 
 	ds.always_call_fn = 1;
@@ -787,10 +809,12 @@ void strbuf_add_unique_abbrev(struct strbuf *sb, const struct object_id *oid,
 int repo_find_unique_abbrev_r(struct repository *r, char *hex,
 			      const struct object_id *oid, int len)
 {
+	const struct git_hash_algo *algo =
+		oid->algo ? &hash_algos[oid->algo] : r->hash_algo;
 	struct disambiguate_state ds;
 	struct min_abbrev_data mad;
 	struct object_id oid_ret;
-	const unsigned hexsz = r->hash_algo->hexsz;
+	const unsigned hexsz = algo->hexsz;
 
 	if (len < 0) {
 		unsigned long count = repo_approximate_object_count(r);
@@ -826,7 +850,7 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex,
 
 	find_abbrev_len_packed(&mad);
 
-	if (init_object_disambiguation(r, hex, mad.cur_len, &ds) < 0)
+	if (init_object_disambiguation(r, hex, mad.cur_len, algo, &ds) < 0)
 		return -1;
 
 	ds.fn = repo_extend_abbrev_len;
diff --git a/object-name.h b/object-name.h
index 9ae522307148..064ddc97d1fe 100644
--- a/object-name.h
+++ b/object-name.h
@@ -67,7 +67,8 @@ enum get_oid_result get_oid_with_context(struct repository *repo, const char *st
 
 
 typedef int each_abbrev_fn(const struct object_id *oid, void *);
-int repo_for_each_abbrev(struct repository *r, const char *prefix, each_abbrev_fn, void *);
+int repo_for_each_abbrev(struct repository *r, const char *prefix,
+			 const struct git_hash_algo *algo, each_abbrev_fn, void *);
 
 int set_disambiguate_hint_config(const char *var, const char *value);
 
-- 
2.41.0


  parent reply	other threads:[~2023-10-02  2:40 UTC|newest]

Thread overview: 104+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-27 19:49 [PATCH 00/30] Initial support for multiple hash functions Eric W. Biederman
2023-09-27 19:55 ` [PATCH 01/30] object-file-convert: Stubs for converting from one object format to another Eric W. Biederman
2023-09-27 20:42   ` Eric Sunshine
2023-10-02  1:22     ` Eric W. Biederman
2023-10-02  2:27       ` Eric Sunshine
2023-09-27 19:55 ` [PATCH 02/30] oid-array: Teach oid-array to handle multiple kinds of oids Eric W. Biederman
2023-09-27 23:20   ` Eric Sunshine
2023-09-27 19:55 ` [PATCH 03/30] object-names: Support input of oids in any supported hash Eric W. Biederman
2023-09-27 23:29   ` Eric Sunshine
2023-10-02  1:54     ` Eric W. Biederman
2023-09-27 19:55 ` [PATCH 04/30] repository: add a compatibility hash algorithm Eric W. Biederman
2023-09-27 19:55 ` [PATCH 05/30] loose: add a mapping between SHA-1 and SHA-256 for loose objects Eric W. Biederman
2023-09-28  7:14   ` Eric Sunshine
2023-10-02  2:11     ` Eric W. Biederman
2023-10-02  2:36       ` Eric Sunshine
2023-09-27 19:55 ` [PATCH 06/30] loose: Compatibilty short name support Eric W. Biederman
2023-09-27 19:55 ` [PATCH 07/30] object-file: Update the loose object map when writing loose objects Eric W. Biederman
2023-09-27 19:55 ` [PATCH 08/30] object-file: Add a compat_oid_in parameter to write_object_file_flags Eric W. Biederman
2023-09-27 19:55 ` [PATCH 09/30] commit: write commits for both hashes Eric W. Biederman
2023-09-27 19:55 ` [PATCH 10/30] commit: Convert mergetag before computing the signature of a commit Eric W. Biederman
2023-09-27 19:55 ` [PATCH 11/30] commit: Export add_header_signature to support handling signatures on tags Eric W. Biederman
2023-09-27 19:55 ` [PATCH 12/30] tag: sign both hashes Eric W. Biederman
2023-09-27 19:55 ` [PATCH 13/30] cache: add a function to read an OID of a specific algorithm Eric W. Biederman
2023-09-27 19:55 ` [PATCH 14/30] object: Factor out parse_mode out of fast-import and tree-walk into in object.h Eric W. Biederman
2023-09-27 19:55 ` [PATCH 15/30] object-file-convert: add a function to convert trees between algorithms Eric W. Biederman
2023-09-27 19:55 ` [PATCH 16/30] object-file-convert: convert tag objects when writing Eric W. Biederman
2023-09-27 19:55 ` [PATCH 17/30] object-file-convert: Don't leak when converting tag objects Eric W. Biederman
2023-09-27 19:55 ` [PATCH 18/30] object-file-convert: convert commit objects when writing Eric W. Biederman
2023-09-27 19:55 ` [PATCH 19/30] object-file-convert: Convert commits that embed signed tags Eric W. Biederman
2023-09-27 19:55 ` [PATCH 20/30] object-file: Update object_info_extended to reencode objects Eric W. Biederman
2023-09-27 19:55 ` [PATCH 21/30] repository: Implement extensions.compatObjectFormat Eric W. Biederman
2023-09-27 21:39   ` Junio C Hamano
2023-09-28 20:18     ` Junio C Hamano
2023-09-29  0:50       ` Eric Biederman
2023-09-29 16:59       ` Eric W. Biederman
2023-09-29 18:48         ` Junio C Hamano
2023-10-02  0:48           ` Eric W. Biederman
2023-10-02  1:31     ` Eric W. Biederman
2023-09-27 19:55 ` [PATCH 22/30] rev-parse: Add an --output-object-format parameter Eric W. Biederman
2023-09-27 19:55 ` [PATCH 23/30] builtin/cat-file: Let the oid determine the output algorithm Eric W. Biederman
2023-09-27 19:55 ` [PATCH 24/30] tree-walk: init_tree_desc take an oid to get the hash algorithm Eric W. Biederman
2023-09-27 19:55 ` [PATCH 25/30] object-file: Handle compat objects in check_object_signature Eric W. Biederman
2023-09-27 19:55 ` [PATCH 26/30] builtin/ls-tree: Let the oid determine the output algorithm Eric W. Biederman
2023-09-27 19:55 ` [PATCH 27/30] test-lib: Compute the compatibility hash so tests may use it Eric W. Biederman
2023-09-27 19:55 ` [PATCH 28/30] t1006: Rename sha1 to oid Eric W. Biederman
2023-09-27 19:55 ` [PATCH 29/30] t1006: Test oid compatibility with cat-file Eric W. Biederman
2023-09-27 19:55 ` [PATCH 30/30] t1016-compatObjectFormat: Add tests to verify the conversion between objects Eric W. Biederman
2023-09-27 21:31 ` [PATCH 00/30] Initial support for multiple hash functions Junio C Hamano
2023-10-02  2:39 ` [PATCH v2 00/30] initial " Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 01/30] object-file-convert: stubs for converting from one object format to another Eric W. Biederman
2024-02-08  8:23     ` Linus Arver
2024-02-15 11:21     ` Patrick Steinhardt
2023-10-02  2:40   ` [PATCH v2 02/30] oid-array: teach oid-array to handle multiple kinds of oids Eric W. Biederman
2024-02-13  8:16     ` Linus Arver
2024-02-15  6:22       ` Eric W. Biederman
2024-02-16  0:16         ` Linus Arver
2024-02-16  4:48           ` Eric W. Biederman
2024-02-17  1:59             ` Linus Arver
2024-02-13  8:31     ` Kristoffer Haugsbakk
2024-02-15  6:24       ` Eric W. Biederman
2024-02-15 11:21     ` Patrick Steinhardt
2023-10-02  2:40   ` Eric W. Biederman [this message]
2024-02-13  9:33     ` [PATCH v2 03/30] object-names: support input of oids in any supported hash Linus Arver
2024-02-15 11:21     ` Patrick Steinhardt
2023-10-02  2:40   ` [PATCH v2 04/30] repository: add a compatibility hash algorithm Eric W. Biederman
2024-02-13 10:02     ` Linus Arver
2024-02-15 11:22     ` Patrick Steinhardt
2023-10-02  2:40   ` [PATCH v2 05/30] loose: add a mapping between SHA-1 and SHA-256 for loose objects Eric W. Biederman
2024-02-14  7:20     ` Linus Arver
2024-02-15  5:33       ` Eric W. Biederman
2024-02-15 11:22     ` Patrick Steinhardt
2023-10-02  2:40   ` [PATCH v2 06/30] loose: compatibilty short name support Eric W. Biederman
2024-02-15 11:22     ` Patrick Steinhardt
2023-10-02  2:40   ` [PATCH v2 07/30] object-file: update the loose object map when writing loose objects Eric W. Biederman
2024-02-15 11:22     ` Patrick Steinhardt
2023-10-02  2:40   ` [PATCH v2 08/30] object-file: add a compat_oid_in parameter to write_object_file_flags Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 09/30] commit: write commits for both hashes Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 10/30] commit: convert mergetag before computing the signature of a commit Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 11/30] commit: export add_header_signature to support handling signatures on tags Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 12/30] tag: sign both hashes Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 13/30] cache: add a function to read an OID of a specific algorithm Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 14/30] object: factor out parse_mode out of fast-import and tree-walk into in object.h Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 15/30] object-file-convert: add a function to convert trees between algorithms Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 16/30] object-file-convert: convert tag objects when writing Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 17/30] object-file-convert: don't leak when converting tag objects Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 18/30] object-file-convert: convert commit objects when writing Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 19/30] object-file-convert: convert commits that embed signed tags Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 20/30] object-file: update object_info_extended to reencode objects Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 21/30] repository: implement extensions.compatObjectFormat Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 22/30] rev-parse: add an --output-object-format parameter Eric W. Biederman
2024-02-08 16:25     ` Jean-Noël Avila
2023-10-02  2:40   ` [PATCH v2 23/30] builtin/cat-file: let the oid determine the output algorithm Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 24/30] tree-walk: init_tree_desc take an oid to get the hash algorithm Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 25/30] object-file: handle compat objects in check_object_signature Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 26/30] builtin/ls-tree: let the oid determine the output algorithm Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 27/30] test-lib: compute the compatibility hash so tests may use it Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 28/30] t1006: rename sha1 to oid Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 29/30] t1006: test oid compatibility with cat-file Eric W. Biederman
2023-10-02  2:40   ` [PATCH v2 30/30] t1016-compatObjectFormat: add tests to verify the conversion between objects Eric W. Biederman
2024-02-07 22:18   ` [PATCH v2 00/30] initial support for multiple hash functions Junio C Hamano
2024-02-08  0:24     ` Linus Arver
2024-02-08  6:11       ` Patrick Steinhardt
2024-02-14  7:36       ` Linus Arver
2024-02-15 11:27   ` Patrick Steinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231002024034.2611-3-ebiederm@gmail.com \
    --to=ebiederm@gmail.com \
    --cc=ebiederm@xmission.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=sandals@crustytoothpaste.net \
    --cc=sunshine@sunshineco.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).