git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "brian m. carlson" <sandals@crustytoothpaste.net>
To: <git@vger.kernel.org>
Cc: "René Scharfe" <l.s.r@web.de>, "Duy Nguyen" <pclouds@gmail.com>
Subject: [PATCH 29/31] read-cache: read data in a hash-independent way
Date: Tue, 12 Feb 2019 01:22:54 +0000	[thread overview]
Message-ID: <20190212012256.1005924-30-sandals@crustytoothpaste.net> (raw)
In-Reply-To: <20190212012256.1005924-1-sandals@crustytoothpaste.net>

Index entries are structured with a variety of fields up front, followed
by a hash and one or two flags fields.  Because the hash field is stored
in the middle of the structure, it's difficult to use one fixed-size
structure that easily allows access to the hash and flags fields.
Adjust the structure to hold the maximum amount of data that may be
needed using a member called "data" and read and write this field
independently in the various places that need to read and write the
structure.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 read-cache.c | 74 ++++++++++++++++++++--------------------------------
 1 file changed, 29 insertions(+), 45 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 0e0c93edc9..d9f12c568f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1634,39 +1634,24 @@ struct ondisk_cache_entry {
 	uint32_t uid;
 	uint32_t gid;
 	uint32_t size;
-	unsigned char sha1[20];
-	uint16_t flags;
-	char name[FLEX_ARRAY]; /* more */
-};
-
-/*
- * This struct is used when CE_EXTENDED bit is 1
- * The struct must match ondisk_cache_entry exactly from
- * ctime till flags
- */
-struct ondisk_cache_entry_extended {
-	struct cache_time ctime;
-	struct cache_time mtime;
-	uint32_t dev;
-	uint32_t ino;
-	uint32_t mode;
-	uint32_t uid;
-	uint32_t gid;
-	uint32_t size;
-	unsigned char sha1[20];
-	uint16_t flags;
-	uint16_t flags2;
-	char name[FLEX_ARRAY]; /* more */
+	/*
+	 * unsigned char hash[hashsz];
+	 * uint16_t flags;
+	 * if (flags & CE_EXTENDED)
+	 *	uint16_t flags2;
+	 */
+	unsigned char data[GIT_MAX_RAWSZ + 2 * sizeof(uint16_t)];
+	char name[FLEX_ARRAY];
 };
 
 /* These are only used for v3 or lower */
 #define align_padding_size(size, len) ((size + (len) + 8) & ~7) - (size + len)
-#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
+#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,data) + (len) + 8) & ~7)
 #define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
-#define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len)
-#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \
-			    ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
-			    ondisk_cache_entry_size(ce_namelen(ce)))
+#define ondisk_data_size(flags, len) (the_hash_algo->rawsz + \
+				     ((flags & CE_EXTENDED) ? 2 : 1) * sizeof(uint16_t) + len)
+#define ondisk_data_size_max(len) (ondisk_data_size(CE_EXTENDED, len))
+#define ondisk_ce_size(ce) (ondisk_cache_entry_size(ondisk_data_size((ce)->ce_flags, ce_namelen(ce))))
 
 /* Allow fsck to force verification of the index checksum. */
 int verify_index_checksum;
@@ -1740,6 +1725,8 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
 	struct cache_entry *ce;
 	size_t len;
 	const char *name;
+	const unsigned hashsz = the_hash_algo->rawsz;
+	const uint16_t *flagsp = (const uint16_t *)(ondisk->data + hashsz);
 	unsigned int flags;
 	size_t copy_len = 0;
 	/*
@@ -1752,22 +1739,20 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
 	int expand_name_field = version == 4;
 
 	/* On-disk flags are just 16 bits */
-	flags = get_be16(&ondisk->flags);
+	flags = get_be16(flagsp);
 	len = flags & CE_NAMEMASK;
 
 	if (flags & CE_EXTENDED) {
-		struct ondisk_cache_entry_extended *ondisk2;
 		int extended_flags;
-		ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
-		extended_flags = get_be16(&ondisk2->flags2) << 16;
+		extended_flags = get_be16(flagsp + 1) << 16;
 		/* We do not yet understand any bit out of CE_EXTENDED_FLAGS */
 		if (extended_flags & ~CE_EXTENDED_FLAGS)
 			die(_("unknown index entry format 0x%08x"), extended_flags);
 		flags |= extended_flags;
-		name = ondisk2->name;
+		name = (const char *)(flagsp + 2);
 	}
 	else
-		name = ondisk->name;
+		name = (const char *)(flagsp + 1);
 
 	if (expand_name_field) {
 		const unsigned char *cp = (const unsigned char *)name;
@@ -1806,7 +1791,9 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
 	ce->ce_flags = flags & ~CE_NAMEMASK;
 	ce->ce_namelen = len;
 	ce->index = 0;
-	hashcpy(ce->oid.hash, ondisk->sha1);
+	hashcpy(ce->oid.hash, ondisk->data);
+	memcpy(ce->name, name, len);
+	ce->name[len] = '\0';
 
 	if (expand_name_field) {
 		if (copy_len)
@@ -2528,6 +2515,8 @@ static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
 				       struct cache_entry *ce)
 {
 	short flags;
+	const unsigned hashsz = the_hash_algo->rawsz;
+	uint16_t *flagsp = (uint16_t *)(ondisk->data + hashsz);
 
 	ondisk->ctime.sec = htonl(ce->ce_stat_data.sd_ctime.sec);
 	ondisk->mtime.sec = htonl(ce->ce_stat_data.sd_mtime.sec);
@@ -2539,15 +2528,13 @@ static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
 	ondisk->uid  = htonl(ce->ce_stat_data.sd_uid);
 	ondisk->gid  = htonl(ce->ce_stat_data.sd_gid);
 	ondisk->size = htonl(ce->ce_stat_data.sd_size);
-	hashcpy(ondisk->sha1, ce->oid.hash);
+	hashcpy(ondisk->data, ce->oid.hash);
 
 	flags = ce->ce_flags & ~CE_NAMEMASK;
 	flags |= (ce_namelen(ce) >= CE_NAMEMASK ? CE_NAMEMASK : ce_namelen(ce));
-	ondisk->flags = htons(flags);
+	flagsp[0] = htons(flags);
 	if (ce->ce_flags & CE_EXTENDED) {
-		struct ondisk_cache_entry_extended *ondisk2;
-		ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
-		ondisk2->flags2 = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
+		flagsp[1] = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
 	}
 }
 
@@ -2566,10 +2553,7 @@ static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
 		stripped_name = 1;
 	}
 
-	if (ce->ce_flags & CE_EXTENDED)
-		size = offsetof(struct ondisk_cache_entry_extended, name);
-	else
-		size = offsetof(struct ondisk_cache_entry, name);
+	size = offsetof(struct ondisk_cache_entry,data) + ondisk_data_size(ce->ce_flags, 0);
 
 	if (!previous_name) {
 		int len = ce_namelen(ce);
@@ -2727,7 +2711,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 	struct cache_entry **cache = istate->cache;
 	int entries = istate->cache_nr;
 	struct stat st;
-	struct ondisk_cache_entry_extended ondisk;
+	struct ondisk_cache_entry ondisk;
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 	int drop_cache_tree = istate->drop_cache_tree;
 	off_t offset;

  parent reply	other threads:[~2019-02-12  1:23 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-12  1:22 [PATCH 00/31] Hash function transition part 16 brian m. carlson
2019-02-12  1:22 ` [PATCH 01/31] t/lib-submodule-update: use appropriate length constant brian m. carlson
2019-02-12  1:22 ` [PATCH 02/31] pack-bitmap: make bitmap header handling hash agnostic brian m. carlson
2019-02-12  1:22 ` [PATCH 03/31] pack-bitmap: convert struct stored_bitmap to object_id brian m. carlson
2019-02-12  1:22 ` [PATCH 04/31] pack-bitmap: replace sha1_to_hex brian m. carlson
2019-02-12  6:37   ` Jeff King
2019-02-13  0:00     ` brian m. carlson
2019-02-14  4:41       ` Jeff King
2019-02-12  1:22 ` [PATCH 05/31] pack-bitmap: switch hard-coded constants to the_hash_algo brian m. carlson
2019-02-12 11:13   ` Ævar Arnfjörð Bjarmason
2019-02-12  1:22 ` [PATCH 06/31] submodule: avoid hard-coded constants brian m. carlson
2019-02-12  1:22 ` [PATCH 07/31] notes-merge: switch to use the_hash_algo brian m. carlson
2019-02-12  1:22 ` [PATCH 08/31] notes: make hash size independent brian m. carlson
2019-02-12  1:37   ` Eric Sunshine
2019-02-12  1:42     ` brian m. carlson
2019-02-12  1:22 ` [PATCH 09/31] notes: replace sha1_to_hex brian m. carlson
2019-02-12  1:22 ` [PATCH 10/31] object-store: rename and expand packed_git's sha1 member brian m. carlson
2019-02-12  3:32   ` Eric Sunshine
2019-02-14  3:33     ` brian m. carlson
2019-02-12  1:22 ` [PATCH 11/31] builtin/name-rev: make hash-size independent brian m. carlson
2019-02-12  1:22 ` [PATCH 12/31] fast-import: " brian m. carlson
2019-02-12  3:44   ` Eric Sunshine
2019-02-12 23:36     ` brian m. carlson
2019-02-12  1:22 ` [PATCH 13/31] fast-import: replace sha1_to_hex brian m. carlson
2019-02-12  1:22 ` [PATCH 14/31] builtin/am: make hash size independent brian m. carlson
2019-02-12  1:22 ` [PATCH 15/31] builtin/pull: make hash-size independent brian m. carlson
2019-02-12  3:47   ` Eric Sunshine
2019-02-12  1:22 ` [PATCH 16/31] http-push: convert to use the_hash_algo brian m. carlson
2019-02-12  1:22 ` [PATCH 17/31] http-backend: allow 64-character hex names brian m. carlson
2019-02-12  1:22 ` [PATCH 18/31] http-push: remove remaining uses of sha1_to_hex brian m. carlson
2019-02-12  1:22 ` [PATCH 19/31] http-walker: replace sha1_to_hex brian m. carlson
2019-02-12  3:51   ` Eric Sunshine
2019-02-12  1:22 ` [PATCH 20/31] http: replace hard-coded constant with the_hash_algo brian m. carlson
2019-02-12  1:22 ` [PATCH 21/31] http: compute hash of downloaded objects using the_hash_algo brian m. carlson
2019-02-12  1:22 ` [PATCH 22/31] http: replace sha1_to_hex brian m. carlson
2019-02-12  1:22 ` [PATCH 23/31] remote-curl: make hash size independent brian m. carlson
2019-02-12 11:11   ` Ævar Arnfjörð Bjarmason
2019-02-12  1:22 ` [PATCH 24/31] archive-tar: " brian m. carlson
2019-02-12  7:20   ` René Scharfe
2019-02-12 17:33     ` René Scharfe
2019-02-13  0:11       ` brian m. carlson
2019-02-12  1:22 ` [PATCH 25/31] archive: convert struct archiver_args to object_id brian m. carlson
2019-02-12  1:22 ` [PATCH 26/31] refspec: make hash size independent brian m. carlson
2019-02-12  1:22 ` [PATCH 27/31] builtin/difftool: use parse_oid_hex brian m. carlson
2019-02-12  8:27   ` Eric Sunshine
2019-02-12  1:22 ` [PATCH 28/31] dir: make untracked cache extension hash size independent brian m. carlson
2019-02-12 11:08   ` Ævar Arnfjörð Bjarmason
2019-02-13  0:30     ` brian m. carlson
2019-02-12  1:22 ` brian m. carlson [this message]
2019-02-12  1:22 ` [PATCH 30/31] Git.pm: make " brian m. carlson
2019-02-12 10:59   ` Ævar Arnfjörð Bjarmason
2019-02-18 19:09     ` brian m. carlson
2019-02-18 21:00       ` Ævar Arnfjörð Bjarmason
2019-02-12  1:22 ` [PATCH 31/31] gitweb: " brian m. carlson
2019-02-12 10:57   ` Ævar Arnfjörð Bjarmason
2019-02-12 11:15 ` [PATCH 00/31] Hash function transition part 16 Ævar Arnfjörð Bjarmason

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190212012256.1005924-30-sandals@crustytoothpaste.net \
    --to=sandals@crustytoothpaste.net \
    --cc=git@vger.kernel.org \
    --cc=l.s.r@web.de \
    --cc=pclouds@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).