From: "brian m. carlson" <sandals@crustytoothpaste.net>
To: <git@vger.kernel.org>
Cc: "René Scharfe" <l.s.r@web.de>, "Duy Nguyen" <pclouds@gmail.com>
Subject: [PATCH 29/31] read-cache: read data in a hash-independent way
Date: Tue, 12 Feb 2019 01:22:54 +0000 [thread overview]
Message-ID: <20190212012256.1005924-30-sandals@crustytoothpaste.net> (raw)
In-Reply-To: <20190212012256.1005924-1-sandals@crustytoothpaste.net>
Index entries are structured with a variety of fields up front, followed
by a hash and one or two flags fields. Because the hash field is stored
in the middle of the structure, it's difficult to use one fixed-size
structure that easily allows access to the hash and flags fields.
Adjust the structure to hold the maximum amount of data that may be
needed using a member called "data" and read and write this field
independently in the various places that need to read and write the
structure.
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
read-cache.c | 74 ++++++++++++++++++++--------------------------------
1 file changed, 29 insertions(+), 45 deletions(-)
diff --git a/read-cache.c b/read-cache.c
index 0e0c93edc9..d9f12c568f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1634,39 +1634,24 @@ struct ondisk_cache_entry {
uint32_t uid;
uint32_t gid;
uint32_t size;
- unsigned char sha1[20];
- uint16_t flags;
- char name[FLEX_ARRAY]; /* more */
-};
-
-/*
- * This struct is used when CE_EXTENDED bit is 1
- * The struct must match ondisk_cache_entry exactly from
- * ctime till flags
- */
-struct ondisk_cache_entry_extended {
- struct cache_time ctime;
- struct cache_time mtime;
- uint32_t dev;
- uint32_t ino;
- uint32_t mode;
- uint32_t uid;
- uint32_t gid;
- uint32_t size;
- unsigned char sha1[20];
- uint16_t flags;
- uint16_t flags2;
- char name[FLEX_ARRAY]; /* more */
+ /*
+ * unsigned char hash[hashsz];
+ * uint16_t flags;
+ * if (flags & CE_EXTENDED)
+ * uint16_t flags2;
+ */
+ unsigned char data[GIT_MAX_RAWSZ + 2 * sizeof(uint16_t)];
+ char name[FLEX_ARRAY];
};
/* These are only used for v3 or lower */
#define align_padding_size(size, len) ((size + (len) + 8) & ~7) - (size + len)
-#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
+#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,data) + (len) + 8) & ~7)
#define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
-#define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len)
-#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \
- ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
- ondisk_cache_entry_size(ce_namelen(ce)))
+#define ondisk_data_size(flags, len) (the_hash_algo->rawsz + \
+ ((flags & CE_EXTENDED) ? 2 : 1) * sizeof(uint16_t) + len)
+#define ondisk_data_size_max(len) (ondisk_data_size(CE_EXTENDED, len))
+#define ondisk_ce_size(ce) (ondisk_cache_entry_size(ondisk_data_size((ce)->ce_flags, ce_namelen(ce))))
/* Allow fsck to force verification of the index checksum. */
int verify_index_checksum;
@@ -1740,6 +1725,8 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
struct cache_entry *ce;
size_t len;
const char *name;
+ const unsigned hashsz = the_hash_algo->rawsz;
+ const uint16_t *flagsp = (const uint16_t *)(ondisk->data + hashsz);
unsigned int flags;
size_t copy_len = 0;
/*
@@ -1752,22 +1739,20 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
int expand_name_field = version == 4;
/* On-disk flags are just 16 bits */
- flags = get_be16(&ondisk->flags);
+ flags = get_be16(flagsp);
len = flags & CE_NAMEMASK;
if (flags & CE_EXTENDED) {
- struct ondisk_cache_entry_extended *ondisk2;
int extended_flags;
- ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
- extended_flags = get_be16(&ondisk2->flags2) << 16;
+ extended_flags = get_be16(flagsp + 1) << 16;
/* We do not yet understand any bit out of CE_EXTENDED_FLAGS */
if (extended_flags & ~CE_EXTENDED_FLAGS)
die(_("unknown index entry format 0x%08x"), extended_flags);
flags |= extended_flags;
- name = ondisk2->name;
+ name = (const char *)(flagsp + 2);
}
else
- name = ondisk->name;
+ name = (const char *)(flagsp + 1);
if (expand_name_field) {
const unsigned char *cp = (const unsigned char *)name;
@@ -1806,7 +1791,9 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
ce->ce_flags = flags & ~CE_NAMEMASK;
ce->ce_namelen = len;
ce->index = 0;
- hashcpy(ce->oid.hash, ondisk->sha1);
+ hashcpy(ce->oid.hash, ondisk->data);
+ memcpy(ce->name, name, len);
+ ce->name[len] = '\0';
if (expand_name_field) {
if (copy_len)
@@ -2528,6 +2515,8 @@ static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
struct cache_entry *ce)
{
short flags;
+ const unsigned hashsz = the_hash_algo->rawsz;
+ uint16_t *flagsp = (uint16_t *)(ondisk->data + hashsz);
ondisk->ctime.sec = htonl(ce->ce_stat_data.sd_ctime.sec);
ondisk->mtime.sec = htonl(ce->ce_stat_data.sd_mtime.sec);
@@ -2539,15 +2528,13 @@ static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
ondisk->uid = htonl(ce->ce_stat_data.sd_uid);
ondisk->gid = htonl(ce->ce_stat_data.sd_gid);
ondisk->size = htonl(ce->ce_stat_data.sd_size);
- hashcpy(ondisk->sha1, ce->oid.hash);
+ hashcpy(ondisk->data, ce->oid.hash);
flags = ce->ce_flags & ~CE_NAMEMASK;
flags |= (ce_namelen(ce) >= CE_NAMEMASK ? CE_NAMEMASK : ce_namelen(ce));
- ondisk->flags = htons(flags);
+ flagsp[0] = htons(flags);
if (ce->ce_flags & CE_EXTENDED) {
- struct ondisk_cache_entry_extended *ondisk2;
- ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
- ondisk2->flags2 = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
+ flagsp[1] = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
}
}
@@ -2566,10 +2553,7 @@ static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
stripped_name = 1;
}
- if (ce->ce_flags & CE_EXTENDED)
- size = offsetof(struct ondisk_cache_entry_extended, name);
- else
- size = offsetof(struct ondisk_cache_entry, name);
+ size = offsetof(struct ondisk_cache_entry,data) + ondisk_data_size(ce->ce_flags, 0);
if (!previous_name) {
int len = ce_namelen(ce);
@@ -2727,7 +2711,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
struct cache_entry **cache = istate->cache;
int entries = istate->cache_nr;
struct stat st;
- struct ondisk_cache_entry_extended ondisk;
+ struct ondisk_cache_entry ondisk;
struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
int drop_cache_tree = istate->drop_cache_tree;
off_t offset;
next prev parent reply other threads:[~2019-02-12 1:23 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-12 1:22 [PATCH 00/31] Hash function transition part 16 brian m. carlson
2019-02-12 1:22 ` [PATCH 01/31] t/lib-submodule-update: use appropriate length constant brian m. carlson
2019-02-12 1:22 ` [PATCH 02/31] pack-bitmap: make bitmap header handling hash agnostic brian m. carlson
2019-02-12 1:22 ` [PATCH 03/31] pack-bitmap: convert struct stored_bitmap to object_id brian m. carlson
2019-02-12 1:22 ` [PATCH 04/31] pack-bitmap: replace sha1_to_hex brian m. carlson
2019-02-12 6:37 ` Jeff King
2019-02-13 0:00 ` brian m. carlson
2019-02-14 4:41 ` Jeff King
2019-02-12 1:22 ` [PATCH 05/31] pack-bitmap: switch hard-coded constants to the_hash_algo brian m. carlson
2019-02-12 11:13 ` Ævar Arnfjörð Bjarmason
2019-02-12 1:22 ` [PATCH 06/31] submodule: avoid hard-coded constants brian m. carlson
2019-02-12 1:22 ` [PATCH 07/31] notes-merge: switch to use the_hash_algo brian m. carlson
2019-02-12 1:22 ` [PATCH 08/31] notes: make hash size independent brian m. carlson
2019-02-12 1:37 ` Eric Sunshine
2019-02-12 1:42 ` brian m. carlson
2019-02-12 1:22 ` [PATCH 09/31] notes: replace sha1_to_hex brian m. carlson
2019-02-12 1:22 ` [PATCH 10/31] object-store: rename and expand packed_git's sha1 member brian m. carlson
2019-02-12 3:32 ` Eric Sunshine
2019-02-14 3:33 ` brian m. carlson
2019-02-12 1:22 ` [PATCH 11/31] builtin/name-rev: make hash-size independent brian m. carlson
2019-02-12 1:22 ` [PATCH 12/31] fast-import: " brian m. carlson
2019-02-12 3:44 ` Eric Sunshine
2019-02-12 23:36 ` brian m. carlson
2019-02-12 1:22 ` [PATCH 13/31] fast-import: replace sha1_to_hex brian m. carlson
2019-02-12 1:22 ` [PATCH 14/31] builtin/am: make hash size independent brian m. carlson
2019-02-12 1:22 ` [PATCH 15/31] builtin/pull: make hash-size independent brian m. carlson
2019-02-12 3:47 ` Eric Sunshine
2019-02-12 1:22 ` [PATCH 16/31] http-push: convert to use the_hash_algo brian m. carlson
2019-02-12 1:22 ` [PATCH 17/31] http-backend: allow 64-character hex names brian m. carlson
2019-02-12 1:22 ` [PATCH 18/31] http-push: remove remaining uses of sha1_to_hex brian m. carlson
2019-02-12 1:22 ` [PATCH 19/31] http-walker: replace sha1_to_hex brian m. carlson
2019-02-12 3:51 ` Eric Sunshine
2019-02-12 1:22 ` [PATCH 20/31] http: replace hard-coded constant with the_hash_algo brian m. carlson
2019-02-12 1:22 ` [PATCH 21/31] http: compute hash of downloaded objects using the_hash_algo brian m. carlson
2019-02-12 1:22 ` [PATCH 22/31] http: replace sha1_to_hex brian m. carlson
2019-02-12 1:22 ` [PATCH 23/31] remote-curl: make hash size independent brian m. carlson
2019-02-12 11:11 ` Ævar Arnfjörð Bjarmason
2019-02-12 1:22 ` [PATCH 24/31] archive-tar: " brian m. carlson
2019-02-12 7:20 ` René Scharfe
2019-02-12 17:33 ` René Scharfe
2019-02-13 0:11 ` brian m. carlson
2019-02-12 1:22 ` [PATCH 25/31] archive: convert struct archiver_args to object_id brian m. carlson
2019-02-12 1:22 ` [PATCH 26/31] refspec: make hash size independent brian m. carlson
2019-02-12 1:22 ` [PATCH 27/31] builtin/difftool: use parse_oid_hex brian m. carlson
2019-02-12 8:27 ` Eric Sunshine
2019-02-12 1:22 ` [PATCH 28/31] dir: make untracked cache extension hash size independent brian m. carlson
2019-02-12 11:08 ` Ævar Arnfjörð Bjarmason
2019-02-13 0:30 ` brian m. carlson
2019-02-12 1:22 ` brian m. carlson [this message]
2019-02-12 1:22 ` [PATCH 30/31] Git.pm: make " brian m. carlson
2019-02-12 10:59 ` Ævar Arnfjörð Bjarmason
2019-02-18 19:09 ` brian m. carlson
2019-02-18 21:00 ` Ævar Arnfjörð Bjarmason
2019-02-12 1:22 ` [PATCH 31/31] gitweb: " brian m. carlson
2019-02-12 10:57 ` Ævar Arnfjörð Bjarmason
2019-02-12 11:15 ` [PATCH 00/31] Hash function transition part 16 Ævar Arnfjörð Bjarmason
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190212012256.1005924-30-sandals@crustytoothpaste.net \
--to=sandals@crustytoothpaste.net \
--cc=git@vger.kernel.org \
--cc=l.s.r@web.de \
--cc=pclouds@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).