All of lore.kernel.org
 help / color / mirror / Atom feed
From: "brian m. carlson" <sandals@crustytoothpaste.net>
To: <git@vger.kernel.org>
Cc: "René Scharfe" <l.s.r@web.de>, "Duy Nguyen" <pclouds@gmail.com>
Subject: [PATCH 29/31] read-cache: read data in a hash-independent way
Date: Tue, 12 Feb 2019 01:22:54 +0000	[thread overview]
Message-ID: <20190212012256.1005924-30-sandals@crustytoothpaste.net> (raw)
In-Reply-To: <20190212012256.1005924-1-sandals@crustytoothpaste.net>

Index entries are structured with a variety of fields up front, followed
by a hash and one or two flags fields.  Because the hash field is stored
in the middle of the structure, it's difficult to use one fixed-size
structure that easily allows access to the hash and flags fields.
Adjust the structure to hold the maximum amount of data that may be
needed using a member called "data" and read and write this field
independently in the various places that need to read and write the
structure.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 read-cache.c | 74 ++++++++++++++++++++--------------------------------
 1 file changed, 29 insertions(+), 45 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 0e0c93edc9..d9f12c568f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1634,39 +1634,24 @@ struct ondisk_cache_entry {
 	uint32_t uid;
 	uint32_t gid;
 	uint32_t size;
-	unsigned char sha1[20];
-	uint16_t flags;
-	char name[FLEX_ARRAY]; /* more */
-};
-
-/*
- * This struct is used when CE_EXTENDED bit is 1
- * The struct must match ondisk_cache_entry exactly from
- * ctime till flags
- */
-struct ondisk_cache_entry_extended {
-	struct cache_time ctime;
-	struct cache_time mtime;
-	uint32_t dev;
-	uint32_t ino;
-	uint32_t mode;
-	uint32_t uid;
-	uint32_t gid;
-	uint32_t size;
-	unsigned char sha1[20];
-	uint16_t flags;
-	uint16_t flags2;
-	char name[FLEX_ARRAY]; /* more */
+	/*
+	 * unsigned char hash[hashsz];
+	 * uint16_t flags;
+	 * if (flags & CE_EXTENDED)
+	 *	uint16_t flags2;
+	 */
+	unsigned char data[GIT_MAX_RAWSZ + 2 * sizeof(uint16_t)];
+	char name[FLEX_ARRAY];
 };
 
 /* These are only used for v3 or lower */
 #define align_padding_size(size, len) ((size + (len) + 8) & ~7) - (size + len)
-#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
+#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,data) + (len) + 8) & ~7)
 #define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
-#define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len)
-#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \
-			    ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
-			    ondisk_cache_entry_size(ce_namelen(ce)))
+#define ondisk_data_size(flags, len) (the_hash_algo->rawsz + \
+				     ((flags & CE_EXTENDED) ? 2 : 1) * sizeof(uint16_t) + len)
+#define ondisk_data_size_max(len) (ondisk_data_size(CE_EXTENDED, len))
+#define ondisk_ce_size(ce) (ondisk_cache_entry_size(ondisk_data_size((ce)->ce_flags, ce_namelen(ce))))
 
 /* Allow fsck to force verification of the index checksum. */
 int verify_index_checksum;
@@ -1740,6 +1725,8 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
 	struct cache_entry *ce;
 	size_t len;
 	const char *name;
+	const unsigned hashsz = the_hash_algo->rawsz;
+	const uint16_t *flagsp = (const uint16_t *)(ondisk->data + hashsz);
 	unsigned int flags;
 	size_t copy_len = 0;
 	/*
@@ -1752,22 +1739,20 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
 	int expand_name_field = version == 4;
 
 	/* On-disk flags are just 16 bits */
-	flags = get_be16(&ondisk->flags);
+	flags = get_be16(flagsp);
 	len = flags & CE_NAMEMASK;
 
 	if (flags & CE_EXTENDED) {
-		struct ondisk_cache_entry_extended *ondisk2;
 		int extended_flags;
-		ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
-		extended_flags = get_be16(&ondisk2->flags2) << 16;
+		extended_flags = get_be16(flagsp + 1) << 16;
 		/* We do not yet understand any bit out of CE_EXTENDED_FLAGS */
 		if (extended_flags & ~CE_EXTENDED_FLAGS)
 			die(_("unknown index entry format 0x%08x"), extended_flags);
 		flags |= extended_flags;
-		name = ondisk2->name;
+		name = (const char *)(flagsp + 2);
 	}
 	else
-		name = ondisk->name;
+		name = (const char *)(flagsp + 1);
 
 	if (expand_name_field) {
 		const unsigned char *cp = (const unsigned char *)name;
@@ -1806,7 +1791,9 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
 	ce->ce_flags = flags & ~CE_NAMEMASK;
 	ce->ce_namelen = len;
 	ce->index = 0;
-	hashcpy(ce->oid.hash, ondisk->sha1);
+	hashcpy(ce->oid.hash, ondisk->data);
+	memcpy(ce->name, name, len);
+	ce->name[len] = '\0';
 
 	if (expand_name_field) {
 		if (copy_len)
@@ -2528,6 +2515,8 @@ static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
 				       struct cache_entry *ce)
 {
 	short flags;
+	const unsigned hashsz = the_hash_algo->rawsz;
+	uint16_t *flagsp = (uint16_t *)(ondisk->data + hashsz);
 
 	ondisk->ctime.sec = htonl(ce->ce_stat_data.sd_ctime.sec);
 	ondisk->mtime.sec = htonl(ce->ce_stat_data.sd_mtime.sec);
@@ -2539,15 +2528,13 @@ static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
 	ondisk->uid  = htonl(ce->ce_stat_data.sd_uid);
 	ondisk->gid  = htonl(ce->ce_stat_data.sd_gid);
 	ondisk->size = htonl(ce->ce_stat_data.sd_size);
-	hashcpy(ondisk->sha1, ce->oid.hash);
+	hashcpy(ondisk->data, ce->oid.hash);
 
 	flags = ce->ce_flags & ~CE_NAMEMASK;
 	flags |= (ce_namelen(ce) >= CE_NAMEMASK ? CE_NAMEMASK : ce_namelen(ce));
-	ondisk->flags = htons(flags);
+	flagsp[0] = htons(flags);
 	if (ce->ce_flags & CE_EXTENDED) {
-		struct ondisk_cache_entry_extended *ondisk2;
-		ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
-		ondisk2->flags2 = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
+		flagsp[1] = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
 	}
 }
 
@@ -2566,10 +2553,7 @@ static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
 		stripped_name = 1;
 	}
 
-	if (ce->ce_flags & CE_EXTENDED)
-		size = offsetof(struct ondisk_cache_entry_extended, name);
-	else
-		size = offsetof(struct ondisk_cache_entry, name);
+	size = offsetof(struct ondisk_cache_entry,data) + ondisk_data_size(ce->ce_flags, 0);
 
 	if (!previous_name) {
 		int len = ce_namelen(ce);
@@ -2727,7 +2711,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 	struct cache_entry **cache = istate->cache;
 	int entries = istate->cache_nr;
 	struct stat st;
-	struct ondisk_cache_entry_extended ondisk;
+	struct ondisk_cache_entry ondisk;
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 	int drop_cache_tree = istate->drop_cache_tree;
 	off_t offset;

  parent reply	other threads:[~2019-02-12  1:23 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-12  1:22 [PATCH 00/31] Hash function transition part 16 brian m. carlson
2019-02-12  1:22 ` [PATCH 01/31] t/lib-submodule-update: use appropriate length constant brian m. carlson
2019-02-12  1:22 ` [PATCH 02/31] pack-bitmap: make bitmap header handling hash agnostic brian m. carlson
2019-02-12  1:22 ` [PATCH 03/31] pack-bitmap: convert struct stored_bitmap to object_id brian m. carlson
2019-02-12  1:22 ` [PATCH 04/31] pack-bitmap: replace sha1_to_hex brian m. carlson
2019-02-12  6:37   ` Jeff King
2019-02-13  0:00     ` brian m. carlson
2019-02-14  4:41       ` Jeff King
2019-02-12  1:22 ` [PATCH 05/31] pack-bitmap: switch hard-coded constants to the_hash_algo brian m. carlson
2019-02-12 11:13   ` Ævar Arnfjörð Bjarmason
2019-02-12  1:22 ` [PATCH 06/31] submodule: avoid hard-coded constants brian m. carlson
2019-02-12  1:22 ` [PATCH 07/31] notes-merge: switch to use the_hash_algo brian m. carlson
2019-02-12  1:22 ` [PATCH 08/31] notes: make hash size independent brian m. carlson
2019-02-12  1:37   ` Eric Sunshine
2019-02-12  1:42     ` brian m. carlson
2019-02-12  1:22 ` [PATCH 09/31] notes: replace sha1_to_hex brian m. carlson
2019-02-12  1:22 ` [PATCH 10/31] object-store: rename and expand packed_git's sha1 member brian m. carlson
2019-02-12  3:32   ` Eric Sunshine
2019-02-14  3:33     ` brian m. carlson
2019-02-12  1:22 ` [PATCH 11/31] builtin/name-rev: make hash-size independent brian m. carlson
2019-02-12  1:22 ` [PATCH 12/31] fast-import: " brian m. carlson
2019-02-12  3:44   ` Eric Sunshine
2019-02-12 23:36     ` brian m. carlson
2019-02-12  1:22 ` [PATCH 13/31] fast-import: replace sha1_to_hex brian m. carlson
2019-02-12  1:22 ` [PATCH 14/31] builtin/am: make hash size independent brian m. carlson
2019-02-12  1:22 ` [PATCH 15/31] builtin/pull: make hash-size independent brian m. carlson
2019-02-12  3:47   ` Eric Sunshine
2019-02-12  1:22 ` [PATCH 16/31] http-push: convert to use the_hash_algo brian m. carlson
2019-02-12  1:22 ` [PATCH 17/31] http-backend: allow 64-character hex names brian m. carlson
2019-02-12  1:22 ` [PATCH 18/31] http-push: remove remaining uses of sha1_to_hex brian m. carlson
2019-02-12  1:22 ` [PATCH 19/31] http-walker: replace sha1_to_hex brian m. carlson
2019-02-12  3:51   ` Eric Sunshine
2019-02-12  1:22 ` [PATCH 20/31] http: replace hard-coded constant with the_hash_algo brian m. carlson
2019-02-12  1:22 ` [PATCH 21/31] http: compute hash of downloaded objects using the_hash_algo brian m. carlson
2019-02-12  1:22 ` [PATCH 22/31] http: replace sha1_to_hex brian m. carlson
2019-02-12  1:22 ` [PATCH 23/31] remote-curl: make hash size independent brian m. carlson
2019-02-12 11:11   ` Ævar Arnfjörð Bjarmason
2019-02-12  1:22 ` [PATCH 24/31] archive-tar: " brian m. carlson
2019-02-12  7:20   ` René Scharfe
2019-02-12 17:33     ` René Scharfe
2019-02-13  0:11       ` brian m. carlson
2019-02-12  1:22 ` [PATCH 25/31] archive: convert struct archiver_args to object_id brian m. carlson
2019-02-12  1:22 ` [PATCH 26/31] refspec: make hash size independent brian m. carlson
2019-02-12  1:22 ` [PATCH 27/31] builtin/difftool: use parse_oid_hex brian m. carlson
2019-02-12  8:27   ` Eric Sunshine
2019-02-12  1:22 ` [PATCH 28/31] dir: make untracked cache extension hash size independent brian m. carlson
2019-02-12 11:08   ` Ævar Arnfjörð Bjarmason
2019-02-13  0:30     ` brian m. carlson
2019-02-12  1:22 ` brian m. carlson [this message]
2019-02-12  1:22 ` [PATCH 30/31] Git.pm: make " brian m. carlson
2019-02-12 10:59   ` Ævar Arnfjörð Bjarmason
2019-02-18 19:09     ` brian m. carlson
2019-02-18 21:00       ` Ævar Arnfjörð Bjarmason
2019-02-12  1:22 ` [PATCH 31/31] gitweb: " brian m. carlson
2019-02-12 10:57   ` Ævar Arnfjörð Bjarmason
2019-02-12 11:15 ` [PATCH 00/31] Hash function transition part 16 Ævar Arnfjörð Bjarmason

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190212012256.1005924-30-sandals@crustytoothpaste.net \
    --to=sandals@crustytoothpaste.net \
    --cc=git@vger.kernel.org \
    --cc=l.s.r@web.de \
    --cc=pclouds@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.