git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "brian m. carlson" <sandals@crustytoothpaste.net>
To: <git@vger.kernel.org>
Cc: "Derrick Stolee" <dstolee@microsoft.com>,
	"Ævar Arnfjörð Bjarmason" <avarab@gmail.com>
Subject: [PATCH v2 07/13] hash: set, copy, and use algo field in struct object_id
Date: Mon, 26 Apr 2021 01:02:55 +0000	[thread overview]
Message-ID: <20210426010301.1093562-8-sandals@crustytoothpaste.net> (raw)
In-Reply-To: <20210426010301.1093562-1-sandals@crustytoothpaste.net>

Now that struct object_id has an algorithm field, we should populate it.
This will allow us to handle object IDs in any supported algorithm and
distinguish between them.  Ensure that the field is written whenever we
write an object ID by storing it explicitly every time we write an
object.  Set values for the empty blob and tree values as well.

In addition, use the algorithm field to compare object IDs.  Note that
because we zero-initialize struct object_id in many places throughout
the codebase, we default to the default algorithm in cases where the
algorithm field is zero rather than explicitly initialize all of those
locations.

This leads to a branch on every comparison, but the alternative is to
compare the entire buffer each time and padding the buffer for SHA-1.
That alternative ranges up to 3.9% worse than this approach on the perf
t0001, t1450, and t1451.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 hash.h        | 42 +++++++++++++++++++++++++++++++++++-------
 hex.c         |  9 ++++++---
 notes.c       |  3 +++
 object-file.c | 15 +++++++++++----
 4 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/hash.h b/hash.h
index c8f03d8aee..0e85e448ed 100644
--- a/hash.h
+++ b/hash.h
@@ -192,36 +192,56 @@ static inline int hash_algo_by_ptr(const struct git_hash_algo *p)
 
 extern const struct object_id null_oid;
 
-static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
+static inline int hashcmp_algop(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop)
 {
 	/*
 	 * Teach the compiler that there are only two possibilities of hash size
 	 * here, so that it can optimize for this case as much as possible.
 	 */
-	if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
+	if (algop->rawsz == GIT_MAX_RAWSZ)
 		return memcmp(sha1, sha2, GIT_MAX_RAWSZ);
 	return memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
 }
 
-static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
+static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
 {
-	return hashcmp(oid1->hash, oid2->hash);
+	return hashcmp_algop(sha1, sha2, the_hash_algo);
 }
 
-static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
+static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
+{
+	const struct git_hash_algo *algop;
+	if (!oid1->algo)
+		algop = the_hash_algo;
+	else
+		algop = &hash_algos[oid1->algo];
+	return hashcmp_algop(oid1->hash, oid2->hash, algop);
+}
+
+static inline int hasheq_algop(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop)
 {
 	/*
 	 * We write this here instead of deferring to hashcmp so that the
 	 * compiler can properly inline it and avoid calling memcmp.
 	 */
-	if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
+	if (algop->rawsz == GIT_MAX_RAWSZ)
 		return !memcmp(sha1, sha2, GIT_MAX_RAWSZ);
 	return !memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
 }
 
+static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
+{
+	return hasheq_algop(sha1, sha2, the_hash_algo);
+}
+
 static inline int oideq(const struct object_id *oid1, const struct object_id *oid2)
 {
-	return hasheq(oid1->hash, oid2->hash);
+	const struct git_hash_algo *algop;
+	if (!oid1->algo)
+		algop = the_hash_algo;
+	else
+		algop = &hash_algos[oid1->algo];
+	return hasheq_algop(oid1->hash, oid2->hash, algop);
 }
 
 static inline int is_null_oid(const struct object_id *oid)
@@ -237,6 +257,7 @@ static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src)
 static inline void oidcpy(struct object_id *dst, const struct object_id *src)
 {
 	memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ);
+	dst->algo = src->algo;
 }
 
 static inline struct object_id *oiddup(const struct object_id *src)
@@ -254,11 +275,13 @@ static inline void hashclr(unsigned char *hash)
 static inline void oidclr(struct object_id *oid)
 {
 	memset(oid->hash, 0, GIT_MAX_RAWSZ);
+	oid->algo = hash_algo_by_ptr(the_hash_algo);
 }
 
 static inline void oidread(struct object_id *oid, const unsigned char *hash)
 {
 	memcpy(oid->hash, hash, the_hash_algo->rawsz);
+	oid->algo = hash_algo_by_ptr(the_hash_algo);
 }
 
 static inline int is_empty_blob_sha1(const unsigned char *sha1)
@@ -281,6 +304,11 @@ static inline int is_empty_tree_oid(const struct object_id *oid)
 	return oideq(oid, the_hash_algo->empty_tree);
 }
 
+static inline void oid_set_algo(struct object_id *oid, const struct git_hash_algo *algop)
+{
+	oid->algo = hash_algo_by_ptr(algop);
+}
+
 const char *empty_tree_oid_hex(void);
 const char *empty_blob_oid_hex(void);
 
diff --git a/hex.c b/hex.c
index da51e64929..e7af18fe55 100644
--- a/hex.c
+++ b/hex.c
@@ -69,7 +69,10 @@ int get_sha1_hex(const char *hex, unsigned char *sha1)
 int get_oid_hex_algop(const char *hex, struct object_id *oid,
 		      const struct git_hash_algo *algop)
 {
-	return get_hash_hex_algop(hex, oid->hash, algop);
+	int ret = get_hash_hex_algop(hex, oid->hash, algop);
+	if (!ret)
+		oid_set_algo(oid, algop);
+	return ret;
 }
 
 /*
@@ -80,7 +83,7 @@ int get_oid_hex_any(const char *hex, struct object_id *oid)
 {
 	int i;
 	for (i = GIT_HASH_NALGOS - 1; i > 0; i--) {
-		if (!get_hash_hex_algop(hex, oid->hash, &hash_algos[i]))
+		if (!get_oid_hex_algop(hex, oid, &hash_algos[i]))
 			return i;
 	}
 	return GIT_HASH_UNKNOWN;
@@ -95,7 +98,7 @@ int parse_oid_hex_algop(const char *hex, struct object_id *oid,
 			const char **end,
 			const struct git_hash_algo *algop)
 {
-	int ret = get_hash_hex_algop(hex, oid->hash, algop);
+	int ret = get_oid_hex_algop(hex, oid, algop);
 	if (!ret)
 		*end = hex + algop->hexsz;
 	return ret;
diff --git a/notes.c b/notes.c
index a44b25858f..135ea13ba1 100644
--- a/notes.c
+++ b/notes.c
@@ -455,6 +455,8 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree,
 		CALLOC_ARRAY(l, 1);
 		oidcpy(&l->key_oid, &object_oid);
 		oidcpy(&l->val_oid, &entry.oid);
+		oid_set_algo(&l->key_oid, the_hash_algo);
+		oid_set_algo(&l->val_oid, the_hash_algo);
 		if (note_tree_insert(t, node, n, l, type,
 				     combine_notes_concatenate))
 			die("Failed to load %s %s into notes tree "
@@ -484,6 +486,7 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree,
 				strbuf_addch(&non_note_path, '/');
 			}
 			strbuf_addstr(&non_note_path, entry.path);
+			oid_set_algo(&entry.oid, the_hash_algo);
 			add_non_note(t, strbuf_detach(&non_note_path, NULL),
 				     entry.mode, entry.oid.hash);
 		}
diff --git a/object-file.c b/object-file.c
index 1d8c82fa99..d4ba0c4a4f 100644
--- a/object-file.c
+++ b/object-file.c
@@ -57,16 +57,20 @@
 
 const struct object_id null_oid;
 static const struct object_id empty_tree_oid = {
-	EMPTY_TREE_SHA1_BIN_LITERAL
+	.hash = EMPTY_TREE_SHA1_BIN_LITERAL,
+	.algo = GIT_HASH_SHA1,
 };
 static const struct object_id empty_blob_oid = {
-	EMPTY_BLOB_SHA1_BIN_LITERAL
+	.hash = EMPTY_BLOB_SHA1_BIN_LITERAL,
+	.algo = GIT_HASH_SHA1,
 };
 static const struct object_id empty_tree_oid_sha256 = {
-	EMPTY_TREE_SHA256_BIN_LITERAL
+	.hash = EMPTY_TREE_SHA256_BIN_LITERAL,
+	.algo = GIT_HASH_SHA256,
 };
 static const struct object_id empty_blob_oid_sha256 = {
-	EMPTY_BLOB_SHA256_BIN_LITERAL
+	.hash = EMPTY_BLOB_SHA256_BIN_LITERAL,
+	.algo = GIT_HASH_SHA256,
 };
 
 static void git_hash_sha1_init(git_hash_ctx *ctx)
@@ -93,6 +97,7 @@ static void git_hash_sha1_final_oid(struct object_id *oid, git_hash_ctx *ctx)
 {
 	git_SHA1_Final(oid->hash, &ctx->sha1);
 	memset(oid->hash + GIT_SHA1_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA1_RAWSZ);
+	oid->algo = GIT_HASH_SHA1;
 }
 
 
@@ -124,6 +129,7 @@ static void git_hash_sha256_final_oid(struct object_id *oid, git_hash_ctx *ctx)
 	 * but keep it in case we extend the hash size again.
 	 */
 	memset(oid->hash + GIT_SHA256_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA256_RAWSZ);
+	oid->algo = GIT_HASH_SHA256;
 }
 
 static void git_hash_unknown_init(git_hash_ctx *ctx)
@@ -2340,6 +2346,7 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr,
 		if (namelen == the_hash_algo->hexsz - 2 &&
 		    !hex_to_bytes(oid.hash + 1, de->d_name,
 				  the_hash_algo->rawsz - 1)) {
+			oid_set_algo(&oid, the_hash_algo);
 			if (obj_cb) {
 				r = obj_cb(&oid, path->buf, data);
 				if (r)

  parent reply	other threads:[~2021-04-26  1:03 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-26  1:02 [PATCH v2 00/13] SHA-256 / SHA-1 interop, part 1 brian m. carlson
2021-04-26  1:02 ` [PATCH v2 01/13] hash: add an algo member to struct object_id brian m. carlson
2021-05-07 13:58   ` Matheus Tavares Bernardino
2021-05-07 20:07     ` brian m. carlson
2021-04-26  1:02 ` [PATCH v2 02/13] Always use oidread to read into " brian m. carlson
2021-04-26  1:02 ` [PATCH v2 03/13] http-push: set algorithm when reading object ID brian m. carlson
2021-04-26  1:02 ` [PATCH v2 04/13] hash: add a function to finalize object IDs brian m. carlson
2021-04-26  1:02 ` [PATCH v2 05/13] Use the final_oid_fn to finalize hashing of " brian m. carlson
2021-04-26  1:02 ` [PATCH v2 06/13] builtin/pack-redundant: avoid casting buffers to struct object_id brian m. carlson
2021-04-26  1:02 ` brian m. carlson [this message]
2021-04-26  1:02 ` [PATCH v2 08/13] hash: provide per-algorithm null OIDs brian m. carlson
2021-04-26  1:02 ` [PATCH v2 09/13] builtin/show-index: set the algorithm for object IDs brian m. carlson
2021-04-26  1:02 ` [PATCH v2 10/13] commit-graph: don't store file hashes as struct object_id brian m. carlson
2021-04-26  1:02 ` [PATCH v2 11/13] builtin/pack-objects: avoid using struct object_id for pack hash brian m. carlson
2021-04-26  1:03 ` [PATCH v2 12/13] hex: default to the_hash_algo on zero algorithm value brian m. carlson
2021-04-26  1:03 ` [PATCH v2 13/13] hex: print objects using the hash algorithm member brian m. carlson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210426010301.1093562-8-sandals@crustytoothpaste.net \
    --to=sandals@crustytoothpaste.net \
    --cc=avarab@gmail.com \
    --cc=dstolee@microsoft.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).