git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "brian m. carlson" <sandals@crustytoothpaste.net>
To: <git@vger.kernel.org>
Cc: Derrick Stolee <dstolee@microsoft.com>
Subject: [PATCH 05/15] hash: add a function to finalize object IDs
Date: Sat, 10 Apr 2021 15:21:30 +0000	[thread overview]
Message-ID: <20210410152140.3525040-6-sandals@crustytoothpaste.net> (raw)
In-Reply-To: <20210410152140.3525040-1-sandals@crustytoothpaste.net>

To avoid the penalty of having to branch in hash comparison functions,
we'll want to always compare the full hash member in a struct object_id,
which will require that SHA-1 object IDs be zero-padded.  To do so, add
a function which finalizes a hash context and writes it into an object
ID that performs this padding.

Move the definition of struct object_id and the constant definitions
higher up so we they are available for us to use.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 hash.h        | 50 +++++++++++++++++++++++++++-----------------------
 object-file.c | 25 +++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/hash.h b/hash.h
index dafdcb3335..c8f03d8aee 100644
--- a/hash.h
+++ b/hash.h
@@ -95,6 +95,29 @@ static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *s
 /* Number of algorithms supported (including unknown). */
 #define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1)
 
+/* The length in bytes and in hex digits of an object name (SHA-1 value). */
+#define GIT_SHA1_RAWSZ 20
+#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ)
+/* The block size of SHA-1. */
+#define GIT_SHA1_BLKSZ 64
+
+/* The length in bytes and in hex digits of an object name (SHA-256 value). */
+#define GIT_SHA256_RAWSZ 32
+#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ)
+/* The block size of SHA-256. */
+#define GIT_SHA256_BLKSZ 64
+
+/* The length in byte and in hex digits of the largest possible hash value. */
+#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ
+#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ
+/* The largest possible block size for any supported hash. */
+#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ
+
+struct object_id {
+	unsigned char hash[GIT_MAX_RAWSZ];
+	int algo;
+};
+
 /* A suitably aligned type for stack allocations of hash contexts. */
 union git_hash_ctx {
 	git_SHA_CTX sha1;
@@ -106,6 +129,7 @@ typedef void (*git_hash_init_fn)(git_hash_ctx *ctx);
 typedef void (*git_hash_clone_fn)(git_hash_ctx *dst, const git_hash_ctx *src);
 typedef void (*git_hash_update_fn)(git_hash_ctx *ctx, const void *in, size_t len);
 typedef void (*git_hash_final_fn)(unsigned char *hash, git_hash_ctx *ctx);
+typedef void (*git_hash_final_oid_fn)(struct object_id *oid, git_hash_ctx *ctx);
 
 struct git_hash_algo {
 	/*
@@ -138,6 +162,9 @@ struct git_hash_algo {
 	/* The hash finalization function. */
 	git_hash_final_fn final_fn;
 
+	/* The hash finalization function for object IDs. */
+	git_hash_final_oid_fn final_oid_fn;
+
 	/* The OID of the empty tree. */
 	const struct object_id *empty_tree;
 
@@ -161,29 +188,6 @@ static inline int hash_algo_by_ptr(const struct git_hash_algo *p)
 	return p - hash_algos;
 }
 
-/* The length in bytes and in hex digits of an object name (SHA-1 value). */
-#define GIT_SHA1_RAWSZ 20
-#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ)
-/* The block size of SHA-1. */
-#define GIT_SHA1_BLKSZ 64
-
-/* The length in bytes and in hex digits of an object name (SHA-256 value). */
-#define GIT_SHA256_RAWSZ 32
-#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ)
-/* The block size of SHA-256. */
-#define GIT_SHA256_BLKSZ 64
-
-/* The length in byte and in hex digits of the largest possible hash value. */
-#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ
-#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ
-/* The largest possible block size for any supported hash. */
-#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ
-
-struct object_id {
-	unsigned char hash[GIT_MAX_RAWSZ];
-	int algo;
-};
-
 #define the_hash_algo the_repository->hash_algo
 
 extern const struct object_id null_oid;
diff --git a/object-file.c b/object-file.c
index f5847ee20f..58d31452d8 100644
--- a/object-file.c
+++ b/object-file.c
@@ -89,6 +89,12 @@ static void git_hash_sha1_final(unsigned char *hash, git_hash_ctx *ctx)
 	git_SHA1_Final(hash, &ctx->sha1);
 }
 
+static void git_hash_sha1_final_oid(struct object_id *oid, git_hash_ctx *ctx)
+{
+	git_SHA1_Final(oid->hash, &ctx->sha1);
+	memset(oid->hash + GIT_SHA1_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA1_RAWSZ);
+}
+
 
 static void git_hash_sha256_init(git_hash_ctx *ctx)
 {
@@ -110,6 +116,16 @@ static void git_hash_sha256_final(unsigned char *hash, git_hash_ctx *ctx)
 	git_SHA256_Final(hash, &ctx->sha256);
 }
 
+static void git_hash_sha256_final_oid(struct object_id *oid, git_hash_ctx *ctx)
+{
+	git_SHA256_Final(oid->hash, &ctx->sha256);
+	/*
+	 * This currently does nothing, so the compiler should optimize it out,
+	 * but keep it in case we extend the hash size again.
+	 */
+	memset(oid->hash + GIT_SHA256_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA256_RAWSZ);
+}
+
 static void git_hash_unknown_init(git_hash_ctx *ctx)
 {
 	BUG("trying to init unknown hash");
@@ -130,6 +146,12 @@ static void git_hash_unknown_final(unsigned char *hash, git_hash_ctx *ctx)
 	BUG("trying to finalize unknown hash");
 }
 
+static void git_hash_unknown_final_oid(struct object_id *oid, git_hash_ctx *ctx)
+{
+	BUG("trying to finalize unknown hash");
+}
+
+
 const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
 	{
 		NULL,
@@ -141,6 +163,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
 		git_hash_unknown_clone,
 		git_hash_unknown_update,
 		git_hash_unknown_final,
+		git_hash_unknown_final_oid,
 		NULL,
 		NULL,
 	},
@@ -155,6 +178,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
 		git_hash_sha1_clone,
 		git_hash_sha1_update,
 		git_hash_sha1_final,
+		git_hash_sha1_final_oid,
 		&empty_tree_oid,
 		&empty_blob_oid,
 	},
@@ -169,6 +193,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
 		git_hash_sha256_clone,
 		git_hash_sha256_update,
 		git_hash_sha256_final,
+		git_hash_sha256_final_oid,
 		&empty_tree_oid_sha256,
 		&empty_blob_oid_sha256,
 	}

  parent reply	other threads:[~2021-04-10 15:22 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-10 15:21 [PATCH 00/15] SHA-256 / SHA-1 interop, part 1 brian m. carlson
2021-04-10 15:21 ` [PATCH 01/15] sha1-file: allow hashing objects literally with any algorithm brian m. carlson
2021-04-15  8:55   ` Denton Liu
2021-04-15 23:03     ` brian m. carlson
2021-04-16 15:04   ` Ævar Arnfjörð Bjarmason
2021-04-16 18:55     ` Junio C Hamano
2021-04-10 15:21 ` [PATCH 02/15] builtin/hash-object: allow literally hashing with a given algorithm brian m. carlson
2021-04-11  8:52   ` Ævar Arnfjörð Bjarmason
2021-04-11 21:07     ` brian m. carlson
2021-04-16 15:21   ` Ævar Arnfjörð Bjarmason
2021-04-16 17:27   ` Ævar Arnfjörð Bjarmason
2021-04-10 15:21 ` [PATCH 03/15] cache: add an algo member to struct object_id brian m. carlson
2021-04-11 11:55   ` Ævar Arnfjörð Bjarmason
2021-04-11 21:37     ` brian m. carlson
2021-04-13 12:12   ` Derrick Stolee
2021-04-14  1:08     ` brian m. carlson
2021-04-15  8:47       ` Ævar Arnfjörð Bjarmason
2021-04-15 23:51         ` brian m. carlson
2021-04-10 15:21 ` [PATCH 04/15] Always use oidread to read into " brian m. carlson
2021-04-10 15:21 ` brian m. carlson [this message]
2021-04-10 15:21 ` [PATCH 06/15] Use the final_oid_fn to finalize hashing of object IDs brian m. carlson
2021-04-10 15:21 ` [PATCH 07/15] builtin/pack-redundant: avoid casting buffers to struct object_id brian m. carlson
2021-04-10 15:21 ` [PATCH 08/15] cache: compare the entire buffer for " brian m. carlson
2021-04-11  8:17   ` Chris Torek
2021-04-11 11:36   ` Ævar Arnfjörð Bjarmason
2021-04-11 21:05     ` brian m. carlson
2021-04-10 15:21 ` [PATCH 09/15] hash: set and copy algo field in " brian m. carlson
2021-04-11 11:57   ` Ævar Arnfjörð Bjarmason
2021-04-11 21:48     ` brian m. carlson
2021-04-11 22:12       ` Ævar Arnfjörð Bjarmason
2021-04-11 23:52         ` brian m. carlson
2021-04-12 11:02           ` [PATCH 0/2] C99: harder dependency on variadic macros Ævar Arnfjörð Bjarmason
2021-04-12 11:02             ` [PATCH 1/2] git-compat-util.h: clarify comment on GCC-specific code Ævar Arnfjörð Bjarmason
2021-04-13  7:57               ` Jeff King
2021-04-13 21:07                 ` Junio C Hamano
2021-04-14  5:21                   ` Jeff King
2021-04-14  6:12                     ` Ævar Arnfjörð Bjarmason
2021-04-14  7:31                       ` Jeff King
2021-05-21  2:06               ` Jonathan Nieder
2021-04-12 11:02             ` [PATCH 2/2] C99 support: remove non-HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2021-04-12 17:58               ` Junio C Hamano
2021-04-13  8:00                 ` Jeff King
2021-05-21  2:50               ` Jonathan Nieder
2021-04-12 12:14             ` [PATCH 0/2] C99: harder dependency on variadic macros Bagas Sanjaya
2021-04-12 12:41               ` Ævar Arnfjörð Bjarmason
2021-04-12 22:57                 ` brian m. carlson
2021-04-12 23:19                   ` Junio C Hamano
2022-01-28 11:11             ` [PATCH v2 0/2] C99: remove hardcoded-out !HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2022-01-28 11:11               ` [PATCH v2 1/2] git-compat-util.h: clarify GCC v.s. C99-specific in comment Ævar Arnfjörð Bjarmason
2022-01-28 11:11               ` [PATCH v2 2/2] C99: remove hardcoded-out !HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2022-01-28 22:40                 ` Junio C Hamano
2022-02-19 10:41               ` [PATCH v3 0/3] C99: remove dead " Ævar Arnfjörð Bjarmason
2022-02-19 10:41                 ` [PATCH v3 1/3] git-compat-util.h: clarify GCC v.s. C99-specific in comment Ævar Arnfjörð Bjarmason
2022-02-19 10:41                 ` [PATCH v3 2/3] C99: remove hardcoded-out !HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2022-02-19 10:41                 ` [PATCH v3 3/3] trace.h: remove never-used TRACE_CONTEXT Ævar Arnfjörð Bjarmason
2022-02-20 12:02                   ` Junio C Hamano
2022-02-20 12:38                     ` Ævar Arnfjörð Bjarmason
2022-02-20 20:12                       ` Junio C Hamano
2022-02-21 16:05                 ` [PATCH v4 0/2] C99: remove dead !HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2022-02-21 16:05                   ` [PATCH v4 1/2] git-compat-util.h: clarify GCC v.s. C99-specific in comment Ævar Arnfjörð Bjarmason
2022-02-21 16:05                   ` [PATCH v4 2/2] C99: remove hardcoded-out !HAVE_VARIADIC_MACROS code Ævar Arnfjörð Bjarmason
2021-04-12 10:53         ` [PATCH 09/15] hash: set and copy algo field in struct object_id Junio C Hamano
2021-04-12 11:13           ` Ævar Arnfjörð Bjarmason
2021-04-10 15:21 ` [PATCH 10/15] hash: provide per-algorithm null OIDs brian m. carlson
2021-04-11 14:03   ` Junio C Hamano
2021-04-11 21:51     ` brian m. carlson
2021-04-10 15:21 ` [PATCH 11/15] builtin/show-index: set the algorithm for object IDs brian m. carlson
2021-04-10 15:21 ` [PATCH 12/15] commit-graph: don't store file hashes as struct object_id brian m. carlson
2021-04-10 15:21 ` [PATCH 13/15] builtin/pack-objects: avoid using struct object_id for pack hash brian m. carlson
2021-04-10 15:21 ` [PATCH 14/15] hex: default to the_hash_algo on zero algorithm value brian m. carlson
2021-04-10 15:21 ` [PATCH 15/15] hex: print objects using the hash algorithm member brian m. carlson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210410152140.3525040-6-sandals@crustytoothpaste.net \
    --to=sandals@crustytoothpaste.net \
    --cc=dstolee@microsoft.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).