From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: szeder.dev@gmail.com, me@ttaylorr.com,
Derrick Stolee <derrickstolee@github.com>,
Derrick Stolee <dstolee@microsoft.com>
Subject: [PATCH 08/15] midx: convert chunk write methods to return int
Date: Thu, 03 Dec 2020 16:16:47 +0000 [thread overview]
Message-ID: <dbb637a7ac6d2cc6bb78428ef7bda67687095f88.1607012215.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.804.git.1607012215.gitgitgadget@gmail.com>
From: Derrick Stolee <dstolee@microsoft.com>
Historically, the chunk-writing methods in midx.c have returned the
amount of data written so the writer method could compare this with the
table of contents. This presents with some interesting issues:
1. If a chunk writing method has a bug that miscalculates the written
bytes, then we can satisfy the table of contents without actually
writing the right amount of data to the hashfile. The commit-graph
writing code checks the hashfile struct directly for a more robust
verification.
2. There is no way for a chunk writing method to gracefully fail.
Returning an int presents an opportunity to fail without a die().
3. The current pattern doesn't match chunk_write_fn type exactly, so we
cannot share code with commit-graph.c
For these reasons, convert the midx chunk writer methods to return an
'int'. Since none of them fail at the moment, they all return 0.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
midx.c | 63 +++++++++++++++++++++++++---------------------------------
1 file changed, 27 insertions(+), 36 deletions(-)
diff --git a/midx.c b/midx.c
index d7da358a3f..5eb1b01946 100644
--- a/midx.c
+++ b/midx.c
@@ -650,7 +650,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
return deduplicated_entries;
}
-static size_t write_midx_pack_names(struct hashfile *f, void *data)
+static int write_midx_pack_names(struct hashfile *f, void *data)
{
struct write_midx_context *ctx = (struct write_midx_context *)data;
uint32_t i;
@@ -678,14 +678,13 @@ static size_t write_midx_pack_names(struct hashfile *f, void *data)
if (i < MIDX_CHUNK_ALIGNMENT) {
memset(padding, 0, sizeof(padding));
hashwrite(f, padding, i);
- written += i;
}
- return written;
+ return 0;
}
-static size_t write_midx_oid_fanout(struct hashfile *f,
- void *data)
+static int write_midx_oid_fanout(struct hashfile *f,
+ void *data)
{
struct write_midx_context *ctx = (struct write_midx_context *)data;
struct pack_midx_entry *list = ctx->entries;
@@ -710,17 +709,16 @@ static size_t write_midx_oid_fanout(struct hashfile *f,
list = next;
}
- return MIDX_CHUNK_FANOUT_SIZE;
+ return 0;
}
-static size_t write_midx_oid_lookup(struct hashfile *f,
- void *data)
+static int write_midx_oid_lookup(struct hashfile *f,
+ void *data)
{
struct write_midx_context *ctx = (struct write_midx_context *)data;
unsigned char hash_len = the_hash_algo->rawsz;
struct pack_midx_entry *list = ctx->entries;
uint32_t i;
- size_t written = 0;
for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *obj = list++;
@@ -734,19 +732,17 @@ static size_t write_midx_oid_lookup(struct hashfile *f,
}
hashwrite(f, obj->oid.hash, (int)hash_len);
- written += hash_len;
}
- return written;
+ return 0;
}
-static size_t write_midx_object_offsets(struct hashfile *f,
- void *data)
+static int write_midx_object_offsets(struct hashfile *f,
+ void *data)
{
struct write_midx_context *ctx = (struct write_midx_context *)data;
struct pack_midx_entry *list = ctx->entries;
uint32_t i, nr_large_offset = 0;
- size_t written = 0;
for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *obj = list++;
@@ -766,20 +762,17 @@ static size_t write_midx_object_offsets(struct hashfile *f,
obj->offset);
else
hashwrite_be32(f, (uint32_t)obj->offset);
-
- written += MIDX_CHUNK_OFFSET_WIDTH;
}
- return written;
+ return 0;
}
-static size_t write_midx_large_offsets(struct hashfile *f,
- void *data)
+static int write_midx_large_offsets(struct hashfile *f,
+ void *data)
{
struct write_midx_context *ctx = (struct write_midx_context *)data;
struct pack_midx_entry *list = ctx->entries;
struct pack_midx_entry *end = ctx->entries + ctx->entries_nr;
- size_t written = 0;
uint32_t nr_large_offset = ctx->num_large_offsets;
while (nr_large_offset) {
@@ -795,12 +788,12 @@ static size_t write_midx_large_offsets(struct hashfile *f,
if (!(offset >> 31))
continue;
- written += hashwrite_be64(f, offset);
+ hashwrite_be64(f, offset);
nr_large_offset--;
}
- return written;
+ return 0;
}
static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
@@ -812,7 +805,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
struct hashfile *f = NULL;
struct lock_file lk;
struct write_midx_context ctx = { 0 };
- uint64_t written = 0;
+ uint64_t header_size = 0;
uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1];
uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1];
struct progress *progress = NULL;
@@ -940,10 +933,10 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
goto cleanup;
}
- written = write_midx_header(f, num_chunks, ctx.nr - dropped_packs);
+ header_size = write_midx_header(f, num_chunks, ctx.nr - dropped_packs);
chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
- chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
+ chunk_offsets[cur_chunk] = header_size + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
cur_chunk++;
chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT;
@@ -981,39 +974,37 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
hashwrite_be32(f, chunk_ids[i]);
hashwrite_be64(f, chunk_offsets[i]);
-
- written += MIDX_CHUNKLOOKUP_WIDTH;
}
if (flags & MIDX_PROGRESS)
progress = start_delayed_progress(_("Writing chunks to multi-pack-index"),
num_chunks);
for (i = 0; i < num_chunks; i++) {
- if (written != chunk_offsets[i])
+ if (f->total + f->offset != chunk_offsets[i])
BUG("incorrect chunk offset (%"PRIu64" != %"PRIu64") for chunk id %"PRIx32,
chunk_offsets[i],
- written,
+ f->total + f->offset,
chunk_ids[i]);
switch (chunk_ids[i]) {
case MIDX_CHUNKID_PACKNAMES:
- written += write_midx_pack_names(f, &ctx);
+ write_midx_pack_names(f, &ctx);
break;
case MIDX_CHUNKID_OIDFANOUT:
- written += write_midx_oid_fanout(f, &ctx);
+ write_midx_oid_fanout(f, &ctx);
break;
case MIDX_CHUNKID_OIDLOOKUP:
- written += write_midx_oid_lookup(f, &ctx);
+ write_midx_oid_lookup(f, &ctx);
break;
case MIDX_CHUNKID_OBJECTOFFSETS:
- written += write_midx_object_offsets(f, &ctx);
+ write_midx_object_offsets(f, &ctx);
break;
case MIDX_CHUNKID_LARGEOFFSETS:
- written += write_midx_large_offsets(f, &ctx);
+ write_midx_large_offsets(f, &ctx);
break;
default:
@@ -1025,9 +1016,9 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
}
stop_progress(&progress);
- if (written != chunk_offsets[num_chunks])
+ if (f->total + f->offset != chunk_offsets[num_chunks])
BUG("incorrect final offset %"PRIu64" != %"PRIu64,
- written,
+ f->total + f->offset,
chunk_offsets[num_chunks]);
finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
--
gitgitgadget
next prev parent reply other threads:[~2020-12-03 16:17 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-03 16:16 [PATCH 00/15] Refactor chunk-format into an API Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 01/15] commit-graph: anonymize data in chunk_write_fn Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 02/15] chunk-format: add API for writing table of contents Derrick Stolee via GitGitGadget
2020-12-08 17:56 ` Taylor Blau
2020-12-03 16:16 ` [PATCH 03/15] midx: rename pack_info to write_midx_context Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 04/15] midx: use context in write_midx_pack_names() Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 05/15] midx: add entries to write_midx_context Derrick Stolee via GitGitGadget
2020-12-03 21:42 ` Junio C Hamano
2020-12-04 13:39 ` Derrick Stolee
2020-12-08 18:00 ` Taylor Blau
2020-12-03 16:16 ` [PATCH 06/15] midx: add pack_perm " Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 07/15] midx: add num_large_offsets " Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` Derrick Stolee via GitGitGadget [this message]
2020-12-03 21:50 ` [PATCH 08/15] midx: convert chunk write methods to return int Junio C Hamano
2020-12-04 13:40 ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 09/15] midx: drop chunk progress during write Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 10/15] midx: use chunk-format API in write_midx_internal() Derrick Stolee via GitGitGadget
2020-12-08 18:42 ` Taylor Blau
2020-12-10 14:36 ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 11/15] midx: use 64-bit multiplication for chunk sizes Derrick Stolee via GitGitGadget
2020-12-03 22:00 ` Junio C Hamano
2020-12-08 18:43 ` Taylor Blau
2020-12-03 16:16 ` [PATCH 12/15] chunk-format: create write_chunks() Derrick Stolee via GitGitGadget
2020-12-08 18:45 ` Taylor Blau
2020-12-03 16:16 ` [PATCH 13/15] chunk-format: create chunk reading API Derrick Stolee via GitGitGadget
2020-12-03 22:17 ` Junio C Hamano
2020-12-04 13:47 ` Derrick Stolee
2020-12-04 20:17 ` Junio C Hamano
2020-12-03 22:43 ` Junio C Hamano
2020-12-04 13:45 ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 14/15] commit-graph: restore duplicate chunk checks Derrick Stolee via GitGitGadget
2020-12-07 13:43 ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 15/15] chunk-format: add technical docs Derrick Stolee via GitGitGadget
2020-12-04 12:48 ` [PATCH 00/15] Refactor chunk-format into an API René Scharfe
2020-12-04 13:57 ` Derrick Stolee
2020-12-04 19:42 ` Junio C Hamano
2020-12-08 18:49 ` Taylor Blau
2020-12-09 17:13 ` René Scharfe
2020-12-10 0:50 ` Taylor Blau
2020-12-10 14:30 ` Derrick Stolee
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=dbb637a7ac6d2cc6bb78428ef7bda67687095f88.1607012215.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=derrickstolee@github.com \
--cc=dstolee@microsoft.com \
--cc=git@vger.kernel.org \
--cc=me@ttaylorr.com \
--cc=szeder.dev@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).