All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: szeder.dev@gmail.com, me@ttaylorr.com,
	Derrick Stolee <derrickstolee@github.com>,
	Derrick Stolee <dstolee@microsoft.com>
Subject: [PATCH 10/15] midx: use chunk-format API in write_midx_internal()
Date: Thu, 03 Dec 2020 16:16:49 +0000	[thread overview]
Message-ID: <f2f78ee1054f64fe767335c4b0c77ba2fbec5fae.1607012215.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.804.git.1607012215.gitgitgadget@gmail.com>

From: Derrick Stolee <dstolee@microsoft.com>

The chunk-format API allows automatically writing the table of contents
for a chunk-based file format when using an array of "struct
chunk_info"s. Update write_midx_internal() to use this strategy, which
also simplifies the chunk writing loop. This loop will be replaced with
a chunk-format API call in an upcoming change.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 midx.c | 96 +++++++++++++---------------------------------------------
 1 file changed, 21 insertions(+), 75 deletions(-)

diff --git a/midx.c b/midx.c
index ce6d4339bd..0548266bea 100644
--- a/midx.c
+++ b/midx.c
@@ -11,6 +11,7 @@
 #include "trace2.h"
 #include "run-command.h"
 #include "repository.h"
+#include "chunk-format.h"
 
 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
 #define MIDX_VERSION 1
@@ -799,15 +800,14 @@ static int write_midx_large_offsets(struct hashfile *f,
 static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
 			       struct string_list *packs_to_drop, unsigned flags)
 {
-	unsigned char cur_chunk, num_chunks = 0;
+	unsigned char num_chunks = 0;
 	char *midx_name;
 	uint32_t i;
 	struct hashfile *f = NULL;
 	struct lock_file lk;
 	struct write_midx_context ctx = { 0 };
 	uint64_t header_size = 0;
-	uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1];
-	uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1];
+	struct chunk_info chunks[MIDX_MAX_CHUNKS];
 	int pack_name_concat_len = 0;
 	int dropped_packs = 0;
 	int result = 0;
@@ -923,7 +923,6 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 	if (ctx.m)
 		close_midx(ctx.m);
 
-	cur_chunk = 0;
 	num_chunks = ctx.large_offsets_needed ? 5 : 4;
 
 	if (ctx.nr - dropped_packs == 0) {
@@ -934,85 +933,32 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 
 	header_size = write_midx_header(f, num_chunks, ctx.nr - dropped_packs);
 
-	chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
-	chunk_offsets[cur_chunk] = header_size + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
+	chunks[0].id = MIDX_CHUNKID_PACKNAMES;
+	chunks[0].size = pack_name_concat_len;
+	chunks[0].write_fn = write_midx_pack_names;
 
-	cur_chunk++;
-	chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT;
-	chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + pack_name_concat_len;
+	chunks[1].id = MIDX_CHUNKID_OIDFANOUT;
+	chunks[1].size = MIDX_CHUNK_FANOUT_SIZE;
+	chunks[1].write_fn = write_midx_oid_fanout;
 
-	cur_chunk++;
-	chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;
-	chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + MIDX_CHUNK_FANOUT_SIZE;
+	chunks[2].id = MIDX_CHUNKID_OIDLOOKUP;
+	chunks[2].size = ctx.entries_nr * the_hash_algo->rawsz;
+	chunks[2].write_fn = write_midx_oid_lookup;
 
-	cur_chunk++;
-	chunk_ids[cur_chunk] = MIDX_CHUNKID_OBJECTOFFSETS;
-	chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + ctx.entries_nr * the_hash_algo->rawsz;
+	chunks[3].id = MIDX_CHUNKID_OBJECTOFFSETS;
+	chunks[3].size = ctx.entries_nr * MIDX_CHUNK_OFFSET_WIDTH;
+	chunks[3].write_fn = write_midx_object_offsets;
 
-	cur_chunk++;
-	chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + ctx.entries_nr * MIDX_CHUNK_OFFSET_WIDTH;
 	if (ctx.large_offsets_needed) {
-		chunk_ids[cur_chunk] = MIDX_CHUNKID_LARGEOFFSETS;
-
-		cur_chunk++;
-		chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] +
-					   ctx.num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH;
+		chunks[4].id = MIDX_CHUNKID_LARGEOFFSETS;
+		chunks[4].size = ctx.num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH;
+		chunks[4].write_fn = write_midx_large_offsets;
 	}
 
-	chunk_ids[cur_chunk] = 0;
-
-	for (i = 0; i <= num_chunks; i++) {
-		if (i && chunk_offsets[i] < chunk_offsets[i - 1])
-			BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64,
-			    chunk_offsets[i - 1],
-			    chunk_offsets[i]);
-
-		if (chunk_offsets[i] % MIDX_CHUNK_ALIGNMENT)
-			BUG("chunk offset %"PRIu64" is not properly aligned",
-			    chunk_offsets[i]);
-
-		hashwrite_be32(f, chunk_ids[i]);
-		hashwrite_be64(f, chunk_offsets[i]);
-	}
-
-	for (i = 0; i < num_chunks; i++) {
-		if (f->total + f->offset != chunk_offsets[i])
-			BUG("incorrect chunk offset (%"PRIu64" != %"PRIu64") for chunk id %"PRIx32,
-			    chunk_offsets[i],
-			    f->total + f->offset,
-			    chunk_ids[i]);
-
-		switch (chunk_ids[i]) {
-			case MIDX_CHUNKID_PACKNAMES:
-				write_midx_pack_names(f, &ctx);
-				break;
-
-			case MIDX_CHUNKID_OIDFANOUT:
-				write_midx_oid_fanout(f, &ctx);
-				break;
-
-			case MIDX_CHUNKID_OIDLOOKUP:
-				write_midx_oid_lookup(f, &ctx);
-				break;
-
-			case MIDX_CHUNKID_OBJECTOFFSETS:
-				write_midx_object_offsets(f, &ctx);
-				break;
-
-			case MIDX_CHUNKID_LARGEOFFSETS:
-				write_midx_large_offsets(f, &ctx);
-				break;
-
-			default:
-				BUG("trying to write unknown chunk id %"PRIx32,
-				    chunk_ids[i]);
-		}
-	}
+	write_table_of_contents(f, header_size, chunks, num_chunks);
 
-	if (f->total + f->offset != chunk_offsets[num_chunks])
-		BUG("incorrect final offset %"PRIu64" != %"PRIu64,
-		    f->total + f->offset,
-		    chunk_offsets[num_chunks]);
+	for (i = 0; i < num_chunks; i++)
+		chunks[i].write_fn(f, &ctx);
 
 	finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
 	commit_lock_file(&lk);
-- 
gitgitgadget


  parent reply	other threads:[~2020-12-03 16:18 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-03 16:16 [PATCH 00/15] Refactor chunk-format into an API Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 01/15] commit-graph: anonymize data in chunk_write_fn Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 02/15] chunk-format: add API for writing table of contents Derrick Stolee via GitGitGadget
2020-12-08 17:56   ` Taylor Blau
2020-12-03 16:16 ` [PATCH 03/15] midx: rename pack_info to write_midx_context Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 04/15] midx: use context in write_midx_pack_names() Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 05/15] midx: add entries to write_midx_context Derrick Stolee via GitGitGadget
2020-12-03 21:42   ` Junio C Hamano
2020-12-04 13:39     ` Derrick Stolee
2020-12-08 18:00   ` Taylor Blau
2020-12-03 16:16 ` [PATCH 06/15] midx: add pack_perm " Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 07/15] midx: add num_large_offsets " Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 08/15] midx: convert chunk write methods to return int Derrick Stolee via GitGitGadget
2020-12-03 21:50   ` Junio C Hamano
2020-12-04 13:40     ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 09/15] midx: drop chunk progress during write Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` Derrick Stolee via GitGitGadget [this message]
2020-12-08 18:42   ` [PATCH 10/15] midx: use chunk-format API in write_midx_internal() Taylor Blau
2020-12-10 14:36     ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 11/15] midx: use 64-bit multiplication for chunk sizes Derrick Stolee via GitGitGadget
2020-12-03 22:00   ` Junio C Hamano
2020-12-08 18:43     ` Taylor Blau
2020-12-03 16:16 ` [PATCH 12/15] chunk-format: create write_chunks() Derrick Stolee via GitGitGadget
2020-12-08 18:45   ` Taylor Blau
2020-12-03 16:16 ` [PATCH 13/15] chunk-format: create chunk reading API Derrick Stolee via GitGitGadget
2020-12-03 22:17   ` Junio C Hamano
2020-12-04 13:47     ` Derrick Stolee
2020-12-04 20:17       ` Junio C Hamano
2020-12-03 22:43   ` Junio C Hamano
2020-12-04 13:45     ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 14/15] commit-graph: restore duplicate chunk checks Derrick Stolee via GitGitGadget
2020-12-07 13:43   ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 15/15] chunk-format: add technical docs Derrick Stolee via GitGitGadget
2020-12-04 12:48 ` [PATCH 00/15] Refactor chunk-format into an API René Scharfe
2020-12-04 13:57   ` Derrick Stolee
2020-12-04 19:42   ` Junio C Hamano
2020-12-08 18:49   ` Taylor Blau
2020-12-09 17:13     ` René Scharfe
2020-12-10  0:50       ` Taylor Blau
2020-12-10 14:30         ` Derrick Stolee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f2f78ee1054f64fe767335c4b0c77ba2fbec5fae.1607012215.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=derrickstolee@github.com \
    --cc=dstolee@microsoft.com \
    --cc=git@vger.kernel.org \
    --cc=me@ttaylorr.com \
    --cc=szeder.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.