git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: szeder.dev@gmail.com, me@ttaylorr.com,
	Derrick Stolee <derrickstolee@github.com>,
	Derrick Stolee <dstolee@microsoft.com>
Subject: [PATCH 10/15] midx: use chunk-format API in write_midx_internal()
Date: Thu, 03 Dec 2020 16:16:49 +0000	[thread overview]
Message-ID: <f2f78ee1054f64fe767335c4b0c77ba2fbec5fae.1607012215.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.804.git.1607012215.gitgitgadget@gmail.com>

From: Derrick Stolee <dstolee@microsoft.com>

The chunk-format API allows automatically writing the table of contents
for a chunk-based file format when using an array of "struct
chunk_info"s. Update write_midx_internal() to use this strategy, which
also simplifies the chunk writing loop. This loop will be replaced with
a chunk-format API call in an upcoming change.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 midx.c | 96 +++++++++++++---------------------------------------------
 1 file changed, 21 insertions(+), 75 deletions(-)

diff --git a/midx.c b/midx.c
index ce6d4339bd..0548266bea 100644
--- a/midx.c
+++ b/midx.c
@@ -11,6 +11,7 @@
 #include "trace2.h"
 #include "run-command.h"
 #include "repository.h"
+#include "chunk-format.h"
 
 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
 #define MIDX_VERSION 1
@@ -799,15 +800,14 @@ static int write_midx_large_offsets(struct hashfile *f,
 static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
 			       struct string_list *packs_to_drop, unsigned flags)
 {
-	unsigned char cur_chunk, num_chunks = 0;
+	unsigned char num_chunks = 0;
 	char *midx_name;
 	uint32_t i;
 	struct hashfile *f = NULL;
 	struct lock_file lk;
 	struct write_midx_context ctx = { 0 };
 	uint64_t header_size = 0;
-	uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1];
-	uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1];
+	struct chunk_info chunks[MIDX_MAX_CHUNKS];
 	int pack_name_concat_len = 0;
 	int dropped_packs = 0;
 	int result = 0;
@@ -923,7 +923,6 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 	if (ctx.m)
 		close_midx(ctx.m);
 
-	cur_chunk = 0;
 	num_chunks = ctx.large_offsets_needed ? 5 : 4;
 
 	if (ctx.nr - dropped_packs == 0) {
@@ -934,85 +933,32 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 
 	header_size = write_midx_header(f, num_chunks, ctx.nr - dropped_packs);
 
-	chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
-	chunk_offsets[cur_chunk] = header_size + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
+	chunks[0].id = MIDX_CHUNKID_PACKNAMES;
+	chunks[0].size = pack_name_concat_len;
+	chunks[0].write_fn = write_midx_pack_names;
 
-	cur_chunk++;
-	chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT;
-	chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + pack_name_concat_len;
+	chunks[1].id = MIDX_CHUNKID_OIDFANOUT;
+	chunks[1].size = MIDX_CHUNK_FANOUT_SIZE;
+	chunks[1].write_fn = write_midx_oid_fanout;
 
-	cur_chunk++;
-	chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;
-	chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + MIDX_CHUNK_FANOUT_SIZE;
+	chunks[2].id = MIDX_CHUNKID_OIDLOOKUP;
+	chunks[2].size = ctx.entries_nr * the_hash_algo->rawsz;
+	chunks[2].write_fn = write_midx_oid_lookup;
 
-	cur_chunk++;
-	chunk_ids[cur_chunk] = MIDX_CHUNKID_OBJECTOFFSETS;
-	chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + ctx.entries_nr * the_hash_algo->rawsz;
+	chunks[3].id = MIDX_CHUNKID_OBJECTOFFSETS;
+	chunks[3].size = ctx.entries_nr * MIDX_CHUNK_OFFSET_WIDTH;
+	chunks[3].write_fn = write_midx_object_offsets;
 
-	cur_chunk++;
-	chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + ctx.entries_nr * MIDX_CHUNK_OFFSET_WIDTH;
 	if (ctx.large_offsets_needed) {
-		chunk_ids[cur_chunk] = MIDX_CHUNKID_LARGEOFFSETS;
-
-		cur_chunk++;
-		chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] +
-					   ctx.num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH;
+		chunks[4].id = MIDX_CHUNKID_LARGEOFFSETS;
+		chunks[4].size = ctx.num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH;
+		chunks[4].write_fn = write_midx_large_offsets;
 	}
 
-	chunk_ids[cur_chunk] = 0;
-
-	for (i = 0; i <= num_chunks; i++) {
-		if (i && chunk_offsets[i] < chunk_offsets[i - 1])
-			BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64,
-			    chunk_offsets[i - 1],
-			    chunk_offsets[i]);
-
-		if (chunk_offsets[i] % MIDX_CHUNK_ALIGNMENT)
-			BUG("chunk offset %"PRIu64" is not properly aligned",
-			    chunk_offsets[i]);
-
-		hashwrite_be32(f, chunk_ids[i]);
-		hashwrite_be64(f, chunk_offsets[i]);
-	}
-
-	for (i = 0; i < num_chunks; i++) {
-		if (f->total + f->offset != chunk_offsets[i])
-			BUG("incorrect chunk offset (%"PRIu64" != %"PRIu64") for chunk id %"PRIx32,
-			    chunk_offsets[i],
-			    f->total + f->offset,
-			    chunk_ids[i]);
-
-		switch (chunk_ids[i]) {
-			case MIDX_CHUNKID_PACKNAMES:
-				write_midx_pack_names(f, &ctx);
-				break;
-
-			case MIDX_CHUNKID_OIDFANOUT:
-				write_midx_oid_fanout(f, &ctx);
-				break;
-
-			case MIDX_CHUNKID_OIDLOOKUP:
-				write_midx_oid_lookup(f, &ctx);
-				break;
-
-			case MIDX_CHUNKID_OBJECTOFFSETS:
-				write_midx_object_offsets(f, &ctx);
-				break;
-
-			case MIDX_CHUNKID_LARGEOFFSETS:
-				write_midx_large_offsets(f, &ctx);
-				break;
-
-			default:
-				BUG("trying to write unknown chunk id %"PRIx32,
-				    chunk_ids[i]);
-		}
-	}
+	write_table_of_contents(f, header_size, chunks, num_chunks);
 
-	if (f->total + f->offset != chunk_offsets[num_chunks])
-		BUG("incorrect final offset %"PRIu64" != %"PRIu64,
-		    f->total + f->offset,
-		    chunk_offsets[num_chunks]);
+	for (i = 0; i < num_chunks; i++)
+		chunks[i].write_fn(f, &ctx);
 
 	finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
 	commit_lock_file(&lk);
-- 
gitgitgadget


  parent reply	other threads:[~2020-12-03 16:18 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-03 16:16 [PATCH 00/15] Refactor chunk-format into an API Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 01/15] commit-graph: anonymize data in chunk_write_fn Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 02/15] chunk-format: add API for writing table of contents Derrick Stolee via GitGitGadget
2020-12-08 17:56   ` Taylor Blau
2020-12-03 16:16 ` [PATCH 03/15] midx: rename pack_info to write_midx_context Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 04/15] midx: use context in write_midx_pack_names() Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 05/15] midx: add entries to write_midx_context Derrick Stolee via GitGitGadget
2020-12-03 21:42   ` Junio C Hamano
2020-12-04 13:39     ` Derrick Stolee
2020-12-08 18:00   ` Taylor Blau
2020-12-03 16:16 ` [PATCH 06/15] midx: add pack_perm " Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 07/15] midx: add num_large_offsets " Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` [PATCH 08/15] midx: convert chunk write methods to return int Derrick Stolee via GitGitGadget
2020-12-03 21:50   ` Junio C Hamano
2020-12-04 13:40     ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 09/15] midx: drop chunk progress during write Derrick Stolee via GitGitGadget
2020-12-03 16:16 ` Derrick Stolee via GitGitGadget [this message]
2020-12-08 18:42   ` [PATCH 10/15] midx: use chunk-format API in write_midx_internal() Taylor Blau
2020-12-10 14:36     ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 11/15] midx: use 64-bit multiplication for chunk sizes Derrick Stolee via GitGitGadget
2020-12-03 22:00   ` Junio C Hamano
2020-12-08 18:43     ` Taylor Blau
2020-12-03 16:16 ` [PATCH 12/15] chunk-format: create write_chunks() Derrick Stolee via GitGitGadget
2020-12-08 18:45   ` Taylor Blau
2020-12-03 16:16 ` [PATCH 13/15] chunk-format: create chunk reading API Derrick Stolee via GitGitGadget
2020-12-03 22:17   ` Junio C Hamano
2020-12-04 13:47     ` Derrick Stolee
2020-12-04 20:17       ` Junio C Hamano
2020-12-03 22:43   ` Junio C Hamano
2020-12-04 13:45     ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 14/15] commit-graph: restore duplicate chunk checks Derrick Stolee via GitGitGadget
2020-12-07 13:43   ` Derrick Stolee
2020-12-03 16:16 ` [PATCH 15/15] chunk-format: add technical docs Derrick Stolee via GitGitGadget
2020-12-04 12:48 ` [PATCH 00/15] Refactor chunk-format into an API René Scharfe
2020-12-04 13:57   ` Derrick Stolee
2020-12-04 19:42   ` Junio C Hamano
2020-12-08 18:49   ` Taylor Blau
2020-12-09 17:13     ` René Scharfe
2020-12-10  0:50       ` Taylor Blau
2020-12-10 14:30         ` Derrick Stolee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f2f78ee1054f64fe767335c4b0c77ba2fbec5fae.1607012215.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=derrickstolee@github.com \
    --cc=dstolee@microsoft.com \
    --cc=git@vger.kernel.org \
    --cc=me@ttaylorr.com \
    --cc=szeder.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).