All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: jrnieder@gmail.com, Derrick Stolee <derrickstolee@github.com>,
	Derrick Stolee <derrickstolee@github.com>
Subject: [PATCH 20/30] packed-refs: read optional prefix chunks
Date: Mon, 07 Nov 2022 18:35:54 +0000	[thread overview]
Message-ID: <9b3bd93e51e5ed4358c76263e96c4b4e218987b7.1667846165.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1408.git.1667846164.gitgitgadget@gmail.com>

From: Derrick Stolee <derrickstolee@github.com>

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 refs/packed-backend.c   |   2 +
 refs/packed-backend.h   |   9 +++
 refs/packed-format-v2.c | 159 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+)

diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index 549cce1f84a..ae904de9014 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -475,6 +475,8 @@ static struct ref_iterator *packed_ref_iterator_begin(
 	iter->version = snapshot->version;
 	iter->row = v2_row;
 
+	init_iterator_prefix_info(prefix, iter);
+
 	iter->pos = start;
 	iter->eof = snapshot->eof;
 	strbuf_init(&iter->refname_buf, 0);
diff --git a/refs/packed-backend.h b/refs/packed-backend.h
index 3a8649857f1..1936bb5c76c 100644
--- a/refs/packed-backend.h
+++ b/refs/packed-backend.h
@@ -103,9 +103,12 @@ struct snapshot {
 	 * packed-refs v2 values *
 	 *************************/
 	size_t nr;
+	size_t prefixes_nr;
 	size_t buflen;
 	const unsigned char *offset_chunk;
 	const char *refs_chunk;
+	const unsigned char *prefix_offsets_chunk;
+	const char *prefix_chunk;
 
 	/*
 	 * Count of references to this instance, including the pointer
@@ -212,6 +215,9 @@ struct packed_ref_iterator {
 	 ***********************************/
 	size_t nr;
 	size_t row;
+	size_t prefix_row_end;
+	size_t prefix_i;
+	const char *cur_prefix;
 };
 
 typedef int (*write_ref_fn)(const char *refname,
@@ -308,4 +314,7 @@ struct write_packed_refs_v2_context *create_v2_context(struct packed_ref_store *
 int write_packed_refs_v2(struct write_packed_refs_v2_context *ctx);
 void free_v2_context(struct write_packed_refs_v2_context *ctx);
 
+void init_iterator_prefix_info(const char *prefix,
+			       struct packed_ref_iterator *iter);
+
 #endif /* REFS_PACKED_BACKEND_H */
diff --git a/refs/packed-format-v2.c b/refs/packed-format-v2.c
index d75df9545ec..0ab277f7ad4 100644
--- a/refs/packed-format-v2.c
+++ b/refs/packed-format-v2.c
@@ -14,6 +14,79 @@
 #define PACKED_REFS_SIGNATURE          0x50524546 /* "PREF" */
 #define CHREFS_CHUNKID_OFFSETS         0x524F4646 /* "ROFF" */
 #define CHREFS_CHUNKID_REFS            0x52454653 /* "REFS" */
+#define CHREFS_CHUNKID_PREFIX_DATA     0x50465844 /* "PFXD" */
+#define CHREFS_CHUNKID_PREFIX_OFFSETS  0x5046584F /* "PFXO" */
+
+static const char *get_nth_prefix(struct snapshot *snapshot,
+				  size_t n, size_t *len)
+{
+	uint64_t offset, next_offset;
+
+	if (n >= snapshot->prefixes_nr)
+		BUG("asking for prefix %"PRIu64" outside of bounds (%"PRIu64")",
+		    (uint64_t)n, (uint64_t)snapshot->prefixes_nr);
+
+	if (n)
+		offset = get_be32(snapshot->prefix_offsets_chunk +
+				  2 * sizeof(uint32_t) * (n - 1));
+	else
+		offset = 0;
+
+	if (len) {
+		next_offset = get_be32(snapshot->prefix_offsets_chunk +
+				       2 * sizeof(uint32_t) * n);
+
+		/* Prefix includes null terminator. */
+		*len = next_offset - offset - 1;
+	}
+
+	return snapshot->prefix_chunk + offset;
+}
+
+/*
+ * Find the place in `snapshot->buf` where the start of the record for
+ * `refname` starts. If `mustexist` is true and the reference doesn't
+ * exist, then return NULL. If `mustexist` is false and the reference
+ * doesn't exist, then return the point where that reference would be
+ * inserted, or `snapshot->eof` (which might be NULL) if it would be
+ * inserted at the end of the file. In the latter mode, `refname`
+ * doesn't have to be a proper reference name; for example, one could
+ * search for "refs/replace/" to find the start of any replace
+ * references.
+ *
+ * The record is sought using a binary search, so `snapshot->buf` must
+ * be sorted.
+ */
+static const char *find_prefix_location(struct snapshot *snapshot,
+					const char *refname, size_t *pos)
+{
+	size_t lo = 0, hi = snapshot->prefixes_nr;
+
+	while (lo != hi) {
+		const char *rec;
+		int cmp;
+		size_t len;
+		size_t mid = lo + (hi - lo) / 2;
+
+		rec = get_nth_prefix(snapshot, mid, &len);
+		cmp = strncmp(rec, refname, len);
+		if (cmp < 0) {
+			lo = mid + 1;
+		} else if (cmp > 0) {
+			hi = mid;
+		} else {
+			/* we have a prefix match! */
+			*pos = mid;
+			return rec;
+		}
+	}
+
+	*pos = lo;
+	if (lo < snapshot->prefixes_nr)
+		return get_nth_prefix(snapshot, lo, NULL);
+	else
+		return NULL;
+}
 
 int detect_packed_format_v2_header(struct packed_ref_store *refs,
 				   struct snapshot *snapshot)
@@ -63,6 +136,46 @@ const char *find_reference_location_v2(struct snapshot *snapshot,
 {
 	size_t lo = 0, hi = snapshot->nr;
 
+	if (snapshot->prefix_chunk) {
+		size_t prefix_row;
+		const char *prefix;
+		int found = 1;
+
+		prefix = find_prefix_location(snapshot, refname, &prefix_row);
+
+		if (!prefix || !starts_with(refname, prefix)) {
+			if (mustexist)
+				return NULL;
+			found = 0;
+		}
+
+		/* The second 4-byte column of the prefix offsets */
+		if (prefix_row) {
+			/* if prefix_row == 0, then lo = 0, which is already true. */
+			lo = get_be32(snapshot->prefix_offsets_chunk +
+				2 * sizeof(uint32_t) * (prefix_row - 1) + sizeof(uint32_t));
+		}
+
+		if (!found) {
+			const char *ret;
+			/* Terminate early with this lo position as the insertion point. */
+			if (pos)
+				*pos = lo;
+
+			if (lo >= snapshot->nr)
+				return NULL;
+
+			ret = get_nth_ref(snapshot, lo);
+			return ret;
+		}
+
+		hi = get_be32(snapshot->prefix_offsets_chunk +
+			      2 * sizeof(uint32_t) * prefix_row + sizeof(uint32_t));
+
+		if (prefix)
+			refname += strlen(prefix);
+	}
+
 	while (lo != hi) {
 		const char *rec;
 		int cmp;
@@ -132,6 +245,16 @@ static int packed_refs_read_offsets(const unsigned char *chunk_start,
 	return 0;
 }
 
+static int packed_refs_read_prefix_offsets(const unsigned char *chunk_start,
+					    size_t chunk_size, void *data)
+{
+	struct snapshot *snapshot = data;
+
+	snapshot->prefix_offsets_chunk = chunk_start;
+	snapshot->prefixes_nr = chunk_size / sizeof(uint64_t);
+	return 0;
+}
+
 void fill_snapshot_v2(struct snapshot *snapshot)
 {
 	uint32_t file_signature, file_version, hash_version;
@@ -163,6 +286,9 @@ void fill_snapshot_v2(struct snapshot *snapshot)
 	read_chunk(cf, CHREFS_CHUNKID_OFFSETS, packed_refs_read_offsets, snapshot);
 	pair_chunk(cf, CHREFS_CHUNKID_REFS, (const unsigned char**)&snapshot->refs_chunk);
 
+	read_chunk(cf, CHREFS_CHUNKID_PREFIX_OFFSETS, packed_refs_read_prefix_offsets, snapshot);
+	pair_chunk(cf, CHREFS_CHUNKID_PREFIX_DATA, (const unsigned char**)&snapshot->prefix_chunk);
+
 	/* TODO: add error checks for invalid chunk combinations. */
 
 cleanup:
@@ -187,6 +313,8 @@ int next_record_v2(struct packed_ref_iterator *iter)
 
 	iter->base.flags = REF_ISPACKED;
 
+	if (iter->cur_prefix)
+		strbuf_addstr(&iter->refname_buf, iter->cur_prefix);
 	strbuf_addstr(&iter->refname_buf, pos);
 	iter->base.refname = iter->refname_buf.buf;
 	pos += strlen(pos) + 1;
@@ -221,9 +349,40 @@ int next_record_v2(struct packed_ref_iterator *iter)
 
 	iter->row++;
 
+	if (iter->row == iter->prefix_row_end && iter->snapshot->prefix_chunk) {
+		size_t prefix_pos = get_be32(iter->snapshot->prefix_offsets_chunk +
+					     2 * sizeof(uint32_t) * iter->prefix_i);
+		iter->cur_prefix = iter->snapshot->prefix_chunk + prefix_pos;
+		iter->prefix_i++;
+		iter->prefix_row_end = get_be32(iter->snapshot->prefix_offsets_chunk +
+						2 * sizeof(uint32_t) * iter->prefix_i + sizeof(uint32_t));
+	}
+
 	return ITER_OK;
 }
 
+void init_iterator_prefix_info(const char *prefix,
+			       struct packed_ref_iterator *iter)
+{
+	struct snapshot *snapshot = iter->snapshot;
+
+	if (snapshot->version != 2 || !snapshot->prefix_chunk) {
+		iter->prefix_row_end = snapshot->nr;
+		return;
+	}
+
+	if (prefix)
+		iter->cur_prefix = find_prefix_location(snapshot, prefix, &iter->prefix_i);
+	else {
+		iter->cur_prefix = snapshot->prefix_chunk;
+		iter->prefix_i = 0;
+	}
+
+	iter->prefix_row_end = get_be32(snapshot->prefix_offsets_chunk +
+					2 * sizeof(uint32_t) * iter->prefix_i +
+					sizeof(uint32_t));
+}
+
 struct write_packed_refs_v2_context {
 	struct packed_ref_store *refs;
 	struct string_list *updates;
-- 
gitgitgadget


  parent reply	other threads:[~2022-11-07 18:37 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-07 18:35 [PATCH 00/30] [RFC] extensions.refFormat and packed-refs v2 file format Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 01/30] hashfile: allow skipping the hash function Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 02/30] read-cache: add index.computeHash config option Derrick Stolee via GitGitGadget
2022-11-11 23:31   ` Elijah Newren
2022-11-14 16:30     ` Derrick Stolee
2022-11-17 16:13   ` Ævar Arnfjörð Bjarmason
2022-11-07 18:35 ` [PATCH 03/30] extensions: add refFormat extension Derrick Stolee via GitGitGadget
2022-11-11 23:39   ` Elijah Newren
2022-11-16 14:37     ` Derrick Stolee
2022-11-07 18:35 ` [PATCH 04/30] config: fix multi-level bulleted list Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 05/30] repository: wire ref extensions to ref backends Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 06/30] refs: allow loose files without packed-refs Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 07/30] chunk-format: number of chunks is optional Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 08/30] chunk-format: document trailing table of contents Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 09/30] chunk-format: store chunk offset during write Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 10/30] chunk-format: allow trailing table of contents Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 11/30] chunk-format: parse " Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 12/30] refs: extract packfile format to new file Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 13/30] packed-backend: extract add_write_error() Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 14/30] packed-backend: extract iterator/updates merge Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 15/30] packed-backend: create abstraction for writing refs Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 16/30] config: add config values for packed-refs v2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 17/30] packed-backend: create shell of v2 writes Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 18/30] packed-refs: write file format version 2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 19/30] packed-refs: read file format v2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` Derrick Stolee via GitGitGadget [this message]
2022-11-07 18:35 ` [PATCH 21/30] packed-refs: write prefix chunks Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 22/30] packed-backend: create GIT_TEST_PACKED_REFS_VERSION Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 23/30] t1409: test with packed-refs v2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 24/30] t5312: allow packed-refs v2 format Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 25/30] t5502: add PACKED_REFS_V1 prerequisite Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 26/30] t3210: require packed-refs v1 for some tests Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 27/30] t*: skip packed-refs v2 over http tests Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 28/30] ci: run GIT_TEST_PACKED_REFS_VERSION=2 in some builds Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 29/30] p1401: create performance test for ref operations Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 30/30] refs: skip hashing when writing packed-refs v2 Derrick Stolee via GitGitGadget
2022-11-09 15:15 ` [PATCH 00/30] [RFC] extensions.refFormat and packed-refs v2 file format Derrick Stolee
2022-11-11 23:28 ` Elijah Newren
2022-11-14  0:07   ` Derrick Stolee
2022-11-15  2:47     ` Elijah Newren
2022-11-16 14:45       ` Derrick Stolee
2022-11-17  4:28         ` Elijah Newren
2022-11-18 23:31     ` Junio C Hamano
2022-11-19  0:41       ` Elijah Newren
2022-11-19  3:00         ` Taylor Blau
2022-11-30 15:31       ` Derrick Stolee
2022-11-28 18:56 ` Han-Wen Nienhuys
2022-11-30 15:16   ` Derrick Stolee
2022-11-30 15:38     ` Phillip Wood
2022-11-30 16:37     ` Taylor Blau
2022-11-30 18:30     ` Han-Wen Nienhuys
2022-11-30 18:37       ` Sean Allred
2022-12-01 20:18       ` Derrick Stolee
2022-12-02 16:46         ` Han-Wen Nienhuys
2022-12-02 18:24           ` Ævar Arnfjörð Bjarmason
2022-11-30 22:55     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9b3bd93e51e5ed4358c76263e96c4b4e218987b7.1667846165.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=derrickstolee@github.com \
    --cc=git@vger.kernel.org \
    --cc=jrnieder@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.