git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Taylor Blau <me@ttaylorr.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, Johannes.Schindelin@gmx.de,
	chakrabortyabhradeep79@gmail.com, derrickstolee@github.com,
	jonathantanmy@google.com, kaartic.sivaraam@gmail.com
Subject: [PATCH v2 3/7] midx.c: extract `struct midx_fanout`
Date: Mon, 22 Aug 2022 15:50:38 -0400	[thread overview]
Message-ID: <ae2077acb795311c87ce3bbcef60bffb66a6aa79.1661197803.git.me@ttaylorr.com> (raw)
In-Reply-To: <cover.1661197803.git.me@ttaylorr.com>

To build up a list of objects (along with their packs, and the offsets
within those packs that each object appears at), the MIDX code
implements `get_sorted_entries()` which builds up a list of candidates,
sorts them, and then removes duplicate entries.

To do this, it keeps an array of `pack_midx_entry` structures that it
builds up once for each fanout level (ie., for all possible values of
the first byte of each object's ID).

This array is a function-local variable of `get_sorted_entries()`. Since
it uses the ALLOC_GROW() macro, having the `alloc_fanout` variable also
be local to that function, and only modified within that function is
convenient.

However, subsequent changes will extract the two ways this array is
filled (from a pack at some fanout value, and from an existing MIDX at
some fanout value) into separate functions. Instead of passing around
pointers to the entries array, along with `nr_fanout` and
`alloc_fanout`, encapsulate these three into a structure instead. Then
pass around a pointer to this structure instead.

This patch does not yet extract the above two functions, but sets us up
to begin doing so in the following commit. For now, the implementation
of get_sorted_entries() is only modified to replace `entries_by_fanout`
with `fanout.entries`, `nr_fanout` with `fanout.nr`, and so on.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 midx.c | 54 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/midx.c b/midx.c
index 4e956cacb7..cdb6c481c7 100644
--- a/midx.c
+++ b/midx.c
@@ -577,6 +577,22 @@ static void fill_pack_entry(uint32_t pack_int_id,
 	entry->preferred = !!preferred;
 }
 
+struct midx_fanout {
+	struct pack_midx_entry *entries;
+	uint32_t nr;
+	uint32_t alloc;
+};
+
+static void midx_fanout_grow(struct midx_fanout *fanout, uint32_t nr)
+{
+	ALLOC_GROW(fanout->entries, nr, fanout->alloc);
+}
+
+static void midx_fanout_sort(struct midx_fanout *fanout)
+{
+	QSORT(fanout->entries, fanout->nr, midx_oid_compare);
+}
+
 /*
  * It is possible to artificially get into a state where there are many
  * duplicate copies of objects. That can create high memory pressure if
@@ -595,8 +611,8 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 						  int preferred_pack)
 {
 	uint32_t cur_fanout, cur_pack, cur_object;
-	uint32_t alloc_fanout, alloc_objects, total_objects = 0;
-	struct pack_midx_entry *entries_by_fanout = NULL;
+	uint32_t alloc_objects, total_objects = 0;
+	struct midx_fanout fanout = { 0 };
 	struct pack_midx_entry *deduplicated_entries = NULL;
 	uint32_t start_pack = m ? m->num_packs : 0;
 
@@ -608,14 +624,14 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 	 * slices to be evenly distributed, with some noise. Hence,
 	 * allocate slightly more than one 256th.
 	 */
-	alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16;
+	alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16;
 
-	ALLOC_ARRAY(entries_by_fanout, alloc_fanout);
+	ALLOC_ARRAY(fanout.entries, fanout.alloc);
 	ALLOC_ARRAY(deduplicated_entries, alloc_objects);
 	*nr_objects = 0;
 
 	for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
-		uint32_t nr_fanout = 0;
+		fanout.nr = 0;
 
 		if (m) {
 			uint32_t start = 0, end;
@@ -625,15 +641,15 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 			end = ntohl(m->chunk_oid_fanout[cur_fanout]);
 
 			for (cur_object = start; cur_object < end; cur_object++) {
-				ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
+				midx_fanout_grow(&fanout, fanout.nr + 1);
 				nth_midxed_pack_midx_entry(m,
-							   &entries_by_fanout[nr_fanout],
+							   &fanout.entries[fanout.nr],
 							   cur_object);
 				if (nth_midxed_pack_int_id(m, cur_object) == preferred_pack)
-					entries_by_fanout[nr_fanout].preferred = 1;
+					fanout.entries[fanout.nr].preferred = 1;
 				else
-					entries_by_fanout[nr_fanout].preferred = 0;
-				nr_fanout++;
+					fanout.entries[fanout.nr].preferred = 0;
+				fanout.nr++;
 			}
 		}
 
@@ -646,36 +662,36 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 			end = get_pack_fanout(info[cur_pack].p, cur_fanout);
 
 			for (cur_object = start; cur_object < end; cur_object++) {
-				ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
+				midx_fanout_grow(&fanout, fanout.nr + 1);
 				fill_pack_entry(cur_pack,
 						info[cur_pack].p,
 						cur_object,
-						&entries_by_fanout[nr_fanout],
+						&fanout.entries[fanout.nr],
 						preferred);
-				nr_fanout++;
+				fanout.nr++;
 			}
 		}
 
-		QSORT(entries_by_fanout, nr_fanout, midx_oid_compare);
+		midx_fanout_sort(&fanout);
 
 		/*
 		 * The batch is now sorted by OID and then mtime (descending).
 		 * Take only the first duplicate.
 		 */
-		for (cur_object = 0; cur_object < nr_fanout; cur_object++) {
-			if (cur_object && oideq(&entries_by_fanout[cur_object - 1].oid,
-						&entries_by_fanout[cur_object].oid))
+		for (cur_object = 0; cur_object < fanout.nr; cur_object++) {
+			if (cur_object && oideq(&fanout.entries[cur_object - 1].oid,
+						&fanout.entries[cur_object].oid))
 				continue;
 
 			ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects);
 			memcpy(&deduplicated_entries[*nr_objects],
-			       &entries_by_fanout[cur_object],
+			       &fanout.entries[cur_object],
 			       sizeof(struct pack_midx_entry));
 			(*nr_objects)++;
 		}
 	}
 
-	free(entries_by_fanout);
+	free(fanout.entries);
 	return deduplicated_entries;
 }
 
-- 
2.37.0.1.g1379af2e9d


  parent reply	other threads:[~2022-08-22 19:50 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-19 21:30 [PATCH 0/6] midx: permit changing the preferred pack when reusing the MIDX Taylor Blau
2022-08-19 21:30 ` [PATCH 1/6] t5326: demonstrate potential bitmap corruption Taylor Blau
2022-08-22 16:09   ` Derrick Stolee
2022-08-22 17:57     ` Taylor Blau
2022-08-22 19:31       ` Junio C Hamano
2022-08-22 19:41         ` Taylor Blau
2022-08-19 21:30 ` [PATCH 2/6] t/lib-bitmap.sh: avoid silencing stderr Taylor Blau
2022-08-20 16:44   ` Abhradeep Chakraborty
2022-08-22 17:58     ` Taylor Blau
2022-08-19 21:30 ` [PATCH 3/6] midx.c: extract `struct midx_fanout` Taylor Blau
2022-08-19 21:30 ` [PATCH 4/6] midx.c: extract `midx_fanout_add_midx_fanout()` Taylor Blau
2022-08-19 21:30 ` [PATCH 5/6] midx.c: extract `midx_fanout_add_pack_fanout()` Taylor Blau
2022-08-19 21:30 ` [PATCH 6/6] midx.c: include preferred pack correctly with existing MIDX Taylor Blau
2022-08-20 18:40   ` Abhradeep Chakraborty
2022-08-22 18:08     ` Taylor Blau
2022-08-22 17:03   ` Derrick Stolee
2022-08-22 18:14     ` Taylor Blau
2022-08-22 17:04 ` [PATCH 0/6] midx: permit changing the preferred pack when reusing the MIDX Derrick Stolee
2022-08-22 19:44   ` Taylor Blau
2022-08-22 19:50 ` [PATCH v2 0/7] " Taylor Blau
2022-08-22 19:50   ` [PATCH v2 1/7] t5326: demonstrate potential bitmap corruption Taylor Blau
2022-08-22 19:50   ` [PATCH v2 2/7] t/lib-bitmap.sh: avoid silencing stderr Taylor Blau
2022-08-22 19:50   ` Taylor Blau [this message]
2022-08-22 19:50   ` [PATCH v2 4/7] midx.c: extract `midx_fanout_add_midx_fanout()` Taylor Blau
2022-08-22 19:50   ` [PATCH v2 5/7] midx.c: extract `midx_fanout_add_pack_fanout()` Taylor Blau
2022-08-22 19:50   ` [PATCH v2 6/7] midx.c: include preferred pack correctly with existing MIDX Taylor Blau
2022-08-22 19:50   ` [PATCH v2 7/7] midx.c: avoid adding preferred objects twice Taylor Blau
2022-08-23 16:22     ` Derrick Stolee
2022-08-23 16:23   ` [PATCH v2 0/7] midx: permit changing the preferred pack when reusing the MIDX Derrick Stolee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ae2077acb795311c87ce3bbcef60bffb66a6aa79.1661197803.git.me@ttaylorr.com \
    --to=me@ttaylorr.com \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=chakrabortyabhradeep79@gmail.com \
    --cc=derrickstolee@github.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jonathantanmy@google.com \
    --cc=kaartic.sivaraam@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).