All of lore.kernel.org
 help / color / mirror / Atom feed
From: Taylor Blau <me@ttaylorr.com>
To: git@vger.kernel.org
Cc: Jeff King <peff@peff.net>, Junio C Hamano <gitster@pobox.com>
Subject: [PATCH 03/11] midx: move `midx_repack` (and related functions) to midx-write.c
Date: Mon, 25 Mar 2024 13:24:25 -0400	[thread overview]
Message-ID: <487a0ccda8c781a4e7cfdd14d32b0466a867ddff.1711387439.git.me@ttaylorr.com> (raw)
In-Reply-To: <cover.1711387439.git.me@ttaylorr.com>

Move `midx_repack()`, the main function which implements the sub-command
'git multi-pack-index repack' into midx-write.c.

This patch does not introduce any behavioral changes and is best viewed
with `--color-moved`.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 midx-write.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++
 midx.c       | 196 -------------------------------------------------
 2 files changed, 202 insertions(+), 196 deletions(-)

diff --git a/midx-write.c b/midx-write.c
index 4aab273243..6dd58be7e0 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -1,5 +1,11 @@
 #include "git-compat-util.h"
+#include "config.h"
+#include "hex.h"
+#include "packfile.h"
 #include "midx.h"
+#include "run-command.h"
+#include "pack-bitmap.h"
+#include "revision.h"
 
 extern int write_midx_internal(const char *object_dir,
 			       struct string_list *packs_to_include,
@@ -10,3 +16,199 @@ extern int write_midx_internal(const char *object_dir,
 
 extern struct multi_pack_index *lookup_multi_pack_index(struct repository *r,
 							const char *object_dir);
+
+struct repack_info {
+	timestamp_t mtime;
+	uint32_t referenced_objects;
+	uint32_t pack_int_id;
+};
+
+static int compare_by_mtime(const void *a_, const void *b_)
+{
+	const struct repack_info *a, *b;
+
+	a = (const struct repack_info *)a_;
+	b = (const struct repack_info *)b_;
+
+	if (a->mtime < b->mtime)
+		return -1;
+	if (a->mtime > b->mtime)
+		return 1;
+	return 0;
+}
+
+static int fill_included_packs_all(struct repository *r,
+				   struct multi_pack_index *m,
+				   unsigned char *include_pack)
+{
+	uint32_t i, count = 0;
+	int pack_kept_objects = 0;
+
+	repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
+
+	for (i = 0; i < m->num_packs; i++) {
+		if (prepare_midx_pack(r, m, i))
+			continue;
+		if (!pack_kept_objects && m->packs[i]->pack_keep)
+			continue;
+		if (m->packs[i]->is_cruft)
+			continue;
+
+		include_pack[i] = 1;
+		count++;
+	}
+
+	return count < 2;
+}
+
+static int fill_included_packs_batch(struct repository *r,
+				     struct multi_pack_index *m,
+				     unsigned char *include_pack,
+				     size_t batch_size)
+{
+	uint32_t i, packs_to_repack;
+	size_t total_size;
+	struct repack_info *pack_info;
+	int pack_kept_objects = 0;
+
+	CALLOC_ARRAY(pack_info, m->num_packs);
+
+	repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
+
+	for (i = 0; i < m->num_packs; i++) {
+		pack_info[i].pack_int_id = i;
+
+		if (prepare_midx_pack(r, m, i))
+			continue;
+
+		pack_info[i].mtime = m->packs[i]->mtime;
+	}
+
+	for (i = 0; i < m->num_objects; i++) {
+		uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
+		pack_info[pack_int_id].referenced_objects++;
+	}
+
+	QSORT(pack_info, m->num_packs, compare_by_mtime);
+
+	total_size = 0;
+	packs_to_repack = 0;
+	for (i = 0; total_size < batch_size && i < m->num_packs; i++) {
+		int pack_int_id = pack_info[i].pack_int_id;
+		struct packed_git *p = m->packs[pack_int_id];
+		size_t expected_size;
+
+		if (!p)
+			continue;
+		if (!pack_kept_objects && p->pack_keep)
+			continue;
+		if (p->is_cruft)
+			continue;
+		if (open_pack_index(p) || !p->num_objects)
+			continue;
+
+		expected_size = st_mult(p->pack_size,
+					pack_info[i].referenced_objects);
+		expected_size /= p->num_objects;
+
+		if (expected_size >= batch_size)
+			continue;
+
+		packs_to_repack++;
+		total_size += expected_size;
+		include_pack[pack_int_id] = 1;
+	}
+
+	free(pack_info);
+
+	if (packs_to_repack < 2)
+		return 1;
+
+	return 0;
+}
+
+int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags)
+{
+	int result = 0;
+	uint32_t i;
+	unsigned char *include_pack;
+	struct child_process cmd = CHILD_PROCESS_INIT;
+	FILE *cmd_in;
+	struct strbuf base_name = STRBUF_INIT;
+	struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
+
+	/*
+	 * When updating the default for these configuration
+	 * variables in builtin/repack.c, these must be adjusted
+	 * to match.
+	 */
+	int delta_base_offset = 1;
+	int use_delta_islands = 0;
+
+	if (!m)
+		return 0;
+
+	CALLOC_ARRAY(include_pack, m->num_packs);
+
+	if (batch_size) {
+		if (fill_included_packs_batch(r, m, include_pack, batch_size))
+			goto cleanup;
+	} else if (fill_included_packs_all(r, m, include_pack))
+		goto cleanup;
+
+	repo_config_get_bool(r, "repack.usedeltabaseoffset", &delta_base_offset);
+	repo_config_get_bool(r, "repack.usedeltaislands", &use_delta_islands);
+
+	strvec_push(&cmd.args, "pack-objects");
+
+	strbuf_addstr(&base_name, object_dir);
+	strbuf_addstr(&base_name, "/pack/pack");
+	strvec_push(&cmd.args, base_name.buf);
+
+	if (delta_base_offset)
+		strvec_push(&cmd.args, "--delta-base-offset");
+	if (use_delta_islands)
+		strvec_push(&cmd.args, "--delta-islands");
+
+	if (flags & MIDX_PROGRESS)
+		strvec_push(&cmd.args, "--progress");
+	else
+		strvec_push(&cmd.args, "-q");
+
+	strbuf_release(&base_name);
+
+	cmd.git_cmd = 1;
+	cmd.in = cmd.out = -1;
+
+	if (start_command(&cmd)) {
+		error(_("could not start pack-objects"));
+		result = 1;
+		goto cleanup;
+	}
+
+	cmd_in = xfdopen(cmd.in, "w");
+
+	for (i = 0; i < m->num_objects; i++) {
+		struct object_id oid;
+		uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
+
+		if (!include_pack[pack_int_id])
+			continue;
+
+		nth_midxed_object_oid(&oid, m, i);
+		fprintf(cmd_in, "%s\n", oid_to_hex(&oid));
+	}
+	fclose(cmd_in);
+
+	if (finish_command(&cmd)) {
+		error(_("could not finish pack-objects"));
+		result = 1;
+		goto cleanup;
+	}
+
+	result = write_midx_internal(object_dir, NULL, NULL, NULL, NULL, flags);
+
+cleanup:
+	free(include_pack);
+	return result;
+}
diff --git a/midx.c b/midx.c
index 5f22f01716..3bd8c58642 100644
--- a/midx.c
+++ b/midx.c
@@ -2055,199 +2055,3 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla
 
 	return result;
 }
-
-struct repack_info {
-	timestamp_t mtime;
-	uint32_t referenced_objects;
-	uint32_t pack_int_id;
-};
-
-static int compare_by_mtime(const void *a_, const void *b_)
-{
-	const struct repack_info *a, *b;
-
-	a = (const struct repack_info *)a_;
-	b = (const struct repack_info *)b_;
-
-	if (a->mtime < b->mtime)
-		return -1;
-	if (a->mtime > b->mtime)
-		return 1;
-	return 0;
-}
-
-static int fill_included_packs_all(struct repository *r,
-				   struct multi_pack_index *m,
-				   unsigned char *include_pack)
-{
-	uint32_t i, count = 0;
-	int pack_kept_objects = 0;
-
-	repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
-
-	for (i = 0; i < m->num_packs; i++) {
-		if (prepare_midx_pack(r, m, i))
-			continue;
-		if (!pack_kept_objects && m->packs[i]->pack_keep)
-			continue;
-		if (m->packs[i]->is_cruft)
-			continue;
-
-		include_pack[i] = 1;
-		count++;
-	}
-
-	return count < 2;
-}
-
-static int fill_included_packs_batch(struct repository *r,
-				     struct multi_pack_index *m,
-				     unsigned char *include_pack,
-				     size_t batch_size)
-{
-	uint32_t i, packs_to_repack;
-	size_t total_size;
-	struct repack_info *pack_info;
-	int pack_kept_objects = 0;
-
-	CALLOC_ARRAY(pack_info, m->num_packs);
-
-	repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
-
-	for (i = 0; i < m->num_packs; i++) {
-		pack_info[i].pack_int_id = i;
-
-		if (prepare_midx_pack(r, m, i))
-			continue;
-
-		pack_info[i].mtime = m->packs[i]->mtime;
-	}
-
-	for (i = 0; i < m->num_objects; i++) {
-		uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
-		pack_info[pack_int_id].referenced_objects++;
-	}
-
-	QSORT(pack_info, m->num_packs, compare_by_mtime);
-
-	total_size = 0;
-	packs_to_repack = 0;
-	for (i = 0; total_size < batch_size && i < m->num_packs; i++) {
-		int pack_int_id = pack_info[i].pack_int_id;
-		struct packed_git *p = m->packs[pack_int_id];
-		size_t expected_size;
-
-		if (!p)
-			continue;
-		if (!pack_kept_objects && p->pack_keep)
-			continue;
-		if (p->is_cruft)
-			continue;
-		if (open_pack_index(p) || !p->num_objects)
-			continue;
-
-		expected_size = st_mult(p->pack_size,
-					pack_info[i].referenced_objects);
-		expected_size /= p->num_objects;
-
-		if (expected_size >= batch_size)
-			continue;
-
-		packs_to_repack++;
-		total_size += expected_size;
-		include_pack[pack_int_id] = 1;
-	}
-
-	free(pack_info);
-
-	if (packs_to_repack < 2)
-		return 1;
-
-	return 0;
-}
-
-int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags)
-{
-	int result = 0;
-	uint32_t i;
-	unsigned char *include_pack;
-	struct child_process cmd = CHILD_PROCESS_INIT;
-	FILE *cmd_in;
-	struct strbuf base_name = STRBUF_INIT;
-	struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
-
-	/*
-	 * When updating the default for these configuration
-	 * variables in builtin/repack.c, these must be adjusted
-	 * to match.
-	 */
-	int delta_base_offset = 1;
-	int use_delta_islands = 0;
-
-	if (!m)
-		return 0;
-
-	CALLOC_ARRAY(include_pack, m->num_packs);
-
-	if (batch_size) {
-		if (fill_included_packs_batch(r, m, include_pack, batch_size))
-			goto cleanup;
-	} else if (fill_included_packs_all(r, m, include_pack))
-		goto cleanup;
-
-	repo_config_get_bool(r, "repack.usedeltabaseoffset", &delta_base_offset);
-	repo_config_get_bool(r, "repack.usedeltaislands", &use_delta_islands);
-
-	strvec_push(&cmd.args, "pack-objects");
-
-	strbuf_addstr(&base_name, object_dir);
-	strbuf_addstr(&base_name, "/pack/pack");
-	strvec_push(&cmd.args, base_name.buf);
-
-	if (delta_base_offset)
-		strvec_push(&cmd.args, "--delta-base-offset");
-	if (use_delta_islands)
-		strvec_push(&cmd.args, "--delta-islands");
-
-	if (flags & MIDX_PROGRESS)
-		strvec_push(&cmd.args, "--progress");
-	else
-		strvec_push(&cmd.args, "-q");
-
-	strbuf_release(&base_name);
-
-	cmd.git_cmd = 1;
-	cmd.in = cmd.out = -1;
-
-	if (start_command(&cmd)) {
-		error(_("could not start pack-objects"));
-		result = 1;
-		goto cleanup;
-	}
-
-	cmd_in = xfdopen(cmd.in, "w");
-
-	for (i = 0; i < m->num_objects; i++) {
-		struct object_id oid;
-		uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
-
-		if (!include_pack[pack_int_id])
-			continue;
-
-		nth_midxed_object_oid(&oid, m, i);
-		fprintf(cmd_in, "%s\n", oid_to_hex(&oid));
-	}
-	fclose(cmd_in);
-
-	if (finish_command(&cmd)) {
-		error(_("could not finish pack-objects"));
-		result = 1;
-		goto cleanup;
-	}
-
-	result = write_midx_internal(object_dir, NULL, NULL, NULL, NULL, flags);
-
-cleanup:
-	free(include_pack);
-	return result;
-}
-- 
2.44.0.290.g736be63234b


  parent reply	other threads:[~2024-03-25 17:24 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-25 17:24 [PATCH 00/11] midx: split MIDX writing routines into midx-write.c, cleanup Taylor Blau
2024-03-25 17:24 ` [PATCH 01/11] midx-write: initial commit Taylor Blau
2024-03-25 20:30   ` Junio C Hamano
2024-03-25 22:09     ` Taylor Blau
2024-03-25 17:24 ` [PATCH 02/11] midx: extern a pair of shared functions Taylor Blau
2024-03-25 17:24 ` Taylor Blau [this message]
2024-03-25 17:24 ` [PATCH 04/11] midx: move `expire_midx_packs` to midx-write.c Taylor Blau
2024-03-25 17:24 ` [PATCH 05/11] midx: move `write_midx_file_only` " Taylor Blau
2024-03-25 17:24 ` [PATCH 06/11] midx: move `write_midx_file` " Taylor Blau
2024-03-25 17:24 ` [PATCH 07/11] midx: move `write_midx_internal` (and related functions) " Taylor Blau
2024-03-25 17:24 ` [PATCH 08/11] midx-write.c: avoid directly managed temporary strbuf Taylor Blau
2024-03-25 20:33   ` Junio C Hamano
2024-03-25 22:11     ` Taylor Blau
2024-03-25 17:24 ` [PATCH 09/11] midx-write.c: factor out common want_included_pack() routine Taylor Blau
2024-03-25 20:36   ` Junio C Hamano
2024-03-27  8:29   ` Jeff King
2024-03-25 17:24 ` [PATCH 10/11] midx-write.c: check count of packs to repack after grouping Taylor Blau
2024-03-25 20:41   ` Junio C Hamano
2024-03-25 22:11     ` Taylor Blau
2024-03-25 17:24 ` [PATCH 11/11] midx-write.c: use `--stdin-packs` when repacking Taylor Blau
2024-03-27  8:37   ` Jeff King
2024-03-27  8:39 ` [PATCH 00/11] midx: split MIDX writing routines into midx-write.c, cleanup Jeff King
2024-04-01 21:16 ` [PATCH v2 0/4] " Taylor Blau
2024-04-01 21:16   ` [PATCH v2 1/4] midx-write: move writing-related functions from midx.c Taylor Blau
2024-04-01 21:16   ` [PATCH v2 2/4] midx-write.c: factor out common want_included_pack() routine Taylor Blau
2024-04-02 11:47     ` Patrick Steinhardt
2024-04-01 21:16   ` [PATCH v2 3/4] midx-write.c: check count of packs to repack after grouping Taylor Blau
2024-04-01 21:16   ` [PATCH v2 4/4] midx-write.c: use `--stdin-packs` when repacking Taylor Blau
2024-04-01 21:45     ` Junio C Hamano
2024-04-02 11:47     ` Patrick Steinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=487a0ccda8c781a4e7cfdd14d32b0466a867ddff.1711387439.git.me@ttaylorr.com \
    --to=me@ttaylorr.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.