git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Taylor Blau <me@ttaylorr.com>
To: git@vger.kernel.org
Cc: peff@peff.net, gitster@pobox.com, avarab@gmail.com
Subject: [PATCH v2 7/8] builtin/repack.c: make largest pack preferred
Date: Wed, 15 Sep 2021 14:24:50 -0400	[thread overview]
Message-ID: <68bc49d8aed438303e422530c74f2e970c7d66a4.1631730270.git.me@ttaylorr.com> (raw)
In-Reply-To: <cover.1631730270.git.me@ttaylorr.com>

When repacking into a geometric series and writing a multi-pack bitmap,
it is beneficial to have the largest resulting pack be the preferred
object source in the bitmap's MIDX, since selecting the large packs can
lead to fewer broken delta chains and better compression.

Teach 'git repack' to identify this pack and pass it to the MIDX write
machinery in order to mark it as preferred.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 Documentation/git-repack.txt |  4 ++++
 builtin/repack.c             | 27 ++++++++++++++++++++++++++-
 pack-bitmap.c                |  2 +-
 pack-bitmap.h                |  1 +
 t/helper/test-read-midx.c    | 25 ++++++++++++++++++++++++-
 t/t7703-repack-geometric.sh  | 22 ++++++++++++++++++++++
 6 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index 0f2d235ca5..7183fb498f 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -190,6 +190,10 @@ this "roll-up", without respect to their reachability. This is subject
 to change in the future. This option (implying a drastically different
 repack mode) is not guaranteed to work with all other combinations of
 option to `git repack`.
++
+When writing a multi-pack bitmap, `git repack` selects the largest resulting
+pack as the preferred pack for object selection by the MIDX (see
+linkgit:git-multi-pack-index[1]).
 
 -m::
 --write-midx::
diff --git a/builtin/repack.c b/builtin/repack.c
index ca18d3dd2e..8c7bc4551e 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -422,6 +422,25 @@ static void split_pack_geometry(struct pack_geometry *geometry, int factor)
 	geometry->split = split;
 }
 
+static struct packed_git *get_largest_active_pack(struct pack_geometry *geometry)
+{
+	if (!geometry) {
+		/*
+		 * No geometry means either an all-into-one repack (in which
+		 * case there is only one pack left and it is the largest) or an
+		 * incremental one.
+		 *
+		 * If repacking incrementally, then we could check the size of
+		 * all packs to determine which should be preferred, but leave
+		 * this for later.
+		 */
+		return NULL;
+	}
+	if (geometry->split == geometry->pack_nr)
+		return NULL;
+	return geometry->pack[geometry->pack_nr - 1];
+}
+
 static void clear_pack_geometry(struct pack_geometry *geometry)
 {
 	if (!geometry)
@@ -467,10 +486,12 @@ static void midx_included_packs(struct string_list *include,
 }
 
 static int write_midx_included_packs(struct string_list *include,
+				     struct pack_geometry *geometry,
 				     int show_progress, int write_bitmaps)
 {
 	struct child_process cmd = CHILD_PROCESS_INIT;
 	struct string_list_item *item;
+	struct packed_git *largest = get_largest_active_pack(geometry);
 	FILE *in;
 	int ret;
 
@@ -488,6 +509,10 @@ static int write_midx_included_packs(struct string_list *include,
 	if (write_bitmaps)
 		strvec_push(&cmd.args, "--bitmap");
 
+	if (largest)
+		strvec_pushf(&cmd.args, "--preferred-pack=%s",
+			     pack_basename(largest));
+
 	ret = start_command(&cmd);
 	if (ret)
 		return ret;
@@ -773,7 +798,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 		midx_included_packs(&include, &existing_packs,
 				    &existing_kept_packs, &names, geometry);
 
-		ret = write_midx_included_packs(&include,
+		ret = write_midx_included_packs(&include, geometry,
 						show_progress, write_bitmaps > 0);
 
 		string_list_clear(&include, 0);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 8504110a4d..67be9be9a6 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1418,7 +1418,7 @@ static int try_partial_reuse(struct packed_git *pack,
 	return 0;
 }
 
-static uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git)
+uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git)
 {
 	struct multi_pack_index *m = bitmap_git->midx;
 	if (!m)
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 469090bad2..7d407c5a4c 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -55,6 +55,7 @@ int test_bitmap_commits(struct repository *r);
 struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
 					 struct list_objects_filter_options *filter,
 					 int filter_provided_objects);
+uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git);
 int reuse_partial_packfile_from_bitmap(struct bitmap_index *,
 				       struct packed_git **packfile,
 				       uint32_t *entries,
diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c
index cb0d27049a..0038559129 100644
--- a/t/helper/test-read-midx.c
+++ b/t/helper/test-read-midx.c
@@ -3,6 +3,7 @@
 #include "midx.h"
 #include "repository.h"
 #include "object-store.h"
+#include "pack-bitmap.h"
 
 static int read_midx_file(const char *object_dir, int show_objects)
 {
@@ -72,14 +73,36 @@ static int read_midx_checksum(const char *object_dir)
 	return 0;
 }
 
+static int read_midx_preferred_pack(const char *object_dir)
+{
+	struct multi_pack_index *midx = NULL;
+	struct bitmap_index *bitmap = NULL;
+
+	setup_git_directory();
+
+	midx = load_multi_pack_index(object_dir, 1);
+	if (!midx)
+		return 1;
+
+	bitmap = prepare_bitmap_git(the_repository);
+	if (!(bitmap && bitmap_is_midx(bitmap)))
+		return 1;
+
+
+	printf("%s\n", midx->pack_names[midx_preferred_pack(bitmap)]);
+	return 0;
+}
+
 int cmd__read_midx(int argc, const char **argv)
 {
 	if (!(argc == 2 || argc == 3))
-		usage("read-midx [--show-objects|--checksum] <object-dir>");
+		usage("read-midx [--show-objects|--checksum|--preferred-pack] <object-dir>");
 
 	if (!strcmp(argv[1], "--show-objects"))
 		return read_midx_file(argv[2], 1);
 	else if (!strcmp(argv[1], "--checksum"))
 		return read_midx_checksum(argv[2]);
+	else if (!strcmp(argv[1], "--preferred-pack"))
+		return read_midx_preferred_pack(argv[2]);
 	return read_midx_file(argv[1], 0);
 }
diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh
index 5ccaa440e0..54360c9878 100755
--- a/t/t7703-repack-geometric.sh
+++ b/t/t7703-repack-geometric.sh
@@ -180,4 +180,26 @@ test_expect_success '--geometric ignores kept packs' '
 	)
 '
 
+test_expect_success '--geometric chooses largest MIDX preferred pack' '
+	git init geometric &&
+	test_when_finished "rm -fr geometric" &&
+	(
+		cd geometric &&
+
+		# These packs already form a geometric progression.
+		test_commit_bulk --start=1 1 && # 3 objects
+		test_commit_bulk --start=2 2 && # 6 objects
+		ls $objdir/pack/pack-*.idx >before &&
+		test_commit_bulk --start=4 4 && # 12 objects
+		ls $objdir/pack/pack-*.idx >after &&
+
+		git repack --geometric 2 -dbm &&
+
+		comm -3 before after | xargs -n 1 basename >expect &&
+		test-tool read-midx --preferred-pack $objdir >actual &&
+
+		test_cmp expect actual
+	)
+'
+
 test_done
-- 
2.33.0.96.g73915697e6


  parent reply	other threads:[~2021-09-15 18:24 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-11  3:32 [PATCH 0/8] repack: introduce `--write-midx` Taylor Blau
2021-09-11  3:32 ` [PATCH 1/8] midx: expose 'write_midx_file_only()' publicly Taylor Blau
2021-09-11  5:00   ` Junio C Hamano
2021-09-11 16:17     ` Taylor Blau
2021-09-11 10:07   ` Ævar Arnfjörð Bjarmason
2021-09-11 16:21     ` Taylor Blau
2021-09-11  3:32 ` [PATCH 2/8] builtin/multi-pack-index.c: support --stdin-packs mode Taylor Blau
2021-09-11 10:05   ` Ævar Arnfjörð Bjarmason
2021-09-11 16:25     ` Taylor Blau
2021-09-11 16:28       ` Taylor Blau
2021-09-12  2:08       ` Eric Sunshine
2021-09-12  2:21         ` Taylor Blau
2021-09-12 15:15           ` Ævar Arnfjörð Bjarmason
2021-09-12 22:30             ` Junio C Hamano
2021-09-12 22:32               ` Ævar Arnfjörð Bjarmason
2021-09-14 19:02       ` Jeff King
2021-09-14 23:48         ` Taylor Blau
2021-09-15  1:55           ` Eric Sunshine
2021-09-11  3:32 ` [PATCH 3/8] midx: preliminary support for `--refs-snapshot` Taylor Blau
2021-09-11  3:32 ` [PATCH 4/8] builtin/repack.c: keep track of existing packs unconditionally Taylor Blau
2021-09-11  3:32 ` [PATCH 5/8] builtin/repack.c: extract showing progress to a variable Taylor Blau
2021-09-11  3:32 ` [PATCH 6/8] builtin/repack.c: support writing a MIDX while repacking Taylor Blau
2021-09-11  3:32 ` [PATCH 7/8] builtin/repack.c: make largest pack preferred Taylor Blau
2021-09-11 10:17   ` Ævar Arnfjörð Bjarmason
2021-09-11 16:35     ` Taylor Blau
2021-09-11  3:32 ` [PATCH 8/8] builtin/repack.c: pass `--refs-snapshot` when writing bitmaps Taylor Blau
2021-09-11 10:27   ` Ævar Arnfjörð Bjarmason
2021-09-11 11:19     ` Ævar Arnfjörð Bjarmason
2021-09-11 16:51       ` Taylor Blau
2021-09-14 18:55         ` Jeff King
2021-09-14 23:34           ` Taylor Blau
2021-09-14 23:56             ` Ævar Arnfjörð Bjarmason
2021-09-15  4:31               ` Taylor Blau
2021-09-11 16:49     ` Taylor Blau
2021-09-15 18:24 ` [PATCH v2 0/8] repack: introduce `--write-midx` Taylor Blau
2021-09-15 18:24   ` [PATCH v2 1/8] midx: expose `write_midx_file_only()` publicly Taylor Blau
2021-09-22 23:14     ` Jonathan Tan
2021-09-23  3:09       ` Taylor Blau
2021-09-15 18:24   ` [PATCH v2 2/8] builtin/multi-pack-index.c: support `--stdin-packs` mode Taylor Blau
2021-09-22 22:29     ` Josh Steadmon
2021-09-23  2:03       ` Taylor Blau
2021-09-22 23:11     ` Jonathan Tan
2021-09-23  2:06       ` Taylor Blau
2021-09-15 18:24   ` [PATCH v2 3/8] midx: preliminary support for `--refs-snapshot` Taylor Blau
2021-09-22 22:34     ` Josh Steadmon
2021-09-23  2:08       ` Taylor Blau
2021-09-22 23:00     ` Jonathan Tan
2021-09-23  2:18       ` Taylor Blau
2021-09-15 18:24   ` [PATCH v2 4/8] builtin/repack.c: keep track of existing packs unconditionally Taylor Blau
2021-09-22 22:56     ` Jonathan Tan
2021-09-23  2:59       ` Taylor Blau
2021-09-15 18:24   ` [PATCH v2 5/8] builtin/repack.c: extract showing progress to a variable Taylor Blau
2021-09-15 18:24   ` [PATCH v2 6/8] builtin/repack.c: support writing a MIDX while repacking Taylor Blau
2021-09-22 22:39     ` Jonathan Tan
2021-09-23  2:40       ` Taylor Blau
2021-09-15 18:24   ` Taylor Blau [this message]
2021-09-15 18:24   ` [PATCH v2 8/8] builtin/repack.c: pass `--refs-snapshot` when writing bitmaps Taylor Blau
2021-09-24 18:22     ` Jonathan Tan
2021-10-01 22:38       ` Taylor Blau
2021-09-15 19:22   ` [PATCH v2 0/8] repack: introduce `--write-midx` Junio C Hamano
2021-09-15 19:29     ` Junio C Hamano
2021-09-15 21:19       ` Taylor Blau
2021-09-16 22:16         ` Junio C Hamano
2021-09-29  1:54   ` [PATCH v3 0/9] " Taylor Blau
2021-09-29  1:55     ` [PATCH v3 1/9] midx: expose `write_midx_file_only()` publicly Taylor Blau
2021-09-29  1:55     ` [PATCH v3 2/9] builtin/multi-pack-index.c: support `--stdin-packs` mode Taylor Blau
2021-09-29  1:55     ` [PATCH v3 3/9] midx: preliminary support for `--refs-snapshot` Taylor Blau
2021-09-29  1:55     ` [PATCH v3 4/9] builtin/repack.c: keep track of existing packs unconditionally Taylor Blau
2021-09-29  1:55     ` [PATCH v3 5/9] builtin/repack.c: rename variables that deal with non-kept packs Taylor Blau
2021-09-29  1:55     ` [PATCH v3 6/9] builtin/repack.c: extract showing progress to a variable Taylor Blau
2021-09-29  1:55     ` [PATCH v3 7/9] builtin/repack.c: support writing a MIDX while repacking Taylor Blau
2021-09-29  1:55     ` [PATCH v3 8/9] builtin/repack.c: make largest pack preferred Taylor Blau
2021-09-29  1:55     ` [PATCH v3 9/9] builtin/repack.c: pass `--refs-snapshot` when writing bitmaps Taylor Blau
2021-09-29  4:24     ` [PATCH v3 0/9] repack: introduce `--write-midx` Junio C Hamano
2021-10-01 20:01     ` Jonathan Tan
2021-10-01 22:40       ` Taylor Blau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=68bc49d8aed438303e422530c74f2e970c7d66a4.1631730270.git.me@ttaylorr.com \
    --to=me@ttaylorr.com \
    --cc=avarab@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).