git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>,
	peff@peff.net, gitster@pobox.com
Subject: [PATCH v2 2/2] repack: repack promisor objects if -a or -A is set
Date: Wed,  8 Aug 2018 15:34:06 -0700	[thread overview]
Message-ID: <3dab08e467c23f2c0785e34c3a8703d809b6479a.1533767314.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1533767314.git.jonathantanmy@google.com>

Currently, repack does not touch promisor packfiles at all, potentially
causing the performance of repositories that have many such packfiles to
drop. Therefore, repack all promisor objects if invoked with -a or -A.

This is done by an additional invocation of pack-objects on all promisor
objects individually given, which takes care of deduplication and allows
the resulting packfiles to respect flags such as --max-pack-size.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 Documentation/git-repack.txt |  5 +++
 builtin/repack.c             | 83 +++++++++++++++++++++++++++++++++--
 t/t0410-partial-clone.sh     | 85 +++++++++++++++++++++++++++++++-----
 3 files changed, 158 insertions(+), 15 deletions(-)

diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index d90e7907f..d05625096 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -40,6 +40,11 @@ OPTIONS
 Note that users fetching over dumb protocols will have to fetch the
 whole new pack in order to get any contained object, no matter how many
 other objects in that pack they already have locally.
++
+Promisor packfiles are repacked separately: if there are packfiles that
+have an associated ".promisor" file, these packfiles will be repacked
+into another separate pack, and an empty ".promisor" file corresponding
+to the new separate pack will be written.
 
 -A::
 	Same as `-a`, unless `-d` is used.  Then any unreachable
diff --git a/builtin/repack.c b/builtin/repack.c
index f8cae7d66..5c97dec3d 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -8,6 +8,7 @@
 #include "strbuf.h"
 #include "string-list.h"
 #include "argv-array.h"
+#include "packfile.h"
 
 static int delta_base_offset = 1;
 static int pack_kept_objects = -1;
@@ -83,7 +84,7 @@ static void remove_pack_on_signal(int signo)
 
 /*
  * Adds all packs hex strings to the fname list, which do not
- * have a corresponding .keep or .promisor file. These packs are not to
+ * have a corresponding .keep file. These packs are not to
  * be kept if we are going to pack everything into one file.
  */
 static void get_non_kept_pack_filenames(struct string_list *fname_list,
@@ -111,8 +112,7 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list,
 
 		fname = xmemdupz(e->d_name, len);
 
-		if (!file_exists(mkpath("%s/%s.keep", packdir, fname)) &&
-		    !file_exists(mkpath("%s/%s.promisor", packdir, fname)))
+		if (!file_exists(mkpath("%s/%s.keep", packdir, fname)))
 			string_list_append_nodup(fname_list, fname);
 		else
 			free(fname);
@@ -122,7 +122,7 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list,
 
 static void remove_redundant_pack(const char *dir_name, const char *base_name)
 {
-	const char *exts[] = {".pack", ".idx", ".keep", ".bitmap"};
+	const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"};
 	int i;
 	struct strbuf buf = STRBUF_INIT;
 	size_t plen;
@@ -179,6 +179,76 @@ static void prepare_pack_objects(struct child_process *cmd,
 	cmd->out = -1;
 }
 
+/*
+ * Write oid to the given struct child_process's stdin, starting it first if
+ * necessary.
+ */
+static int write_oid(const struct object_id *oid, struct packed_git *pack,
+		     uint32_t pos, void *data)
+{
+	struct child_process *cmd = data;
+
+	if (cmd->in == -1) {
+		if (start_command(cmd))
+			die("Could not start pack-objects to repack promisor objects");
+	}
+
+	xwrite(cmd->in, oid_to_hex(oid), GIT_SHA1_HEXSZ);
+	xwrite(cmd->in, "\n", 1);
+	return 0;
+}
+
+static void repack_promisor_objects(const struct pack_objects_args *args,
+				    struct string_list *names)
+{
+	struct child_process cmd = CHILD_PROCESS_INIT;
+	FILE *out;
+	struct strbuf line = STRBUF_INIT;
+
+	prepare_pack_objects(&cmd, args);
+	cmd.in = -1;
+
+	/*
+	 * NEEDSWORK: Giving pack-objects only the OIDs without any ordering
+	 * hints may result in suboptimal deltas in the resulting pack. See if
+	 * the OIDs can be sent with fake paths such that pack-objects can use a
+	 * {type -> existing pack order} ordering when computing deltas instead
+	 * of a {type -> size} ordering, which may produce better deltas.
+	 */
+	for_each_packed_object(write_oid, &cmd,
+			       FOR_EACH_OBJECT_PROMISOR_ONLY);
+
+	if (cmd.in == -1)
+		/* No packed objects; cmd was never started */
+		return;
+
+	close(cmd.in);
+
+	out = xfdopen(cmd.out, "r");
+	while (strbuf_getline_lf(&line, out) != EOF) {
+		char *promisor_name;
+		int fd;
+		if (line.len != 40)
+			die("repack: Expecting 40 character sha1 lines only from pack-objects.");
+		string_list_append(names, line.buf);
+
+		/*
+		 * pack-objects creates the .pack and .idx files, but not the
+		 * .promisor file. Create the .promisor file, which is empty.
+		 */
+		promisor_name = mkpathdup("%s-%s.promisor", packtmp,
+					  line.buf);
+		fd = open(promisor_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
+		if (fd < 0)
+			die_errno("unable to create '%s'", promisor_name);
+		close(fd);
+		free(promisor_name);
+	}
+	fclose(out);
+	if (finish_command(&cmd))
+		die("Could not finish pack-objects to repack promisor objects");
+}
+
 #define ALL_INTO_ONE 1
 #define LOOSEN_UNREACHABLE 2
 
@@ -191,6 +261,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 		{".pack"},
 		{".idx"},
 		{".bitmap", 1},
+		{".promisor", 1},
 	};
 	struct child_process cmd = CHILD_PROCESS_INIT;
 	struct string_list_item *item;
@@ -293,6 +364,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 	if (pack_everything & ALL_INTO_ONE) {
 		get_non_kept_pack_filenames(&existing_packs, &keep_pack_list);
 
+		repack_promisor_objects(&po_args, &names);
+
 		if (existing_packs.nr && delete_redundant) {
 			if (unpack_unreachable) {
 				argv_array_pushf(&cmd.args,
@@ -440,6 +513,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 
 	/* End of pack replacement. */
 
+	reprepare_packed_git(the_repository);
+
 	if (delete_redundant) {
 		int opts = 0;
 		string_list_sort(&names);
diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index 4984ca583..128130066 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -271,28 +271,91 @@ test_expect_success 'rev-list accepts missing and promised objects on command li
 	git -C repo rev-list --exclude-promisor-objects --objects "$COMMIT" "$TREE" "$BLOB"
 '
 
-test_expect_success 'gc does not repack promisor objects' '
+test_expect_success 'gc repacks promisor objects separately from non-promisor objects' '
 	rm -rf repo &&
 	test_create_repo repo &&
-	test_commit -C repo my_commit &&
+	test_commit -C repo one &&
+	test_commit -C repo two &&
 
-	TREE_HASH=$(git -C repo rev-parse HEAD^{tree}) &&
-	HASH=$(printf "$TREE_HASH\n" | pack_as_from_promisor) &&
+	TREE_ONE=$(git -C repo rev-parse one^{tree}) &&
+	printf "$TREE_ONE\n" | pack_as_from_promisor &&
+	TREE_TWO=$(git -C repo rev-parse two^{tree}) &&
+	printf "$TREE_TWO\n" | pack_as_from_promisor &&
 
 	git -C repo config core.repositoryformatversion 1 &&
 	git -C repo config extensions.partialclone "arbitrary string" &&
 	git -C repo gc &&
 
-	# Ensure that the promisor packfile still exists, and remove it
-	test -e repo/.git/objects/pack/pack-$HASH.pack &&
-	rm repo/.git/objects/pack/pack-$HASH.* &&
-
-	# Ensure that the single other pack contains the commit, but not the tree
+	# Ensure that exactly one promisor packfile exists, and that it
+	# contains the trees but not the commits
+	ls repo/.git/objects/pack/pack-*.promisor >promisorlist &&
+	test_line_count = 1 promisorlist &&
+	PROMISOR_PACKFILE=$(sed "s/.promisor/.pack/" <promisorlist) &&
+	git verify-pack $PROMISOR_PACKFILE -v >out &&
+	grep "$TREE_ONE" out &&
+	grep "$TREE_TWO" out &&
+	! grep "$(git -C repo rev-parse one)" out &&
+	! grep "$(git -C repo rev-parse two)" out &&
+
+	# Remove the promisor packfile and associated files
+	rm $(sed "s/.promisor//" <promisorlist).* &&
+
+	# Ensure that the single other pack contains the commits, but not the
+	# trees
 	ls repo/.git/objects/pack/pack-*.pack >packlist &&
 	test_line_count = 1 packlist &&
 	git verify-pack repo/.git/objects/pack/pack-*.pack -v >out &&
-	grep "$(git -C repo rev-parse HEAD)" out &&
-	! grep "$TREE_HASH" out
+	grep "$(git -C repo rev-parse one)" out &&
+	grep "$(git -C repo rev-parse two)" out &&
+	! grep "$TREE_ONE" out &&
+	! grep "$TREE_TWO" out
+'
+
+test_expect_success 'gc does not repack promisor objects if there are none' '
+	rm -rf repo &&
+	test_create_repo repo &&
+	test_commit -C repo one &&
+
+	git -C repo config core.repositoryformatversion 1 &&
+	git -C repo config extensions.partialclone "arbitrary string" &&
+	git -C repo gc &&
+
+	# Ensure that only one pack exists
+	ls repo/.git/objects/pack/pack-*.pack >packlist &&
+	test_line_count = 1 packlist
+'
+
+repack_and_check () {
+	rm -rf repo2 &&
+	cp -r repo repo2 &&
+	git -C repo2 repack $1 -d &&
+	git -C repo2 fsck &&
+
+	git -C repo2 cat-file -e $2 &&
+	git -C repo2 cat-file -e $3
+}
+
+test_expect_success 'repack -d does not irreversibly delete promisor objects' '
+	rm -rf repo &&
+	test_create_repo repo &&
+	git -C repo config core.repositoryformatversion 1 &&
+	git -C repo config extensions.partialclone "arbitrary string" &&
+
+	git -C repo commit --allow-empty -m one &&
+	git -C repo commit --allow-empty -m two &&
+	git -C repo commit --allow-empty -m three &&
+	git -C repo commit --allow-empty -m four &&
+	ONE=$(git -C repo rev-parse HEAD^^^) &&
+	TWO=$(git -C repo rev-parse HEAD^^) &&
+	THREE=$(git -C repo rev-parse HEAD^) &&
+
+	printf "$TWO\n" | pack_as_from_promisor &&
+	printf "$THREE\n" | pack_as_from_promisor &&
+	delete_object repo "$ONE" &&
+
+	repack_and_check -a "$TWO" "$THREE" &&
+	repack_and_check -A "$TWO" "$THREE" &&
+	repack_and_check -l "$TWO" "$THREE"
 '
 
 test_expect_success 'gc stops traversal when a missing but promised object is reached' '
-- 
2.18.0.597.ga71716f1ad-goog


  parent reply	other threads:[~2018-08-08 22:34 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-07 20:12 [PATCH 0/2] Repacking of promisor packfiles Jonathan Tan
2018-08-07 20:12 ` [PATCH 1/2] repack: refactor setup of pack-objects cmd Jonathan Tan
2018-08-07 20:12 ` [PATCH 2/2] repack: repack promisor objects if -a or -A is set Jonathan Tan
2018-08-07 20:57   ` Junio C Hamano
2018-08-07 23:23     ` Jonathan Tan
2018-08-08  0:25       ` Junio C Hamano
2018-08-08 18:45         ` Jonathan Tan
2018-08-08 15:50       ` Jeff King
2018-08-08 16:17         ` Junio C Hamano
2018-08-07 22:37   ` Junio C Hamano
2018-08-07 23:25     ` Jonathan Tan
2018-08-08 16:34       ` Junio C Hamano
2018-08-08 22:34 ` [PATCH v2 0/2] Repacking of promisor packfiles Jonathan Tan
2018-08-08 22:34   ` [PATCH v2 1/2] repack: refactor setup of pack-objects cmd Jonathan Tan
2018-08-08 22:34   ` Jonathan Tan [this message]
2018-08-09 17:05     ` [PATCH v2 2/2] repack: repack promisor objects if -a or -A is set Junio C Hamano
2018-08-09 18:12       ` Jonathan Tan
2018-08-18 16:05     ` Duy Nguyen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3dab08e467c23f2c0785e34c3a8703d809b6479a.1533767314.git.jonathantanmy@google.com \
    --to=jonathantanmy@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).