All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Oliver <roliver@roku.com>
To: git@vger.kernel.org
Cc: jonathantanmy@google.com
Subject: [PATCH] mktree: learn about promised objects
Date: Tue, 14 Jun 2022 14:36:28 +0100	[thread overview]
Message-ID: <77035a0f-c42e-5cb3-f422-03fe81093adb@roku.com> (raw)

Do not use oid_object_info() to determine object type in mktree_line()
as this can cause promised objects to be dynamically faulted-in one at a
time which has poor performance. Instead, use a combination of
oid_object_info_extended() (with OBJECT_INFO_SKIP_FETCH_OBJECT option),
and the newly introduced promisor_object_type() to determine object type
before defaulting to fetch from remote.

Signed-off-by: Richard Oliver <roliver@roku.com>
---
 builtin/mktree.c         | 19 ++++++++++++++--
 packfile.c               | 49 ++++++++++++++++++++++++++++------------
 packfile.h               |  6 +++++
 t/t0410-partial-clone.sh | 28 +++++++++++++++++++++++
 4 files changed, 86 insertions(+), 16 deletions(-)

diff --git a/builtin/mktree.c b/builtin/mktree.c
index 902edba6d2..a783767b28 100644
--- a/builtin/mktree.c
+++ b/builtin/mktree.c
@@ -8,6 +8,7 @@
 #include "tree.h"
 #include "parse-options.h"
 #include "object-store.h"
+#include "packfile.h"
 
 static struct treeent {
 	unsigned mode;
@@ -116,8 +117,22 @@ static void mktree_line(char *buf, int nul_term_line, int allow_missing)
 			path, ptr, type_name(mode_type));
 	}
 
-	/* Check the type of object identified by sha1 */
-	obj_type = oid_object_info(the_repository, &oid, NULL);
+	/* Check the type of object identified by oid without fetching objects */
+	struct object_info oi = OBJECT_INFO_INIT;
+	oi.typep = &obj_type;
+	if (oid_object_info_extended(the_repository, &oid, &oi,
+				     OBJECT_INFO_LOOKUP_REPLACE |
+				     OBJECT_INFO_SKIP_FETCH_OBJECT) < 0)
+		obj_type = -1;
+
+	/* Has the object been promised to us if we can't find it */
+	if (obj_type < 0)
+		obj_type = promisor_object_type(&oid);
+
+	/* Try to fetch the object from the remote */
+	if (obj_type < 0)
+		obj_type = oid_object_info(the_repository, &oid, NULL);
+
 	if (obj_type < 0) {
 		if (allow_missing) {
 			; /* no problem - missing objects are presumed to be of the right type */
diff --git a/packfile.c b/packfile.c
index 8e812a84a3..d27cb106c1 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2223,17 +2223,26 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
 	return r ? r : pack_errors;
 }
 
+static void map_put_type(kh_oid_pos_t *map,
+	const struct object_id *oid,
+	enum object_type type)
+{
+	int hash_ret;
+	khiter_t pos = kh_put_oid_pos(map, *oid, &hash_ret);
+	kh_value(map, pos) = type;
+}
+
 static int add_promisor_object(const struct object_id *oid,
 			       struct packed_git *pack,
 			       uint32_t pos,
-			       void *set_)
+			       void *map_)
 {
-	struct oidset *set = set_;
+	kh_oid_pos_t *map = map_;
 	struct object *obj = parse_object(the_repository, oid);
 	if (!obj)
 		return 1;
 
-	oidset_insert(set, oid);
+	map_put_type(map, oid, obj->type);
 
 	/*
 	 * If this is a tree, commit, or tag, the objects it refers
@@ -2250,34 +2259,46 @@ static int add_promisor_object(const struct object_id *oid,
 			 */
 			return 0;
 		while (tree_entry_gently(&desc, &entry))
-			oidset_insert(set, &entry.oid);
+			map_put_type(map,
+				     &entry.oid,
+				     object_type(entry.mode));
 		free_tree_buffer(tree);
 	} else if (obj->type == OBJ_COMMIT) {
 		struct commit *commit = (struct commit *) obj;
 		struct commit_list *parents = commit->parents;
 
-		oidset_insert(set, get_commit_tree_oid(commit));
+		map_put_type(map, get_commit_tree_oid(commit), OBJ_TREE);
 		for (; parents; parents = parents->next)
-			oidset_insert(set, &parents->item->object.oid);
+			map_put_type(map,
+				     &parents->item->object.oid,
+				     OBJ_COMMIT);
 	} else if (obj->type == OBJ_TAG) {
 		struct tag *tag = (struct tag *) obj;
-		oidset_insert(set, get_tagged_oid(tag));
+		map_put_type(map, get_tagged_oid(tag), OBJ_COMMIT);
 	}
 	return 0;
 }
 
-int is_promisor_object(const struct object_id *oid)
+enum object_type promisor_object_type(const struct object_id *oid)
 {
-	static struct oidset promisor_objects;
-	static int promisor_objects_prepared;
+	static kh_oid_pos_t *promisor_objects;
 
-	if (!promisor_objects_prepared) {
+	if (!promisor_objects) {
+		promisor_objects = kh_init_oid_pos();
 		if (has_promisor_remote()) {
 			for_each_packed_object(add_promisor_object,
-					       &promisor_objects,
+					       promisor_objects,
 					       FOR_EACH_OBJECT_PROMISOR_ONLY);
 		}
-		promisor_objects_prepared = 1;
 	}
-	return oidset_contains(&promisor_objects, oid);
+	khiter_t pos = kh_get_oid_pos(promisor_objects, *oid);
+	if (pos < kh_end(promisor_objects))
+		return kh_value(promisor_objects, pos);
+	else
+		return OBJ_BAD;
+}
+
+int is_promisor_object(const struct object_id *oid)
+{
+	return (promisor_object_type(oid) > OBJ_BAD);
 }
diff --git a/packfile.h b/packfile.h
index a3f6723857..35d0b600d2 100644
--- a/packfile.h
+++ b/packfile.h
@@ -182,6 +182,12 @@ int has_pack_index(const unsigned char *sha1);
  */
 int is_promisor_object(const struct object_id *oid);
 
+/*
+ * Return the object type if the given object is in, or referred to
+ * by, a promisor packfile; OBJ_BAD otherwise.
+ */
+enum object_type promisor_object_type(const struct object_id *oid);
+
 /*
  * Expose a function for fuzz testing.
  *
diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index 1e864cf317..12bfb02187 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -660,6 +660,34 @@ test_expect_success 'lazy-fetch when accessing object not in the_repository' '
 	! grep "[?]$FILE_HASH" out
 '
 
+test_expect_success 'missing blob object promised by tree, passes mktree' '
+	rm -rf repo &&
+	test_create_repo repo &&
+	echo foo >repo/foo &&
+	A=$(git -C repo hash-object foo) &&
+	git -C repo update-index --add --info-only foo &&
+	B=$(git -C repo write-tree --missing-ok) &&
+	printf "$B\n" | pack_as_from_promisor &&
+	delete_object repo "$B" &&
+
+	git -C repo config core.repositoryformatversion 1 &&
+	git -C repo config extensions.partialclone "arbitrary string" &&
+
+	printf "100644 blob $A\tbar" | git -C repo mktree
+'
+
+test_expect_success 'missing blob object, not promised, faulted-in by mktree' '
+	test_create_repo server &&
+	A=$(echo foo | git -C server hash-object --stdin -w) &&
+	test_commit -C server 1 &&
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+
+	rm -rf repo &&
+	git clone --filter=blob:none "file://$(pwd)/server" repo &&
+	printf "100644 blob $A\tbar" | git -C repo mktree
+'
+
 . "$TEST_DIRECTORY"/lib-httpd.sh
 start_httpd
 

base-commit: 1e59178e3f65880188caedb965e70db5ceeb2d64
-- 
2.36.1.384.gcc60f6fd7d


             reply	other threads:[~2022-06-14 13:36 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-14 13:36 Richard Oliver [this message]
2022-06-14 14:14 ` [PATCH] mktree: learn about promised objects Derrick Stolee
2022-06-14 16:33   ` Richard Oliver
2022-06-14 17:27     ` Derrick Stolee
2022-06-15  0:35       ` Taylor Blau
2022-06-15  4:00         ` Jeff King
2022-06-15 17:40           ` Richard Oliver
2022-06-15 18:17             ` Derrick Stolee
2022-06-16  6:07               ` Jeff King
2022-06-16  6:54                 ` [PATCH] is_promisor_object(): walk promisor packs in pack-order Jeff King
2022-06-16 14:00                   ` Derrick Stolee
2022-06-17 19:50                   ` Jonathan Tan
2022-06-16 13:59                 ` [PATCH] mktree: learn about promised objects Derrick Stolee
2022-06-15 21:01             ` Junio C Hamano
2022-06-16  5:02               ` Jeff King
2022-06-16 15:46               ` [PATCH] mktree: Make '--missing' behave as documented Richard Oliver
2022-06-16 17:44                 ` Junio C Hamano
2022-06-21 13:59                   ` [PATCH] mktree: do not check type of remote objects Richard Oliver
2022-06-21 16:51                     ` Junio C Hamano
2022-06-21 17:48                     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=77035a0f-c42e-5cb3-f422-03fe81093adb@roku.com \
    --to=roliver@roku.com \
    --cc=git@vger.kernel.org \
    --cc=jonathantanmy@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.