All of lore.kernel.org
 help / color / mirror / Atom feed
From: Josh Steadmon <steadmon@google.com>
To: git@vger.kernel.org, peff@peff.net, jonathantanmy@google.com,
	jrnieder@gmail.com
Subject: [PATCH v3] rev-list: exclude promisor objects at walk time
Date: Mon,  8 Apr 2019 14:06:04 -0700	[thread overview]
Message-ID: <9856e7fc74f51b60ae162cbed3f5c0cf8c603222.1554757275.git.steadmon@google.com> (raw)
In-Reply-To: <6de682d5e48186970644569586fc6613763d5caa.1554312374.git.steadmon@google.com>

For large repositories, enumerating the list of all promisor objects (in
order to exclude them from a rev-list walk) can take a significant
amount of time).

When --exclude-promisor-objects is passed to rev-list, don't enumerate
the promisor objects. Instead, filter them (and any children objects)
during the actual graph walk.

Remove the mark_uninteresting() function as it's not used anywhere else.

When testing against a large repo [1], this patch reduces the
connectivity check runtime from 3 minutes to ~7 seconds.

[1]: https://android.googlesource.com/platform/frameworks/base/

Helped-By: Jonathan Tan <jonathantanmy@google.com>
Helped-By: Jeff King <peff@peff.net>
Helped-By: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Josh Steadmon <steadmon@google.com>

Signed-off-by: Josh Steadmon <steadmon@google.com>
---
Changes since V2:
* Pulled the "OK to skip?" logic into a separate function.

Changes since V1:
* Switched to alternate approach, we now do the regular rev-list walk
  but skip promisor objects at walk time, rather than pre-excluding
  them.

Range-diff against v2:
1:  9f327d6d8d ! 1:  9856e7fc74 rev-list: exclude promisor objects at walk time
    @@ -10,9 +10,15 @@
         the promisor objects. Instead, filter them (and any children objects)
         during the actual graph walk.
     
    +    When testing against a large repo [1], this reduces the connectivity
    +    check runtime from 3 minutes to ~7 seconds.
    +
    +    [1]: https://android.googlesource.com/platform/frameworks/base/
    +
         Helped-By: Jonathan Tan <jonathantanmy@google.com>
         Helped-By: Jeff King <peff@peff.net>
         Helped-By: Jonathan Nieder <jrnieder@gmail.com>
    +    Signed-off-by: Josh Steadmon <steadmon@google.com>
     
     
    @@ -20,78 +26,55 @@
      --- a/list-objects.c
      +++ b/list-objects.c
     @@
    - 	struct object *obj = &blob->object;
    - 	size_t pathlen;
    - 	enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
    -+	struct object_info oi = OBJECT_INFO_INIT;
    + 	void *filter_data;
    + };
      
    - 	if (!ctx->revs->blob_objects)
    - 		return;
    ++static int should_skip_promisor_object(const struct rev_info *revs,
    ++				       const struct object_id *oid)
    ++{
    ++	struct object_info oi = OBJECT_INFO_INIT;
    ++	return (revs->exclude_promisor_objects &&
    ++		!oid_object_info_extended(the_repository, oid, &oi, 0) &&
    ++		oi.whence == OI_PACKED &&
    ++		oi.u.packed.pack->pack_promisor);
    ++}
    ++
    + static void process_blob(struct traversal_context *ctx,
    + 			 struct blob *blob,
    + 			 struct strbuf *path,
     @@
      		die("bad blob object");
      	if (obj->flags & (UNINTERESTING | SEEN))
      		return;
    -+	if (ctx->revs->exclude_promisor_objects &&
    -+	    !oid_object_info_extended(the_repository, &obj->oid, &oi, 0) &&
    -+	    oi.whence == OI_PACKED &&
    -+	    oi.u.packed.pack->pack_promisor)
    ++	if (should_skip_promisor_object(ctx->revs, &obj->oid))
     +		return;
      
      	/*
      	 * Pre-filter known-missing objects when explicitly requested.
    -@@
    - 	int baselen = base->len;
    - 	enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
    - 	int failed_parse;
    -+	struct object_info oi = OBJECT_INFO_INIT;
    - 
    - 	if (!revs->tree_objects)
    - 		return;
     @@
      		die("bad tree object");
      	if (obj->flags & (UNINTERESTING | SEEN))
      		return;
    -+	if (ctx->revs->exclude_promisor_objects &&
    -+	    !oid_object_info_extended(the_repository, &obj->oid, &oi, 0) &&
    -+	    oi.whence == OI_PACKED &&
    -+	    oi.u.packed.pack->pack_promisor)
    ++	if (should_skip_promisor_object(ctx->revs, &obj->oid))
     +		return;
      
      	failed_parse = parse_tree_gently(tree, 1);
      	if (failed_parse) {
    -@@
    - 				     struct strbuf *base)
    - {
    - 	int i;
    -+	struct object_info oi = OBJECT_INFO_INIT;
    - 
    - 	assert(base->len == 0);
    - 
     @@
      		struct object *obj = pending->item;
      		const char *name = pending->name;
      		const char *path = pending->path;
    -+		if (ctx->revs->exclude_promisor_objects &&
    -+		    !oid_object_info_extended(the_repository, &obj->oid, &oi, 0) &&
    -+		    oi.whence == OI_PACKED &&
    -+		    oi.u.packed.pack->pack_promisor)
    ++		if (should_skip_promisor_object(ctx->revs, &obj->oid))
     +			continue;
     +
      		if (obj->flags & (UNINTERESTING | SEEN))
      			continue;
      		if (obj->type == OBJ_TAG) {
     @@
    - {
    - 	struct commit *commit;
    - 	struct strbuf csp; /* callee's scratch pad */
    -+	struct object_info oi = OBJECT_INFO_INIT;
      	strbuf_init(&csp, PATH_MAX);
      
      	while ((commit = get_revision(ctx->revs)) != NULL) {
    -+		if (ctx->revs->exclude_promisor_objects &&
    -+		    !oid_object_info_extended(the_repository, &commit->object.oid, &oi, 0) &&
    -+		    oi.whence == OI_PACKED &&
    -+		    oi.u.packed.pack->pack_promisor)
    ++		if (should_skip_promisor_object(ctx->revs, &commit->object.oid))
     +			continue;
     +
      		/*

 list-objects.c | 20 ++++++++++++++++++++
 revision.c     | 16 ----------------
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/list-objects.c b/list-objects.c
index dc77361e11..c153ee5dfb 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -22,6 +22,16 @@ struct traversal_context {
 	void *filter_data;
 };
 
+static int should_skip_promisor_object(const struct rev_info *revs,
+				       const struct object_id *oid)
+{
+	struct object_info oi = OBJECT_INFO_INIT;
+	return (revs->exclude_promisor_objects &&
+		!oid_object_info_extended(the_repository, oid, &oi, 0) &&
+		oi.whence == OI_PACKED &&
+		oi.u.packed.pack->pack_promisor);
+}
+
 static void process_blob(struct traversal_context *ctx,
 			 struct blob *blob,
 			 struct strbuf *path,
@@ -37,6 +47,8 @@ static void process_blob(struct traversal_context *ctx,
 		die("bad blob object");
 	if (obj->flags & (UNINTERESTING | SEEN))
 		return;
+	if (should_skip_promisor_object(ctx->revs, &obj->oid))
+		return;
 
 	/*
 	 * Pre-filter known-missing objects when explicitly requested.
@@ -156,6 +168,8 @@ static void process_tree(struct traversal_context *ctx,
 		die("bad tree object");
 	if (obj->flags & (UNINTERESTING | SEEN))
 		return;
+	if (should_skip_promisor_object(ctx->revs, &obj->oid))
+		return;
 
 	failed_parse = parse_tree_gently(tree, 1);
 	if (failed_parse) {
@@ -326,6 +340,9 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 		struct object *obj = pending->item;
 		const char *name = pending->name;
 		const char *path = pending->path;
+		if (should_skip_promisor_object(ctx->revs, &obj->oid))
+			continue;
+
 		if (obj->flags & (UNINTERESTING | SEEN))
 			continue;
 		if (obj->type == OBJ_TAG) {
@@ -356,6 +373,9 @@ static void do_traverse(struct traversal_context *ctx)
 	strbuf_init(&csp, PATH_MAX);
 
 	while ((commit = get_revision(ctx->revs)) != NULL) {
+		if (should_skip_promisor_object(ctx->revs, &commit->object.oid))
+			continue;
+
 		/*
 		 * an uninteresting boundary commit may not have its tree
 		 * parsed yet, but we are not going to show them anyway
diff --git a/revision.c b/revision.c
index eb8e51bc63..85974e941d 100644
--- a/revision.c
+++ b/revision.c
@@ -3067,17 +3067,6 @@ void reset_revision_walk(void)
 	clear_object_flags(SEEN | ADDED | SHOWN);
 }
 
-static int mark_uninteresting(const struct object_id *oid,
-			      struct packed_git *pack,
-			      uint32_t pos,
-			      void *cb)
-{
-	struct rev_info *revs = cb;
-	struct object *o = parse_object(revs->repo, oid);
-	o->flags |= UNINTERESTING | SEEN;
-	return 0;
-}
-
 define_commit_slab(indegree_slab, int);
 define_commit_slab(author_date_slab, timestamp_t);
 
@@ -3316,11 +3305,6 @@ int prepare_revision_walk(struct rev_info *revs)
 	    (revs->limited && limiting_can_increase_treesame(revs)))
 		revs->treesame.name = "treesame";
 
-	if (revs->exclude_promisor_objects) {
-		for_each_packed_object(mark_uninteresting, revs,
-				       FOR_EACH_OBJECT_PROMISOR_ONLY);
-	}
-
 	if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED)
 		commit_list_sort_by_date(&revs->commits);
 	if (revs->no_walk)
-- 
2.21.0.392.gf8f6787159e-goog


  parent reply	other threads:[~2019-04-08 21:06 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-03 17:27 [PATCH] clone: do faster object check for partial clones Josh Steadmon
2019-04-03 18:58 ` Jonathan Tan
2019-04-03 19:41 ` Jeff King
2019-04-03 20:57   ` Jonathan Tan
2019-04-04  0:21     ` Josh Steadmon
2019-04-04  1:33     ` Jeff King
2019-04-04 22:53 ` [PATCH v2] rev-list: exclude promisor objects at walk time Josh Steadmon
2019-04-04 23:08   ` Jeff King
2019-04-04 23:47     ` Josh Steadmon
2019-04-05  0:00       ` Jeff King
2019-04-05  0:09         ` Josh Steadmon
2019-04-08 20:59           ` Josh Steadmon
2019-04-08 21:06 ` Josh Steadmon [this message]
2019-04-08 22:23   ` [PATCH v3] " Christian Couder
2019-04-08 23:12     ` Josh Steadmon
2019-04-09 15:14   ` Junio C Hamano
2019-04-09 15:15     ` Jeff King
2019-04-09 15:43       ` Junio C Hamano
2019-04-09 16:35         ` Josh Steadmon
2019-04-09 18:04   ` SZEDER Gábor
2019-04-09 23:42     ` Josh Steadmon
2019-04-11  4:06       ` Jeff King
2019-04-12 22:38         ` Josh Steadmon
2019-04-13  5:34           ` Jeff King
2019-04-19 20:26             ` Josh Steadmon
2019-04-19 21:00 ` [PATCH v4] clone: do faster object check for partial clones Josh Steadmon
2019-04-22 21:31   ` Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9856e7fc74f51b60ae162cbed3f5c0cf8c603222.1554757275.git.steadmon@google.com \
    --to=steadmon@google.com \
    --cc=git@vger.kernel.org \
    --cc=jonathantanmy@google.com \
    --cc=jrnieder@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.