git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Packfile-uris support excluding commit objects
@ 2021-05-07  2:11 Teng Long
  2021-05-10 11:14 ` Ævar Arnfjörð Bjarmason
  2021-05-18  8:49 ` [PATCH v2 0/3] packfile-uris: commit objects exclusion Teng Long
  0 siblings, 2 replies; 72+ messages in thread
From: Teng Long @ 2021-05-07  2:11 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, Teng Long

On the server, more sophisticated means of excluding objects should be
supported, such as commit object. This commit introduces a new
configuration `uploadpack.commitpackfileuri` for this.

This patch only pack the commit object, not including the that commit
and all objects that it references. This work will be done in a further
patch recently.

Similarly, there are related documents that will be included in
subsequent patches.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c |  8 ++---
 fetch-pack.c           |  8 +++++
 t/t5702-protocol-v2.sh | 71 +++++++++++++++++++++++++++++++++---------
 upload-pack.c          |  7 +++--
 4 files changed, 73 insertions(+), 21 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6d13cd3e1a..2f1817fe28 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -2985,7 +2985,7 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 			pack_idx_opts.flags &= ~WRITE_REV;
 		return 0;
 	}
-	if (!strcmp(k, "uploadpack.blobpackfileuri")) {
+    if (!strcmp(k, "uploadpack.blobpackfileuri") || !strcmp(k, "uploadpack.commitpackfileuri")) {
 		struct configured_exclusion *ex = xmalloc(sizeof(*ex));
 		const char *oid_end, *pack_end;
 		/*
@@ -2998,11 +2998,11 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 		    *oid_end != ' ' ||
 		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
 		    *pack_end != ' ')
-			die(_("value of uploadpack.blobpackfileuri must be "
-			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
+            die(_("value of uploadpack.blobpackfileuri or upload.commitpackfileuri must be "
+                  "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
 		if (oidmap_get(&configured_exclusions, &ex->e.oid))
 			die(_("object already configured in another "
-			      "uploadpack.blobpackfileuri (got '%s')"), v);
+			      "uploadpack.blobpackfileuri or uploadpack.commitpackfileuri (got '%s')"), v);
 		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
 		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
 		ex->uri = xstrdup(pack_end + 1);
diff --git a/fetch-pack.c b/fetch-pack.c
index 2318ebe680..24a947835b 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -23,6 +23,7 @@
 #include "fetch-negotiator.h"
 #include "fsck.h"
 #include "shallow.h"
+#include "strmap.h"
 
 static int transfer_unpack_limit = -1;
 static int fetch_unpack_limit = -1;
@@ -1677,6 +1678,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		}
 	}
 
+	struct strset uris;
+	strset_init(&uris);
 	for (i = 0; i < packfile_uris.nr; i++) {
 		int j;
 		struct child_process cmd = CHILD_PROCESS_INIT;
@@ -1684,6 +1687,11 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		const char *uri = packfile_uris.items[i].string +
 			the_hash_algo->hexsz + 1;
 
+        if (strset_contains(&uris, uri)) {
+            continue;
+        }
+
+        strset_add(&uris, uri);
 		strvec_push(&cmd.args, "http-fetch");
 		strvec_pushf(&cmd.args, "--packfile=%.*s",
 			     (int) the_hash_algo->hexsz,
diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 2e1243ca40..d444177fb5 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -824,12 +824,22 @@ test_expect_success 'when server does not send "ready", expect FLUSH' '
 '
 
 configure_exclusion () {
-	git -C "$1" hash-object "$2" >objh &&
-	git -C "$1" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
-	git -C "$1" config --add \
-		"uploadpack.blobpackfileuri" \
-		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
-	cat objh
+    if test "$1" = "blob"
+        then
+            git -C "$2" hash-object "$3" >objh &&
+            git -C "$2" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
+            git -C "$2" config --add \
+            		"uploadpack.blobpackfileuri" \
+            		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+            cat objh
+        else
+            echo "$3" > objh &&
+            git -C "$2" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
+            git -C "$2" config --add \
+            		"uploadpack.commitpackfileuri" \
+            		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+            cat objh
+    fi
 }
 
 test_expect_success 'part of packfile response provided as URI' '
@@ -845,8 +855,8 @@ test_expect_success 'part of packfile response provided as URI' '
 	git -C "$P" add other-blob &&
 	git -C "$P" commit -m x &&
 
-	configure_exclusion "$P" my-blob >h &&
-	configure_exclusion "$P" other-blob >h2 &&
+	configure_exclusion blob "$P" my-blob >h &&
+	configure_exclusion blob "$P" other-blob >h2 &&
 
 	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
 	git -c protocol.version=2 \
@@ -881,7 +891,7 @@ test_expect_success 'part of packfile response provided as URI' '
 	test_line_count = 6 filelist
 '
 
-test_expect_success 'packfile URIs with fetch instead of clone' '
+test_expect_success 'blobs packfile URIs with fetch instead of clone' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	rm -rf "$P" http_child log &&
 
@@ -892,7 +902,7 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
 	git -C "$P" add my-blob &&
 	git -C "$P" commit -m x &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob >h &&
 
 	git init http_child &&
 
@@ -902,6 +912,37 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
 		fetch "$HTTPD_URL/smart/http_parent"
 '
 
+test_expect_success 'commits packfile URIs with fetch instead of clone' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	rm -rf "$P" http_child log &&
+
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+
+	echo my-blob >"$P/my-blob" &&
+	git -C "$P" add my-blob &&
+	git -C "$P" commit -m x &&
+
+
+	mycommit=$(git -C "$P" rev-parse HEAD) &&
+    echo other-blob >"$P/other-blob" &&
+    git -C "$P" add other-blob &&
+    git -C "$P" commit -m x &&
+	othercommit=$(git -C "$P" rev-parse HEAD) &&
+	configure_exclusion commit "$P" "$mycommit" >h &&
+	configure_exclusion commit "$P" "$othercommit" >h2 &&
+
+	git init http_child &&
+
+	GIT_TRACE=1 GIT_TEST_SIDEBAND_ALL=1 \
+	git -C http_child -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		fetch "$HTTPD_URL/smart/http_parent" &&
+	ls http_child/.git/objects/pack/*.pack \
+    	    http_child/.git/objects/pack/*.idx >filelist &&
+    	test_line_count = 6 filelist
+'
+
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	rm -rf "$P" http_child log &&
@@ -915,7 +956,7 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	git -C "$P" add other-blob &&
 	git -C "$P" commit -m x &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob >h &&
 	# Configure a URL for other-blob. Just reuse the hash of the object as
 	# the hash of the packfile, since the hash does not matter for this
 	# test as long as it is not the hash of the pack, and it is of the
@@ -944,7 +985,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' '
 	git -C "$P" add my-blob &&
 	git -C "$P" commit -m x &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -978,7 +1019,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
 	git -C "$P" add my-blob &&
 	git -C "$P" commit -m x &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -1000,7 +1041,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
 	git -C "$P" add .gitmodules &&
 	git -C "$P" commit -m x &&
 
-	configure_exclusion "$P" .gitmodules >h &&
+	configure_exclusion blob "$P" .gitmodules >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -1026,7 +1067,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodul
 	git -C "$P" add .gitmodules &&
 	git -C "$P" commit -m x &&
 
-	configure_exclusion "$P" .gitmodules >h &&
+	configure_exclusion blob "$P" .gitmodules >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
diff --git a/upload-pack.c b/upload-pack.c
index 5c1cd19612..34f8bb81a8 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1744,9 +1744,12 @@ int upload_pack_advertise(struct repository *r,
 		     allow_sideband_all_value))
 			strbuf_addstr(value, " sideband-all");
 
-		if (!repo_config_get_string(the_repository,
+		if ((!repo_config_get_string(the_repository,
 					    "uploadpack.blobpackfileuri",
-					    &str) &&
+					    &str) ||
+            !repo_config_get_string(the_repository,
+                        "uploadpack.commitpackfileuri",
+                        &str)) &&
 		    str) {
 			strbuf_addstr(value, " packfile-uris");
 			free(str);
-- 
2.31.1.442.g7e39198978.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH] Packfile-uris support excluding commit objects
  2021-05-07  2:11 [PATCH] Packfile-uris support excluding commit objects Teng Long
@ 2021-05-10 11:14 ` Ævar Arnfjörð Bjarmason
  2021-05-18  8:49 ` [PATCH v2 0/3] packfile-uris: commit objects exclusion Teng Long
  1 sibling, 0 replies; 72+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-05-10 11:14 UTC (permalink / raw)
  To: Teng Long; +Cc: git, jonathantanmy


On Fri, May 07 2021, Teng Long wrote:

It seems like this and your
http://lore.kernel.org/git/20210506073354.27833-1-dyroneteng@gmail.com
should be part of one series, not split up.

> On the server, more sophisticated means of excluding objects should be
> supported, such as commit object. This commit introduces a new
> configuration `uploadpack.commitpackfileuri` for this.

Per my understanding in
https://lore.kernel.org/git/87o8hk820f.fsf@evledraar.gmail.com/ this +
Jonathan's earlier bfc2a36ff2a (Doc: clarify contents of packfile sent
as URI, 2021-01-20) still makes this whole thing more confusing that it
needs to be.

I think we should just have a new uploadpack.excludeObject, and document
that uploadpack.blobpackfileuri is an (unfortunately named) synonym for
it. I.e. the actual implementation doesn't care about the objec type it
just excludes any object listed via an oidmap. No?

As for some comments on the implementation:

> This patch only pack the commit object, not including the that commit
> and all objects that it references. This work will be done in a further
> patch recently.

I realize you're probably not a native English speaker (neither am I),
but I honestly can't understand that "This work will be done in a
further patch recently.". Do you mean something like:

    This change does not add support for recursively excluding things
    referenced by container objects such as "commit", "tag", and
    "tree". We'll still just dumbly exclude that specific object (this
    was originally meant for specific "blobs"). Smartly excluding things
    recursively might be implemented by a future change.

> Similarly, there are related documents that will be included in
> subsequent patches.

Please send the earlier doc cleanup + the spec change for this + any doc
updates as one series.

Narrow comments on the patch:

> Signed-off-by: Teng Long <dyroneteng@gmail.com>
> ---
>  builtin/pack-objects.c |  8 ++---
>  fetch-pack.c           |  8 +++++
>  t/t5702-protocol-v2.sh | 71 +++++++++++++++++++++++++++++++++---------
>  upload-pack.c          |  7 +++--
>  4 files changed, 73 insertions(+), 21 deletions(-)
>
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 6d13cd3e1a..2f1817fe28 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -2985,7 +2985,7 @@ static int git_pack_config(const char *k, const char *v, void *cb)
>  			pack_idx_opts.flags &= ~WRITE_REV;
>  		return 0;
>  	}
> -	if (!strcmp(k, "uploadpack.blobpackfileuri")) {
> +    if (!strcmp(k, "uploadpack.blobpackfileuri") || !strcmp(k, "uploadpack.commitpackfileuri")) {


Nit: Split this across two lines.

>  		struct configured_exclusion *ex = xmalloc(sizeof(*ex));
>  		const char *oid_end, *pack_end;
>  		/*
> @@ -2998,11 +2998,11 @@ static int git_pack_config(const char *k, const char *v, void *cb)
>  		    *oid_end != ' ' ||
>  		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
>  		    *pack_end != ' ')
> -			die(_("value of uploadpack.blobpackfileuri must be "
> -			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
> +            die(_("value of uploadpack.blobpackfileuri or upload.commitpackfileuri must be "
> +                  "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);

Indending with spaces.

>  		if (oidmap_get(&configured_exclusions, &ex->e.oid))
>  			die(_("object already configured in another "
> -			      "uploadpack.blobpackfileuri (got '%s')"), v);
> +			      "uploadpack.blobpackfileuri or uploadpack.commitpackfileuri (got '%s')"), v);

I think by having a uploadpack.excludeObject documented as the primary
interface to this we could just say "object already listed by an earlier
exclusion" or something like that.

>  		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
>  		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
>  		ex->uri = xstrdup(pack_end + 1);
> diff --git a/fetch-pack.c b/fetch-pack.c
> index 2318ebe680..24a947835b 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -23,6 +23,7 @@
>  #include "fetch-negotiator.h"
>  #include "fsck.h"
>  #include "shallow.h"
> +#include "strmap.h"
>  
>  static int transfer_unpack_limit = -1;
>  static int fetch_unpack_limit = -1;
> @@ -1677,6 +1678,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  		}
>  	}
>  
> +	struct strset uris;
> +	strset_init(&uris);
>  	for (i = 0; i < packfile_uris.nr; i++) {
>  		int j;
>  		struct child_process cmd = CHILD_PROCESS_INIT;
> @@ -1684,6 +1687,11 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  		const char *uri = packfile_uris.items[i].string +
>  			the_hash_algo->hexsz + 1;
>  
> +        if (strset_contains(&uris, uri)) {
> +            continue;
> +        }


More indenting with spaces, also don't need the {} here.

> +
> +        strset_add(&uris, uri);
>  		strvec_push(&cmd.args, "http-fetch");
>  		strvec_pushf(&cmd.args, "--packfile=%.*s",
>  			     (int) the_hash_algo->hexsz,
> diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
> index 2e1243ca40..d444177fb5 100755
> --- a/t/t5702-protocol-v2.sh
> +++ b/t/t5702-protocol-v2.sh
> @@ -824,12 +824,22 @@ test_expect_success 'when server does not send "ready", expect FLUSH' '
>  '
>  
>  configure_exclusion () {
> -	git -C "$1" hash-object "$2" >objh &&
> -	git -C "$1" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
> -	git -C "$1" config --add \
> -		"uploadpack.blobpackfileuri" \
> -		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
> -	cat objh
> +    if test "$1" = "blob"
> +        then

Don't indent the "then", also spaces...

> +            git -C "$2" hash-object "$3" >objh &&
> +            git -C "$2" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
> +            git -C "$2" config --add \
> +            		"uploadpack.blobpackfileuri" \
> +            		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
> +            cat objh
> +        else
> +            echo "$3" > objh &&

Use ">objh" not "> objh".

> +            git -C "$2" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
> +            git -C "$2" config --add \
> +            		"uploadpack.commitpackfileuri" \
> +            		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
> +            cat objh


This whole if/else seems like it could be better split up by discovering
the variable first, using that as a variable, and then avoiding the
duplication. But if we just used uploadpack.excludeObject...

> +    fi
>  }
>  
>  test_expect_success 'part of packfile response provided as URI' '
> @@ -845,8 +855,8 @@ test_expect_success 'part of packfile response provided as URI' '
>  	git -C "$P" add other-blob &&
>  	git -C "$P" commit -m x &&
>  
> -	configure_exclusion "$P" my-blob >h &&
> -	configure_exclusion "$P" other-blob >h2 &&
> +	configure_exclusion blob "$P" my-blob >h &&
> +	configure_exclusion blob "$P" other-blob >h2 &&
>  
>  	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
>  	git -c protocol.version=2 \
> @@ -881,7 +891,7 @@ test_expect_success 'part of packfile response provided as URI' '
>  	test_line_count = 6 filelist
>  '
>  
> -test_expect_success 'packfile URIs with fetch instead of clone' '
> +test_expect_success 'blobs packfile URIs with fetch instead of clone' '
>  	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
>  	rm -rf "$P" http_child log &&
>  
> @@ -892,7 +902,7 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
>  	git -C "$P" add my-blob &&
>  	git -C "$P" commit -m x &&
>  
> -	configure_exclusion "$P" my-blob >h &&
> +	configure_exclusion blob "$P" my-blob >h &&
>  
>  	git init http_child &&
>  
> @@ -902,6 +912,37 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
>  		fetch "$HTTPD_URL/smart/http_parent"
>  '
>  
> +test_expect_success 'commits packfile URIs with fetch instead of clone' '
> +	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +	rm -rf "$P" http_child log &&

Put stuff like this in "test_when_finished"

> +
> +	git init "$P" &&
> +	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
> +
> +	echo my-blob >"$P/my-blob" &&
> +	git -C "$P" add my-blob &&
> +	git -C "$P" commit -m x &&

You can just use test_commit here, no?

> +
> +
> +	mycommit=$(git -C "$P" rev-parse HEAD) &&
> +    echo other-blob >"$P/other-blob" &&
> +    git -C "$P" add other-blob &&
> +    git -C "$P" commit -m x &&

ditto test_commit.

> +	othercommit=$(git -C "$P" rev-parse HEAD) &&
> +	configure_exclusion commit "$P" "$mycommit" >h &&
> +	configure_exclusion commit "$P" "$othercommit" >h2 &&

Personally I'd just skip this whole "rev-parse HEAD" etc. and just pass
the tag name(s) created by earlier test_commit, then have
configure_exclusion ust always do a rev-parse...

> +
> +	git init http_child &&
> +
> +	GIT_TRACE=1 GIT_TEST_SIDEBAND_ALL=1 \
> +	git -C http_child -c protocol.version=2 \
> +		-c fetch.uriprotocols=http,https \
> +		fetch "$HTTPD_URL/smart/http_parent" &&
> +	ls http_child/.git/objects/pack/*.pack \
> +    	    http_child/.git/objects/pack/*.idx >filelist &&
> +    	test_line_count = 6 filelist
> +'
> +
>  test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
>  	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
>  	rm -rf "$P" http_child log &&
> @@ -915,7 +956,7 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
>  	git -C "$P" add other-blob &&
>  	git -C "$P" commit -m x &&
>  
> -	configure_exclusion "$P" my-blob >h &&
> +	configure_exclusion blob "$P" my-blob >h &&
>  	# Configure a URL for other-blob. Just reuse the hash of the object as
>  	# the hash of the packfile, since the hash does not matter for this
>  	# test as long as it is not the hash of the pack, and it is of the
> @@ -944,7 +985,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' '
>  	git -C "$P" add my-blob &&
>  	git -C "$P" commit -m x &&
>  
> -	configure_exclusion "$P" my-blob >h &&
> +	configure_exclusion blob "$P" my-blob >h &&
>  
>  	sane_unset GIT_TEST_SIDEBAND_ALL &&
>  	git -c protocol.version=2 -c transfer.fsckobjects=1 \
> @@ -978,7 +1019,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
>  	git -C "$P" add my-blob &&
>  	git -C "$P" commit -m x &&
>  
> -	configure_exclusion "$P" my-blob >h &&
> +	configure_exclusion blob "$P" my-blob >h &&
>  
>  	sane_unset GIT_TEST_SIDEBAND_ALL &&
>  	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
> @@ -1000,7 +1041,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
>  	git -C "$P" add .gitmodules &&
>  	git -C "$P" commit -m x &&
>  
> -	configure_exclusion "$P" .gitmodules >h &&
> +	configure_exclusion blob "$P" .gitmodules >h &&
>  
>  	sane_unset GIT_TEST_SIDEBAND_ALL &&
>  	git -c protocol.version=2 -c transfer.fsckobjects=1 \
> @@ -1026,7 +1067,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodul
>  	git -C "$P" add .gitmodules &&
>  	git -C "$P" commit -m x &&
>  
> -	configure_exclusion "$P" .gitmodules >h &&
> +	configure_exclusion blob "$P" .gitmodules >h &&
>  
>  	sane_unset GIT_TEST_SIDEBAND_ALL &&
>  	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
> diff --git a/upload-pack.c b/upload-pack.c
> index 5c1cd19612..34f8bb81a8 100644
> --- a/upload-pack.c
> +++ b/upload-pack.c
> @@ -1744,9 +1744,12 @@ int upload_pack_advertise(struct repository *r,
>  		     allow_sideband_all_value))
>  			strbuf_addstr(value, " sideband-all");
>  
> -		if (!repo_config_get_string(the_repository,
> +		if ((!repo_config_get_string(the_repository,
>  					    "uploadpack.blobpackfileuri",
> -					    &str) &&
> +					    &str) ||
> +            !repo_config_get_string(the_repository,
> +                        "uploadpack.commitpackfileuri",
> +                        &str)) &&
>  		    str) {
>  			strbuf_addstr(value, " packfile-uris");
>  			free(str);

Not a new issue, but I wonder if we shouldn't just export
configset_find_element(). This is at least 2 stackframes down the chain
of the "does this key exist?" we actually care about here.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v2 0/3] packfile-uris: commit objects exclusion
  2021-05-07  2:11 [PATCH] Packfile-uris support excluding commit objects Teng Long
  2021-05-10 11:14 ` Ævar Arnfjörð Bjarmason
@ 2021-05-18  8:49 ` Teng Long
  2021-05-18  8:49   ` [PATCH v2 1/3] packfile-uris: support for excluding commit object Teng Long
                     ` (3 more replies)
  1 sibling, 4 replies; 72+ messages in thread
From: Teng Long @ 2021-05-18  8:49 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, avarab, Teng Long

Changes since v2:

* Rename the "uploadpack.blobpackfileuri" configuration to
  "uploadpack.excludeobject".
* In addition to blobs, packfile-uris now supports the exclusion of
  commit objects (recursive and non-recursive).
* Added a patch to modify the packfile-uris.txt file.
* Added a patch for related tests in t5702.

About renaming, I do not know whether it will bring some compatibility
impact, packfile-uris now is an experimental feature, how to deal
with this situation, hoping to get some advice.

Also, I did not consider implementing packfile-uri support for tree
objects, because in the design scenario of packfile-uris, it seems to
be of little use.

Teng Long (3):
  packfile-uris: support for excluding commit object
  packfile-uris.txt: excluding commit object
  t5702: excluding commits with packfile-uris

 Documentation/technical/packfile-uri.txt |  20 ++--
 builtin/pack-objects.c                   |  53 ++++++---
 fetch-pack.c                             |   5 +
 t/t5702-protocol-v2.sh                   | 145 +++++++++++++++++------
 upload-pack.c                            |   5 +-
 5 files changed, 166 insertions(+), 62 deletions(-)

Range-diff against v1:
1:  1f2fb5c85f < -:  ---------- Packfile-uris support excluding commit objects
-:  ---------- > 1:  73e64147b1 packfile-uris: support for excluding commit object
-:  ---------- > 2:  4abab98a76 packfile-uris.txt: excluding commit object
-:  ---------- > 3:  e824cc26a7 t5702: excluding commits with packfile-uris
-- 
2.31.1.442.g7e39198978.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v2 1/3] packfile-uris: support for excluding commit object
  2021-05-18  8:49 ` [PATCH v2 0/3] packfile-uris: commit objects exclusion Teng Long
@ 2021-05-18  8:49   ` Teng Long
  2021-05-19  4:28     ` Junio C Hamano
  2021-05-20  4:46     ` Junio C Hamano
  2021-05-18  8:49   ` [PATCH v2 2/3] packfile-uris.txt: " Teng Long
                     ` (2 subsequent siblings)
  3 siblings, 2 replies; 72+ messages in thread
From: Teng Long @ 2021-05-18  8:49 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, avarab, Teng Long

On the server, more sophisticated means of excluding objects should be
supported, such as commit object. This commit introduces a new
configuration `uploadpack.excludeobject` for this.

The old configuration `uploadpack.blobpackfileuri` is only support to
exclude blobs and the name has no abstract meaning, so the configruation
name changes, to support more object types. Compatibility issues will
not be considered because packfile-uris now is an experimental feature.

In addition to the configuration name, the format of the configuration
value has also been expanded. When excluding the commits (or trees in
the future) objects, the old format `<object-hash> <pack-hash> <uri>`
can not express the meaning of recursion. So, the format is expanded,
the new format `<object-hash> <recursively> <pack-hash> <uri>` should
deal with this scenario (When processing commit objects, whether they
are absolutely recursively excluded).

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 53 ++++++++++++++++++++++++++++++------------
 fetch-pack.c           |  5 ++++
 upload-pack.c          |  5 ++--
 3 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6d13cd3e1a..e687061420 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -99,6 +99,8 @@ static struct bitmap *reuse_packfile_bitmap;
 static int use_bitmap_index_default = 1;
 static int use_bitmap_index = -1;
 static int allow_pack_reuse = 1;
+static int in_commit_order;
+static int exclude_until_next_commit;
 static enum {
 	WRITE_BITMAP_FALSE = 0,
 	WRITE_BITMAP_QUIET,
@@ -132,6 +134,7 @@ struct configured_exclusion {
 	struct oidmap_entry e;
 	char *pack_hash_hex;
 	char *uri;
+	int recursively:1;
 };
 static struct oidmap configured_exclusions;
 
@@ -1291,10 +1294,16 @@ static int want_object_in_pack_one(struct packed_git *p,
  * and its offset in these variables.
  */
 static int want_object_in_pack(const struct object_id *oid,
+			       enum object_type type,
 			       int exclude,
 			       struct packed_git **found_pack,
 			       off_t *found_offset)
 {
+	if (exclude_until_next_commit && type != OBJ_COMMIT)
+		return 0;
+	if (type == OBJ_COMMIT)
+		exclude_until_next_commit = 0 ;
+
 	int want;
 	struct list_head *pos;
 	struct multi_pack_index *m;
@@ -1345,6 +1354,8 @@ static int want_object_in_pack(const struct object_id *oid,
 						&p) &&
 				    *p == ':') {
 					oidset_insert(&excluded_by_config, oid);
+					if(ex->recursively && type == OBJ_COMMIT)
+						exclude_until_next_commit = 1;
 					return 0;
 				}
 			}
@@ -1394,7 +1405,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
 	if (have_duplicate_entry(oid, exclude))
 		return 0;
 
-	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
+	if (!want_object_in_pack(oid, type, exclude, &found_pack, &found_offset)) {
 		/* The pack is missing an object, so it will not have closure */
 		if (write_bitmap_index) {
 			if (write_bitmap_index != WRITE_BITMAP_QUIET)
@@ -1420,7 +1431,7 @@ static int add_object_entry_from_bitmap(const struct object_id *oid,
 	if (have_duplicate_entry(oid, 0))
 		return 0;
 
-	if (!want_object_in_pack(oid, 0, &pack, &offset))
+	if (!want_object_in_pack(oid, type, 0, &pack, &offset))
 		return 0;
 
 	create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
@@ -2985,27 +2996,33 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 			pack_idx_opts.flags &= ~WRITE_REV;
 		return 0;
 	}
-	if (!strcmp(k, "uploadpack.blobpackfileuri")) {
+	if (!strcmp(k, "uploadpack.excludeobject")) {
 		struct configured_exclusion *ex = xmalloc(sizeof(*ex));
-		const char *oid_end, *pack_end;
+		const char *oid_end, *pack_end, *recursively_end;
 		/*
 		 * Stores the pack hash. This is not a true object ID, but is
 		 * of the same form.
 		 */
 		struct object_id pack_hash;
-
+		char recursively[2];
 		if (parse_oid_hex(v, &ex->e.oid, &oid_end) ||
 		    *oid_end != ' ' ||
-		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
+		    !strlcpy(recursively, oid_end + 1, sizeof(recursively)) ||
+		    parse_oid_hex(oid_end + 3, &pack_hash, &pack_end) ||
 		    *pack_end != ' ')
-			die(_("value of uploadpack.blobpackfileuri must be "
-			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
+                        die(_("value of uploadpack.excludeobject must be "
+                              "of the form '<object-hash> <recursively> <pack-hash> <uri>' (got '%s')"), v);
 		if (oidmap_get(&configured_exclusions, &ex->e.oid))
-			die(_("object already configured in another "
-			      "uploadpack.blobpackfileuri (got '%s')"), v);
-		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
-		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
+                        die(_("object already configured by an earlier "
+                              "uploadpack.excludeobject (got '%s')"), v);
+		recursively_end = oid_end + 2;
+		ex->pack_hash_hex = xcalloc(1, pack_end - recursively_end);
+		memcpy(ex->pack_hash_hex, recursively_end + 1, pack_end - recursively_end - 1);
 		ex->uri = xstrdup(pack_end + 1);
+		if (atoi(recursively)) {
+			ex->recursively = 1;
+			in_commit_order = 1;
+                }
 		oidmap_put(&configured_exclusions, ex);
 	}
 	return git_default_config(k, v, cb);
@@ -3023,7 +3040,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
 	struct rev_info *revs = _data;
 	struct object_info oi = OBJECT_INFO_INIT;
 	off_t ofs;
-	enum object_type type;
+	static enum object_type type;
 
 	display_progress(progress_state, ++nr_seen);
 
@@ -3031,7 +3048,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
 		return 0;
 
 	ofs = nth_packed_object_offset(p, pos);
-	if (!want_object_in_pack(oid, 0, &p, &ofs))
+	if (!want_object_in_pack(oid, type, 0, &p, &ofs))
 		return 0;
 
 	oi.typep = &type;
@@ -3831,7 +3848,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			 N_("respect islands during delta compression")),
 		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
 				N_("protocol"),
-				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
+				N_("exclude any configured uploadpack.excludeobject with this protocol")),
 		OPT_END(),
 	};
 
@@ -3903,6 +3920,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 		fetch_if_missing = 0;
 		strvec_push(&rp, "--exclude-promisor-objects");
 	}
+
+	if (in_commit_order){
+		use_internal_rev_list = 1;
+		strvec_push(&rp, "--in-commit-order");
+	}
+
 	if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
 		use_internal_rev_list = 1;
 
diff --git a/fetch-pack.c b/fetch-pack.c
index 2318ebe680..cdf8777907 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -23,6 +23,7 @@
 #include "fetch-negotiator.h"
 #include "fsck.h"
 #include "shallow.h"
+#include "strmap.h"
 
 static int transfer_unpack_limit = -1;
 static int fetch_unpack_limit = -1;
@@ -1677,6 +1678,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		}
 	}
 
+	struct strset uris;
+	strset_init(&uris);
 	for (i = 0; i < packfile_uris.nr; i++) {
 		int j;
 		struct child_process cmd = CHILD_PROCESS_INIT;
@@ -1684,6 +1687,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		const char *uri = packfile_uris.items[i].string +
 			the_hash_algo->hexsz + 1;
 
+		if (!strset_add(&uris, uri))
+			continue;
 		strvec_push(&cmd.args, "http-fetch");
 		strvec_pushf(&cmd.args, "--packfile=%.*s",
 			     (int) the_hash_algo->hexsz,
diff --git a/upload-pack.c b/upload-pack.c
index 5c1cd19612..4d994658d2 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1745,9 +1745,8 @@ int upload_pack_advertise(struct repository *r,
 			strbuf_addstr(value, " sideband-all");
 
 		if (!repo_config_get_string(the_repository,
-					    "uploadpack.blobpackfileuri",
-					    &str) &&
-		    str) {
+					 "uploadpack.excludeobject",
+					 &str) && str) {
 			strbuf_addstr(value, " packfile-uris");
 			free(str);
 		}
-- 
2.31.1.442.g7e39198978.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v2 2/3] packfile-uris.txt: excluding commit object
  2021-05-18  8:49 ` [PATCH v2 0/3] packfile-uris: commit objects exclusion Teng Long
  2021-05-18  8:49   ` [PATCH v2 1/3] packfile-uris: support for excluding commit object Teng Long
@ 2021-05-18  8:49   ` Teng Long
  2021-05-18  8:49   ` [PATCH v2 3/3] t5702: excluding commits with packfile-uris Teng Long
  2021-07-26  9:46   ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Teng Long
  3 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-05-18  8:49 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, avarab, Teng Long

Modified the content related to the configuration of packfile-uris, that
is, the modification of the configuration format and the support for
excluding commit objects, and cut some descriptions about future work.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 Documentation/technical/packfile-uri.txt | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt
index f7eabc6c76..6ed850930f 100644
--- a/Documentation/technical/packfile-uri.txt
+++ b/Documentation/technical/packfile-uri.txt
@@ -35,13 +35,16 @@ include some sort of non-trivial implementation in the Minimum Viable Product,
 at least so that we can test the client.
 
 This is the implementation: a feature, marked experimental, that allows the
-server to be configured by one or more `uploadpack.blobPackfileUri=<sha1>
-<uri>` entries. Whenever the list of objects to be sent is assembled, all such
-blobs are excluded, replaced with URIs. As noted in "Future work" below, the
-server can evolve in the future to support excluding other objects (or other
-implementations of servers could be made that support excluding other objects)
-without needing a protocol change, so clients should not expect that packfiles
-downloaded in this way only contain single blobs.
+server to be configured by one or more entries with the format:
+
+    uploadpack.excludeobject=<object-hash> <recursively> <pack-hash> <uri>
+
+Value <object-hash> is the key of entry, and the object type can be a blob
+or commit. Value <recursively> works for commit object, if <recursively>
+is configured as '1', then the commit and all the referenced objects by
+the commit will be recursively excluded. Otherwise, only the commit itself
+will be excluded. Whenever the list of objects to be sent is assembled, all
+such objects are excluded, replaced with URIs.
 
 Client design
 -------------
@@ -65,9 +68,6 @@ The protocol design allows some evolution of the server and client without any
 need for protocol changes, so only a small-scoped design is included here to
 form the MVP. For example, the following can be done:
 
- * On the server, more sophisticated means of excluding objects (e.g. by
-   specifying a commit to represent that commit and all objects that it
-   references).
  * On the client, resumption of clone. If a clone is interrupted, information
    could be recorded in the repository's config and a "clone-resume" command
    can resume the clone in progress. (Resumption of subsequent fetches is more
-- 
2.31.1.442.g7e39198978.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v2 3/3] t5702: excluding commits with packfile-uris
  2021-05-18  8:49 ` [PATCH v2 0/3] packfile-uris: commit objects exclusion Teng Long
  2021-05-18  8:49   ` [PATCH v2 1/3] packfile-uris: support for excluding commit object Teng Long
  2021-05-18  8:49   ` [PATCH v2 2/3] packfile-uris.txt: " Teng Long
@ 2021-05-18  8:49   ` Teng Long
  2021-07-26  9:46   ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Teng Long
  3 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-05-18  8:49 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, avarab, Teng Long

Modify the logic of configure_exclusion to support the processing of
commits objects and add test cases for excluding commit objects
(recursive and non-recursive).

Replace "rm..." in the original test with "test_when_finished...".
Replace "git commit..." in the original test with "test_commit...".

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 t/t5702-protocol-v2.sh | 145 +++++++++++++++++++++++++++++++----------
 1 file changed, 111 insertions(+), 34 deletions(-)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 2e1243ca40..2148bfcda1 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -753,7 +753,7 @@ test_expect_success 'ls-remote with v2 http sends only one POST' '
 '
 
 test_expect_success 'push with http:// and a config of v2 does not request v2' '
-	test_when_finished "rm -f log" &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child log" &&
 	# Till v2 for push is designed, make sure that if a client has
 	# protocol.version configured to use v2, that the client instead falls
 	# back and uses v0.
@@ -776,7 +776,7 @@ test_expect_success 'push with http:// and a config of v2 does not request v2' '
 '
 
 test_expect_success 'when server sends "ready", expect DELIM' '
-	rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" http_child &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child" &&
 
 	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" one &&
@@ -796,7 +796,7 @@ test_expect_success 'when server sends "ready", expect DELIM' '
 '
 
 test_expect_success 'when server does not send "ready", expect FLUSH' '
-	rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" http_child log &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child log" &&
 
 	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" one &&
@@ -824,17 +824,39 @@ test_expect_success 'when server does not send "ready", expect FLUSH' '
 '
 
 configure_exclusion () {
-	git -C "$1" hash-object "$2" >objh &&
-	git -C "$1" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
-	git -C "$1" config --add \
-		"uploadpack.blobpackfileuri" \
-		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
-	cat objh
+    objt="$1"
+    P="$2"
+    recursive="$4"
+
+    if test "$objt" = "blob"
+    then
+        git -C "$P" hash-object "$3" >objh &&
+        git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
+        git -C "$P" config --add \
+                "uploadpack.excludeobject" \
+                "$(cat objh) $recursive $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+        cat objh
+    elif test "$objt" = "commit"
+    then
+        echo "$3" >objh
+        if test "$recursive" = 0
+            then
+                git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh
+        else
+            git -C "$2" pack-objects --revs "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh
+        fi
+        git -C "$P" config --add \
+                "uploadpack.excludeobject" \
+                "$(cat objh) $recursive $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+        cat objh
+    else
+        echo "unsupported object type in configure_exclusion (got $objt)"
+    fi
 }
 
 test_expect_success 'part of packfile response provided as URI' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -843,10 +865,10 @@ test_expect_success 'part of packfile response provided as URI' '
 	git -C "$P" add my-blob &&
 	echo other-blob >"$P/other-blob" &&
 	git -C "$P" add other-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
-	configure_exclusion "$P" other-blob >h2 &&
+	configure_exclusion blob "$P" my-blob 0 >h &&
+	configure_exclusion blob "$P" other-blob 0 >h2 &&
 
 	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
 	git -c protocol.version=2 \
@@ -881,18 +903,18 @@ test_expect_success 'part of packfile response provided as URI' '
 	test_line_count = 6 filelist
 '
 
-test_expect_success 'packfile URIs with fetch instead of clone' '
+test_expect_success 'blobs packfile URIs with fetch instead of clone' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob 0 >h &&
 
 	git init http_child &&
 
@@ -902,9 +924,65 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
 		fetch "$HTTPD_URL/smart/http_parent"
 '
 
+test_expect_success 'commits(not recursively) packfile URIs with fetch instead of clone' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
+
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+
+	echo my-blob >"$P/my-blob" &&
+	git -C "$P" add my-blob &&
+	test_commit -C "$P" A &&
+
+	mycommit=$(git -C "$P" rev-parse A) &&
+	echo other-blob >"$P/other-blob" &&
+	git -C "$P" add other-blob &&
+	test_commit -C "$P" B &&
+	othercommit=$(git -C "$P" rev-parse B) &&
+
+	configure_exclusion commit "$P" "$mycommit" 0 >h &&
+	configure_exclusion commit "$P" "$othercommit" 0 >h2 &&
+
+	git init http_child &&
+
+	GIT_TRACE=1 GIT_TEST_SIDEBAND_ALL=1 \
+	git -C http_child -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		fetch "$HTTPD_URL/smart/http_parent"
+'
+
+test_expect_success 'commits(recursively) packfile URIs with fetch instead of clone' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
+
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+
+	echo my-blob >"$P/my-blob" &&
+	git -C "$P" add my-blob &&
+	test_commit -C "$P" A &&
+
+	mycommit=$(git -C "$P" rev-parse A) &&
+	echo other-blob >"$P/other-blob" &&
+    git -C "$P" add other-blob &&
+	test_commit -C "$P" B &&
+	othercommit=$(git -C "$P" rev-parse B) &&
+
+	configure_exclusion commit "$P" "$mycommit" 1 >h2 &&
+	configure_exclusion commit "$P" "$othercommit" 1 >h2 &&
+
+	git init http_child &&
+
+	GIT_TRACE=1 GIT_TEST_SIDEBAND_ALL=1 \
+	git -C http_child -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		fetch "$HTTPD_URL/smart/http_parent"
+'
+
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -913,9 +991,9 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	git -C "$P" add my-blob &&
 	echo other-blob >"$P/other-blob" &&
 	git -C "$P" add other-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob 0 >h &&
 	# Configure a URL for other-blob. Just reuse the hash of the object as
 	# the hash of the packfile, since the hash does not matter for this
 	# test as long as it is not the hash of the pack, and it is of the
@@ -923,8 +1001,8 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	git -C "$P" hash-object other-blob >objh &&
 	git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
 	git -C "$P" config --add \
-		"uploadpack.blobpackfileuri" \
-		"$(cat objh) $(cat objh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+		"uploadpack.excludeobject" \
+		"$(cat objh) 0 $(cat objh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
 
 	test_must_fail env GIT_TEST_SIDEBAND_ALL=1 \
 		git -c protocol.version=2 \
@@ -935,16 +1013,15 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 
 test_expect_success 'packfile-uri with transfer.fsckobjects' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
-
-	configure_exclusion "$P" my-blob >h &&
+	test_commit -C "$P" A &&
+	configure_exclusion blob "$P" my-blob 0 >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -959,7 +1036,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' '
 
 test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -976,9 +1053,9 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob 0 >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -989,7 +1066,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
 
 test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmodules is separate from tree' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child &&
+	test_when_finished "rm -rf \"$P\" http_child" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -1000,7 +1077,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
 	git -C "$P" add .gitmodules &&
 	git -C "$P" commit -m x &&
 
-	configure_exclusion "$P" .gitmodules >h &&
+	configure_exclusion blob "$P" .gitmodules 0 >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -1015,7 +1092,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
 
 test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodules separate from tree is invalid' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child err &&
+	test_when_finished "rm -rf \"$P\" http_child err" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -1024,9 +1101,9 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodul
 	echo "path = include/foo" >>"$P/.gitmodules" &&
 	echo "url = git://example.com/git/lib.git" >>"$P/.gitmodules" &&
 	git -C "$P" add .gitmodules &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" .gitmodules >h &&
+	configure_exclusion blob "$P" .gitmodules 0 >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
-- 
2.31.1.442.g7e39198978.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v2 1/3] packfile-uris: support for excluding commit object
  2021-05-18  8:49   ` [PATCH v2 1/3] packfile-uris: support for excluding commit object Teng Long
@ 2021-05-19  4:28     ` Junio C Hamano
  2021-05-20  4:46     ` Junio C Hamano
  1 sibling, 0 replies; 72+ messages in thread
From: Junio C Hamano @ 2021-05-19  4:28 UTC (permalink / raw)
  To: Teng Long; +Cc: git, jonathantanmy, avarab

Teng Long <dyroneteng@gmail.com> writes:

> On the server, more sophisticated means of excluding objects should be
> supported, such as commit object. This commit introduces a new
> configuration `uploadpack.excludeobject` for this.

This "should" is not justfied at all, it seems?  What is lacking in
what we already have?  What new things does it all us to do by
adding a new configuration variable?

> The old configuration `uploadpack.blobpackfileuri` is only support to
> exclude blobs and the name has no abstract meaning, so the configruation
> name changes, to support more object types. Compatibility issues will
> not be considered because packfile-uris now is an experimental feature.

I'll let Jonathan speak up, but even for an experimental feature,
whatever new and incompatible way to do things should have a clear
advantage compared to the old way.  Sell the backward incomptibility
along that line---"it is an experimental so I'll trash it" is not,
but "by doing this it gets this much better, and migrating existing
users won't be too taxing (it is just this simple thing)" is an
acceptable way to justify such a change.

Note that I am not opposed to the proposed change (and I am not
supporting it, either).  I do have a problem with the way the change
is sold, though.

>  builtin/pack-objects.c | 53 ++++++++++++++++++++++++++++++------------
>  fetch-pack.c           |  5 ++++
>  upload-pack.c          |  5 ++--
>  3 files changed, 45 insertions(+), 18 deletions(-)

Even though the name of the configuration variable changed, and the
semantics of the value of it changed, there is no documentation
change, because...?

Because the original didn't even document the variable properly?  It
may be another reason why changing it may not impact the existing
users too much.

> @@ -132,6 +134,7 @@ struct configured_exclusion {
>  	struct oidmap_entry e;
>  	char *pack_hash_hex;
>  	char *uri;
> +	int recursively:1;
>  };
>  static struct oidmap configured_exclusions;
>  
> @@ -1291,10 +1294,16 @@ static int want_object_in_pack_one(struct packed_git *p,
>   * and its offset in these variables.
>   */
>  static int want_object_in_pack(const struct object_id *oid,
> +			       enum object_type type,
>  			       int exclude,
>  			       struct packed_git **found_pack,
>  			       off_t *found_offset)
>  {
> +	if (exclude_until_next_commit && type != OBJ_COMMIT)
> +		return 0;
> +	if (type == OBJ_COMMIT)
> +		exclude_until_next_commit = 0 ;

Lose SP before the semicolon.

Our codebase does not allow statements before declarations.  Move
all of the above down to be below the block of decls at the
beginning of the function.

>  	int want;
>  	struct list_head *pos;
>  	struct multi_pack_index *m;

> @@ -1345,6 +1354,8 @@ static int want_object_in_pack(const struct object_id *oid,
>  						&p) &&
>  				    *p == ':') {
>  					oidset_insert(&excluded_by_config, oid);
> +					if(ex->recursively && type == OBJ_COMMIT)
> +						exclude_until_next_commit = 1;

This depends on a new file-scope global variable, which means two
things.

 * if two or more threads are deciding which object to pack and not
   to pack, this code will horribly break, as they are traversing
   totally different parts of the object DAG to find out which
   objects to pack, but one thread hitting a commit to be excluded
   and setting this flag will cause other thread skip unrelated
   blobs and trees that it discovers, doesn't it?

 * even if we assume there is no concurrency and reentrancy issues
   (e.g. by forcing single-threaded operation when this feature is
   in use), the code _assumes_ a concrete order in which this helper
   function gets called, namely, non-commit objects fed to this
   helper after the helper gets a single commit object *all* belong
   to that commit.  With the current code that feeds objects as they
   are discovered during depth first traversal of the top-level tree
   starting at each commit, that assumption might hold, but it feels
   that the assumption is too much to be healty.  For example, would
   it be possible for the bitmap code to cause this helper to be
   called in different order (i.e. it might find it more convenent
   to feed a tree, a blob or a tag that is unrelated to the commit
   that was last fed to the helper)?  If so, the logic in this code
   will constrain the caller too much.

I'll stop reading for now at this place; review of the remainder may
come at a later time, but not now.

Thanks.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v2 1/3] packfile-uris: support for excluding commit object
  2021-05-18  8:49   ` [PATCH v2 1/3] packfile-uris: support for excluding commit object Teng Long
  2021-05-19  4:28     ` Junio C Hamano
@ 2021-05-20  4:46     ` Junio C Hamano
  1 sibling, 0 replies; 72+ messages in thread
From: Junio C Hamano @ 2021-05-20  4:46 UTC (permalink / raw)
  To: Teng Long; +Cc: git, jonathantanmy, avarab

(continuing from yesterday's review)

> @@ -3023,7 +3040,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
>  	struct rev_info *revs = _data;
>  	struct object_info oi = OBJECT_INFO_INIT;
>  	off_t ofs;
> -	enum object_type type;
> +	static enum object_type type;
>  
>  	display_progress(progress_state, ++nr_seen);
>  
> @@ -3031,7 +3048,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
>  		return 0;
>  
>  	ofs = nth_packed_object_offset(p, pos);
> -	if (!want_object_in_pack(oid, 0, &p, &ofs))
> +	if (!want_object_in_pack(oid, type, 0, &p, &ofs))
>  		return 0;
>  
>  	oi.typep = &type;

This change is puzzling.

The first call to this helper will use BSS initialized type to call
want_object_in_pack(), and then after that call says "yes, we want
that object", packed_object_info() gets called to learn what type it
is.  And the second call would use the type of the object we
previously handled, because type is a function scope static.  Or if
we are not that lucky and multiple threads are involved, the type we
pass is from a random and unrelated object some other thread has
called this helper with.


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v3 0/3] packfile-uris: commit objects exclusio
  2021-05-18  8:49 ` [PATCH v2 0/3] packfile-uris: commit objects exclusion Teng Long
                     ` (2 preceding siblings ...)
  2021-05-18  8:49   ` [PATCH v2 3/3] t5702: excluding commits with packfile-uris Teng Long
@ 2021-07-26  9:46   ` Teng Long
  2021-07-26  9:46     ` [PATCH v3 1/3] packfile-uris: support for excluding commit objects Teng Long
                       ` (4 more replies)
  3 siblings, 5 replies; 72+ messages in thread
From: Teng Long @ 2021-07-26  9:46 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, avarab, Teng Long

Changes since v3:

* Ensure the forward compatibility of the old
  configuration (uploadpack.blobpackfileuri).
* Reimplementation of the commit object exclusion method (without rely
  on the "--in-commit-order" arg).
* Extend `show_object` function.
* Remove `exclude_until_next_commit` var in pack-objects.c (Concurrency
  issues).
* Restore the definition of want_object_in_pack method (problems caused
  by the new "type" parameter)

Teng Long (3):
  packfile-uris: support for excluding commit objects
  t5702: support for excluding commit objects
  packfile-uri.txt: support for excluding commit objects

 Documentation/technical/packfile-uri.txt |  20 +--
 builtin/describe.c                       |   4 +-
 builtin/pack-objects.c                   |  97 +++++++------
 builtin/rev-list.c                       |   2 +-
 fetch-pack.c                             |   6 +
 list-objects.c                           |  37 ++---
 list-objects.h                           |   2 +-
 object.c                                 |  15 +-
 object.h                                 |   4 +
 pack-bitmap.c                            |   8 +-
 reachable.c                              |   8 +-
 revision.c                               |  36 +++--
 revision.h                               |   4 +
 t/t5702-protocol-v2.sh                   | 166 ++++++++++++++++++-----
 upload-pack.c                            |   7 +
 15 files changed, 291 insertions(+), 125 deletions(-)

Range-diff against v2:
-:  ---------- > 1:  91dce385f6 packfile-uris: support for excluding commit objects
-:  ---------- > 2:  92def8c72b t5702: support for excluding commit objects
-:  ---------- > 3:  01ab2cbb34 packfile-uri.txt: support for excluding commit objects
-- 
2.31.1.443.g55c63af4c9.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v3 1/3] packfile-uris: support for excluding commit objects
  2021-07-26  9:46   ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Teng Long
@ 2021-07-26  9:46     ` Teng Long
  2021-07-26 18:15       ` Junio C Hamano
  2021-07-26  9:46     ` [PATCH v3 2/3] t5702: " Teng Long
                       ` (3 subsequent siblings)
  4 siblings, 1 reply; 72+ messages in thread
From: Teng Long @ 2021-07-26  9:46 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, avarab, Teng Long

On the server, more sophisticated means of excluding objects should be
supported, such as commit object. This commit introduces a new
configuration `uploadpack.excludeobject` for this.

The reason for bringing a new configuration is for two considerations.
First, the old configuration supports a single object type (blob), which
limits the use of this feature. Secondly, the name of the old
configuration is not abstract enough, this make extension difficult. If
different object types use different configuration names, the
configuration items will be bloated and difficult to maintain, so the
new configuration is more abstract in name and easy to extend.

Although a new configuration has been introduced, the old one is
still available and compatible with the new configuration. The old
configuration `uploadpack.blobpackfileuri` only supports excluding
blobs. The new configuration `uploadpack.excludeobject` not only
supports excluding blob objects, but also supports excluding commit
objects, as well as recursively excluding tree objects and blob objects
they contain.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/describe.c     |  4 +-
 builtin/pack-objects.c | 97 ++++++++++++++++++++++++------------------
 builtin/rev-list.c     |  2 +-
 fetch-pack.c           |  6 +++
 list-objects.c         | 37 +++++++++-------
 list-objects.h         |  2 +-
 object.c               | 15 +++++--
 object.h               |  4 ++
 pack-bitmap.c          |  8 ++--
 reachable.c            |  8 ++--
 revision.c             | 36 +++++++++++-----
 revision.h             |  4 ++
 upload-pack.c          |  7 +++
 13 files changed, 148 insertions(+), 82 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 40482d8e9f..045da79b5c 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -485,9 +485,9 @@ static void process_commit(struct commit *commit, void *data)
 	pcd->current_commit = commit->object.oid;
 }
 
-static void process_object(struct object *obj, const char *path, void *data)
+static void process_object(struct object *obj, const char *path, void *show_data, void *carry_data)
 {
-	struct process_commit_data *pcd = data;
+	struct process_commit_data *pcd = show_data;
 
 	if (oideq(&pcd->looking_for, &obj->oid) && !pcd->dst->len) {
 		reset_revision_walk();
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6d13cd3e1a..154c98bcb6 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1188,6 +1188,24 @@ static int have_duplicate_entry(const struct object_id *oid,
 	return 1;
 }
 
+static int match_packfile_uri_exclusions(struct configured_exclusion *ex)
+{
+	int i;
+	const char *p;
+
+	if (ex) {
+		for (i = 0; i < uri_protocols.nr; i++) {
+			if (skip_prefix(ex->uri,
+					uri_protocols.items[i].string,
+					&p) &&
+			    *p == ':')
+				return 1;
+
+		}
+	}
+	return 0;
+}
+
 static int want_found_object(const struct object_id *oid, int exclude,
 			     struct packed_git *p)
 {
@@ -1293,7 +1311,8 @@ static int want_object_in_pack_one(struct packed_git *p,
 static int want_object_in_pack(const struct object_id *oid,
 			       int exclude,
 			       struct packed_git **found_pack,
-			       off_t *found_offset)
+			       off_t *found_offset,
+			       struct object *referred_commit)
 {
 	int want;
 	struct list_head *pos;
@@ -1333,21 +1352,13 @@ static int want_object_in_pack(const struct object_id *oid,
 	}
 
 	if (uri_protocols.nr) {
-		struct configured_exclusion *ex =
-			oidmap_get(&configured_exclusions, oid);
-		int i;
-		const char *p;
-
-		if (ex) {
-			for (i = 0; i < uri_protocols.nr; i++) {
-				if (skip_prefix(ex->uri,
-						uri_protocols.items[i].string,
-						&p) &&
-				    *p == ':') {
-					oidset_insert(&excluded_by_config, oid);
-					return 0;
-				}
-			}
+		if (referred_commit) {
+			if (oidmap_get(&configured_exclusions, &referred_commit->oid) && match_packfile_uri_exclusions(referred_ex))
+				return 0;
+		}
+		if (oidmap_get(&configured_exclusions, oid) && match_packfile_uri_exclusions(ex)) {
+			oidset_insert(&excluded_by_config, oid);
+			return 0;
 		}
 	}
 
@@ -1384,7 +1395,8 @@ static const char no_closure_warning[] = N_(
 );
 
 static int add_object_entry(const struct object_id *oid, enum object_type type,
-			    const char *name, int exclude)
+			    const char *name, int exclude,
+			    struct object *referred_commit)
 {
 	struct packed_git *found_pack = NULL;
 	off_t found_offset = 0;
@@ -1394,7 +1406,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
 	if (have_duplicate_entry(oid, exclude))
 		return 0;
 
-	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
+	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset, referred_commit)) {
 		/* The pack is missing an object, so it will not have closure */
 		if (write_bitmap_index) {
 			if (write_bitmap_index != WRITE_BITMAP_QUIET)
@@ -1420,7 +1432,7 @@ static int add_object_entry_from_bitmap(const struct object_id *oid,
 	if (have_duplicate_entry(oid, 0))
 		return 0;
 
-	if (!want_object_in_pack(oid, 0, &pack, &offset))
+	if (!want_object_in_pack(oid, 0, &pack, &offset, NULL))
 		return 0;
 
 	create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
@@ -1560,7 +1572,7 @@ static void add_pbase_object(struct tree_desc *tree,
 		if (name[cmplen] != '/') {
 			add_object_entry(&entry.oid,
 					 object_type(entry.mode),
-					 fullname, 1);
+					 fullname, 1, NULL);
 			return;
 		}
 		if (S_ISDIR(entry.mode)) {
@@ -1628,7 +1640,7 @@ static void add_preferred_base_object(const char *name)
 	cmplen = name_cmp_len(name);
 	for (it = pbase_tree; it; it = it->next) {
 		if (cmplen == 0) {
-			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
+			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1, NULL);
 		}
 		else {
 			struct tree_desc tree;
@@ -2830,7 +2842,7 @@ static void add_tag_chain(const struct object_id *oid)
 			die(_("unable to pack objects reachable from tag %s"),
 			    oid_to_hex(oid));
 
-		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
+		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0, NULL);
 
 		if (tag->tagged->type != OBJ_TAG)
 			return;
@@ -2985,7 +2997,7 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 			pack_idx_opts.flags &= ~WRITE_REV;
 		return 0;
 	}
-	if (!strcmp(k, "uploadpack.blobpackfileuri")) {
+	if (!strcmp(k, "uploadpack.excludeobject") || !strcmp(k, "uploadpack.blobpackfileuri")) {
 		struct configured_exclusion *ex = xmalloc(sizeof(*ex));
 		const char *oid_end, *pack_end;
 		/*
@@ -2998,11 +3010,11 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 		    *oid_end != ' ' ||
 		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
 		    *pack_end != ' ')
-			die(_("value of uploadpack.blobpackfileuri must be "
-			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
+                        die(_("value of uploadpack.excludeobject or uploadpack.blobpackfileuri must be "
+                              "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
 		if (oidmap_get(&configured_exclusions, &ex->e.oid))
-			die(_("object already configured in another "
-			      "uploadpack.blobpackfileuri (got '%s')"), v);
+                        die(_("object already configured by an earlier "
+                              "uploadpack.excludeobject or uploadpack.blobpackfileuri (got '%s')"), v);
 		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
 		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
 		ex->uri = xstrdup(pack_end + 1);
@@ -3031,7 +3043,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
 		return 0;
 
 	ofs = nth_packed_object_offset(p, pos);
-	if (!want_object_in_pack(oid, 0, &p, &ofs))
+	if (!want_object_in_pack(oid, 0, &p, &ofs, NULL))
 		return 0;
 
 	oi.typep = &type;
@@ -3059,7 +3071,7 @@ static void show_commit_pack_hint(struct commit *commit, void *_data)
 }
 
 static void show_object_pack_hint(struct object *object, const char *name,
-				  void *_data)
+				  void *show_data, void *carry_data)
 {
 	struct object_entry *oe = packlist_find(&to_pack, &object->oid);
 	if (!oe)
@@ -3224,7 +3236,7 @@ static void read_object_list_from_stdin(void)
 			die(_("expected object ID, got garbage:\n %s"), line);
 
 		add_preferred_base_object(p + 1);
-		add_object_entry(&oid, OBJ_NONE, p + 1, 0);
+		add_object_entry(&oid, OBJ_NONE, p + 1, 0, NULL);
 	}
 }
 
@@ -3233,7 +3245,7 @@ static void read_object_list_from_stdin(void)
 
 static void show_commit(struct commit *commit, void *data)
 {
-	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
+	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0, NULL);
 	commit->object.flags |= OBJECT_ADDED;
 
 	if (write_bitmap_index)
@@ -3243,10 +3255,11 @@ static void show_commit(struct commit *commit, void *data)
 		propagate_island_marks(commit);
 }
 
-static void show_object(struct object *obj, const char *name, void *data)
+static void show_object(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
+	struct object *referred_commit = carry_data;
 	add_preferred_base_object(name);
-	add_object_entry(&obj->oid, obj->type, name, 0);
+	add_object_entry(&obj->oid, obj->type, name, 0, referred_commit);
 	obj->flags |= OBJECT_ADDED;
 
 	if (use_delta_islands) {
@@ -3265,7 +3278,7 @@ static void show_object(struct object *obj, const char *name, void *data)
 	}
 }
 
-static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
+static void show_object__ma_allow_any(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
 	assert(arg_missing_action == MA_ALLOW_ANY);
 
@@ -3276,10 +3289,10 @@ static void show_object__ma_allow_any(struct object *obj, const char *name, void
 	if (!has_object(the_repository, &obj->oid, 0))
 		return;
 
-	show_object(obj, name, data);
+	show_object(obj, name, show_data, carry_data);
 }
 
-static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
+static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
 	assert(arg_missing_action == MA_ALLOW_PROMISOR);
 
@@ -3290,7 +3303,7 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
 	if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
 		return;
 
-	show_object(obj, name, data);
+	show_object(obj, name, show_data, carry_data);
 }
 
 static int option_parse_missing_action(const struct option *opt,
@@ -3397,7 +3410,7 @@ static void add_objects_in_unpacked_packs(void)
 		QSORT(in_pack.array, in_pack.nr, ofscmp);
 		for (i = 0; i < in_pack.nr; i++) {
 			struct object *o = in_pack.array[i].object;
-			add_object_entry(&o->oid, o->type, "", 0);
+			add_object_entry(&o->oid, o->type, "", 0, NULL);
 		}
 	}
 	free(in_pack.array);
@@ -3413,7 +3426,7 @@ static int add_loose_object(const struct object_id *oid, const char *path,
 		return 0;
 	}
 
-	add_object_entry(oid, type, "", 0);
+	add_object_entry(oid, type, "", 0, NULL);
 	return 0;
 }
 
@@ -3538,7 +3551,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
 
 static void record_recent_object(struct object *obj,
 				 const char *name,
-				 void *data)
+				 void *show_data,
+				 void *carry_data)
 {
 	oid_array_append(&recent_objects, &obj->oid);
 }
@@ -3831,7 +3845,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			 N_("respect islands during delta compression")),
 		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
 				N_("protocol"),
-				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
+				N_("exclude any configured uploadpack.excludeobject or "
+				   	"uploadpack.blobpackfileuri with this protocol")),
 		OPT_END(),
 	};
 
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index b4d8ea0a35..1cad33d9e8 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -266,7 +266,7 @@ static int finish_object(struct object *obj, const char *name, void *cb_data)
 	return 0;
 }
 
-static void show_object(struct object *obj, const char *name, void *cb_data)
+static void show_object(struct object *obj, const char *name, void *cb_data, void *carry_data)
 {
 	struct rev_list_info *info = cb_data;
 	struct rev_info *revs = info->revs;
diff --git a/fetch-pack.c b/fetch-pack.c
index 2318ebe680..39bb449586 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -23,6 +23,7 @@
 #include "fetch-negotiator.h"
 #include "fsck.h"
 #include "shallow.h"
+#include "strmap.h"
 
 static int transfer_unpack_limit = -1;
 static int fetch_unpack_limit = -1;
@@ -1576,6 +1577,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 	struct string_list packfile_uris = STRING_LIST_INIT_DUP;
 	int i;
 	struct strvec index_pack_args = STRVEC_INIT;
+	struct strset uris;
 
 	negotiator = &negotiator_alloc;
 	fetch_negotiator_init(r, negotiator);
@@ -1677,6 +1679,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		}
 	}
 
+	strset_init(&uris);
 	for (i = 0; i < packfile_uris.nr; i++) {
 		int j;
 		struct child_process cmd = CHILD_PROCESS_INIT;
@@ -1684,6 +1687,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		const char *uri = packfile_uris.items[i].string +
 			the_hash_algo->hexsz + 1;
 
+		if (!strset_add(&uris, uri))
+			continue;
 		strvec_push(&cmd.args, "http-fetch");
 		strvec_pushf(&cmd.args, "--packfile=%.*s",
 			     (int) the_hash_algo->hexsz,
@@ -1727,6 +1732,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 						 get_object_directory(),
 						 packname));
 	}
+	strset_clear(&uris);
 	string_list_clear(&packfile_uris, 0);
 	strvec_clear(&index_pack_args);
 
diff --git a/list-objects.c b/list-objects.c
index e19589baa0..fa3156dc89 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -24,7 +24,8 @@ struct traversal_context {
 static void process_blob(struct traversal_context *ctx,
 			 struct blob *blob,
 			 struct strbuf *path,
-			 const char *name)
+			 const char *name,
+			 struct object *referred_commit)
 {
 	struct object *obj = &blob->object;
 	size_t pathlen;
@@ -60,7 +61,7 @@ static void process_blob(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, path->buf, ctx->show_data);
+		ctx->show_object(obj, path->buf, ctx->show_data, referred_commit);
 	strbuf_setlen(path, pathlen);
 }
 
@@ -97,11 +98,13 @@ static void process_gitlink(struct traversal_context *ctx,
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
-			 const char *name);
+			 const char *name,
+			 struct object *referred_commit);
 
 static void process_tree_contents(struct traversal_context *ctx,
 				  struct tree *tree,
-				  struct strbuf *base)
+				  struct strbuf *base,
+				  struct object *referred_commit)
 {
 	struct tree_desc desc;
 	struct name_entry entry;
@@ -129,7 +132,7 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			t->object.flags |= NOT_USER_GIVEN;
-			process_tree(ctx, t, base, entry.path);
+			process_tree(ctx, t, base, entry.path, referred_commit);
 		}
 		else if (S_ISGITLINK(entry.mode))
 			process_gitlink(ctx, entry.oid.hash,
@@ -142,7 +145,7 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			b->object.flags |= NOT_USER_GIVEN;
-			process_blob(ctx, b, base, entry.path);
+			process_blob(ctx, b, base, entry.path, referred_commit);
 		}
 	}
 }
@@ -150,7 +153,8 @@ static void process_tree_contents(struct traversal_context *ctx,
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
-			 const char *name)
+			 const char *name,
+			 struct object *referred_commit)
 {
 	struct object *obj = &tree->object;
 	struct rev_info *revs = ctx->revs;
@@ -191,14 +195,14 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data);
+		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
 	if (base->len)
 		strbuf_addch(base, '/');
 
 	if (r & LOFR_SKIP_TREE)
 		trace_printf("Skipping contents of tree %s...\n", base->buf);
 	else if (!failed_parse)
-		process_tree_contents(ctx, tree, base);
+		process_tree_contents(ctx, tree, base, referred_commit);
 
 	r = list_objects_filter__filter_object(ctx->revs->repo,
 					       LOFS_END_TREE, obj,
@@ -207,7 +211,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data);
+		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
 
 	strbuf_setlen(base, baselen);
 	free_tree_buffer(tree);
@@ -314,9 +318,9 @@ void mark_edges_uninteresting(struct rev_info *revs,
 	}
 }
 
-static void add_pending_tree(struct rev_info *revs, struct tree *tree)
+static void add_pending_tree(struct rev_info *revs,  struct tree *tree, struct object *referred_commit)
 {
-	add_pending_object(revs, &tree->object, "");
+	add_pending_object_with_referred_commit(revs, &tree->object, "", referred_commit);
 }
 
 static void traverse_trees_and_blobs(struct traversal_context *ctx,
@@ -329,23 +333,24 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 	for (i = 0; i < ctx->revs->pending.nr; i++) {
 		struct object_array_entry *pending = ctx->revs->pending.objects + i;
 		struct object *obj = pending->item;
+		struct object *referred_commit = pending->referred_commit;
 		const char *name = pending->name;
 		const char *path = pending->path;
 		if (obj->flags & (UNINTERESTING | SEEN))
 			continue;
 		if (obj->type == OBJ_TAG) {
 			obj->flags |= SEEN;
-			ctx->show_object(obj, name, ctx->show_data);
+			ctx->show_object(obj, name, ctx->show_data, referred_commit);
 			continue;
 		}
 		if (!path)
 			path = "";
 		if (obj->type == OBJ_TREE) {
-			process_tree(ctx, (struct tree *)obj, base, path);
+			process_tree(ctx, (struct tree *)obj, base, path, referred_commit);
 			continue;
 		}
 		if (obj->type == OBJ_BLOB) {
-			process_blob(ctx, (struct blob *)obj, base, path);
+			process_blob(ctx, (struct blob *)obj, base, path, referred_commit);
 			continue;
 		}
 		die("unknown pending object %s (%s)",
@@ -370,7 +375,7 @@ static void do_traverse(struct traversal_context *ctx)
 		else if (get_commit_tree(commit)) {
 			struct tree *tree = get_commit_tree(commit);
 			tree->object.flags |= NOT_USER_GIVEN;
-			add_pending_tree(ctx->revs, tree);
+			add_pending_tree(ctx->revs, tree, &commit->object);
 		} else if (commit->object.parsed) {
 			die(_("unable to load root tree for commit %s"),
 			      oid_to_hex(&commit->object.oid));
diff --git a/list-objects.h b/list-objects.h
index a952680e46..ab946d34db 100644
--- a/list-objects.h
+++ b/list-objects.h
@@ -6,7 +6,7 @@ struct object;
 struct rev_info;
 
 typedef void (*show_commit_fn)(struct commit *, void *);
-typedef void (*show_object_fn)(struct object *, const char *, void *);
+typedef void (*show_object_fn)(struct object *, const char *, void *, void *);
 void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *);
 
 typedef void (*show_edge_fn)(struct commit *);
diff --git a/object.c b/object.c
index 14188453c5..6b1ce2fcde 100644
--- a/object.c
+++ b/object.c
@@ -322,9 +322,10 @@ void object_list_free(struct object_list **list)
  */
 static char object_array_slopbuf[1];
 
-void add_object_array_with_path(struct object *obj, const char *name,
-				struct object_array *array,
-				unsigned mode, const char *path)
+void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name,
+						    struct object_array *array,
+						    unsigned mode, const char *path,
+						    struct object *referred_commit)
 {
 	unsigned nr = array->nr;
 	unsigned alloc = array->alloc;
@@ -339,6 +340,7 @@ void add_object_array_with_path(struct object *obj, const char *name,
 	}
 	entry = &objects[nr];
 	entry->item = obj;
+	entry->referred_commit = referred_commit;
 	if (!name)
 		entry->name = NULL;
 	else if (!*name)
@@ -354,6 +356,13 @@ void add_object_array_with_path(struct object *obj, const char *name,
 	array->nr = ++nr;
 }
 
+void add_object_array_with_path(struct object *obj, const char *name,
+				struct object_array *array,
+				unsigned mode, const char *path)
+{
+	add_object_array_with_path_and_referred_commit(obj, name, array, mode, path, NULL);
+}
+
 void add_object_array(struct object *obj, const char *name, struct object_array *array)
 {
 	add_object_array_with_path(obj, name, array, S_IFINVALID, NULL);
diff --git a/object.h b/object.h
index 87a6da47c8..de9f15b97d 100644
--- a/object.h
+++ b/object.h
@@ -43,6 +43,7 @@ struct object_array {
 	unsigned int alloc;
 	struct object_array_entry {
 		struct object *item;
+		struct object *referred_commit;
 		/*
 		 * name or NULL.  If non-NULL, the memory pointed to
 		 * is owned by this object *except* if it points at
@@ -157,6 +158,9 @@ void object_list_free(struct object_list **list);
 /* Object array handling .. */
 void add_object_array(struct object *obj, const char *name, struct object_array *array);
 void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path);
+void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name, struct object_array *array,
+						    unsigned mode, const char *path,
+						    struct object *referred_commit);
 
 /*
  * Returns NULL if the array is empty. Otherwise, returns the last object
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 3ed15431cd..516eb235da 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -459,9 +459,9 @@ struct bitmap_show_data {
 	struct bitmap *base;
 };
 
-static void show_object(struct object *object, const char *name, void *data_)
+static void show_object(struct object *object, const char *name, void *show_data, void *carry_data)
 {
-	struct bitmap_show_data *data = data_;
+	struct bitmap_show_data *data = show_data;
 	int bitmap_pos;
 
 	bitmap_pos = bitmap_position(data->bitmap_git, &object->oid);
@@ -1268,9 +1268,9 @@ struct bitmap_test_data {
 };
 
 static void test_show_object(struct object *object, const char *name,
-			     void *data)
+			     void *show_data, void *carry_data)
 {
-	struct bitmap_test_data *tdata = data;
+	struct bitmap_test_data *tdata = show_data;
 	int bitmap_pos;
 
 	bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid);
diff --git a/reachable.c b/reachable.c
index 77a60c70a5..ebd817c446 100644
--- a/reachable.c
+++ b/reachable.c
@@ -47,14 +47,14 @@ static int add_one_ref(const char *path, const struct object_id *oid,
  * The traversal will have already marked us as SEEN, so we
  * only need to handle any progress reporting here.
  */
-static void mark_object(struct object *obj, const char *name, void *data)
+static void mark_object(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
-	update_progress(data);
+	update_progress(show_data);
 }
 
-static void mark_commit(struct commit *c, void *data)
+static void mark_commit(struct commit *c, void *show_data)
 {
-	mark_object(&c->object, NULL, data);
+	mark_object(&c->object, NULL, show_data,  NULL);
 }
 
 struct recent_data {
diff --git a/revision.c b/revision.c
index 4853c85d0b..da0ce0e3f2 100644
--- a/revision.c
+++ b/revision.c
@@ -304,10 +304,11 @@ void mark_parents_uninteresting(struct commit *commit)
 	commit_stack_clear(&pending);
 }
 
-static void add_pending_object_with_path(struct rev_info *revs,
-					 struct object *obj,
-					 const char *name, unsigned mode,
-					 const char *path)
+static void add_pending_object_with_path_and_referred_commit(struct rev_info *revs,
+							     struct object *obj,
+							     const char *name, unsigned mode,
+							     const char *path,
+							     struct object *referred_commit)
 {
 	struct interpret_branch_name_options options = { 0 };
 	if (!obj)
@@ -326,20 +327,36 @@ static void add_pending_object_with_path(struct rev_info *revs,
 		strbuf_release(&buf);
 		return; /* do not add the commit itself */
 	}
-	add_object_array_with_path(obj, name, &revs->pending, mode, path);
+	add_object_array_with_path_and_referred_commit(obj, name, &revs->pending, mode, path, referred_commit);
+}
+
+static void add_pending_object_with_path(struct rev_info *revs,
+					 struct object *obj,
+					 const char *name, unsigned mode,
+					 const char *path) {
+	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, path, NULL);
 }
 
 static void add_pending_object_with_mode(struct rev_info *revs,
 					 struct object *obj,
-					 const char *name, unsigned mode)
+					 const char *name, unsigned mode,
+					 struct object *referred_commit)
 {
-	add_pending_object_with_path(revs, obj, name, mode, NULL);
+
+	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, NULL, referred_commit);
+}
+
+void add_pending_object_with_referred_commit(struct rev_info *revs,
+					     struct object *obj, const char *name,
+					     struct object *referred_commit)
+{
+	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, referred_commit);
 }
 
 void add_pending_object(struct rev_info *revs,
 			struct object *obj, const char *name)
 {
-	add_pending_object_with_mode(revs, obj, name, S_IFINVALID);
+	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, NULL);
 }
 
 void add_head_to_pending(struct rev_info *revs)
@@ -2764,7 +2781,6 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 			continue;
 		}
 
-
 		if (handle_revision_arg(arg, revs, flags, revarg_opt)) {
 			int j;
 			if (seen_dashdash || *arg == '^')
@@ -2817,7 +2833,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 		if (get_oid_with_context(revs->repo, revs->def, 0, &oid, &oc))
 			diagnose_missing_default(revs->def);
 		object = get_reference(revs, revs->def, &oid, 0);
-		add_pending_object_with_mode(revs, object, revs->def, oc.mode);
+		add_pending_object_with_mode(revs, object, revs->def, oc.mode, NULL);
 	}
 
 	/* Did the user ask for any diff output? Run the diff! */
diff --git a/revision.h b/revision.h
index a24f72dcd1..8a632e3587 100644
--- a/revision.h
+++ b/revision.h
@@ -424,6 +424,10 @@ void show_object_with_name(FILE *, struct object *, const char *);
 void add_pending_object(struct rev_info *revs,
 			struct object *obj, const char *name);
 
+void add_pending_object_with_referred_commit(struct rev_info *revs,
+					     struct object *obj, const char *name,
+					     struct object *referred_commit);
+
 void add_pending_oid(struct rev_info *revs,
 		     const char *name, const struct object_id *oid,
 		     unsigned int flags);
diff --git a/upload-pack.c b/upload-pack.c
index 5c1cd19612..d26fb351a3 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1751,6 +1751,13 @@ int upload_pack_advertise(struct repository *r,
 			strbuf_addstr(value, " packfile-uris");
 			free(str);
 		}
+
+		if (!repo_config_get_string(the_repository,
+					    "uploadpack.excludeobject",
+					    &str) && str) {
+			strbuf_addstr(value, " packfile-uris");
+			free(str);
+		}
 	}
 
 	return 1;
-- 
2.31.1.443.g55c63af4c9.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v3 2/3] t5702: support for excluding commit objects
  2021-07-26  9:46   ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Teng Long
  2021-07-26  9:46     ` [PATCH v3 1/3] packfile-uris: support for excluding commit objects Teng Long
@ 2021-07-26  9:46     ` Teng Long
  2021-07-26 15:03       ` Ævar Arnfjörð Bjarmason
  2021-07-26  9:46     ` [PATCH v3 3/3] packfile-uri.txt: " Teng Long
                       ` (2 subsequent siblings)
  4 siblings, 1 reply; 72+ messages in thread
From: Teng Long @ 2021-07-26  9:46 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, avarab, Teng Long

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 t/t5702-protocol-v2.sh | 166 +++++++++++++++++++++++++++++++++--------
 1 file changed, 133 insertions(+), 33 deletions(-)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 2e1243ca40..bcf21e1445 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -753,7 +753,7 @@ test_expect_success 'ls-remote with v2 http sends only one POST' '
 '
 
 test_expect_success 'push with http:// and a config of v2 does not request v2' '
-	test_when_finished "rm -f log" &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child log" &&
 	# Till v2 for push is designed, make sure that if a client has
 	# protocol.version configured to use v2, that the client instead falls
 	# back and uses v0.
@@ -776,7 +776,7 @@ test_expect_success 'push with http:// and a config of v2 does not request v2' '
 '
 
 test_expect_success 'when server sends "ready", expect DELIM' '
-	rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" http_child &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child" &&
 
 	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" one &&
@@ -796,7 +796,7 @@ test_expect_success 'when server sends "ready", expect DELIM' '
 '
 
 test_expect_success 'when server does not send "ready", expect FLUSH' '
-	rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" http_child log &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child log" &&
 
 	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" one &&
@@ -824,17 +824,44 @@ test_expect_success 'when server does not send "ready", expect FLUSH' '
 '
 
 configure_exclusion () {
-	git -C "$1" hash-object "$2" >objh &&
-	git -C "$1" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
-	git -C "$1" config --add \
-		"uploadpack.blobpackfileuri" \
-		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
-	cat objh
+    objt="$1"
+    P="$2"
+	version="$3"
+
+    oldc="uploadpack.blobpackfileuri"
+    newc="uploadpack.excludeobject"
+	configkey=""
+	if test "$version" = "0"
+    then
+    	configkey="$oldc"
+    else
+    	configkey="$newc"
+	fi
+
+    if test "$objt" = "blob"
+    then
+        git -C "$P" hash-object "$3" >objh &&
+        git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
+        git -C "$P" config --add \
+                "$configkey" \
+                "$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+        cat objh
+    elif test "$objt" = "commit" || test "$objt" = "tag"
+    then
+        echo "$3" >objh
+		git -C "$2" pack-objects --revs "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh
+        git -C "$P" config --add \
+                "$configkey" \
+                "$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+        cat objh
+    else
+        echo "unsupported object type in configure_exclusion (got $objt)"
+    fi
 }
 
 test_expect_success 'part of packfile response provided as URI' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -843,10 +870,10 @@ test_expect_success 'part of packfile response provided as URI' '
 	git -C "$P" add my-blob &&
 	echo other-blob >"$P/other-blob" &&
 	git -C "$P" add other-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
-	configure_exclusion "$P" other-blob >h2 &&
+	configure_exclusion blob "$P" my-blob 0 >h &&
+	configure_exclusion blob "$P" other-blob 0 >h2 &&
 
 	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
 	git -c protocol.version=2 \
@@ -881,18 +908,40 @@ test_expect_success 'part of packfile response provided as URI' '
 	test_line_count = 6 filelist
 '
 
-test_expect_success 'packfile URIs with fetch instead of clone' '
+test_expect_success 'blobs packfile URIs with fetch instead of clone' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
+
+	configure_exclusion blob "$P" my-blob >h &&
+
+	git init http_child &&
+
+	GIT_TEST_SIDEBAND_ALL=1 \
+	git -C http_child -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		fetch "$HTTPD_URL/smart/http_parent"
+'
+
+test_expect_success 'blobs packfile URIs(Compatible with the old) with fetch instead of clone' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
+
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+
+	echo my-blob >"$P/my-blob" &&
+	git -C "$P" add my-blob &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	# with the old "uploadpack.blobpackfileuri" configure
+	configure_exclusion blob "$P" my-blob 0 >h &&
 
 	git init http_child &&
 
@@ -902,9 +951,60 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
 		fetch "$HTTPD_URL/smart/http_parent"
 '
 
+test_expect_success 'commits packfile URIs with fetch instead of clone' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
+
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+
+	echo my-blob >"$P/my-blob" &&
+	git -C "$P" add my-blob &&
+	test_commit -C "$P" A &&
+
+	mycommit=$(git -C "$P" rev-parse A) &&
+	echo other-blob >"$P/other-blob" &&
+    git -C "$P" add other-blob &&
+	test_commit -C "$P" B &&
+	othercommit=$(git -C "$P" rev-parse B) &&
+
+	configure_exclusion commit "$P" "$mycommit" >h2 &&
+	configure_exclusion commit "$P" "$othercommit" >h2 &&
+
+	git init http_child &&
+
+	GIT_TRACE=1 GIT_TEST_SIDEBAND_ALL=1 \
+	git -C http_child -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		fetch "$HTTPD_URL/smart/http_parent"
+'
+
+test_expect_success 'tags packfile URIs with fetch instead of clone' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+ 	test_when_finished "rm -rf \"$P\" http_child log" &&
+
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+
+	echo my-blob >"$P/my-blob" &&
+	git -C "$P" add my-blob &&
+	test_commit -C "$P" A &&
+	git -C "$P" tag -a -m "annotated_tag" tagA &&
+	tagObj=$(git -C "$P" rev-parse tagA) &&
+
+	configure_exclusion tag "$P" "$tagObj" >h2 &&
+
+	git init http_child &&
+
+	GIT_TRACE=1 GIT_TRACE_PACKET=1 GIT_TEST_SIDEBAND_ALL=1 \
+	git -C http_child -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		fetch --tags "$HTTPD_URL/smart/http_parent"
+'
+
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -913,9 +1013,9 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	git -C "$P" add my-blob &&
 	echo other-blob >"$P/other-blob" &&
 	git -C "$P" add other-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob >h &&
 	# Configure a URL for other-blob. Just reuse the hash of the object as
 	# the hash of the packfile, since the hash does not matter for this
 	# test as long as it is not the hash of the pack, and it is of the
@@ -923,7 +1023,7 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	git -C "$P" hash-object other-blob >objh &&
 	git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
 	git -C "$P" config --add \
-		"uploadpack.blobpackfileuri" \
+		"uploadpack.excludeobject" \
 		"$(cat objh) $(cat objh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
 
 	test_must_fail env GIT_TEST_SIDEBAND_ALL=1 \
@@ -933,18 +1033,18 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	test_i18ngrep "pack downloaded from.*does not match expected hash" err
 '
 
+
 test_expect_success 'packfile-uri with transfer.fsckobjects' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
-
-	configure_exclusion "$P" my-blob >h &&
+	test_commit -C "$P" A &&
+	configure_exclusion blob "$P" my-blob >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -959,7 +1059,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' '
 
 test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -976,9 +1076,9 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -989,7 +1089,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
 
 test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmodules is separate from tree' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child &&
+	test_when_finished "rm -rf \"$P\" http_child" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -1000,7 +1100,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
 	git -C "$P" add .gitmodules &&
 	git -C "$P" commit -m x &&
 
-	configure_exclusion "$P" .gitmodules >h &&
+	configure_exclusion blob "$P" .gitmodules >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -1015,7 +1115,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
 
 test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodules separate from tree is invalid' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child err &&
+	test_when_finished "rm -rf \"$P\" http_child err" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -1024,9 +1124,9 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodul
 	echo "path = include/foo" >>"$P/.gitmodules" &&
 	echo "url = git://example.com/git/lib.git" >>"$P/.gitmodules" &&
 	git -C "$P" add .gitmodules &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" .gitmodules >h &&
+	configure_exclusion blob "$P" .gitmodules >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
-- 
2.31.1.443.g55c63af4c9.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v3 3/3] packfile-uri.txt: support for excluding commit objects
  2021-07-26  9:46   ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Teng Long
  2021-07-26  9:46     ` [PATCH v3 1/3] packfile-uris: support for excluding commit objects Teng Long
  2021-07-26  9:46     ` [PATCH v3 2/3] t5702: " Teng Long
@ 2021-07-26  9:46     ` Teng Long
  2021-07-26 20:52       ` Junio C Hamano
  2021-07-26 12:34     ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Ævar Arnfjörð Bjarmason
  2021-08-11  7:45     ` [PATCH v4 0/7] packfile-uris: commits and trees exclusion Teng Long
  4 siblings, 1 reply; 72+ messages in thread
From: Teng Long @ 2021-07-26  9:46 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, avarab, Teng Long

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 Documentation/technical/packfile-uri.txt | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt
index f7eabc6c76..2532db0e99 100644
--- a/Documentation/technical/packfile-uri.txt
+++ b/Documentation/technical/packfile-uri.txt
@@ -35,13 +35,16 @@ include some sort of non-trivial implementation in the Minimum Viable Product,
 at least so that we can test the client.
 
 This is the implementation: a feature, marked experimental, that allows the
-server to be configured by one or more `uploadpack.blobPackfileUri=<sha1>
-<uri>` entries. Whenever the list of objects to be sent is assembled, all such
-blobs are excluded, replaced with URIs. As noted in "Future work" below, the
-server can evolve in the future to support excluding other objects (or other
-implementations of servers could be made that support excluding other objects)
-without needing a protocol change, so clients should not expect that packfiles
-downloaded in this way only contain single blobs.
+server to be configured by one or more entries with the format:
+
+    uploadpack.excludeobject=<object-hash> <recursively> <pack-hash> <uri>
+
+Value <object-hash> is the key of entry, and the object type can be a blob
+or commit. Whenever the list of objects to be sent is assembled, all such
+objects are excluded, replaced with URIs. At the same time, for the old
+configuration `uploadpack.blobPackfileUri=<sha1> <pack-hash> <uri>` is
+still compatible for now, but this configuration only supports the
+exclusion of blob objects.
 
 Client design
 -------------
@@ -65,9 +68,6 @@ The protocol design allows some evolution of the server and client without any
 need for protocol changes, so only a small-scoped design is included here to
 form the MVP. For example, the following can be done:
 
- * On the server, more sophisticated means of excluding objects (e.g. by
-   specifying a commit to represent that commit and all objects that it
-   references).
  * On the client, resumption of clone. If a clone is interrupted, information
    could be recorded in the repository's config and a "clone-resume" command
    can resume the clone in progress. (Resumption of subsequent fetches is more
-- 
2.31.1.443.g55c63af4c9.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v3 0/3] packfile-uris: commit objects exclusio
  2021-07-26  9:46   ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Teng Long
                       ` (2 preceding siblings ...)
  2021-07-26  9:46     ` [PATCH v3 3/3] packfile-uri.txt: " Teng Long
@ 2021-07-26 12:34     ` Ævar Arnfjörð Bjarmason
  2021-08-11  1:48       ` Teng Long
  2021-08-11  7:45     ` [PATCH v4 0/7] packfile-uris: commits and trees exclusion Teng Long
  4 siblings, 1 reply; 72+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-07-26 12:34 UTC (permalink / raw)
  To: Teng Long; +Cc: git, jonathantanmy


On Mon, Jul 26 2021, Teng Long wrote:

> Range-diff against v2:
> -:  ---------- > 1:  91dce385f6 packfile-uris: support for excluding commit objects
> -:  ---------- > 2:  92def8c72b t5702: support for excluding commit objects
> -:  ---------- > 3:  01ab2cbb34 packfile-uri.txt: support for excluding commit objects

It looks like you provided the wrong base for the --range-diff (likely
master?), so it's not a diff against v2, just whatever you used as a
base.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v3 2/3] t5702: support for excluding commit objects
  2021-07-26  9:46     ` [PATCH v3 2/3] t5702: " Teng Long
@ 2021-07-26 15:03       ` Ævar Arnfjörð Bjarmason
  2021-08-11  1:46         ` [PATCH v3 1/3] packfile-uris: " Teng Long
  0 siblings, 1 reply; 72+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-07-26 15:03 UTC (permalink / raw)
  To: Teng Long; +Cc: git, jonathantanmy


On Mon, Jul 26 2021, Teng Long wrote:

> Signed-off-by: Teng Long <dyroneteng@gmail.com>
> ---
>  t/t5702-protocol-v2.sh | 166 +++++++++++++++++++++++++++++++++--------
>  1 file changed, 133 insertions(+), 33 deletions(-)
>
> diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
> index 2e1243ca40..bcf21e1445 100755
> --- a/t/t5702-protocol-v2.sh
> +++ b/t/t5702-protocol-v2.sh
> @@ -753,7 +753,7 @@ test_expect_success 'ls-remote with v2 http sends only one POST' '
>  '
>  
>  test_expect_success 'push with http:// and a config of v2 does not request v2' '
> -	test_when_finished "rm -f log" &&
> +	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child log" &&
>  	# Till v2 for push is designed, make sure that if a client has
>  	# protocol.version configured to use v2, that the client instead falls
>  	# back and uses v0.
> @@ -776,7 +776,7 @@ test_expect_success 'push with http:// and a config of v2 does not request v2' '
>  '
>  
>  test_expect_success 'when server sends "ready", expect DELIM' '
> -	rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" http_child &&
> +	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child" &&
>  
>  	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
>  	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" one &&
> @@ -796,7 +796,7 @@ test_expect_success 'when server sends "ready", expect DELIM' '
>  '
>  
>  test_expect_success 'when server does not send "ready", expect FLUSH' '
> -	rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" http_child log &&
> +	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child log" &&
>  
>  	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
>  	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" one &&
> @@ -824,17 +824,44 @@ test_expect_success 'when server does not send "ready", expect FLUSH' '
>  '

This looks like a good cleanup, but should be split into another cleanup
commit. It looks unrelated.

>  configure_exclusion () {
> -	git -C "$1" hash-object "$2" >objh &&
> -	git -C "$1" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
> -	git -C "$1" config --add \
> -		"uploadpack.blobpackfileuri" \
> -		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
> -	cat objh
> +    objt="$1"
> +    P="$2"
> +	version="$3"
> +
> +    oldc="uploadpack.blobpackfileuri"
> +    newc="uploadpack.excludeobject"
> +	configkey=""
> +	if test "$version" = "0"
> +    then
> +    	configkey="$oldc"
> +    else
> +    	configkey="$newc"
> +	fi

You've got all sorts of mixed space/tab indent here.

> +    if test "$objt" = "blob"
> +    then
> +        git -C "$P" hash-object "$3" >objh &&
> +        git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
> +        git -C "$P" config --add \
> +                "$configkey" \
> +                "$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
> +        cat objh
> +    elif test "$objt" = "commit" || test "$objt" = "tag"
> +    then
> +        echo "$3" >objh
> +		git -C "$2" pack-objects --revs "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh
> +        git -C "$P" config --add \
> +                "$configkey" \
> +                "$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
> +        cat objh
> +    else
> +        echo "unsupported object type in configure_exclusion (got $objt)"
> +    fi
>  }
>  
>  test_expect_success 'part of packfile response provided as URI' '
>  	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> -	rm -rf "$P" http_child log &&
> +	test_when_finished "rm -rf \"$P\" http_child log" &&
>  
>  	git init "$P" &&
>  	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
> @@ -843,10 +870,10 @@ test_expect_success 'part of packfile response provided as URI' '
>  	git -C "$P" add my-blob &&
>  	echo other-blob >"$P/other-blob" &&
>  	git -C "$P" add other-blob &&
> -	git -C "$P" commit -m x &&
> +	test_commit -C "$P" A &&
>  
> -	configure_exclusion "$P" my-blob >h &&
> -	configure_exclusion "$P" other-blob >h2 &&
> +	configure_exclusion blob "$P" my-blob 0 >h &&
> +	configure_exclusion blob "$P" other-blob 0 >h2 &&
>  
>  	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
>  	git -c protocol.version=2 \
> @@ -881,18 +908,40 @@ test_expect_success 'part of packfile response provided as URI' '
>  	test_line_count = 6 filelist
>  '
>  
> -test_expect_success 'packfile URIs with fetch instead of clone' '
> +test_expect_success 'blobs packfile URIs with fetch instead of clone' '
>  	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> -	rm -rf "$P" http_child log &&
> +	test_when_finished "rm -rf \"$P\" http_child log" &&
>  
>  	git init "$P" &&
>  	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
>  
>  	echo my-blob >"$P/my-blob" &&
>  	git -C "$P" add my-blob &&
> -	git -C "$P" commit -m x &&
> +	test_commit -C "$P" A &&
> +
> +	configure_exclusion blob "$P" my-blob >h &&
> +
> +	git init http_child &&
> +
> +	GIT_TEST_SIDEBAND_ALL=1 \
> +	git -C http_child -c protocol.version=2 \
> +		-c fetch.uriprotocols=http,https \

Isn't accepting http and https the default? Is this guarding against
config leak from a previous test? Ditto some later changes.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v3 1/3] packfile-uris: support for excluding commit objects
  2021-07-26  9:46     ` [PATCH v3 1/3] packfile-uris: support for excluding commit objects Teng Long
@ 2021-07-26 18:15       ` Junio C Hamano
  2021-07-26 19:45         ` Felipe Contreras
  2021-08-11  1:44         ` Teng Long
  0 siblings, 2 replies; 72+ messages in thread
From: Junio C Hamano @ 2021-07-26 18:15 UTC (permalink / raw)
  To: Teng Long; +Cc: git, jonathantanmy, avarab

Teng Long <dyroneteng@gmail.com> writes:

> On the server, more sophisticated means of excluding objects should be
> supported, such as commit object. This commit introduces a new
> configuration `uploadpack.excludeobject` for this.

Please avoid adjectives that express subjective values, like
"sophisticated".  Readers will expect a lot more sophistication than
your code actually offers and will be disappointed ("wow, that would
be wonderful if we can say 'exclude commits made by bots, and those
older than 3 months'---eh, you cannot do that?  where is your
sophistication then?").

Please avoid "should" without first describing the background for
"why it should".  It would help if you briefly describe what we
currently have and its limitation before this first paragraph
(i.e. your "we can already exclude only blob objects" would become
major part of the explanation, but you'd need to present in what
situations it would help to be able to exclude other types).

This commit is probalby doing too many things at once.  For example,
refactoring like creation of match_packfile_uri_exclusions() helper
function out of existing code (there probably are others) can and
should be done as separate preparatory steps before the API gets
modified (e.g. process-object callbacks gain an extra parameter) in
tree-wide way.

And by slimming the primary step that introduces the new feature,
there will be a space to also add documentation and test in the same
step, which would help reviewers.  With the current structure of the
series, with a code dump in the first step with only a vague promiss
of "sophistication" without documentation updates, reviewers cannot
even tell how the "commit object" is used easily.

Thanks.

>  builtin/describe.c     |  4 +-
>  builtin/pack-objects.c | 97 ++++++++++++++++++++++++------------------
>  builtin/rev-list.c     |  2 +-
>  fetch-pack.c           |  6 +++
>  list-objects.c         | 37 +++++++++-------
>  list-objects.h         |  2 +-
>  object.c               | 15 +++++--
>  object.h               |  4 ++
>  pack-bitmap.c          |  8 ++--
>  reachable.c            |  8 ++--
>  revision.c             | 36 +++++++++++-----
>  revision.h             |  4 ++
>  upload-pack.c          |  7 +++
>  13 files changed, 148 insertions(+), 82 deletions(-)
>
> diff --git a/builtin/describe.c b/builtin/describe.c
> index 40482d8e9f..045da79b5c 100644
> --- a/builtin/describe.c
> +++ b/builtin/describe.c
> @@ -485,9 +485,9 @@ static void process_commit(struct commit *commit, void *data)
>  	pcd->current_commit = commit->object.oid;
>  }
>  
> -static void process_object(struct object *obj, const char *path, void *data)
> +static void process_object(struct object *obj, const char *path, void *show_data, void *carry_data)
>  {
> -	struct process_commit_data *pcd = data;
> +	struct process_commit_data *pcd = show_data;
>  
>  	if (oideq(&pcd->looking_for, &obj->oid) && !pcd->dst->len) {
>  		reset_revision_walk();
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 6d13cd3e1a..154c98bcb6 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -1188,6 +1188,24 @@ static int have_duplicate_entry(const struct object_id *oid,
>  	return 1;
>  }
>  
> +static int match_packfile_uri_exclusions(struct configured_exclusion *ex)
> +{
> +	int i;
> +	const char *p;
> +
> +	if (ex) {
> +		for (i = 0; i < uri_protocols.nr; i++) {
> +			if (skip_prefix(ex->uri,
> +					uri_protocols.items[i].string,
> +					&p) &&
> +			    *p == ':')
> +				return 1;
> +
> +		}
> +	}
> +	return 0;
> +}
> +
>  static int want_found_object(const struct object_id *oid, int exclude,
>  			     struct packed_git *p)
>  {
> @@ -1293,7 +1311,8 @@ static int want_object_in_pack_one(struct packed_git *p,
>  static int want_object_in_pack(const struct object_id *oid,
>  			       int exclude,
>  			       struct packed_git **found_pack,
> -			       off_t *found_offset)
> +			       off_t *found_offset,
> +			       struct object *referred_commit)
>  {
>  	int want;
>  	struct list_head *pos;
> @@ -1333,21 +1352,13 @@ static int want_object_in_pack(const struct object_id *oid,
>  	}
>  
>  	if (uri_protocols.nr) {
> -		struct configured_exclusion *ex =
> -			oidmap_get(&configured_exclusions, oid);
> -		int i;
> -		const char *p;
> -
> -		if (ex) {
> -			for (i = 0; i < uri_protocols.nr; i++) {
> -				if (skip_prefix(ex->uri,
> -						uri_protocols.items[i].string,
> -						&p) &&
> -				    *p == ':') {
> -					oidset_insert(&excluded_by_config, oid);
> -					return 0;
> -				}
> -			}
> +		if (referred_commit) {
> +			if (oidmap_get(&configured_exclusions, &referred_commit->oid) && match_packfile_uri_exclusions(referred_ex))
> +				return 0;
> +		}
> +		if (oidmap_get(&configured_exclusions, oid) && match_packfile_uri_exclusions(ex)) {
> +			oidset_insert(&excluded_by_config, oid);
> +			return 0;
>  		}
>  	}
>  
> @@ -1384,7 +1395,8 @@ static const char no_closure_warning[] = N_(
>  );
>  
>  static int add_object_entry(const struct object_id *oid, enum object_type type,
> -			    const char *name, int exclude)
> +			    const char *name, int exclude,
> +			    struct object *referred_commit)
>  {
>  	struct packed_git *found_pack = NULL;
>  	off_t found_offset = 0;
> @@ -1394,7 +1406,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
>  	if (have_duplicate_entry(oid, exclude))
>  		return 0;
>  
> -	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
> +	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset, referred_commit)) {
>  		/* The pack is missing an object, so it will not have closure */
>  		if (write_bitmap_index) {
>  			if (write_bitmap_index != WRITE_BITMAP_QUIET)
> @@ -1420,7 +1432,7 @@ static int add_object_entry_from_bitmap(const struct object_id *oid,
>  	if (have_duplicate_entry(oid, 0))
>  		return 0;
>  
> -	if (!want_object_in_pack(oid, 0, &pack, &offset))
> +	if (!want_object_in_pack(oid, 0, &pack, &offset, NULL))
>  		return 0;
>  
>  	create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
> @@ -1560,7 +1572,7 @@ static void add_pbase_object(struct tree_desc *tree,
>  		if (name[cmplen] != '/') {
>  			add_object_entry(&entry.oid,
>  					 object_type(entry.mode),
> -					 fullname, 1);
> +					 fullname, 1, NULL);
>  			return;
>  		}
>  		if (S_ISDIR(entry.mode)) {
> @@ -1628,7 +1640,7 @@ static void add_preferred_base_object(const char *name)
>  	cmplen = name_cmp_len(name);
>  	for (it = pbase_tree; it; it = it->next) {
>  		if (cmplen == 0) {
> -			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
> +			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1, NULL);
>  		}
>  		else {
>  			struct tree_desc tree;
> @@ -2830,7 +2842,7 @@ static void add_tag_chain(const struct object_id *oid)
>  			die(_("unable to pack objects reachable from tag %s"),
>  			    oid_to_hex(oid));
>  
> -		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
> +		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0, NULL);
>  
>  		if (tag->tagged->type != OBJ_TAG)
>  			return;
> @@ -2985,7 +2997,7 @@ static int git_pack_config(const char *k, const char *v, void *cb)
>  			pack_idx_opts.flags &= ~WRITE_REV;
>  		return 0;
>  	}
> -	if (!strcmp(k, "uploadpack.blobpackfileuri")) {
> +	if (!strcmp(k, "uploadpack.excludeobject") || !strcmp(k, "uploadpack.blobpackfileuri")) {
>  		struct configured_exclusion *ex = xmalloc(sizeof(*ex));
>  		const char *oid_end, *pack_end;
>  		/*
> @@ -2998,11 +3010,11 @@ static int git_pack_config(const char *k, const char *v, void *cb)
>  		    *oid_end != ' ' ||
>  		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
>  		    *pack_end != ' ')
> -			die(_("value of uploadpack.blobpackfileuri must be "
> -			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
> +                        die(_("value of uploadpack.excludeobject or uploadpack.blobpackfileuri must be "
> +                              "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
>  		if (oidmap_get(&configured_exclusions, &ex->e.oid))
> -			die(_("object already configured in another "
> -			      "uploadpack.blobpackfileuri (got '%s')"), v);
> +                        die(_("object already configured by an earlier "
> +                              "uploadpack.excludeobject or uploadpack.blobpackfileuri (got '%s')"), v);
>  		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
>  		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
>  		ex->uri = xstrdup(pack_end + 1);
> @@ -3031,7 +3043,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
>  		return 0;
>  
>  	ofs = nth_packed_object_offset(p, pos);
> -	if (!want_object_in_pack(oid, 0, &p, &ofs))
> +	if (!want_object_in_pack(oid, 0, &p, &ofs, NULL))
>  		return 0;
>  
>  	oi.typep = &type;
> @@ -3059,7 +3071,7 @@ static void show_commit_pack_hint(struct commit *commit, void *_data)
>  }
>  
>  static void show_object_pack_hint(struct object *object, const char *name,
> -				  void *_data)
> +				  void *show_data, void *carry_data)
>  {
>  	struct object_entry *oe = packlist_find(&to_pack, &object->oid);
>  	if (!oe)
> @@ -3224,7 +3236,7 @@ static void read_object_list_from_stdin(void)
>  			die(_("expected object ID, got garbage:\n %s"), line);
>  
>  		add_preferred_base_object(p + 1);
> -		add_object_entry(&oid, OBJ_NONE, p + 1, 0);
> +		add_object_entry(&oid, OBJ_NONE, p + 1, 0, NULL);
>  	}
>  }
>  
> @@ -3233,7 +3245,7 @@ static void read_object_list_from_stdin(void)
>  
>  static void show_commit(struct commit *commit, void *data)
>  {
> -	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
> +	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0, NULL);
>  	commit->object.flags |= OBJECT_ADDED;
>  
>  	if (write_bitmap_index)
> @@ -3243,10 +3255,11 @@ static void show_commit(struct commit *commit, void *data)
>  		propagate_island_marks(commit);
>  }
>  
> -static void show_object(struct object *obj, const char *name, void *data)
> +static void show_object(struct object *obj, const char *name, void *show_data, void *carry_data)
>  {
> +	struct object *referred_commit = carry_data;
>  	add_preferred_base_object(name);
> -	add_object_entry(&obj->oid, obj->type, name, 0);
> +	add_object_entry(&obj->oid, obj->type, name, 0, referred_commit);
>  	obj->flags |= OBJECT_ADDED;
>  
>  	if (use_delta_islands) {
> @@ -3265,7 +3278,7 @@ static void show_object(struct object *obj, const char *name, void *data)
>  	}
>  }
>  
> -static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
> +static void show_object__ma_allow_any(struct object *obj, const char *name, void *show_data, void *carry_data)
>  {
>  	assert(arg_missing_action == MA_ALLOW_ANY);
>  
> @@ -3276,10 +3289,10 @@ static void show_object__ma_allow_any(struct object *obj, const char *name, void
>  	if (!has_object(the_repository, &obj->oid, 0))
>  		return;
>  
> -	show_object(obj, name, data);
> +	show_object(obj, name, show_data, carry_data);
>  }
>  
> -static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
> +static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *show_data, void *carry_data)
>  {
>  	assert(arg_missing_action == MA_ALLOW_PROMISOR);
>  
> @@ -3290,7 +3303,7 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
>  	if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
>  		return;
>  
> -	show_object(obj, name, data);
> +	show_object(obj, name, show_data, carry_data);
>  }
>  
>  static int option_parse_missing_action(const struct option *opt,
> @@ -3397,7 +3410,7 @@ static void add_objects_in_unpacked_packs(void)
>  		QSORT(in_pack.array, in_pack.nr, ofscmp);
>  		for (i = 0; i < in_pack.nr; i++) {
>  			struct object *o = in_pack.array[i].object;
> -			add_object_entry(&o->oid, o->type, "", 0);
> +			add_object_entry(&o->oid, o->type, "", 0, NULL);
>  		}
>  	}
>  	free(in_pack.array);
> @@ -3413,7 +3426,7 @@ static int add_loose_object(const struct object_id *oid, const char *path,
>  		return 0;
>  	}
>  
> -	add_object_entry(oid, type, "", 0);
> +	add_object_entry(oid, type, "", 0, NULL);
>  	return 0;
>  }
>  
> @@ -3538,7 +3551,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
>  
>  static void record_recent_object(struct object *obj,
>  				 const char *name,
> -				 void *data)
> +				 void *show_data,
> +				 void *carry_data)
>  {
>  	oid_array_append(&recent_objects, &obj->oid);
>  }
> @@ -3831,7 +3845,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
>  			 N_("respect islands during delta compression")),
>  		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
>  				N_("protocol"),
> -				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
> +				N_("exclude any configured uploadpack.excludeobject or "
> +				   	"uploadpack.blobpackfileuri with this protocol")),
>  		OPT_END(),
>  	};
>  
> diff --git a/builtin/rev-list.c b/builtin/rev-list.c
> index b4d8ea0a35..1cad33d9e8 100644
> --- a/builtin/rev-list.c
> +++ b/builtin/rev-list.c
> @@ -266,7 +266,7 @@ static int finish_object(struct object *obj, const char *name, void *cb_data)
>  	return 0;
>  }
>  
> -static void show_object(struct object *obj, const char *name, void *cb_data)
> +static void show_object(struct object *obj, const char *name, void *cb_data, void *carry_data)
>  {
>  	struct rev_list_info *info = cb_data;
>  	struct rev_info *revs = info->revs;
> diff --git a/fetch-pack.c b/fetch-pack.c
> index 2318ebe680..39bb449586 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -23,6 +23,7 @@
>  #include "fetch-negotiator.h"
>  #include "fsck.h"
>  #include "shallow.h"
> +#include "strmap.h"
>  
>  static int transfer_unpack_limit = -1;
>  static int fetch_unpack_limit = -1;
> @@ -1576,6 +1577,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  	struct string_list packfile_uris = STRING_LIST_INIT_DUP;
>  	int i;
>  	struct strvec index_pack_args = STRVEC_INIT;
> +	struct strset uris;
>  
>  	negotiator = &negotiator_alloc;
>  	fetch_negotiator_init(r, negotiator);
> @@ -1677,6 +1679,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  		}
>  	}
>  
> +	strset_init(&uris);
>  	for (i = 0; i < packfile_uris.nr; i++) {
>  		int j;
>  		struct child_process cmd = CHILD_PROCESS_INIT;
> @@ -1684,6 +1687,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  		const char *uri = packfile_uris.items[i].string +
>  			the_hash_algo->hexsz + 1;
>  
> +		if (!strset_add(&uris, uri))
> +			continue;
>  		strvec_push(&cmd.args, "http-fetch");
>  		strvec_pushf(&cmd.args, "--packfile=%.*s",
>  			     (int) the_hash_algo->hexsz,
> @@ -1727,6 +1732,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  						 get_object_directory(),
>  						 packname));
>  	}
> +	strset_clear(&uris);
>  	string_list_clear(&packfile_uris, 0);
>  	strvec_clear(&index_pack_args);
>  
> diff --git a/list-objects.c b/list-objects.c
> index e19589baa0..fa3156dc89 100644
> --- a/list-objects.c
> +++ b/list-objects.c
> @@ -24,7 +24,8 @@ struct traversal_context {
>  static void process_blob(struct traversal_context *ctx,
>  			 struct blob *blob,
>  			 struct strbuf *path,
> -			 const char *name)
> +			 const char *name,
> +			 struct object *referred_commit)
>  {
>  	struct object *obj = &blob->object;
>  	size_t pathlen;
> @@ -60,7 +61,7 @@ static void process_blob(struct traversal_context *ctx,
>  	if (r & LOFR_MARK_SEEN)
>  		obj->flags |= SEEN;
>  	if (r & LOFR_DO_SHOW)
> -		ctx->show_object(obj, path->buf, ctx->show_data);
> +		ctx->show_object(obj, path->buf, ctx->show_data, referred_commit);
>  	strbuf_setlen(path, pathlen);
>  }
>  
> @@ -97,11 +98,13 @@ static void process_gitlink(struct traversal_context *ctx,
>  static void process_tree(struct traversal_context *ctx,
>  			 struct tree *tree,
>  			 struct strbuf *base,
> -			 const char *name);
> +			 const char *name,
> +			 struct object *referred_commit);
>  
>  static void process_tree_contents(struct traversal_context *ctx,
>  				  struct tree *tree,
> -				  struct strbuf *base)
> +				  struct strbuf *base,
> +				  struct object *referred_commit)
>  {
>  	struct tree_desc desc;
>  	struct name_entry entry;
> @@ -129,7 +132,7 @@ static void process_tree_contents(struct traversal_context *ctx,
>  				    entry.path, oid_to_hex(&tree->object.oid));
>  			}
>  			t->object.flags |= NOT_USER_GIVEN;
> -			process_tree(ctx, t, base, entry.path);
> +			process_tree(ctx, t, base, entry.path, referred_commit);
>  		}
>  		else if (S_ISGITLINK(entry.mode))
>  			process_gitlink(ctx, entry.oid.hash,
> @@ -142,7 +145,7 @@ static void process_tree_contents(struct traversal_context *ctx,
>  				    entry.path, oid_to_hex(&tree->object.oid));
>  			}
>  			b->object.flags |= NOT_USER_GIVEN;
> -			process_blob(ctx, b, base, entry.path);
> +			process_blob(ctx, b, base, entry.path, referred_commit);
>  		}
>  	}
>  }
> @@ -150,7 +153,8 @@ static void process_tree_contents(struct traversal_context *ctx,
>  static void process_tree(struct traversal_context *ctx,
>  			 struct tree *tree,
>  			 struct strbuf *base,
> -			 const char *name)
> +			 const char *name,
> +			 struct object *referred_commit)
>  {
>  	struct object *obj = &tree->object;
>  	struct rev_info *revs = ctx->revs;
> @@ -191,14 +195,14 @@ static void process_tree(struct traversal_context *ctx,
>  	if (r & LOFR_MARK_SEEN)
>  		obj->flags |= SEEN;
>  	if (r & LOFR_DO_SHOW)
> -		ctx->show_object(obj, base->buf, ctx->show_data);
> +		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
>  	if (base->len)
>  		strbuf_addch(base, '/');
>  
>  	if (r & LOFR_SKIP_TREE)
>  		trace_printf("Skipping contents of tree %s...\n", base->buf);
>  	else if (!failed_parse)
> -		process_tree_contents(ctx, tree, base);
> +		process_tree_contents(ctx, tree, base, referred_commit);
>  
>  	r = list_objects_filter__filter_object(ctx->revs->repo,
>  					       LOFS_END_TREE, obj,
> @@ -207,7 +211,7 @@ static void process_tree(struct traversal_context *ctx,
>  	if (r & LOFR_MARK_SEEN)
>  		obj->flags |= SEEN;
>  	if (r & LOFR_DO_SHOW)
> -		ctx->show_object(obj, base->buf, ctx->show_data);
> +		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
>  
>  	strbuf_setlen(base, baselen);
>  	free_tree_buffer(tree);
> @@ -314,9 +318,9 @@ void mark_edges_uninteresting(struct rev_info *revs,
>  	}
>  }
>  
> -static void add_pending_tree(struct rev_info *revs, struct tree *tree)
> +static void add_pending_tree(struct rev_info *revs,  struct tree *tree, struct object *referred_commit)
>  {
> -	add_pending_object(revs, &tree->object, "");
> +	add_pending_object_with_referred_commit(revs, &tree->object, "", referred_commit);
>  }
>  
>  static void traverse_trees_and_blobs(struct traversal_context *ctx,
> @@ -329,23 +333,24 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
>  	for (i = 0; i < ctx->revs->pending.nr; i++) {
>  		struct object_array_entry *pending = ctx->revs->pending.objects + i;
>  		struct object *obj = pending->item;
> +		struct object *referred_commit = pending->referred_commit;
>  		const char *name = pending->name;
>  		const char *path = pending->path;
>  		if (obj->flags & (UNINTERESTING | SEEN))
>  			continue;
>  		if (obj->type == OBJ_TAG) {
>  			obj->flags |= SEEN;
> -			ctx->show_object(obj, name, ctx->show_data);
> +			ctx->show_object(obj, name, ctx->show_data, referred_commit);
>  			continue;
>  		}
>  		if (!path)
>  			path = "";
>  		if (obj->type == OBJ_TREE) {
> -			process_tree(ctx, (struct tree *)obj, base, path);
> +			process_tree(ctx, (struct tree *)obj, base, path, referred_commit);
>  			continue;
>  		}
>  		if (obj->type == OBJ_BLOB) {
> -			process_blob(ctx, (struct blob *)obj, base, path);
> +			process_blob(ctx, (struct blob *)obj, base, path, referred_commit);
>  			continue;
>  		}
>  		die("unknown pending object %s (%s)",
> @@ -370,7 +375,7 @@ static void do_traverse(struct traversal_context *ctx)
>  		else if (get_commit_tree(commit)) {
>  			struct tree *tree = get_commit_tree(commit);
>  			tree->object.flags |= NOT_USER_GIVEN;
> -			add_pending_tree(ctx->revs, tree);
> +			add_pending_tree(ctx->revs, tree, &commit->object);
>  		} else if (commit->object.parsed) {
>  			die(_("unable to load root tree for commit %s"),
>  			      oid_to_hex(&commit->object.oid));
> diff --git a/list-objects.h b/list-objects.h
> index a952680e46..ab946d34db 100644
> --- a/list-objects.h
> +++ b/list-objects.h
> @@ -6,7 +6,7 @@ struct object;
>  struct rev_info;
>  
>  typedef void (*show_commit_fn)(struct commit *, void *);
> -typedef void (*show_object_fn)(struct object *, const char *, void *);
> +typedef void (*show_object_fn)(struct object *, const char *, void *, void *);
>  void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *);
>  
>  typedef void (*show_edge_fn)(struct commit *);
> diff --git a/object.c b/object.c
> index 14188453c5..6b1ce2fcde 100644
> --- a/object.c
> +++ b/object.c
> @@ -322,9 +322,10 @@ void object_list_free(struct object_list **list)
>   */
>  static char object_array_slopbuf[1];
>  
> -void add_object_array_with_path(struct object *obj, const char *name,
> -				struct object_array *array,
> -				unsigned mode, const char *path)
> +void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name,
> +						    struct object_array *array,
> +						    unsigned mode, const char *path,
> +						    struct object *referred_commit)
>  {
>  	unsigned nr = array->nr;
>  	unsigned alloc = array->alloc;
> @@ -339,6 +340,7 @@ void add_object_array_with_path(struct object *obj, const char *name,
>  	}
>  	entry = &objects[nr];
>  	entry->item = obj;
> +	entry->referred_commit = referred_commit;
>  	if (!name)
>  		entry->name = NULL;
>  	else if (!*name)
> @@ -354,6 +356,13 @@ void add_object_array_with_path(struct object *obj, const char *name,
>  	array->nr = ++nr;
>  }
>  
> +void add_object_array_with_path(struct object *obj, const char *name,
> +				struct object_array *array,
> +				unsigned mode, const char *path)
> +{
> +	add_object_array_with_path_and_referred_commit(obj, name, array, mode, path, NULL);
> +}
> +
>  void add_object_array(struct object *obj, const char *name, struct object_array *array)
>  {
>  	add_object_array_with_path(obj, name, array, S_IFINVALID, NULL);
> diff --git a/object.h b/object.h
> index 87a6da47c8..de9f15b97d 100644
> --- a/object.h
> +++ b/object.h
> @@ -43,6 +43,7 @@ struct object_array {
>  	unsigned int alloc;
>  	struct object_array_entry {
>  		struct object *item;
> +		struct object *referred_commit;
>  		/*
>  		 * name or NULL.  If non-NULL, the memory pointed to
>  		 * is owned by this object *except* if it points at
> @@ -157,6 +158,9 @@ void object_list_free(struct object_list **list);
>  /* Object array handling .. */
>  void add_object_array(struct object *obj, const char *name, struct object_array *array);
>  void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path);
> +void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name, struct object_array *array,
> +						    unsigned mode, const char *path,
> +						    struct object *referred_commit);
>  
>  /*
>   * Returns NULL if the array is empty. Otherwise, returns the last object
> diff --git a/pack-bitmap.c b/pack-bitmap.c
> index 3ed15431cd..516eb235da 100644
> --- a/pack-bitmap.c
> +++ b/pack-bitmap.c
> @@ -459,9 +459,9 @@ struct bitmap_show_data {
>  	struct bitmap *base;
>  };
>  
> -static void show_object(struct object *object, const char *name, void *data_)
> +static void show_object(struct object *object, const char *name, void *show_data, void *carry_data)
>  {
> -	struct bitmap_show_data *data = data_;
> +	struct bitmap_show_data *data = show_data;
>  	int bitmap_pos;
>  
>  	bitmap_pos = bitmap_position(data->bitmap_git, &object->oid);
> @@ -1268,9 +1268,9 @@ struct bitmap_test_data {
>  };
>  
>  static void test_show_object(struct object *object, const char *name,
> -			     void *data)
> +			     void *show_data, void *carry_data)
>  {
> -	struct bitmap_test_data *tdata = data;
> +	struct bitmap_test_data *tdata = show_data;
>  	int bitmap_pos;
>  
>  	bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid);
> diff --git a/reachable.c b/reachable.c
> index 77a60c70a5..ebd817c446 100644
> --- a/reachable.c
> +++ b/reachable.c
> @@ -47,14 +47,14 @@ static int add_one_ref(const char *path, const struct object_id *oid,
>   * The traversal will have already marked us as SEEN, so we
>   * only need to handle any progress reporting here.
>   */
> -static void mark_object(struct object *obj, const char *name, void *data)
> +static void mark_object(struct object *obj, const char *name, void *show_data, void *carry_data)
>  {
> -	update_progress(data);
> +	update_progress(show_data);
>  }
>  
> -static void mark_commit(struct commit *c, void *data)
> +static void mark_commit(struct commit *c, void *show_data)
>  {
> -	mark_object(&c->object, NULL, data);
> +	mark_object(&c->object, NULL, show_data,  NULL);
>  }
>  
>  struct recent_data {
> diff --git a/revision.c b/revision.c
> index 4853c85d0b..da0ce0e3f2 100644
> --- a/revision.c
> +++ b/revision.c
> @@ -304,10 +304,11 @@ void mark_parents_uninteresting(struct commit *commit)
>  	commit_stack_clear(&pending);
>  }
>  
> -static void add_pending_object_with_path(struct rev_info *revs,
> -					 struct object *obj,
> -					 const char *name, unsigned mode,
> -					 const char *path)
> +static void add_pending_object_with_path_and_referred_commit(struct rev_info *revs,
> +							     struct object *obj,
> +							     const char *name, unsigned mode,
> +							     const char *path,
> +							     struct object *referred_commit)
>  {
>  	struct interpret_branch_name_options options = { 0 };
>  	if (!obj)
> @@ -326,20 +327,36 @@ static void add_pending_object_with_path(struct rev_info *revs,
>  		strbuf_release(&buf);
>  		return; /* do not add the commit itself */
>  	}
> -	add_object_array_with_path(obj, name, &revs->pending, mode, path);
> +	add_object_array_with_path_and_referred_commit(obj, name, &revs->pending, mode, path, referred_commit);
> +}
> +
> +static void add_pending_object_with_path(struct rev_info *revs,
> +					 struct object *obj,
> +					 const char *name, unsigned mode,
> +					 const char *path) {
> +	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, path, NULL);
>  }
>  
>  static void add_pending_object_with_mode(struct rev_info *revs,
>  					 struct object *obj,
> -					 const char *name, unsigned mode)
> +					 const char *name, unsigned mode,
> +					 struct object *referred_commit)
>  {
> -	add_pending_object_with_path(revs, obj, name, mode, NULL);
> +
> +	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, NULL, referred_commit);
> +}
> +
> +void add_pending_object_with_referred_commit(struct rev_info *revs,
> +					     struct object *obj, const char *name,
> +					     struct object *referred_commit)
> +{
> +	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, referred_commit);
>  }
>  
>  void add_pending_object(struct rev_info *revs,
>  			struct object *obj, const char *name)
>  {
> -	add_pending_object_with_mode(revs, obj, name, S_IFINVALID);
> +	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, NULL);
>  }
>  
>  void add_head_to_pending(struct rev_info *revs)
> @@ -2764,7 +2781,6 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
>  			continue;
>  		}
>  
> -
>  		if (handle_revision_arg(arg, revs, flags, revarg_opt)) {
>  			int j;
>  			if (seen_dashdash || *arg == '^')
> @@ -2817,7 +2833,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
>  		if (get_oid_with_context(revs->repo, revs->def, 0, &oid, &oc))
>  			diagnose_missing_default(revs->def);
>  		object = get_reference(revs, revs->def, &oid, 0);
> -		add_pending_object_with_mode(revs, object, revs->def, oc.mode);
> +		add_pending_object_with_mode(revs, object, revs->def, oc.mode, NULL);
>  	}
>  
>  	/* Did the user ask for any diff output? Run the diff! */
> diff --git a/revision.h b/revision.h
> index a24f72dcd1..8a632e3587 100644
> --- a/revision.h
> +++ b/revision.h
> @@ -424,6 +424,10 @@ void show_object_with_name(FILE *, struct object *, const char *);
>  void add_pending_object(struct rev_info *revs,
>  			struct object *obj, const char *name);
>  
> +void add_pending_object_with_referred_commit(struct rev_info *revs,
> +					     struct object *obj, const char *name,
> +					     struct object *referred_commit);
> +
>  void add_pending_oid(struct rev_info *revs,
>  		     const char *name, const struct object_id *oid,
>  		     unsigned int flags);
> diff --git a/upload-pack.c b/upload-pack.c
> index 5c1cd19612..d26fb351a3 100644
> --- a/upload-pack.c
> +++ b/upload-pack.c
> @@ -1751,6 +1751,13 @@ int upload_pack_advertise(struct repository *r,
>  			strbuf_addstr(value, " packfile-uris");
>  			free(str);
>  		}
> +
> +		if (!repo_config_get_string(the_repository,
> +					    "uploadpack.excludeobject",
> +					    &str) && str) {
> +			strbuf_addstr(value, " packfile-uris");
> +			free(str);
> +		}
>  	}
>  
>  	return 1;

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v3 1/3] packfile-uris: support for excluding commit objects
  2021-07-26 18:15       ` Junio C Hamano
@ 2021-07-26 19:45         ` Felipe Contreras
  2021-08-11  1:44         ` Teng Long
  1 sibling, 0 replies; 72+ messages in thread
From: Felipe Contreras @ 2021-07-26 19:45 UTC (permalink / raw)
  To: Junio C Hamano, Teng Long; +Cc: git, jonathantanmy, avarab

Junio C Hamano wrote:
> Teng Long <dyroneteng@gmail.com> writes:
> 
> > On the server, more sophisticated means of excluding objects should be
> > supported, such as commit object. This commit introduces a new
> > configuration `uploadpack.excludeobject` for this.
> 
> Please avoid adjectives that express subjective values, like
> "sophisticated".

The word "sophisticated" is not necessarily subjective, it can easily
mean "complex".

https://www.merriam-webster.com/dictionary/sophisticated

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v3 3/3] packfile-uri.txt: support for excluding commit objects
  2021-07-26  9:46     ` [PATCH v3 3/3] packfile-uri.txt: " Teng Long
@ 2021-07-26 20:52       ` Junio C Hamano
  2021-08-11  1:47         ` Teng Long
  0 siblings, 1 reply; 72+ messages in thread
From: Junio C Hamano @ 2021-07-26 20:52 UTC (permalink / raw)
  To: Teng Long; +Cc: git, jonathantanmy, avarab

Teng Long <dyroneteng@gmail.com> writes:

> +++ b/Documentation/technical/packfile-uri.txt
> @@ -35,13 +35,16 @@ include some sort of non-trivial implementation in the Minimum Viable Product,
>  at least so that we can test the client.
>  
>  This is the implementation: a feature, marked experimental, that allows the
> +server to be configured by one or more entries with the format:
> +
> +    uploadpack.excludeobject=<object-hash> <recursively> <pack-hash> <uri>
> +
> +Value <object-hash> is the key of entry, and the object type can be a blob
> +or commit. Whenever the list of objects to be sent is assembled, all such
> +objects are excluded, replaced with URIs. At the same time, for the old
> +configuration `uploadpack.blobPackfileUri=<sha1> <pack-hash> <uri>` is
> +still compatible for now, but this configuration only supports the
> +exclusion of blob objects.

Do not hint deprecation and future removal with "still" and "for
now", before seeing a concensus that it should be deprecated and
removed.

The new thing, <recursively>, deserves some explanation.  What are
the acceptable values (yes/no? spatial/time/both? infinitely/limited?)
and what do these values mean?

Why is this limited to only <blob> and <commit>?

There isn't a fundamental reason why I shouldn't be able to say
"v2.32.0" instead of ebf3c04b262aa27fbb97f8a0156c2347fecafafb (or
"v2.32.0~0") to say "I want anything reachable from v2.32.0 (in
other words, that version and everything before it)", is there?

For that matter, "everything reachable from this tree object" may
also be a reasonable way to specify which set of objects are
offloaded to an out-of-band URI.

Thanks.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v3 1/3] packfile-uris: support for excluding commit objects
  2021-07-26 18:15       ` Junio C Hamano
  2021-07-26 19:45         ` Felipe Contreras
@ 2021-08-11  1:44         ` Teng Long
  1 sibling, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-11  1:44 UTC (permalink / raw)
  To: gitster; +Cc: avarab, dyroneteng, git, jonathantanmy

>> Please avoid adjectives that express subjective values, like
>> "sophisticated".  Readers will expect a lot more sophistication than
>> your code actually offers and will be disappointed ("wow, that would
>> be wonderful if we can say 'exclude commits made by bots, and those
>> older than 3 months'---eh, you cannot do that?  where is your
>> sophistication then?").

I use it("sophisticated") from "packfile-uri.txt", but i agree with you.
Documentation will also remove/replace the subjective word in the next
patch.

>> Please avoid "should" without first describing the background for
>> "why it should".  It would help if you briefly describe what we
>> currently have and its limitation before this first paragraph
>> (i.e. your "we can already exclude only blob objects" would become
>> major part of the explanation, but you'd need to present in what
>> situations it would help to be able to exclude other types).

Agree.
Commit message will be appended with backgroud description.

>> This commit is probalby doing too many things at once.  For example,
>> refactoring like creation of match_packfile_uri_exclusions() helper
>> function out of existing code (there probably are others) can and
>> should be done as separate preparatory steps before the API gets
>> modified (e.g. process-object callbacks gain an e xtra parameter) in
>> tree-wide way.
>>
>> And by slimming the primary step that introduces the new feature,
>> there will be a space to also add documentation and test in the same
>> step, which would help reviewers.  With the current structure of the
>> series, with a code dump in the first step with only a vague promiss
>> of "sophistication" without documentation updates, reviewers cannot
>> even tell how the "commit object" is used easily.

Agree.
The current commit will be splitted for more clear reponsibilities, 
documentation and tests ditto.

Thank you.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v3 1/3] packfile-uris: support for excluding commit objects
  2021-07-26 15:03       ` Ævar Arnfjörð Bjarmason
@ 2021-08-11  1:46         ` Teng Long
  0 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-11  1:46 UTC (permalink / raw)
  To: avarab; +Cc: dyroneteng, git, jonathantanmy



>>This looks like a good cleanup, but should be split into another cleanup
>>commit. It looks unrelated.

Agree.
Will split up in next patchset.

>>You've got all sorts of mixed space/tab indent here.

Yes.
Will fix wrong indents in next patchset.

>>Isn't accepting http and https the default?

I think it's NOT after I took a view at function `fetch_pack_config`.
This may be optimized in another patch I think and also will listen
to some other suggestions.

Thank you.
	     

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v3 3/3] packfile-uri.txt: support for excluding commit objects
  2021-07-26 20:52       ` Junio C Hamano
@ 2021-08-11  1:47         ` Teng Long
  0 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-11  1:47 UTC (permalink / raw)
  To: gitster; +Cc: avarab, dyroneteng, git, jonathantanmy

>>Do not hint deprecation and future removal with "still" and "for
>>now", before seeing a concensus that it should be deprecated and
>>removed.

Agree.
Will remove.

>>The new thing, <recursively>, deserves some explanation.  What are
>>the acceptable values (yes/no? spatial/time/both? infinitely/limited?)
>>and what do these values mean?

More expanation.
I think over about <recursively> and consider to remove it in next patch
because offer a pack only contains commit or tree object individually
may not make much sense, so <recursively> will remove(as default on tree
and commit object) in next patch.

>>Why is this limited to only <blob> and <commit>?

Will support tree but not tag(maybe furture work) in next patch.

>>There isn't a fundamental reason why I shouldn't be able to say
>>"v2.32.0" instead of ebf3c04b262aa27fbb97f8a0156c2347fecafafb (or
>>"v2.32.0~0") to say "I want anything reachable from v2.32.0 (in
>>other words, that version and everything before it)", is there?

>>For that matter, "everything reachable from this tree object" may
>>also be a reasonable way to specify which set of objects are
>>offloaded to an out-of-band URI.

Agree.
Will provide more detailed instuctions in next patch.

Thank you.



^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v3 0/3] packfile-uris: commit objects exclusio
  2021-07-26 12:34     ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Ævar Arnfjörð Bjarmason
@ 2021-08-11  1:48       ` Teng Long
  0 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-11  1:48 UTC (permalink / raw)
  To: avarab; +Cc: dyroneteng, git, jonathantanmy

>>It looks like you provided the wrong base for the --range-diff (likely
>>master?), so it's not a diff against v2, just whatever you used as a
>>base.

Agree.
Sorry about that, the first use made a mistake. Maybe corrected next time :)

Thank you.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v4 0/7] packfile-uris: commits and trees exclusion
  2021-07-26  9:46   ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Teng Long
                       ` (3 preceding siblings ...)
  2021-07-26 12:34     ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Ævar Arnfjörð Bjarmason
@ 2021-08-11  7:45     ` Teng Long
  2021-08-11  7:45       ` [PATCH v4 1/7] pack-objects.c: introduce new method `match_packfile_uri_exclusions` Teng Long
                         ` (7 more replies)
  4 siblings, 8 replies; 72+ messages in thread
From: Teng Long @ 2021-08-11  7:45 UTC (permalink / raw)
  To: dyroneteng; +Cc: avarab, git, jonathantanmy

Changes since v4:

* Remove subjective words in commit message.
* Add background descriptions in commit message.
* Big commit split.
* Fix `t/t5702-protocol-v2.sh` indent problems.
* Test commit split(cleanup and new test case).
* Remove hint deprecation words in documentation.
* Support tree object exclusion.
* Add details about recursive exclusion of commits and trees in documentation.

Teng Long (7):
  pack-objects.c: introduce new method `match_packfile_uri_exclusions`
  Add new parameter "carry_data" for "show_object" function
  packfile-uri: support for excluding commit objects
  packfile-uri: support for excluding tree objects
  packfile-uri.txt: support for excluding commits and trees
  t5702: replace with "test_when_finished" for cleanup
  t5702: support for excluding commit objects

 Documentation/technical/packfile-uri.txt |  32 ++-
 builtin/describe.c                       |   4 +-
 builtin/pack-objects.c                   | 104 +++++---
 builtin/rev-list.c                       |   2 +-
 list-objects.c                           |  41 +--
 list-objects.h                           |   2 +-
 object.c                                 |  19 +-
 object.h                                 |  15 +-
 pack-bitmap.c                            |   8 +-
 reachable.c                              |   8 +-
 revision.c                               |  34 ++-
 revision.h                               |   3 +
 t/t5702-protocol-v2.sh                   | 312 ++++++++++++++++++++---
 upload-pack.c                            |   7 +
 14 files changed, 466 insertions(+), 125 deletions(-)

Range-diff against v3:
-:  ---------- > 1:  73a5b4ccc1 pack-objects.c: introduce new method `match_packfile_uri_exclusions`
-:  ---------- > 2:  bc8fea97e3 Add new parameter "carry_data" for "show_object" function
1:  f324359ec8 ! 3:  f71b310842 packfile-uris: support for excluding commit objects
    @@ Metadata
     Author: Teng Long <dyroneteng@gmail.com>
     
      ## Commit message ##
    -    packfile-uris: support for excluding commit objects
    +    packfile-uri: support for excluding commit objects
     
    -    On the server, more sophisticated means of excluding objects should be
    -    supported, such as commit object. This commit introduces a new
    -    configuration `uploadpack.excludeobject` for this.
    +    Currently packfile-uri supports the exclusion of blob objects, but in
    +    some scenarios, users may wish to exclude more types of objects, such as
    +    commit and tree objects, not only because packfile itself supports
    +    storing these object types, but also on the other hand, to make
    +    configuration items maintainable and simpler.
    +
    +    This commit is used to support the recursive exclusion of a commit
    +    object, which means that if the exclusion of a commit is configured as
    +    packfile-uri, the commit itself and all the objects it contains will
    +    also be recursively excluded. In addition, to support this feature, a
    +    new configuration  `uploadpack.excludeobject` is introduced.
     
         The reason for bringing a new configuration is for two considerations.
         First, the old configuration supports a single object type (blob), which
    @@ Commit message
     
         Signed-off-by: Teng Long <dyroneteng@gmail.com>
     
    - ## builtin/describe.c ##
    -@@ builtin/describe.c: static void process_commit(struct commit *commit, void *data)
    - 	pcd->current_commit = commit->object.oid;
    - }
    - 
    --static void process_object(struct object *obj, const char *path, void *data)
    -+static void process_object(struct object *obj, const char *path, void *show_data, void *carry_data)
    - {
    --	struct process_commit_data *pcd = data;
    -+	struct process_commit_data *pcd = show_data;
    - 
    - 	if (oideq(&pcd->looking_for, &obj->oid) && !pcd->dst->len) {
    - 		reset_revision_walk();
    -
      ## builtin/pack-objects.c ##
    -@@ builtin/pack-objects.c: static int have_duplicate_entry(const struct object_id *oid,
    - 	return 1;
    - }
    - 
    -+static int match_packfile_uri_exclusions(struct configured_exclusion *ex)
    -+{
    -+	int i;
    -+	const char *p;
    -+
    -+	if (ex) {
    -+		for (i = 0; i < uri_protocols.nr; i++) {
    -+			if (skip_prefix(ex->uri,
    -+					uri_protocols.items[i].string,
    -+					&p) &&
    -+			    *p == ':')
    -+				return 1;
    -+
    -+		}
    -+	}
    -+	return 0;
    -+}
    -+
    - static int want_found_object(const struct object_id *oid, int exclude,
    - 			     struct packed_git *p)
    - {
     @@ builtin/pack-objects.c: static int want_object_in_pack_one(struct packed_git *p,
      static int want_object_in_pack(const struct object_id *oid,
      			       int exclude,
    @@ builtin/pack-objects.c: static int want_object_in_pack_one(struct packed_git *p,
      	struct multi_pack_index *m;
     +	struct configured_exclusion *ex;
     +	struct configured_exclusion *referred_ex;
    -+
      
      	if (!exclude && local && has_loose_object_nonlocal(oid))
      		return 0;
    @@ builtin/pack-objects.c: static int want_object_in_pack(const struct object_id *o
      	if (uri_protocols.nr) {
     -		struct configured_exclusion *ex =
     -			oidmap_get(&configured_exclusions, oid);
    --		int i;
    --		const char *p;
    --
    --		if (ex) {
    --			for (i = 0; i < uri_protocols.nr; i++) {
    --				if (skip_prefix(ex->uri,
    --						uri_protocols.items[i].string,
    --						&p) &&
    --				    *p == ':') {
    --					oidset_insert(&excluded_by_config, oid);
    --					return 0;
    --				}
    --			}
     +		if (referred_commit) {
     +			referred_ex = oidmap_get(&configured_exclusions, &referred_commit->oid);
     +			if (referred_ex && match_packfile_uri_exclusions(referred_ex))
     +				return 0;
     +		}
    -+		ex = oidmap_get(&configured_exclusions, oid);
    -+		if (ex && match_packfile_uri_exclusions(ex)) {
    -+			oidset_insert(&excluded_by_config, oid);
    -+			return 0;
    - 		}
    - 	}
      
    ++		ex = oidmap_get(&configured_exclusions, oid);
    + 		if (ex && match_packfile_uri_exclusions(ex)) {
    + 			oidset_insert(&excluded_by_config, oid);
    + 			return 0;
     @@ builtin/pack-objects.c: static const char no_closure_warning[] = N_(
      );
      
    @@ builtin/pack-objects.c: static int git_pack_config(const char *k, const char *v,
      		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
      		    *pack_end != ' ')
     -			die(_("value of uploadpack.blobpackfileuri must be "
    --			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
    -+                        die(_("value of uploadpack.excludeobject or uploadpack.blobpackfileuri must be "
    -+                              "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
    ++			die(_("value of uploadpack.excludeobject or uploadpack.blobpackfileuri must be "
    + 			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
      		if (oidmap_get(&configured_exclusions, &ex->e.oid))
     -			die(_("object already configured in another "
     -			      "uploadpack.blobpackfileuri (got '%s')"), v);
    -+                        die(_("object already configured by an earlier "
    -+                              "uploadpack.excludeobject or uploadpack.blobpackfileuri (got '%s')"), v);
    ++			die(_("object already configured by an earlier "
    ++			      "uploadpack.excludeobject or uploadpack.blobpackfileuri (got '%s')"), v);
      		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
      		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
      		ex->uri = xstrdup(pack_end + 1);
    @@ builtin/pack-objects.c: static int add_object_entry_from_pack(const struct objec
      		return 0;
      
      	oi.typep = &type;
    -@@ builtin/pack-objects.c: static void show_commit_pack_hint(struct commit *commit, void *_data)
    - }
    - 
    - static void show_object_pack_hint(struct object *object, const char *name,
    --				  void *_data)
    -+				  void *show_data, void *carry_data)
    - {
    - 	struct object_entry *oe = packlist_find(&to_pack, &object->oid);
    - 	if (!oe)
     @@ builtin/pack-objects.c: static void read_object_list_from_stdin(void)
      			die(_("expected object ID, got garbage:\n %s"), line);
      
    @@ builtin/pack-objects.c: static void read_object_list_from_stdin(void)
      
      	if (write_bitmap_index)
     @@ builtin/pack-objects.c: static void show_commit(struct commit *commit, void *data)
    - 		propagate_island_marks(commit);
    - }
      
    --static void show_object(struct object *obj, const char *name, void *data)
    -+static void show_object(struct object *obj, const char *name, void *show_data, void *carry_data)
    + static void show_object(struct object *obj, const char *name, void *show_data, void *carry_data)
      {
     +	struct object *referred_commit = carry_data;
      	add_preferred_base_object(name);
    @@ builtin/pack-objects.c: static void show_commit(struct commit *commit, void *dat
      	obj->flags |= OBJECT_ADDED;
      
      	if (use_delta_islands) {
    -@@ builtin/pack-objects.c: static void show_object(struct object *obj, const char *name, void *data)
    - 	}
    - }
    - 
    --static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
    -+static void show_object__ma_allow_any(struct object *obj, const char *name, void *show_data, void *carry_data)
    - {
    - 	assert(arg_missing_action == MA_ALLOW_ANY);
    - 
    -@@ builtin/pack-objects.c: static void show_object__ma_allow_any(struct object *obj, const char *name, void
    - 	if (!has_object(the_repository, &obj->oid, 0))
    - 		return;
    - 
    --	show_object(obj, name, data);
    -+	show_object(obj, name, show_data, carry_data);
    - }
    - 
    --static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
    -+static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *show_data, void *carry_data)
    - {
    - 	assert(arg_missing_action == MA_ALLOW_PROMISOR);
    - 
    -@@ builtin/pack-objects.c: static void show_object__ma_allow_promisor(struct object *obj, const char *name,
    - 	if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
    - 		return;
    - 
    --	show_object(obj, name, data);
    -+	show_object(obj, name, show_data, carry_data);
    - }
    - 
    - static int option_parse_missing_action(const struct option *opt,
     @@ builtin/pack-objects.c: static void add_objects_in_unpacked_packs(void)
      		QSORT(in_pack.array, in_pack.nr, ofscmp);
      		for (i = 0; i < in_pack.nr; i++) {
    @@ builtin/pack-objects.c: static int add_loose_object(const struct object_id *oid,
      	return 0;
      }
      
    -@@ builtin/pack-objects.c: static int get_object_list_from_bitmap(struct rev_info *revs)
    - 
    - static void record_recent_object(struct object *obj,
    - 				 const char *name,
    --				 void *data)
    -+				 void *show_data,
    -+				 void *carry_data)
    - {
    - 	oid_array_append(&recent_objects, &obj->oid);
    - }
     @@ builtin/pack-objects.c: int cmd_pack_objects(int argc, const char **argv, const char *prefix)
      			 N_("respect islands during delta compression")),
      		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
      				N_("protocol"),
     -				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
     +				N_("exclude any configured uploadpack.excludeobject or "
    -+				   	"uploadpack.blobpackfileuri with this protocol")),
    ++				   "uploadpack.blobpackfileuri with this protocol")),
      		OPT_END(),
      	};
      
     
    - ## builtin/rev-list.c ##
    -@@ builtin/rev-list.c: static int finish_object(struct object *obj, const char *name, void *cb_data)
    - 	return 0;
    - }
    - 
    --static void show_object(struct object *obj, const char *name, void *cb_data)
    -+static void show_object(struct object *obj, const char *name, void *cb_data, void *carry_data)
    - {
    - 	struct rev_list_info *info = cb_data;
    - 	struct rev_info *revs = info->revs;
    -
    - ## fetch-pack.c ##
    -@@
    - #include "fetch-negotiator.h"
    - #include "fsck.h"
    - #include "shallow.h"
    -+#include "strmap.h"
    - 
    - static int transfer_unpack_limit = -1;
    - static int fetch_unpack_limit = -1;
    -@@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
    - 	struct string_list packfile_uris = STRING_LIST_INIT_DUP;
    - 	int i;
    - 	struct strvec index_pack_args = STRVEC_INIT;
    -+	struct strset uris;
    - 
    - 	negotiator = &negotiator_alloc;
    - 	fetch_negotiator_init(r, negotiator);
    -@@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
    - 		}
    - 	}
    - 
    -+	strset_init(&uris);
    - 	for (i = 0; i < packfile_uris.nr; i++) {
    - 		int j;
    - 		struct child_process cmd = CHILD_PROCESS_INIT;
    -@@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
    - 		const char *uri = packfile_uris.items[i].string +
    - 			the_hash_algo->hexsz + 1;
    - 
    -+		if (!strset_add(&uris, uri))
    -+			continue;
    - 		strvec_push(&cmd.args, "http-fetch");
    - 		strvec_pushf(&cmd.args, "--packfile=%.*s",
    - 			     (int) the_hash_algo->hexsz,
    -@@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
    - 						 get_object_directory(),
    - 						 packname));
    - 	}
    -+	strset_clear(&uris);
    - 	string_list_clear(&packfile_uris, 0);
    - 	strvec_clear(&index_pack_args);
    - 
    -
      ## list-objects.c ##
     @@ list-objects.c: struct traversal_context {
      static void process_blob(struct traversal_context *ctx,
    @@ list-objects.c: static void process_blob(struct traversal_context *ctx,
      	if (r & LOFR_MARK_SEEN)
      		obj->flags |= SEEN;
      	if (r & LOFR_DO_SHOW)
    --		ctx->show_object(obj, path->buf, ctx->show_data);
    +-		ctx->show_object(obj, path->buf, ctx->show_data, NULL);
     +		ctx->show_object(obj, path->buf, ctx->show_data, referred_commit);
      	strbuf_setlen(path, pathlen);
      }
    @@ list-objects.c: static void process_tree(struct traversal_context *ctx,
      	if (r & LOFR_MARK_SEEN)
      		obj->flags |= SEEN;
      	if (r & LOFR_DO_SHOW)
    --		ctx->show_object(obj, base->buf, ctx->show_data);
    +-		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
     +		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
      	if (base->len)
      		strbuf_addch(base, '/');
    @@ list-objects.c: static void process_tree(struct traversal_context *ctx,
      	if (r & LOFR_MARK_SEEN)
      		obj->flags |= SEEN;
      	if (r & LOFR_DO_SHOW)
    --		ctx->show_object(obj, base->buf, ctx->show_data);
    +-		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
     +		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
      
      	strbuf_setlen(base, baselen);
    @@ list-objects.c: void mark_edges_uninteresting(struct rev_info *revs,
      }
      
     -static void add_pending_tree(struct rev_info *revs, struct tree *tree)
    -+static void add_pending_tree(struct rev_info *revs,  struct tree *tree, struct object *referred_commit)
    ++static void add_pending_tree(struct rev_info *revs, struct tree *tree, struct object *referred_commit)
      {
     -	add_pending_object(revs, &tree->object, "");
     +	add_pending_object_with_referred_commit(revs, &tree->object, "", referred_commit);
    @@ list-objects.c: static void traverse_trees_and_blobs(struct traversal_context *c
      			continue;
      		if (obj->type == OBJ_TAG) {
      			obj->flags |= SEEN;
    --			ctx->show_object(obj, name, ctx->show_data);
    +-			ctx->show_object(obj, name, ctx->show_data, NULL);
     +			ctx->show_object(obj, name, ctx->show_data, referred_commit);
      			continue;
      		}
    @@ list-objects.c: static void do_traverse(struct traversal_context *ctx)
      			die(_("unable to load root tree for commit %s"),
      			      oid_to_hex(&commit->object.oid));
     
    - ## list-objects.h ##
    -@@ list-objects.h: struct object;
    - struct rev_info;
    - 
    - typedef void (*show_commit_fn)(struct commit *, void *);
    --typedef void (*show_object_fn)(struct object *, const char *, void *);
    -+typedef void (*show_object_fn)(struct object *, const char *, void *, void *);
    - void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *);
    - 
    - typedef void (*show_edge_fn)(struct commit *);
    -
      ## object.c ##
     @@ object.c: void object_list_free(struct object_list **list)
       */
    @@ object.c: void add_object_array_with_path(struct object *obj, const char *name,
     
      ## object.h ##
     @@ object.h: struct object_array {
    - 	unsigned int alloc;
    - 	struct object_array_entry {
    - 		struct object *item;
    + 		char *name;
    + 		char *path;
    + 		unsigned mode;
     +		struct object *referred_commit;
    - 		/*
    - 		 * name or NULL.  If non-NULL, the memory pointed to
    - 		 * is owned by this object *except* if it points at
    + 	} *objects;
    + };
    + 
     @@ object.h: void object_list_free(struct object_list **list);
      /* Object array handling .. */
      void add_object_array(struct object *obj, const char *name, struct object_array *array);
      void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path);
    +-
     +void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name, struct object_array *array,
    -+						    unsigned mode, const char *path,
    -+						    struct object *referred_commit);
    - 
    ++						    unsigned mode, const char *path, struct object *referred_commit);
      /*
       * Returns NULL if the array is empty. Otherwise, returns the last object
    -
    - ## pack-bitmap.c ##
    -@@ pack-bitmap.c: struct bitmap_show_data {
    - 	struct bitmap *base;
    - };
    - 
    --static void show_object(struct object *object, const char *name, void *data_)
    -+static void show_object(struct object *object, const char *name, void *show_data, void *carry_data)
    - {
    --	struct bitmap_show_data *data = data_;
    -+	struct bitmap_show_data *data = show_data;
    - 	int bitmap_pos;
    - 
    - 	bitmap_pos = bitmap_position(data->bitmap_git, &object->oid);
    -@@ pack-bitmap.c: struct bitmap_test_data {
    - };
    - 
    - static void test_show_object(struct object *object, const char *name,
    --			     void *data)
    -+			     void *show_data, void *carry_data)
    - {
    --	struct bitmap_test_data *tdata = data;
    -+	struct bitmap_test_data *tdata = show_data;
    - 	int bitmap_pos;
    - 
    - 	bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid);
    -
    - ## reachable.c ##
    -@@ reachable.c: static int add_one_ref(const char *path, const struct object_id *oid,
    -  * The traversal will have already marked us as SEEN, so we
    -  * only need to handle any progress reporting here.
    -  */
    --static void mark_object(struct object *obj, const char *name, void *data)
    -+static void mark_object(struct object *obj, const char *name, void *show_data, void *carry_data)
    - {
    --	update_progress(data);
    -+	update_progress(show_data);
    - }
    - 
    --static void mark_commit(struct commit *c, void *data)
    -+static void mark_commit(struct commit *c, void *show_data)
    - {
    --	mark_object(&c->object, NULL, data);
    -+	mark_object(&c->object, NULL, show_data,  NULL);
    - }
    - 
    - struct recent_data {
    +  * after removing its entry from the array. Other resources associated
     
      ## revision.c ##
     @@ revision.c: void mark_parents_uninteresting(struct commit *commit)
    @@ revision.c: static void add_pending_object_with_path(struct rev_info *revs,
      	}
     -	add_object_array_with_path(obj, name, &revs->pending, mode, path);
     +	add_object_array_with_path_and_referred_commit(obj, name, &revs->pending, mode, path, referred_commit);
    -+}
    -+
    + }
    + 
     +static void add_pending_object_with_path(struct rev_info *revs,
     +					 struct object *obj,
     +					 const char *name, unsigned mode,
    -+					 const char *path) {
    ++					 const char *path)
    ++{
     +	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, path, NULL);
    - }
    - 
    ++}
      static void add_pending_object_with_mode(struct rev_info *revs,
      					 struct object *obj,
     -					 const char *name, unsigned mode)
     +					 const char *name, unsigned mode,
     +					 struct object *referred_commit)
    - {
    --	add_pending_object_with_path(revs, obj, name, mode, NULL);
    -+
    ++{
     +	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, NULL, referred_commit);
     +}
     +
     +void add_pending_object_with_referred_commit(struct rev_info *revs,
     +					     struct object *obj, const char *name,
     +					     struct object *referred_commit)
    -+{
    + {
    +-	add_pending_object_with_path(revs, obj, name, mode, NULL);
     +	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, referred_commit);
      }
      
    @@ revision.c: static void add_pending_object_with_path(struct rev_info *revs,
      }
      
      void add_head_to_pending(struct rev_info *revs)
    -@@ revision.c: int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
    - 			continue;
    - 		}
    - 
    --
    - 		if (handle_revision_arg(arg, revs, flags, revarg_opt)) {
    - 			int j;
    - 			if (seen_dashdash || *arg == '^')
     @@ revision.c: int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
      		if (get_oid_with_context(revs->repo, revs->def, 0, &oid, &oc))
      			diagnose_missing_default(revs->def);
    @@ revision.c: int setup_revisions(int argc, const char **argv, struct rev_info *re
     
      ## revision.h ##
     @@ revision.h: void show_object_with_name(FILE *, struct object *, const char *);
    +  */
      void add_pending_object(struct rev_info *revs,
      			struct object *obj, const char *name);
    - 
     +void add_pending_object_with_referred_commit(struct rev_info *revs,
     +					     struct object *obj, const char *name,
     +					     struct object *referred_commit);
    -+
    + 
      void add_pending_oid(struct rev_info *revs,
      		     const char *name, const struct object_id *oid,
    - 		     unsigned int flags);
     
      ## upload-pack.c ##
     @@ upload-pack.c: int upload_pack_advertise(struct repository *r,
2:  7ce99a0f31 < -:  ---------- t5702: support for excluding commit objects
-:  ---------- > 4:  bbb0413cc4 packfile-uri: support for excluding tree objects
3:  36b9a92132 ! 5:  8e5bf4010c packfile-uri.txt: support for excluding commit objects
    @@ Metadata
     Author: Teng Long <dyroneteng@gmail.com>
     
      ## Commit message ##
    -    packfile-uri.txt: support for excluding commit objects
    +    packfile-uri.txt: support for excluding commits and trees
     
         Signed-off-by: Teng Long <dyroneteng@gmail.com>
     
    @@ Documentation/technical/packfile-uri.txt: include some sort of non-trivial imple
     -downloaded in this way only contain single blobs.
     +server to be configured by one or more entries with the format:
     +
    -+    uploadpack.excludeobject=<object-hash> <recursively> <pack-hash> <uri>
    ++    uploadpack.excludeobject=<object-hash> <pack-hash> <uri>
     +
    -+Value <object-hash> is the key of entry, and the object type can be a blob
    -+or commit. Whenever the list of objects to be sent is assembled, all such
    -+objects are excluded, replaced with URIs. At the same time, for the old
    -+configuration `uploadpack.blobPackfileUri=<sha1> <pack-hash> <uri>` is
    -+still compatible for now, but this configuration only supports the
    -+exclusion of blob objects.
    ++Value <object-hash> is the key of entry, and the object type can be a blob,
    ++tree, or commit. The exclusion of tree and commit is recursive by default,
    ++which means that when a tree or commit object is excluded, the object itself
    ++and all reachable objects of the object will be excluded recursively. Whenever
    ++the list of objects to be sent is assembled, all such objects are excluded,
    ++replaced with URIs.
    ++
    ++Configuration compatibility
    ++-------------
    ++
    ++The old configuration of packfile-uri:
    ++
    ++	`uploadpack.blobPackfileUri=<object-hash> <pack-hash> <uri>`
    ++
    ++For the old configuration is compatible with the new one, but it only
    ++supports the exclusion of blob objects.
      
      Client design
      -------------
    @@ Documentation/technical/packfile-uri.txt: The protocol design allows some evolut
       * On the client, resumption of clone. If a clone is interrupted, information
         could be recorded in the repository's config and a "clone-resume" command
         can resume the clone in progress. (Resumption of subsequent fetches is more
    +@@ Documentation/technical/packfile-uri.txt: There are some possible features that will require a change in protocol:
    + 
    +  * Additional HTTP headers (e.g. authentication)
    +  * Byte range support
    +- * Different file formats referenced by URIs (e.g. raw object)
    ++ * Different file formats referenced by URIs (e.g. raw object)
    + \ No newline at end of file
-:  ---------- > 6:  f3b1cba7e1 t5702: replace with "test_when_finished" for cleanup
-:  ---------- > 7:  3b5f9732b8 t5702: support for excluding commit objects
-- 
2.31.1.449.gb2aa5456a8.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v4 1/7] pack-objects.c: introduce new method `match_packfile_uri_exclusions`
  2021-08-11  7:45     ` [PATCH v4 0/7] packfile-uris: commits and trees exclusion Teng Long
@ 2021-08-11  7:45       ` Teng Long
  2021-08-11  7:45       ` [PATCH v4 2/7] Add new parameter "carry_data" for "show_object" function Teng Long
                         ` (6 subsequent siblings)
  7 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-11  7:45 UTC (permalink / raw)
  To: dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6d13cd3e1a..31556e7396 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1188,6 +1188,24 @@ static int have_duplicate_entry(const struct object_id *oid,
 	return 1;
 }
 
+static int match_packfile_uri_exclusions(struct configured_exclusion *ex)
+{
+	int i;
+	const char *p;
+
+	if (ex) {
+		for (i = 0; i < uri_protocols.nr; i++) {
+			if (skip_prefix(ex->uri,
+					uri_protocols.items[i].string,
+					&p) &&
+			    *p == ':')
+				return 1;
+
+		}
+	}
+	return 0;
+}
+
 static int want_found_object(const struct object_id *oid, int exclude,
 			     struct packed_git *p)
 {
@@ -1335,19 +1353,10 @@ static int want_object_in_pack(const struct object_id *oid,
 	if (uri_protocols.nr) {
 		struct configured_exclusion *ex =
 			oidmap_get(&configured_exclusions, oid);
-		int i;
-		const char *p;
 
-		if (ex) {
-			for (i = 0; i < uri_protocols.nr; i++) {
-				if (skip_prefix(ex->uri,
-						uri_protocols.items[i].string,
-						&p) &&
-				    *p == ':') {
-					oidset_insert(&excluded_by_config, oid);
-					return 0;
-				}
-			}
+		if (ex && match_packfile_uri_exclusions(ex)) {
+			oidset_insert(&excluded_by_config, oid);
+			return 0;
 		}
 	}
 
-- 
2.31.1.449.gb2aa5456a8.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v4 2/7] Add new parameter "carry_data" for "show_object" function
  2021-08-11  7:45     ` [PATCH v4 0/7] packfile-uris: commits and trees exclusion Teng Long
  2021-08-11  7:45       ` [PATCH v4 1/7] pack-objects.c: introduce new method `match_packfile_uri_exclusions` Teng Long
@ 2021-08-11  7:45       ` Teng Long
  2021-08-11  7:45       ` [PATCH v4 3/7] packfile-uri: support for excluding commit objects Teng Long
                         ` (5 subsequent siblings)
  7 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-11  7:45 UTC (permalink / raw)
  To: dyroneteng; +Cc: avarab, git, jonathantanmy

During the pack-objects process, "show_object" function will be called
to find the object and show the process("show_object_fn" in
"list-object.h"), the function definition contains three parameters:

	1. struct object *obj(contains object type, flags, and oid).
	2. const char *name(the object name).
	3. void *show_data(function to show progress info).

This commit adds a new parameter: "void *carry_data", the reason is
mainly based on scalability and performance considerations when showing
an object, space for time, avoid costly temporary calculations in the
"show" phase. For example, carry the ownership relationship between
blob or tree object and the referred commit to avoid redundant and
expensive calculations.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/describe.c     |  4 ++--
 builtin/pack-objects.c | 15 ++++++++-------
 builtin/rev-list.c     |  2 +-
 list-objects.c         |  8 ++++----
 list-objects.h         |  2 +-
 pack-bitmap.c          |  8 ++++----
 reachable.c            |  8 ++++----
 7 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 40482d8e9f..045da79b5c 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -485,9 +485,9 @@ static void process_commit(struct commit *commit, void *data)
 	pcd->current_commit = commit->object.oid;
 }
 
-static void process_object(struct object *obj, const char *path, void *data)
+static void process_object(struct object *obj, const char *path, void *show_data, void *carry_data)
 {
-	struct process_commit_data *pcd = data;
+	struct process_commit_data *pcd = show_data;
 
 	if (oideq(&pcd->looking_for, &obj->oid) && !pcd->dst->len) {
 		reset_revision_walk();
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 31556e7396..5f9ec3566f 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3068,7 +3068,7 @@ static void show_commit_pack_hint(struct commit *commit, void *_data)
 }
 
 static void show_object_pack_hint(struct object *object, const char *name,
-				  void *_data)
+				  void *show_data, void *carry_dataa)
 {
 	struct object_entry *oe = packlist_find(&to_pack, &object->oid);
 	if (!oe)
@@ -3252,7 +3252,7 @@ static void show_commit(struct commit *commit, void *data)
 		propagate_island_marks(commit);
 }
 
-static void show_object(struct object *obj, const char *name, void *data)
+static void show_object(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
 	add_preferred_base_object(name);
 	add_object_entry(&obj->oid, obj->type, name, 0);
@@ -3274,7 +3274,7 @@ static void show_object(struct object *obj, const char *name, void *data)
 	}
 }
 
-static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
+static void show_object__ma_allow_any(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
 	assert(arg_missing_action == MA_ALLOW_ANY);
 
@@ -3285,10 +3285,10 @@ static void show_object__ma_allow_any(struct object *obj, const char *name, void
 	if (!has_object(the_repository, &obj->oid, 0))
 		return;
 
-	show_object(obj, name, data);
+	show_object(obj, name, show_data, carry_data);
 }
 
-static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
+static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
 	assert(arg_missing_action == MA_ALLOW_PROMISOR);
 
@@ -3299,7 +3299,7 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
 	if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
 		return;
 
-	show_object(obj, name, data);
+	show_object(obj, name, show_data, carry_data);
 }
 
 static int option_parse_missing_action(const struct option *opt,
@@ -3547,7 +3547,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
 
 static void record_recent_object(struct object *obj,
 				 const char *name,
-				 void *data)
+				 void *show_data,
+				 void *carry_data)
 {
 	oid_array_append(&recent_objects, &obj->oid);
 }
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index b4d8ea0a35..1cad33d9e8 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -266,7 +266,7 @@ static int finish_object(struct object *obj, const char *name, void *cb_data)
 	return 0;
 }
 
-static void show_object(struct object *obj, const char *name, void *cb_data)
+static void show_object(struct object *obj, const char *name, void *cb_data, void *carry_data)
 {
 	struct rev_list_info *info = cb_data;
 	struct rev_info *revs = info->revs;
diff --git a/list-objects.c b/list-objects.c
index e19589baa0..427228a3ba 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -60,7 +60,7 @@ static void process_blob(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, path->buf, ctx->show_data);
+		ctx->show_object(obj, path->buf, ctx->show_data, NULL);
 	strbuf_setlen(path, pathlen);
 }
 
@@ -191,7 +191,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data);
+		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
 	if (base->len)
 		strbuf_addch(base, '/');
 
@@ -207,7 +207,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data);
+		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
 
 	strbuf_setlen(base, baselen);
 	free_tree_buffer(tree);
@@ -335,7 +335,7 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 			continue;
 		if (obj->type == OBJ_TAG) {
 			obj->flags |= SEEN;
-			ctx->show_object(obj, name, ctx->show_data);
+			ctx->show_object(obj, name, ctx->show_data, NULL);
 			continue;
 		}
 		if (!path)
diff --git a/list-objects.h b/list-objects.h
index a952680e46..ab946d34db 100644
--- a/list-objects.h
+++ b/list-objects.h
@@ -6,7 +6,7 @@ struct object;
 struct rev_info;
 
 typedef void (*show_commit_fn)(struct commit *, void *);
-typedef void (*show_object_fn)(struct object *, const char *, void *);
+typedef void (*show_object_fn)(struct object *, const char *, void *, void *);
 void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *);
 
 typedef void (*show_edge_fn)(struct commit *);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 3ed15431cd..516eb235da 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -459,9 +459,9 @@ struct bitmap_show_data {
 	struct bitmap *base;
 };
 
-static void show_object(struct object *object, const char *name, void *data_)
+static void show_object(struct object *object, const char *name, void *show_data, void *carry_data)
 {
-	struct bitmap_show_data *data = data_;
+	struct bitmap_show_data *data = show_data;
 	int bitmap_pos;
 
 	bitmap_pos = bitmap_position(data->bitmap_git, &object->oid);
@@ -1268,9 +1268,9 @@ struct bitmap_test_data {
 };
 
 static void test_show_object(struct object *object, const char *name,
-			     void *data)
+			     void *show_data, void *carry_data)
 {
-	struct bitmap_test_data *tdata = data;
+	struct bitmap_test_data *tdata = show_data;
 	int bitmap_pos;
 
 	bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid);
diff --git a/reachable.c b/reachable.c
index 77a60c70a5..521b39edef 100644
--- a/reachable.c
+++ b/reachable.c
@@ -47,14 +47,14 @@ static int add_one_ref(const char *path, const struct object_id *oid,
  * The traversal will have already marked us as SEEN, so we
  * only need to handle any progress reporting here.
  */
-static void mark_object(struct object *obj, const char *name, void *data)
+static void mark_object(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
-	update_progress(data);
+	update_progress(show_data);
 }
 
-static void mark_commit(struct commit *c, void *data)
+static void mark_commit(struct commit *c, void *show_data)
 {
-	mark_object(&c->object, NULL, data);
+	mark_object(&c->object, NULL, show_data, NULL);
 }
 
 struct recent_data {
-- 
2.31.1.449.gb2aa5456a8.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v4 3/7] packfile-uri: support for excluding commit objects
  2021-08-11  7:45     ` [PATCH v4 0/7] packfile-uris: commits and trees exclusion Teng Long
  2021-08-11  7:45       ` [PATCH v4 1/7] pack-objects.c: introduce new method `match_packfile_uri_exclusions` Teng Long
  2021-08-11  7:45       ` [PATCH v4 2/7] Add new parameter "carry_data" for "show_object" function Teng Long
@ 2021-08-11  7:45       ` Teng Long
  2021-08-11  7:45       ` [PATCH v4 4/7] packfile-uri: support for excluding tree objects Teng Long
                         ` (4 subsequent siblings)
  7 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-11  7:45 UTC (permalink / raw)
  To: dyroneteng; +Cc: avarab, git, jonathantanmy

Currently packfile-uri supports the exclusion of blob objects, but in
some scenarios, users may wish to exclude more types of objects, such as
commit and tree objects, not only because packfile itself supports
storing these object types, but also on the other hand, to make
configuration items maintainable and simpler.

This commit is used to support the recursive exclusion of a commit
object, which means that if the exclusion of a commit is configured as
packfile-uri, the commit itself and all the objects it contains will
also be recursively excluded. In addition, to support this feature, a
new configuration  `uploadpack.excludeobject` is introduced.

The reason for bringing a new configuration is for two considerations.
First, the old configuration supports a single object type (blob), which
limits the use of this feature. Secondly, the name of the old
configuration is not abstract enough, this make extension difficult. If
different object types use different configuration names, the
configuration items will be bloated and difficult to maintain, so the
new configuration is more abstract in name and easy to extend.

Although a new configuration has been introduced, the old one is
still available and compatible with the new configuration. The old
configuration `uploadpack.blobpackfileuri` only supports excluding
blobs. The new configuration `uploadpack.excludeobject` not only
supports excluding blob objects, but also supports excluding commit
objects, as well as recursively excluding tree objects and blob objects
they contain.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 50 +++++++++++++++++++++++++-----------------
 list-objects.c         | 37 +++++++++++++++++--------------
 object.c               | 15 ++++++++++---
 object.h               |  4 +++-
 revision.c             | 34 ++++++++++++++++++++--------
 revision.h             |  3 +++
 upload-pack.c          |  7 ++++++
 7 files changed, 101 insertions(+), 49 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 5f9ec3566f..63f3aed70a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1311,11 +1311,14 @@ static int want_object_in_pack_one(struct packed_git *p,
 static int want_object_in_pack(const struct object_id *oid,
 			       int exclude,
 			       struct packed_git **found_pack,
-			       off_t *found_offset)
+			       off_t *found_offset,
+			       struct object *referred_commit)
 {
 	int want;
 	struct list_head *pos;
 	struct multi_pack_index *m;
+	struct configured_exclusion *ex;
+	struct configured_exclusion *referred_ex;
 
 	if (!exclude && local && has_loose_object_nonlocal(oid))
 		return 0;
@@ -1351,9 +1354,13 @@ static int want_object_in_pack(const struct object_id *oid,
 	}
 
 	if (uri_protocols.nr) {
-		struct configured_exclusion *ex =
-			oidmap_get(&configured_exclusions, oid);
+		if (referred_commit) {
+			referred_ex = oidmap_get(&configured_exclusions, &referred_commit->oid);
+			if (referred_ex && match_packfile_uri_exclusions(referred_ex))
+				return 0;
+		}
 
+		ex = oidmap_get(&configured_exclusions, oid);
 		if (ex && match_packfile_uri_exclusions(ex)) {
 			oidset_insert(&excluded_by_config, oid);
 			return 0;
@@ -1393,7 +1400,8 @@ static const char no_closure_warning[] = N_(
 );
 
 static int add_object_entry(const struct object_id *oid, enum object_type type,
-			    const char *name, int exclude)
+			    const char *name, int exclude,
+			    struct object *referred_commit)
 {
 	struct packed_git *found_pack = NULL;
 	off_t found_offset = 0;
@@ -1403,7 +1411,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
 	if (have_duplicate_entry(oid, exclude))
 		return 0;
 
-	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
+	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset, referred_commit)) {
 		/* The pack is missing an object, so it will not have closure */
 		if (write_bitmap_index) {
 			if (write_bitmap_index != WRITE_BITMAP_QUIET)
@@ -1429,7 +1437,7 @@ static int add_object_entry_from_bitmap(const struct object_id *oid,
 	if (have_duplicate_entry(oid, 0))
 		return 0;
 
-	if (!want_object_in_pack(oid, 0, &pack, &offset))
+	if (!want_object_in_pack(oid, 0, &pack, &offset, NULL))
 		return 0;
 
 	create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
@@ -1569,7 +1577,7 @@ static void add_pbase_object(struct tree_desc *tree,
 		if (name[cmplen] != '/') {
 			add_object_entry(&entry.oid,
 					 object_type(entry.mode),
-					 fullname, 1);
+					 fullname, 1, NULL);
 			return;
 		}
 		if (S_ISDIR(entry.mode)) {
@@ -1637,7 +1645,7 @@ static void add_preferred_base_object(const char *name)
 	cmplen = name_cmp_len(name);
 	for (it = pbase_tree; it; it = it->next) {
 		if (cmplen == 0) {
-			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
+			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1, NULL);
 		}
 		else {
 			struct tree_desc tree;
@@ -2839,7 +2847,7 @@ static void add_tag_chain(const struct object_id *oid)
 			die(_("unable to pack objects reachable from tag %s"),
 			    oid_to_hex(oid));
 
-		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
+		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0, NULL);
 
 		if (tag->tagged->type != OBJ_TAG)
 			return;
@@ -2994,7 +3002,7 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 			pack_idx_opts.flags &= ~WRITE_REV;
 		return 0;
 	}
-	if (!strcmp(k, "uploadpack.blobpackfileuri")) {
+	if (!strcmp(k, "uploadpack.excludeobject") || !strcmp(k, "uploadpack.blobpackfileuri")) {
 		struct configured_exclusion *ex = xmalloc(sizeof(*ex));
 		const char *oid_end, *pack_end;
 		/*
@@ -3007,11 +3015,11 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 		    *oid_end != ' ' ||
 		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
 		    *pack_end != ' ')
-			die(_("value of uploadpack.blobpackfileuri must be "
+			die(_("value of uploadpack.excludeobject or uploadpack.blobpackfileuri must be "
 			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
 		if (oidmap_get(&configured_exclusions, &ex->e.oid))
-			die(_("object already configured in another "
-			      "uploadpack.blobpackfileuri (got '%s')"), v);
+			die(_("object already configured by an earlier "
+			      "uploadpack.excludeobject or uploadpack.blobpackfileuri (got '%s')"), v);
 		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
 		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
 		ex->uri = xstrdup(pack_end + 1);
@@ -3040,7 +3048,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
 		return 0;
 
 	ofs = nth_packed_object_offset(p, pos);
-	if (!want_object_in_pack(oid, 0, &p, &ofs))
+	if (!want_object_in_pack(oid, 0, &p, &ofs, NULL))
 		return 0;
 
 	oi.typep = &type;
@@ -3233,7 +3241,7 @@ static void read_object_list_from_stdin(void)
 			die(_("expected object ID, got garbage:\n %s"), line);
 
 		add_preferred_base_object(p + 1);
-		add_object_entry(&oid, OBJ_NONE, p + 1, 0);
+		add_object_entry(&oid, OBJ_NONE, p + 1, 0, NULL);
 	}
 }
 
@@ -3242,7 +3250,7 @@ static void read_object_list_from_stdin(void)
 
 static void show_commit(struct commit *commit, void *data)
 {
-	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
+	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0, NULL);
 	commit->object.flags |= OBJECT_ADDED;
 
 	if (write_bitmap_index)
@@ -3254,8 +3262,9 @@ static void show_commit(struct commit *commit, void *data)
 
 static void show_object(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
+	struct object *referred_commit = carry_data;
 	add_preferred_base_object(name);
-	add_object_entry(&obj->oid, obj->type, name, 0);
+	add_object_entry(&obj->oid, obj->type, name, 0, referred_commit);
 	obj->flags |= OBJECT_ADDED;
 
 	if (use_delta_islands) {
@@ -3406,7 +3415,7 @@ static void add_objects_in_unpacked_packs(void)
 		QSORT(in_pack.array, in_pack.nr, ofscmp);
 		for (i = 0; i < in_pack.nr; i++) {
 			struct object *o = in_pack.array[i].object;
-			add_object_entry(&o->oid, o->type, "", 0);
+			add_object_entry(&o->oid, o->type, "", 0, NULL);
 		}
 	}
 	free(in_pack.array);
@@ -3422,7 +3431,7 @@ static int add_loose_object(const struct object_id *oid, const char *path,
 		return 0;
 	}
 
-	add_object_entry(oid, type, "", 0);
+	add_object_entry(oid, type, "", 0, NULL);
 	return 0;
 }
 
@@ -3841,7 +3850,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			 N_("respect islands during delta compression")),
 		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
 				N_("protocol"),
-				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
+				N_("exclude any configured uploadpack.excludeobject or "
+				   "uploadpack.blobpackfileuri with this protocol")),
 		OPT_END(),
 	};
 
diff --git a/list-objects.c b/list-objects.c
index 427228a3ba..968d842ceb 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -24,7 +24,8 @@ struct traversal_context {
 static void process_blob(struct traversal_context *ctx,
 			 struct blob *blob,
 			 struct strbuf *path,
-			 const char *name)
+			 const char *name,
+			 struct object *referred_commit)
 {
 	struct object *obj = &blob->object;
 	size_t pathlen;
@@ -60,7 +61,7 @@ static void process_blob(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, path->buf, ctx->show_data, NULL);
+		ctx->show_object(obj, path->buf, ctx->show_data, referred_commit);
 	strbuf_setlen(path, pathlen);
 }
 
@@ -97,11 +98,13 @@ static void process_gitlink(struct traversal_context *ctx,
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
-			 const char *name);
+			 const char *name,
+			 struct object *referred_commit);
 
 static void process_tree_contents(struct traversal_context *ctx,
 				  struct tree *tree,
-				  struct strbuf *base)
+				  struct strbuf *base,
+				  struct object *referred_commit)
 {
 	struct tree_desc desc;
 	struct name_entry entry;
@@ -129,7 +132,7 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			t->object.flags |= NOT_USER_GIVEN;
-			process_tree(ctx, t, base, entry.path);
+			process_tree(ctx, t, base, entry.path, referred_commit);
 		}
 		else if (S_ISGITLINK(entry.mode))
 			process_gitlink(ctx, entry.oid.hash,
@@ -142,7 +145,7 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			b->object.flags |= NOT_USER_GIVEN;
-			process_blob(ctx, b, base, entry.path);
+			process_blob(ctx, b, base, entry.path, referred_commit);
 		}
 	}
 }
@@ -150,7 +153,8 @@ static void process_tree_contents(struct traversal_context *ctx,
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
-			 const char *name)
+			 const char *name,
+			 struct object *referred_commit)
 {
 	struct object *obj = &tree->object;
 	struct rev_info *revs = ctx->revs;
@@ -191,14 +195,14 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
+		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
 	if (base->len)
 		strbuf_addch(base, '/');
 
 	if (r & LOFR_SKIP_TREE)
 		trace_printf("Skipping contents of tree %s...\n", base->buf);
 	else if (!failed_parse)
-		process_tree_contents(ctx, tree, base);
+		process_tree_contents(ctx, tree, base, referred_commit);
 
 	r = list_objects_filter__filter_object(ctx->revs->repo,
 					       LOFS_END_TREE, obj,
@@ -207,7 +211,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
+		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
 
 	strbuf_setlen(base, baselen);
 	free_tree_buffer(tree);
@@ -314,9 +318,9 @@ void mark_edges_uninteresting(struct rev_info *revs,
 	}
 }
 
-static void add_pending_tree(struct rev_info *revs, struct tree *tree)
+static void add_pending_tree(struct rev_info *revs, struct tree *tree, struct object *referred_commit)
 {
-	add_pending_object(revs, &tree->object, "");
+	add_pending_object_with_referred_commit(revs, &tree->object, "", referred_commit);
 }
 
 static void traverse_trees_and_blobs(struct traversal_context *ctx,
@@ -329,23 +333,24 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 	for (i = 0; i < ctx->revs->pending.nr; i++) {
 		struct object_array_entry *pending = ctx->revs->pending.objects + i;
 		struct object *obj = pending->item;
+		struct object *referred_commit = pending->referred_commit;
 		const char *name = pending->name;
 		const char *path = pending->path;
 		if (obj->flags & (UNINTERESTING | SEEN))
 			continue;
 		if (obj->type == OBJ_TAG) {
 			obj->flags |= SEEN;
-			ctx->show_object(obj, name, ctx->show_data, NULL);
+			ctx->show_object(obj, name, ctx->show_data, referred_commit);
 			continue;
 		}
 		if (!path)
 			path = "";
 		if (obj->type == OBJ_TREE) {
-			process_tree(ctx, (struct tree *)obj, base, path);
+			process_tree(ctx, (struct tree *)obj, base, path, referred_commit);
 			continue;
 		}
 		if (obj->type == OBJ_BLOB) {
-			process_blob(ctx, (struct blob *)obj, base, path);
+			process_blob(ctx, (struct blob *)obj, base, path, referred_commit);
 			continue;
 		}
 		die("unknown pending object %s (%s)",
@@ -370,7 +375,7 @@ static void do_traverse(struct traversal_context *ctx)
 		else if (get_commit_tree(commit)) {
 			struct tree *tree = get_commit_tree(commit);
 			tree->object.flags |= NOT_USER_GIVEN;
-			add_pending_tree(ctx->revs, tree);
+			add_pending_tree(ctx->revs, tree, &commit->object);
 		} else if (commit->object.parsed) {
 			die(_("unable to load root tree for commit %s"),
 			      oid_to_hex(&commit->object.oid));
diff --git a/object.c b/object.c
index 14188453c5..6b1ce2fcde 100644
--- a/object.c
+++ b/object.c
@@ -322,9 +322,10 @@ void object_list_free(struct object_list **list)
  */
 static char object_array_slopbuf[1];
 
-void add_object_array_with_path(struct object *obj, const char *name,
-				struct object_array *array,
-				unsigned mode, const char *path)
+void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name,
+						    struct object_array *array,
+						    unsigned mode, const char *path,
+						    struct object *referred_commit)
 {
 	unsigned nr = array->nr;
 	unsigned alloc = array->alloc;
@@ -339,6 +340,7 @@ void add_object_array_with_path(struct object *obj, const char *name,
 	}
 	entry = &objects[nr];
 	entry->item = obj;
+	entry->referred_commit = referred_commit;
 	if (!name)
 		entry->name = NULL;
 	else if (!*name)
@@ -354,6 +356,13 @@ void add_object_array_with_path(struct object *obj, const char *name,
 	array->nr = ++nr;
 }
 
+void add_object_array_with_path(struct object *obj, const char *name,
+				struct object_array *array,
+				unsigned mode, const char *path)
+{
+	add_object_array_with_path_and_referred_commit(obj, name, array, mode, path, NULL);
+}
+
 void add_object_array(struct object *obj, const char *name, struct object_array *array)
 {
 	add_object_array_with_path(obj, name, array, S_IFINVALID, NULL);
diff --git a/object.h b/object.h
index 87a6da47c8..d63819ab91 100644
--- a/object.h
+++ b/object.h
@@ -52,6 +52,7 @@ struct object_array {
 		char *name;
 		char *path;
 		unsigned mode;
+		struct object *referred_commit;
 	} *objects;
 };
 
@@ -157,7 +158,8 @@ void object_list_free(struct object_list **list);
 /* Object array handling .. */
 void add_object_array(struct object *obj, const char *name, struct object_array *array);
 void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path);
-
+void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name, struct object_array *array,
+						    unsigned mode, const char *path, struct object *referred_commit);
 /*
  * Returns NULL if the array is empty. Otherwise, returns the last object
  * after removing its entry from the array. Other resources associated
diff --git a/revision.c b/revision.c
index 4853c85d0b..65e0926d25 100644
--- a/revision.c
+++ b/revision.c
@@ -304,10 +304,11 @@ void mark_parents_uninteresting(struct commit *commit)
 	commit_stack_clear(&pending);
 }
 
-static void add_pending_object_with_path(struct rev_info *revs,
-					 struct object *obj,
-					 const char *name, unsigned mode,
-					 const char *path)
+static void add_pending_object_with_path_and_referred_commit(struct rev_info *revs,
+							     struct object *obj,
+							     const char *name, unsigned mode,
+							     const char *path,
+							     struct object *referred_commit)
 {
 	struct interpret_branch_name_options options = { 0 };
 	if (!obj)
@@ -326,20 +327,35 @@ static void add_pending_object_with_path(struct rev_info *revs,
 		strbuf_release(&buf);
 		return; /* do not add the commit itself */
 	}
-	add_object_array_with_path(obj, name, &revs->pending, mode, path);
+	add_object_array_with_path_and_referred_commit(obj, name, &revs->pending, mode, path, referred_commit);
 }
 
+static void add_pending_object_with_path(struct rev_info *revs,
+					 struct object *obj,
+					 const char *name, unsigned mode,
+					 const char *path)
+{
+	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, path, NULL);
+}
 static void add_pending_object_with_mode(struct rev_info *revs,
 					 struct object *obj,
-					 const char *name, unsigned mode)
+					 const char *name, unsigned mode,
+					 struct object *referred_commit)
+{
+	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, NULL, referred_commit);
+}
+
+void add_pending_object_with_referred_commit(struct rev_info *revs,
+					     struct object *obj, const char *name,
+					     struct object *referred_commit)
 {
-	add_pending_object_with_path(revs, obj, name, mode, NULL);
+	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, referred_commit);
 }
 
 void add_pending_object(struct rev_info *revs,
 			struct object *obj, const char *name)
 {
-	add_pending_object_with_mode(revs, obj, name, S_IFINVALID);
+	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, NULL);
 }
 
 void add_head_to_pending(struct rev_info *revs)
@@ -2817,7 +2833,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 		if (get_oid_with_context(revs->repo, revs->def, 0, &oid, &oc))
 			diagnose_missing_default(revs->def);
 		object = get_reference(revs, revs->def, &oid, 0);
-		add_pending_object_with_mode(revs, object, revs->def, oc.mode);
+		add_pending_object_with_mode(revs, object, revs->def, oc.mode, NULL);
 	}
 
 	/* Did the user ask for any diff output? Run the diff! */
diff --git a/revision.h b/revision.h
index a24f72dcd1..f9c9628ed8 100644
--- a/revision.h
+++ b/revision.h
@@ -423,6 +423,9 @@ void show_object_with_name(FILE *, struct object *, const char *);
  */
 void add_pending_object(struct rev_info *revs,
 			struct object *obj, const char *name);
+void add_pending_object_with_referred_commit(struct rev_info *revs,
+					     struct object *obj, const char *name,
+					     struct object *referred_commit);
 
 void add_pending_oid(struct rev_info *revs,
 		     const char *name, const struct object_id *oid,
diff --git a/upload-pack.c b/upload-pack.c
index 5c1cd19612..d26fb351a3 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1751,6 +1751,13 @@ int upload_pack_advertise(struct repository *r,
 			strbuf_addstr(value, " packfile-uris");
 			free(str);
 		}
+
+		if (!repo_config_get_string(the_repository,
+					    "uploadpack.excludeobject",
+					    &str) && str) {
+			strbuf_addstr(value, " packfile-uris");
+			free(str);
+		}
 	}
 
 	return 1;
-- 
2.31.1.449.gb2aa5456a8.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v4 4/7] packfile-uri: support for excluding tree objects
  2021-08-11  7:45     ` [PATCH v4 0/7] packfile-uris: commits and trees exclusion Teng Long
                         ` (2 preceding siblings ...)
  2021-08-11  7:45       ` [PATCH v4 3/7] packfile-uri: support for excluding commit objects Teng Long
@ 2021-08-11  7:45       ` Teng Long
  2021-08-11  7:45       ` [PATCH v4 5/7] packfile-uri.txt: support for excluding commits and trees Teng Long
                         ` (3 subsequent siblings)
  7 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-11  7:45 UTC (permalink / raw)
  To: dyroneteng; +Cc: avarab, git, jonathantanmy

This commit supports the use of `uploadpack.excludeobject` to exclude
tree objects, which means that when a type object is configured as
packfile-uri, the tree object itself and all objects contains will be
recursively excluded.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 30 ++++++++++++++++++++----------
 list-objects.c         | 32 ++++++++++++++++++--------------
 object.c               |  6 +++++-
 object.h               | 13 ++++++++++++-
 4 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 63f3aed70a..4ff12ec525 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1312,13 +1312,14 @@ static int want_object_in_pack(const struct object_id *oid,
 			       int exclude,
 			       struct packed_git **found_pack,
 			       off_t *found_offset,
-			       struct object *referred_commit)
+			       struct referred_objects *referred_objs)
 {
 	int want;
 	struct list_head *pos;
 	struct multi_pack_index *m;
+	struct configured_exclusion *commit_ex;
+	struct configured_exclusion *tree_ex;
 	struct configured_exclusion *ex;
-	struct configured_exclusion *referred_ex;
 
 	if (!exclude && local && has_loose_object_nonlocal(oid))
 		return 0;
@@ -1354,14 +1355,23 @@ static int want_object_in_pack(const struct object_id *oid,
 	}
 
 	if (uri_protocols.nr) {
-		if (referred_commit) {
-			referred_ex = oidmap_get(&configured_exclusions, &referred_commit->oid);
-			if (referred_ex && match_packfile_uri_exclusions(referred_ex))
+		if (referred_objs && referred_objs->commit) {
+			commit_ex = oidmap_get(&configured_exclusions, &referred_objs->commit->oid);
+			if (match_packfile_uri_exclusions(commit_ex))
 				return 0;
 		}
 
+		if (referred_objs && referred_objs->trees) {
+			struct object_list *p;
+			for (p = referred_objs->trees; p; p = p->next) {
+				tree_ex = oidmap_get(&configured_exclusions, &p->item->oid);
+				if (match_packfile_uri_exclusions(tree_ex))
+					return 0;
+			}
+		}
+
 		ex = oidmap_get(&configured_exclusions, oid);
-		if (ex && match_packfile_uri_exclusions(ex)) {
+		if (match_packfile_uri_exclusions(ex)) {
 			oidset_insert(&excluded_by_config, oid);
 			return 0;
 		}
@@ -1401,7 +1411,7 @@ static const char no_closure_warning[] = N_(
 
 static int add_object_entry(const struct object_id *oid, enum object_type type,
 			    const char *name, int exclude,
-			    struct object *referred_commit)
+			    struct referred_objects *referred_objs)
 {
 	struct packed_git *found_pack = NULL;
 	off_t found_offset = 0;
@@ -1411,7 +1421,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
 	if (have_duplicate_entry(oid, exclude))
 		return 0;
 
-	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset, referred_commit)) {
+	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset, referred_objs)) {
 		/* The pack is missing an object, so it will not have closure */
 		if (write_bitmap_index) {
 			if (write_bitmap_index != WRITE_BITMAP_QUIET)
@@ -3262,9 +3272,9 @@ static void show_commit(struct commit *commit, void *data)
 
 static void show_object(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
-	struct object *referred_commit = carry_data;
+	struct referred_objects *referred_objs = carry_data;
 	add_preferred_base_object(name);
-	add_object_entry(&obj->oid, obj->type, name, 0, referred_commit);
+	add_object_entry(&obj->oid, obj->type, name, 0, referred_objs);
 	obj->flags |= OBJECT_ADDED;
 
 	if (use_delta_islands) {
diff --git a/list-objects.c b/list-objects.c
index 968d842ceb..49f177cb56 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -25,7 +25,7 @@ static void process_blob(struct traversal_context *ctx,
 			 struct blob *blob,
 			 struct strbuf *path,
 			 const char *name,
-			 struct object *referred_commit)
+			 struct referred_objects *referred_objs)
 {
 	struct object *obj = &blob->object;
 	size_t pathlen;
@@ -61,7 +61,7 @@ static void process_blob(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, path->buf, ctx->show_data, referred_commit);
+		ctx->show_object(obj, path->buf, ctx->show_data, referred_objs);
 	strbuf_setlen(path, pathlen);
 }
 
@@ -99,19 +99,22 @@ static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
 			 const char *name,
-			 struct object *referred_commit);
+			 struct referred_objects *referred_objs);
 
 static void process_tree_contents(struct traversal_context *ctx,
 				  struct tree *tree,
 				  struct strbuf *base,
-				  struct object *referred_commit)
+				  struct referred_objects *referred_objs)
 {
 	struct tree_desc desc;
 	struct name_entry entry;
 	enum interesting match = ctx->revs->diffopt.pathspec.nr == 0 ?
 		all_entries_interesting : entry_not_interesting;
+	struct referred_objects *referred_buf;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
+	referred_buf = xmemdupz(referred_objs, sizeof(struct referred_objects));
+	object_list_insert(&tree->object, &referred_buf->trees);
 
 	while (tree_entry(&desc, &entry)) {
 		if (match != all_entries_interesting) {
@@ -132,7 +135,7 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			t->object.flags |= NOT_USER_GIVEN;
-			process_tree(ctx, t, base, entry.path, referred_commit);
+			process_tree(ctx, t, base, entry.path, referred_buf);
 		}
 		else if (S_ISGITLINK(entry.mode))
 			process_gitlink(ctx, entry.oid.hash,
@@ -145,16 +148,17 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			b->object.flags |= NOT_USER_GIVEN;
-			process_blob(ctx, b, base, entry.path, referred_commit);
+			process_blob(ctx, b, base, entry.path, referred_buf);
 		}
 	}
+	free(referred_buf);
 }
 
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
 			 const char *name,
-			 struct object *referred_commit)
+			 struct referred_objects *referred_objs)
 {
 	struct object *obj = &tree->object;
 	struct rev_info *revs = ctx->revs;
@@ -195,14 +199,14 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
+		ctx->show_object(obj, base->buf, ctx->show_data, referred_objs);
 	if (base->len)
 		strbuf_addch(base, '/');
 
 	if (r & LOFR_SKIP_TREE)
 		trace_printf("Skipping contents of tree %s...\n", base->buf);
 	else if (!failed_parse)
-		process_tree_contents(ctx, tree, base, referred_commit);
+		process_tree_contents(ctx, tree, base, referred_objs);
 
 	r = list_objects_filter__filter_object(ctx->revs->repo,
 					       LOFS_END_TREE, obj,
@@ -211,7 +215,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
+		ctx->show_object(obj, base->buf, ctx->show_data, referred_objs);
 
 	strbuf_setlen(base, baselen);
 	free_tree_buffer(tree);
@@ -333,24 +337,24 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 	for (i = 0; i < ctx->revs->pending.nr; i++) {
 		struct object_array_entry *pending = ctx->revs->pending.objects + i;
 		struct object *obj = pending->item;
-		struct object *referred_commit = pending->referred_commit;
+		struct referred_objects *referred_objs = pending->referred_objects;
 		const char *name = pending->name;
 		const char *path = pending->path;
 		if (obj->flags & (UNINTERESTING | SEEN))
 			continue;
 		if (obj->type == OBJ_TAG) {
 			obj->flags |= SEEN;
-			ctx->show_object(obj, name, ctx->show_data, referred_commit);
+			ctx->show_object(obj, name, ctx->show_data, referred_objs);
 			continue;
 		}
 		if (!path)
 			path = "";
 		if (obj->type == OBJ_TREE) {
-			process_tree(ctx, (struct tree *)obj, base, path, referred_commit);
+			process_tree(ctx, (struct tree *)obj, base, path, referred_objs);
 			continue;
 		}
 		if (obj->type == OBJ_BLOB) {
-			process_blob(ctx, (struct blob *)obj, base, path, referred_commit);
+			process_blob(ctx, (struct blob *)obj, base, path, referred_objs);
 			continue;
 		}
 		die("unknown pending object %s (%s)",
diff --git a/object.c b/object.c
index 6b1ce2fcde..69ba0baf95 100644
--- a/object.c
+++ b/object.c
@@ -331,6 +331,7 @@ void add_object_array_with_path_and_referred_commit(struct object *obj, const ch
 	unsigned alloc = array->alloc;
 	struct object_array_entry *objects = array->objects;
 	struct object_array_entry *entry;
+	struct referred_objects *referred_objs = xmalloc(sizeof(struct referred_objects));
 
 	if (nr >= alloc) {
 		alloc = (alloc + 32) * 2;
@@ -338,9 +339,11 @@ void add_object_array_with_path_and_referred_commit(struct object *obj, const ch
 		array->alloc = alloc;
 		array->objects = objects;
 	}
+	referred_objs->commit = referred_commit;
+	referred_objs->trees = NULL;
 	entry = &objects[nr];
 	entry->item = obj;
-	entry->referred_commit = referred_commit;
+	entry->referred_objects = referred_objs;
 	if (!name)
 		entry->name = NULL;
 	else if (!*name)
@@ -377,6 +380,7 @@ static void object_array_release_entry(struct object_array_entry *ent)
 	if (ent->name != object_array_slopbuf)
 		free(ent->name);
 	free(ent->path);
+	free(ent->referred_objects);
 }
 
 struct object *object_array_pop(struct object_array *array)
diff --git a/object.h b/object.h
index d63819ab91..3785546adf 100644
--- a/object.h
+++ b/object.h
@@ -52,12 +52,23 @@ struct object_array {
 		char *name;
 		char *path;
 		unsigned mode;
-		struct object *referred_commit;
+		 /*
+		 * referred_objects or NULL.  If non-NULL, it will
+		 * temporary storage the referred commit and trees when
+		 * traversing the specified object. Space for time,
+		 * reduce related computing costs (such as packfile-uri
+		 * exclusion), clean up when the traversal is over.
+		 */
+		struct referred_objects *referred_objects;
 	} *objects;
 };
 
 #define OBJECT_ARRAY_INIT { 0, 0, NULL }
 
+struct referred_objects{
+    struct object *commit;
+    struct object_list *trees;
+};
 /*
  * object flag allocation:
  * revision.h:               0---------10         15             23------26
-- 
2.31.1.449.gb2aa5456a8.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v4 5/7] packfile-uri.txt: support for excluding commits and trees
  2021-08-11  7:45     ` [PATCH v4 0/7] packfile-uris: commits and trees exclusion Teng Long
                         ` (3 preceding siblings ...)
  2021-08-11  7:45       ` [PATCH v4 4/7] packfile-uri: support for excluding tree objects Teng Long
@ 2021-08-11  7:45       ` Teng Long
  2021-08-11  9:59         ` Bagas Sanjaya
  2021-08-11  7:45       ` [PATCH v4 6/7] t5702: replace with "test_when_finished" for cleanup Teng Long
                         ` (2 subsequent siblings)
  7 siblings, 1 reply; 72+ messages in thread
From: Teng Long @ 2021-08-11  7:45 UTC (permalink / raw)
  To: dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 Documentation/technical/packfile-uri.txt | 32 ++++++++++++++++--------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt
index f7eabc6c76..c3e4873956 100644
--- a/Documentation/technical/packfile-uri.txt
+++ b/Documentation/technical/packfile-uri.txt
@@ -35,13 +35,26 @@ include some sort of non-trivial implementation in the Minimum Viable Product,
 at least so that we can test the client.
 
 This is the implementation: a feature, marked experimental, that allows the
-server to be configured by one or more `uploadpack.blobPackfileUri=<sha1>
-<uri>` entries. Whenever the list of objects to be sent is assembled, all such
-blobs are excluded, replaced with URIs. As noted in "Future work" below, the
-server can evolve in the future to support excluding other objects (or other
-implementations of servers could be made that support excluding other objects)
-without needing a protocol change, so clients should not expect that packfiles
-downloaded in this way only contain single blobs.
+server to be configured by one or more entries with the format:
+
+    uploadpack.excludeobject=<object-hash> <pack-hash> <uri>
+
+Value <object-hash> is the key of entry, and the object type can be a blob,
+tree, or commit. The exclusion of tree and commit is recursive by default,
+which means that when a tree or commit object is excluded, the object itself
+and all reachable objects of the object will be excluded recursively. Whenever
+the list of objects to be sent is assembled, all such objects are excluded,
+replaced with URIs.
+
+Configuration compatibility
+-------------
+
+The old configuration of packfile-uri:
+
+	`uploadpack.blobPackfileUri=<object-hash> <pack-hash> <uri>`
+
+For the old configuration is compatible with the new one, but it only
+supports the exclusion of blob objects.
 
 Client design
 -------------
@@ -65,9 +78,6 @@ The protocol design allows some evolution of the server and client without any
 need for protocol changes, so only a small-scoped design is included here to
 form the MVP. For example, the following can be done:
 
- * On the server, more sophisticated means of excluding objects (e.g. by
-   specifying a commit to represent that commit and all objects that it
-   references).
  * On the client, resumption of clone. If a clone is interrupted, information
    could be recorded in the repository's config and a "clone-resume" command
    can resume the clone in progress. (Resumption of subsequent fetches is more
@@ -78,4 +88,4 @@ There are some possible features that will require a change in protocol:
 
  * Additional HTTP headers (e.g. authentication)
  * Byte range support
- * Different file formats referenced by URIs (e.g. raw object)
+ * Different file formats referenced by URIs (e.g. raw object)
\ No newline at end of file
-- 
2.31.1.449.gb2aa5456a8.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v4 6/7] t5702: replace with "test_when_finished" for cleanup
  2021-08-11  7:45     ` [PATCH v4 0/7] packfile-uris: commits and trees exclusion Teng Long
                         ` (4 preceding siblings ...)
  2021-08-11  7:45       ` [PATCH v4 5/7] packfile-uri.txt: support for excluding commits and trees Teng Long
@ 2021-08-11  7:45       ` Teng Long
  2021-08-11  7:45       ` [PATCH v4 7/7] t5702: support for excluding commit objects Teng Long
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
  7 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-11  7:45 UTC (permalink / raw)
  To: dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 t/t5702-protocol-v2.sh | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 2e1243ca40..e6314b53b0 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -753,7 +753,7 @@ test_expect_success 'ls-remote with v2 http sends only one POST' '
 '
 
 test_expect_success 'push with http:// and a config of v2 does not request v2' '
-	test_when_finished "rm -f log" &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child log" &&
 	# Till v2 for push is designed, make sure that if a client has
 	# protocol.version configured to use v2, that the client instead falls
 	# back and uses v0.
@@ -776,7 +776,7 @@ test_expect_success 'push with http:// and a config of v2 does not request v2' '
 '
 
 test_expect_success 'when server sends "ready", expect DELIM' '
-	rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" http_child &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child" &&
 
 	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" one &&
@@ -796,7 +796,7 @@ test_expect_success 'when server sends "ready", expect DELIM' '
 '
 
 test_expect_success 'when server does not send "ready", expect FLUSH' '
-	rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" http_child log &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child log" &&
 
 	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" one &&
@@ -834,7 +834,7 @@ configure_exclusion () {
 
 test_expect_success 'part of packfile response provided as URI' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -883,7 +883,7 @@ test_expect_success 'part of packfile response provided as URI' '
 
 test_expect_success 'packfile URIs with fetch instead of clone' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -904,7 +904,7 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
 
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -935,7 +935,7 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 
 test_expect_success 'packfile-uri with transfer.fsckobjects' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -959,7 +959,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' '
 
 test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -989,7 +989,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
 
 test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmodules is separate from tree' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child &&
+	test_when_finished "rm -rf \"$P\" http_child" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -1015,7 +1015,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
 
 test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodules separate from tree is invalid' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child err &&
+	test_when_finished "rm -rf \"$P\" http_child err" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -1038,4 +1038,4 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodul
 # DO NOT add non-httpd-specific tests here, because the last part of this
 # test script is only executed when httpd is available and enabled.
 
-test_done
+test_done
\ No newline at end of file
-- 
2.31.1.449.gb2aa5456a8.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v4 7/7] t5702: support for excluding commit objects
  2021-08-11  7:45     ` [PATCH v4 0/7] packfile-uris: commits and trees exclusion Teng Long
                         ` (5 preceding siblings ...)
  2021-08-11  7:45       ` [PATCH v4 6/7] t5702: replace with "test_when_finished" for cleanup Teng Long
@ 2021-08-11  7:45       ` Teng Long
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
  7 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-11  7:45 UTC (permalink / raw)
  To: dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 t/t5702-protocol-v2.sh | 292 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 266 insertions(+), 26 deletions(-)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index e6314b53b0..5ad52e0cee 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -824,17 +824,47 @@ test_expect_success 'when server does not send "ready", expect FLUSH' '
 '
 
 configure_exclusion () {
-	git -C "$1" hash-object "$2" >objh &&
-	git -C "$1" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
-	git -C "$1" config --add \
-		"uploadpack.blobpackfileuri" \
-		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
-	cat objh
+	objt="$1"
+	P="$2"
+	oid="$3"
+	version="$4"
+
+	oldc="uploadpack.blobpackfileuri"
+	newc="uploadpack.excludeobject"
+	configkey=""
+
+	if test "$version" = "old"
+	then
+		configkey="$oldc"
+	else
+		configkey="$newc"
+	fi
+
+	if test "$objt" = "blob"
+	then
+		git -C "$P" hash-object "$oid" >objh &&
+		git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
+		git -C "$P" config --add \
+			"$configkey" \
+			"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+		cat objh
+	elif test "$objt" = "commit" || test "$objt" = "tree" || test "$objt" = "tag"
+	then
+		echo "$oid" >objh
+		git -C "$P" pack-objects --revs "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh
+		git -C "$P" config --add \
+        			"$configkey" \
+        			"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+		cat objh
+	else
+		echo "unsupported object type in configure_exclusion (got $objt)"
+	fi
 }
 
-test_expect_success 'part of packfile response provided as URI' '
+part_of_packfile_response_verify() {
+	config="$1"
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	test_when_finished "rm -rf \"$P\" http_child log" &&
+	test_when_finished "rm -rf \"$P\" http_child log *found" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -843,10 +873,10 @@ test_expect_success 'part of packfile response provided as URI' '
 	git -C "$P" add my-blob &&
 	echo other-blob >"$P/other-blob" &&
 	git -C "$P" add other-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
-	configure_exclusion "$P" other-blob >h2 &&
+	configure_exclusion blob "$P" my-blob config >h &&
+	configure_exclusion blob "$P" other-blob config >h2 &&
 
 	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
 	git -c protocol.version=2 \
@@ -879,9 +909,11 @@ test_expect_success 'part of packfile response provided as URI' '
 	ls http_child/.git/objects/pack/*.pack \
 	    http_child/.git/objects/pack/*.idx >filelist &&
 	test_line_count = 6 filelist
-'
+}
+
+blobpackfileuri_fetch () {
+	config="$1"
 
-test_expect_success 'packfile URIs with fetch instead of clone' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_when_finished "rm -rf \"$P\" http_child log" &&
 
@@ -890,9 +922,9 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob $config >h &&
 
 	git init http_child &&
 
@@ -900,6 +932,215 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
 	git -C http_child -c protocol.version=2 \
 		-c fetch.uriprotocols=http,https \
 		fetch "$HTTPD_URL/smart/http_parent"
+}
+
+test_expect_success 'blob-exclusion (using uploadpack.blobpackfileuri): part of packfile response provided as URI' '
+	part_of_packfile_response_verify old
+'
+
+test_expect_success 'blob-exclusion (using uploadpack.excludeobject): part of packfile response provided as URI' '
+	part_of_packfile_response_verify new
+'
+
+test_expect_success 'blob-exclusion (using uploadpack.blobpackfileuri): packfile URIs with fetch instead of clone' '
+	blobpackfileuri_fetch old
+'
+
+test_expect_success 'blob-exclusion (using uploadpack.excludeobject): packfile URIs with fetch instead of clone' '
+	blobpackfileuri_fetch new
+'
+
+test_expect_success 'tree-exclusion: part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log *found" &&
+
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true"  &&
+
+	# Dir struct
+	# 	.
+	#     |-- A.t
+	#     |-- my-tree
+	#     |   `-- my-blob
+	#     `-- other-tree
+	#         |-- other-blob
+	#         `-- sub-tree
+	#             `-- sub-blob
+	mkdir "$P"/my-tree  &&
+	echo my-blob >"$P"/my-tree/my-blob &&
+	git -C "$P" add my-tree &&
+	mkdir "$P"/other-tree &&
+	echo other-blob >"$P"/other-tree/other-blob &&
+	mkdir "$P"/other-tree/sub-tree &&
+	echo sub-blob >"$P"/other-tree/sub-tree/sub-blob &&
+	git -C "$P" add other-tree &&
+ 	test_commit -C "$P" A &&
+
+ 	commith=$(git -C "$P" rev-parse A) &&
+ 	roottreeh=$(git -C "$P" rev-parse A:) &&
+	ah=$(git -C "$P" hash-object A.t) &&
+	mytreeh=$(git -C "$P" ls-tree HEAD my-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	othertreeh=$(git -C "$P" ls-tree HEAD other-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	subtreeh=$(git -C "$P" ls-tree HEAD other-tree/sub-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	myblobh=$(git -C "$P" hash-object my-tree/my-blob) &&
+	otherblobh=$(git -C "$P" hash-object other-tree/other-blob) &&
+	subblobh=$(git -C "$P" hash-object other-tree/sub-tree/sub-blob) &&
+
+	configure_exclusion tree "$P" "$mytreeh" config >h &&
+	configure_exclusion tree "$P" "$othertreeh" config >h2 &&
+
+	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
+	git -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		clone "$HTTPD_URL/smart/http_parent" http_child &&
+
+	# Ensure that my-tree and other-tree and theirs complementary set are in separate packfiles.
+	for idx in http_child/.git/objects/pack/*.idx
+	do
+		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+		{
+			grep "^[0-9a-f]\{16,\} " out || :
+		} >out.objectlist &&
+		if test_line_count = 3 out.objectlist
+		then
+			if grep $commith out
+			then
+				>commithfound
+			fi &&
+			if grep $roottreeh out
+			then
+				>roottreehfound
+			fi &&
+			if grep $ah out
+			then
+				>ahfound
+			fi
+		elif test_line_count = 2 out.objectlist
+		then
+			if grep $mytreeh out
+			then
+				>mytreehfound
+			fi &&
+			if grep $myblobh out
+			then
+				>myblobhfound
+			fi
+		elif test_line_count = 4 out.objectlist
+		then
+			if grep $othertreeh out
+			then
+				>othertreehfound
+			fi &&
+			if grep $otherblobh out
+			then
+				>otherblobhfound
+			fi
+			if grep $subtreeh out
+			then
+				>subtreehfound
+			fi &&
+			if grep $subblobh out
+			then
+				>subblobhfound
+			fi
+		fi
+	done &&
+	test -f mytreehfound &&
+	test -f myblobhfound &&
+	test -f othertreehfound &&
+	test -f otherblobhfound &&
+	test -f subtreehfound &&
+	test -f subblobhfound &&
+	test -f commithfound &&
+	test -f roottreehfound &&
+	test -f ahfound &&
+
+	# Ensure that there are exactly 3 packfiles with associated .idx
+	ls http_child/.git/objects/pack/*.pack \
+		http_child/.git/objects/pack/*.idx >filelist &&
+	test_line_count = 6 filelist
+'
+
+test_expect_success 'commit-exclusion: part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log *found" &&
+
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+
+	mkdir "$P"/my-tree  &&
+	echo my-blob >"$P"/my-tree/my-blob &&
+	git -C "$P" add my-tree &&
+	mkdir "$P"/my-tree/sub-tree &&
+	echo sub-blob >"$P"/my-tree/sub-tree/sub-blob &&
+	git -C "$P" add my-tree &&
+	test_commit -C "$P" A &&
+
+ 	commith=$(git -C "$P" rev-parse A) &&
+ 	roottreeh=$(git -C "$P" rev-parse A:) &&
+	mytreeh=$(git -C "$P" ls-tree HEAD my-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	subtreeh=$(git -C "$P" ls-tree HEAD my-tree/sub-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	ah=$(git -C "$P" hash-object A.t) &&
+	myblobh=$(git -C "$P" hash-object my-tree/my-blob) &&
+	subblobh=$(git -C "$P" hash-object my-tree/sub-tree/sub-blob) &&
+
+	configure_exclusion commit "$P" "$commith" >h &&
+
+	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
+	git -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		clone "$HTTPD_URL/smart/http_parent" http_child &&
+
+	for idx in http_child/.git/objects/pack/*.idx
+	do
+		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+		{
+			grep "^[0-9a-f]\{16,\} " out || :
+		} >out.objectlist &&
+		if test_line_count = 7 out.objectlist
+		then
+			if grep $commith out
+			then
+				>commithfound
+			fi &&
+			if grep $roottreeh out
+			then
+				>roottreehfound
+			fi &&
+			if grep $ah out
+			then
+				>ahfound
+			fi &&
+			if grep $mytreeh out
+			then
+				>mytreehfound
+			fi &&
+			if grep $myblobh out
+			then
+				>myblobhfound
+			fi &&
+			if grep $subtreeh out
+			then
+				>subtreehfound
+			fi &&
+			if grep $subblobh out
+			then
+				>subblobhfound
+			fi
+		fi
+	done &&
+	test -f mytreehfound &&
+	test -f myblobhfound &&
+	test -f subtreehfound &&
+	test -f subblobhfound &&
+	test -f commithfound &&
+	test -f roottreehfound &&
+	test -f ahfound &&
+
+	# Ensure that there are exactly 2 packfiles with associated .idx
+	ls http_child/.git/objects/pack/*.pack \
+		http_child/.git/objects/pack/*.idx >filelist &&
+	test_line_count = 4 filelist
 '
 
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
@@ -913,9 +1154,9 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	git -C "$P" add my-blob &&
 	echo other-blob >"$P/other-blob" &&
 	git -C "$P" add other-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob >h &&
 	# Configure a URL for other-blob. Just reuse the hash of the object as
 	# the hash of the packfile, since the hash does not matter for this
 	# test as long as it is not the hash of the pack, and it is of the
@@ -923,7 +1164,7 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	git -C "$P" hash-object other-blob >objh &&
 	git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
 	git -C "$P" config --add \
-		"uploadpack.blobpackfileuri" \
+		"uploadpack.excludeobject" \
 		"$(cat objh) $(cat objh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
 
 	test_must_fail env GIT_TEST_SIDEBAND_ALL=1 \
@@ -942,9 +1183,8 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' '
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
-
-	configure_exclusion "$P" my-blob >h &&
+	test_commit -C "$P" A &&
+	configure_exclusion blob "$P" my-blob >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -976,9 +1216,9 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -1000,7 +1240,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
 	git -C "$P" add .gitmodules &&
 	git -C "$P" commit -m x &&
 
-	configure_exclusion "$P" .gitmodules >h &&
+	configure_exclusion blob "$P" .gitmodules >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -1024,9 +1264,9 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodul
 	echo "path = include/foo" >>"$P/.gitmodules" &&
 	echo "url = git://example.com/git/lib.git" >>"$P/.gitmodules" &&
 	git -C "$P" add .gitmodules &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" .gitmodules >h &&
+	configure_exclusion blob "$P" .gitmodules >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
-- 
2.31.1.449.gb2aa5456a8.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v4 5/7] packfile-uri.txt: support for excluding commits and trees
  2021-08-11  7:45       ` [PATCH v4 5/7] packfile-uri.txt: support for excluding commits and trees Teng Long
@ 2021-08-11  9:59         ` Bagas Sanjaya
  0 siblings, 0 replies; 72+ messages in thread
From: Bagas Sanjaya @ 2021-08-11  9:59 UTC (permalink / raw)
  To: Teng Long; +Cc: avarab, git, jonathantanmy

On 11/08/21 14.45, Teng Long wrote:
> +The old configuration of packfile-uri:
> +
> +	`uploadpack.blobPackfileUri=<object-hash> <pack-hash> <uri>`
> +
> +For the old configuration is compatible with the new one, but it only
> +supports the exclusion of blob objects.

I think better say "The old configuration of packfile-uri ... is 
compatible with the new one, but it only supports exclusion of blobs".

-- 
An old man doll... just what I always wanted! - Clara

^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion
  2021-08-11  7:45     ` [PATCH v4 0/7] packfile-uris: commits and trees exclusion Teng Long
                         ` (6 preceding siblings ...)
  2021-08-11  7:45       ` [PATCH v4 7/7] t5702: support for excluding commit objects Teng Long
@ 2021-08-25  2:21       ` Teng Long
  2021-08-25  2:21         ` [PATCH v5 01/14] pack-objects.c: introduce new method `match_packfile_uri_exclusions` Teng Long
                           ` (14 more replies)
  7 siblings, 15 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Changes since v5:

* Support tag objects exclusion (configurable but will repeat download object
data in previous patch)
* Tests and docs modification for tag exclusion feature
* Abstraction of some reusable methods

Teng Long (14):
  pack-objects.c: introduce new method `match_packfile_uri_exclusions`
  Add new parameter "carry_data" for "show_object" function
  packfile-uri: support for excluding commit objects
  packfile-uri: support for excluding tree objects
  packfile-uri.txt: support for excluding commits and trees
  t5702: replace with "test_when_finished" for cleanup
  t5702: support for excluding commit objects
  Add new parameter "carry_data" for "show_commit function
  commit.h: add wrapped tags in commit struct
  object.h: add referred tags in `referred_objects` struct
  packfile-uri: support for excluding tag objects
  packfile-uri.txt: support for excluding tag objects
  t5702: add tag exclusion test case
  pack-objects.c: introduce `want_exclude_object` function

 Documentation/technical/packfile-uri.txt |  38 ++-
 builtin/describe.c                       |   8 +-
 builtin/pack-objects.c                   | 128 ++++++---
 builtin/rev-list.c                       |   6 +-
 bundle.c                                 |   4 +-
 commit.h                                 |   5 +
 list-objects.c                           |  50 ++--
 list-objects.h                           |   4 +-
 object.c                                 |  20 +-
 object.h                                 |  16 +-
 pack-bitmap.c                            |  14 +-
 reachable.c                              |   8 +-
 revision.c                               |  42 ++-
 revision.h                               |   3 +
 shallow.c                                |   4 +-
 t/t5702-protocol-v2.sh                   | 330 ++++++++++++++++++++---
 upload-pack.c                            |   7 +
 17 files changed, 542 insertions(+), 145 deletions(-)

Range-diff against v4:
 -:  ---------- >  1:  73a5b4ccc1 pack-objects.c: introduce new method `match_packfile_uri_exclusions`
 -:  ---------- >  2:  bc8fea97e3 Add new parameter "carry_data" for "show_object" function
 -:  ---------- >  3:  f71b310842 packfile-uri: support for excluding commit objects
 -:  ---------- >  4:  bbb0413cc4 packfile-uri: support for excluding tree objects
 -:  ---------- >  5:  8e5bf4010c packfile-uri.txt: support for excluding commits and trees
 -:  ---------- >  6:  f3b1cba7e1 t5702: replace with "test_when_finished" for cleanup
 -:  ---------- >  7:  3b5f9732b8 t5702: support for excluding commit objects
 1:  19f7670384 =  8:  19f7670384 Add new parameter "carry_data" for "show_commit function
 2:  011e5eaea3 =  9:  011e5eaea3 commit.h: add wrapped tags in commit struct
 3:  824844499f = 10:  824844499f object.h: add referred tags in `referred_objects` struct
 4:  43aa811b65 = 11:  43aa811b65 packfile-uri: support for excluding tag objects
 5:  c83db0055c = 12:  c83db0055c packfile-uri.txt: support for excluding tag objects
 6:  29a52b7a0d = 13:  29a52b7a0d t5702: add tag exclusion test case
 7:  1a7c4c5894 = 14:  1a7c4c5894 pack-objects.c: introduce `want_exclude_object` function
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 01/14] pack-objects.c: introduce new method `match_packfile_uri_exclusions`
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25  2:21         ` [PATCH v5 02/14] Add new parameter "carry_data" for "show_object" function Teng Long
                           ` (13 subsequent siblings)
  14 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6d13cd3e1a..31556e7396 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1188,6 +1188,24 @@ static int have_duplicate_entry(const struct object_id *oid,
 	return 1;
 }
 
+static int match_packfile_uri_exclusions(struct configured_exclusion *ex)
+{
+	int i;
+	const char *p;
+
+	if (ex) {
+		for (i = 0; i < uri_protocols.nr; i++) {
+			if (skip_prefix(ex->uri,
+					uri_protocols.items[i].string,
+					&p) &&
+			    *p == ':')
+				return 1;
+
+		}
+	}
+	return 0;
+}
+
 static int want_found_object(const struct object_id *oid, int exclude,
 			     struct packed_git *p)
 {
@@ -1335,19 +1353,10 @@ static int want_object_in_pack(const struct object_id *oid,
 	if (uri_protocols.nr) {
 		struct configured_exclusion *ex =
 			oidmap_get(&configured_exclusions, oid);
-		int i;
-		const char *p;
 
-		if (ex) {
-			for (i = 0; i < uri_protocols.nr; i++) {
-				if (skip_prefix(ex->uri,
-						uri_protocols.items[i].string,
-						&p) &&
-				    *p == ':') {
-					oidset_insert(&excluded_by_config, oid);
-					return 0;
-				}
-			}
+		if (ex && match_packfile_uri_exclusions(ex)) {
+			oidset_insert(&excluded_by_config, oid);
+			return 0;
 		}
 	}
 
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 02/14] Add new parameter "carry_data" for "show_object" function
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
  2021-08-25  2:21         ` [PATCH v5 01/14] pack-objects.c: introduce new method `match_packfile_uri_exclusions` Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-26 20:45           ` Junio C Hamano
  2021-08-25  2:21         ` [PATCH v5 03/14] packfile-uri: support for excluding commit objects Teng Long
                           ` (12 subsequent siblings)
  14 siblings, 1 reply; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

During the pack-objects process, "show_object" function will be called
to find the object and show the process("show_object_fn" in
"list-object.h"), the function definition contains three parameters:

	1. struct object *obj(contains object type, flags, and oid).
	2. const char *name(the object name).
	3. void *show_data(function to show progress info).

This commit adds a new parameter: "void *carry_data", the reason is
mainly based on scalability and performance considerations when showing
an object, space for time, avoid costly temporary calculations in the
"show" phase. For example, carry the ownership relationship between
blob or tree object and the referred commit to avoid redundant and
expensive calculations.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/describe.c     |  4 ++--
 builtin/pack-objects.c | 15 ++++++++-------
 builtin/rev-list.c     |  2 +-
 list-objects.c         |  8 ++++----
 list-objects.h         |  2 +-
 pack-bitmap.c          |  8 ++++----
 reachable.c            |  8 ++++----
 7 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 40482d8e9f..045da79b5c 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -485,9 +485,9 @@ static void process_commit(struct commit *commit, void *data)
 	pcd->current_commit = commit->object.oid;
 }
 
-static void process_object(struct object *obj, const char *path, void *data)
+static void process_object(struct object *obj, const char *path, void *show_data, void *carry_data)
 {
-	struct process_commit_data *pcd = data;
+	struct process_commit_data *pcd = show_data;
 
 	if (oideq(&pcd->looking_for, &obj->oid) && !pcd->dst->len) {
 		reset_revision_walk();
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 31556e7396..5f9ec3566f 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3068,7 +3068,7 @@ static void show_commit_pack_hint(struct commit *commit, void *_data)
 }
 
 static void show_object_pack_hint(struct object *object, const char *name,
-				  void *_data)
+				  void *show_data, void *carry_dataa)
 {
 	struct object_entry *oe = packlist_find(&to_pack, &object->oid);
 	if (!oe)
@@ -3252,7 +3252,7 @@ static void show_commit(struct commit *commit, void *data)
 		propagate_island_marks(commit);
 }
 
-static void show_object(struct object *obj, const char *name, void *data)
+static void show_object(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
 	add_preferred_base_object(name);
 	add_object_entry(&obj->oid, obj->type, name, 0);
@@ -3274,7 +3274,7 @@ static void show_object(struct object *obj, const char *name, void *data)
 	}
 }
 
-static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
+static void show_object__ma_allow_any(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
 	assert(arg_missing_action == MA_ALLOW_ANY);
 
@@ -3285,10 +3285,10 @@ static void show_object__ma_allow_any(struct object *obj, const char *name, void
 	if (!has_object(the_repository, &obj->oid, 0))
 		return;
 
-	show_object(obj, name, data);
+	show_object(obj, name, show_data, carry_data);
 }
 
-static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
+static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
 	assert(arg_missing_action == MA_ALLOW_PROMISOR);
 
@@ -3299,7 +3299,7 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
 	if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
 		return;
 
-	show_object(obj, name, data);
+	show_object(obj, name, show_data, carry_data);
 }
 
 static int option_parse_missing_action(const struct option *opt,
@@ -3547,7 +3547,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
 
 static void record_recent_object(struct object *obj,
 				 const char *name,
-				 void *data)
+				 void *show_data,
+				 void *carry_data)
 {
 	oid_array_append(&recent_objects, &obj->oid);
 }
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index b4d8ea0a35..1cad33d9e8 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -266,7 +266,7 @@ static int finish_object(struct object *obj, const char *name, void *cb_data)
 	return 0;
 }
 
-static void show_object(struct object *obj, const char *name, void *cb_data)
+static void show_object(struct object *obj, const char *name, void *cb_data, void *carry_data)
 {
 	struct rev_list_info *info = cb_data;
 	struct rev_info *revs = info->revs;
diff --git a/list-objects.c b/list-objects.c
index e19589baa0..427228a3ba 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -60,7 +60,7 @@ static void process_blob(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, path->buf, ctx->show_data);
+		ctx->show_object(obj, path->buf, ctx->show_data, NULL);
 	strbuf_setlen(path, pathlen);
 }
 
@@ -191,7 +191,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data);
+		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
 	if (base->len)
 		strbuf_addch(base, '/');
 
@@ -207,7 +207,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data);
+		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
 
 	strbuf_setlen(base, baselen);
 	free_tree_buffer(tree);
@@ -335,7 +335,7 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 			continue;
 		if (obj->type == OBJ_TAG) {
 			obj->flags |= SEEN;
-			ctx->show_object(obj, name, ctx->show_data);
+			ctx->show_object(obj, name, ctx->show_data, NULL);
 			continue;
 		}
 		if (!path)
diff --git a/list-objects.h b/list-objects.h
index a952680e46..ab946d34db 100644
--- a/list-objects.h
+++ b/list-objects.h
@@ -6,7 +6,7 @@ struct object;
 struct rev_info;
 
 typedef void (*show_commit_fn)(struct commit *, void *);
-typedef void (*show_object_fn)(struct object *, const char *, void *);
+typedef void (*show_object_fn)(struct object *, const char *, void *, void *);
 void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *);
 
 typedef void (*show_edge_fn)(struct commit *);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 3ed15431cd..516eb235da 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -459,9 +459,9 @@ struct bitmap_show_data {
 	struct bitmap *base;
 };
 
-static void show_object(struct object *object, const char *name, void *data_)
+static void show_object(struct object *object, const char *name, void *show_data, void *carry_data)
 {
-	struct bitmap_show_data *data = data_;
+	struct bitmap_show_data *data = show_data;
 	int bitmap_pos;
 
 	bitmap_pos = bitmap_position(data->bitmap_git, &object->oid);
@@ -1268,9 +1268,9 @@ struct bitmap_test_data {
 };
 
 static void test_show_object(struct object *object, const char *name,
-			     void *data)
+			     void *show_data, void *carry_data)
 {
-	struct bitmap_test_data *tdata = data;
+	struct bitmap_test_data *tdata = show_data;
 	int bitmap_pos;
 
 	bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid);
diff --git a/reachable.c b/reachable.c
index 77a60c70a5..521b39edef 100644
--- a/reachable.c
+++ b/reachable.c
@@ -47,14 +47,14 @@ static int add_one_ref(const char *path, const struct object_id *oid,
  * The traversal will have already marked us as SEEN, so we
  * only need to handle any progress reporting here.
  */
-static void mark_object(struct object *obj, const char *name, void *data)
+static void mark_object(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
-	update_progress(data);
+	update_progress(show_data);
 }
 
-static void mark_commit(struct commit *c, void *data)
+static void mark_commit(struct commit *c, void *show_data)
 {
-	mark_object(&c->object, NULL, data);
+	mark_object(&c->object, NULL, show_data, NULL);
 }
 
 struct recent_data {
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 03/14] packfile-uri: support for excluding commit objects
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
  2021-08-25  2:21         ` [PATCH v5 01/14] pack-objects.c: introduce new method `match_packfile_uri_exclusions` Teng Long
  2021-08-25  2:21         ` [PATCH v5 02/14] Add new parameter "carry_data" for "show_object" function Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25 23:49           ` Ævar Arnfjörð Bjarmason
  2021-08-26 20:56           ` Junio C Hamano
  2021-08-25  2:21         ` [PATCH v5 04/14] packfile-uri: support for excluding tree objects Teng Long
                           ` (11 subsequent siblings)
  14 siblings, 2 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Currently packfile-uri supports the exclusion of blob objects, but in
some scenarios, users may wish to exclude more types of objects, such as
commit and tree objects, not only because packfile itself supports
storing these object types, but also on the other hand, to make
configuration items maintainable and simpler.

This commit is used to support the recursive exclusion of a commit
object, which means that if the exclusion of a commit is configured as
packfile-uri, the commit itself and all the objects it contains will
also be recursively excluded. In addition, to support this feature, a
new configuration  `uploadpack.excludeobject` is introduced.

The reason for bringing a new configuration is for two considerations.
First, the old configuration supports a single object type (blob), which
limits the use of this feature. Secondly, the name of the old
configuration is not abstract enough, this make extension difficult. If
different object types use different configuration names, the
configuration items will be bloated and difficult to maintain, so the
new configuration is more abstract in name and easy to extend.

Although a new configuration has been introduced, the old one is
still available and compatible with the new configuration. The old
configuration `uploadpack.blobpackfileuri` only supports excluding
blobs. The new configuration `uploadpack.excludeobject` not only
supports excluding blob objects, but also supports excluding commit
objects, as well as recursively excluding tree objects and blob objects
they contain.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 50 +++++++++++++++++++++++++-----------------
 list-objects.c         | 37 +++++++++++++++++--------------
 object.c               | 15 ++++++++++---
 object.h               |  4 +++-
 revision.c             | 34 ++++++++++++++++++++--------
 revision.h             |  3 +++
 upload-pack.c          |  7 ++++++
 7 files changed, 101 insertions(+), 49 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 5f9ec3566f..63f3aed70a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1311,11 +1311,14 @@ static int want_object_in_pack_one(struct packed_git *p,
 static int want_object_in_pack(const struct object_id *oid,
 			       int exclude,
 			       struct packed_git **found_pack,
-			       off_t *found_offset)
+			       off_t *found_offset,
+			       struct object *referred_commit)
 {
 	int want;
 	struct list_head *pos;
 	struct multi_pack_index *m;
+	struct configured_exclusion *ex;
+	struct configured_exclusion *referred_ex;
 
 	if (!exclude && local && has_loose_object_nonlocal(oid))
 		return 0;
@@ -1351,9 +1354,13 @@ static int want_object_in_pack(const struct object_id *oid,
 	}
 
 	if (uri_protocols.nr) {
-		struct configured_exclusion *ex =
-			oidmap_get(&configured_exclusions, oid);
+		if (referred_commit) {
+			referred_ex = oidmap_get(&configured_exclusions, &referred_commit->oid);
+			if (referred_ex && match_packfile_uri_exclusions(referred_ex))
+				return 0;
+		}
 
+		ex = oidmap_get(&configured_exclusions, oid);
 		if (ex && match_packfile_uri_exclusions(ex)) {
 			oidset_insert(&excluded_by_config, oid);
 			return 0;
@@ -1393,7 +1400,8 @@ static const char no_closure_warning[] = N_(
 );
 
 static int add_object_entry(const struct object_id *oid, enum object_type type,
-			    const char *name, int exclude)
+			    const char *name, int exclude,
+			    struct object *referred_commit)
 {
 	struct packed_git *found_pack = NULL;
 	off_t found_offset = 0;
@@ -1403,7 +1411,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
 	if (have_duplicate_entry(oid, exclude))
 		return 0;
 
-	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
+	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset, referred_commit)) {
 		/* The pack is missing an object, so it will not have closure */
 		if (write_bitmap_index) {
 			if (write_bitmap_index != WRITE_BITMAP_QUIET)
@@ -1429,7 +1437,7 @@ static int add_object_entry_from_bitmap(const struct object_id *oid,
 	if (have_duplicate_entry(oid, 0))
 		return 0;
 
-	if (!want_object_in_pack(oid, 0, &pack, &offset))
+	if (!want_object_in_pack(oid, 0, &pack, &offset, NULL))
 		return 0;
 
 	create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
@@ -1569,7 +1577,7 @@ static void add_pbase_object(struct tree_desc *tree,
 		if (name[cmplen] != '/') {
 			add_object_entry(&entry.oid,
 					 object_type(entry.mode),
-					 fullname, 1);
+					 fullname, 1, NULL);
 			return;
 		}
 		if (S_ISDIR(entry.mode)) {
@@ -1637,7 +1645,7 @@ static void add_preferred_base_object(const char *name)
 	cmplen = name_cmp_len(name);
 	for (it = pbase_tree; it; it = it->next) {
 		if (cmplen == 0) {
-			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
+			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1, NULL);
 		}
 		else {
 			struct tree_desc tree;
@@ -2839,7 +2847,7 @@ static void add_tag_chain(const struct object_id *oid)
 			die(_("unable to pack objects reachable from tag %s"),
 			    oid_to_hex(oid));
 
-		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
+		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0, NULL);
 
 		if (tag->tagged->type != OBJ_TAG)
 			return;
@@ -2994,7 +3002,7 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 			pack_idx_opts.flags &= ~WRITE_REV;
 		return 0;
 	}
-	if (!strcmp(k, "uploadpack.blobpackfileuri")) {
+	if (!strcmp(k, "uploadpack.excludeobject") || !strcmp(k, "uploadpack.blobpackfileuri")) {
 		struct configured_exclusion *ex = xmalloc(sizeof(*ex));
 		const char *oid_end, *pack_end;
 		/*
@@ -3007,11 +3015,11 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 		    *oid_end != ' ' ||
 		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
 		    *pack_end != ' ')
-			die(_("value of uploadpack.blobpackfileuri must be "
+			die(_("value of uploadpack.excludeobject or uploadpack.blobpackfileuri must be "
 			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
 		if (oidmap_get(&configured_exclusions, &ex->e.oid))
-			die(_("object already configured in another "
-			      "uploadpack.blobpackfileuri (got '%s')"), v);
+			die(_("object already configured by an earlier "
+			      "uploadpack.excludeobject or uploadpack.blobpackfileuri (got '%s')"), v);
 		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
 		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
 		ex->uri = xstrdup(pack_end + 1);
@@ -3040,7 +3048,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
 		return 0;
 
 	ofs = nth_packed_object_offset(p, pos);
-	if (!want_object_in_pack(oid, 0, &p, &ofs))
+	if (!want_object_in_pack(oid, 0, &p, &ofs, NULL))
 		return 0;
 
 	oi.typep = &type;
@@ -3233,7 +3241,7 @@ static void read_object_list_from_stdin(void)
 			die(_("expected object ID, got garbage:\n %s"), line);
 
 		add_preferred_base_object(p + 1);
-		add_object_entry(&oid, OBJ_NONE, p + 1, 0);
+		add_object_entry(&oid, OBJ_NONE, p + 1, 0, NULL);
 	}
 }
 
@@ -3242,7 +3250,7 @@ static void read_object_list_from_stdin(void)
 
 static void show_commit(struct commit *commit, void *data)
 {
-	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
+	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0, NULL);
 	commit->object.flags |= OBJECT_ADDED;
 
 	if (write_bitmap_index)
@@ -3254,8 +3262,9 @@ static void show_commit(struct commit *commit, void *data)
 
 static void show_object(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
+	struct object *referred_commit = carry_data;
 	add_preferred_base_object(name);
-	add_object_entry(&obj->oid, obj->type, name, 0);
+	add_object_entry(&obj->oid, obj->type, name, 0, referred_commit);
 	obj->flags |= OBJECT_ADDED;
 
 	if (use_delta_islands) {
@@ -3406,7 +3415,7 @@ static void add_objects_in_unpacked_packs(void)
 		QSORT(in_pack.array, in_pack.nr, ofscmp);
 		for (i = 0; i < in_pack.nr; i++) {
 			struct object *o = in_pack.array[i].object;
-			add_object_entry(&o->oid, o->type, "", 0);
+			add_object_entry(&o->oid, o->type, "", 0, NULL);
 		}
 	}
 	free(in_pack.array);
@@ -3422,7 +3431,7 @@ static int add_loose_object(const struct object_id *oid, const char *path,
 		return 0;
 	}
 
-	add_object_entry(oid, type, "", 0);
+	add_object_entry(oid, type, "", 0, NULL);
 	return 0;
 }
 
@@ -3841,7 +3850,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			 N_("respect islands during delta compression")),
 		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
 				N_("protocol"),
-				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
+				N_("exclude any configured uploadpack.excludeobject or "
+				   "uploadpack.blobpackfileuri with this protocol")),
 		OPT_END(),
 	};
 
diff --git a/list-objects.c b/list-objects.c
index 427228a3ba..968d842ceb 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -24,7 +24,8 @@ struct traversal_context {
 static void process_blob(struct traversal_context *ctx,
 			 struct blob *blob,
 			 struct strbuf *path,
-			 const char *name)
+			 const char *name,
+			 struct object *referred_commit)
 {
 	struct object *obj = &blob->object;
 	size_t pathlen;
@@ -60,7 +61,7 @@ static void process_blob(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, path->buf, ctx->show_data, NULL);
+		ctx->show_object(obj, path->buf, ctx->show_data, referred_commit);
 	strbuf_setlen(path, pathlen);
 }
 
@@ -97,11 +98,13 @@ static void process_gitlink(struct traversal_context *ctx,
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
-			 const char *name);
+			 const char *name,
+			 struct object *referred_commit);
 
 static void process_tree_contents(struct traversal_context *ctx,
 				  struct tree *tree,
-				  struct strbuf *base)
+				  struct strbuf *base,
+				  struct object *referred_commit)
 {
 	struct tree_desc desc;
 	struct name_entry entry;
@@ -129,7 +132,7 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			t->object.flags |= NOT_USER_GIVEN;
-			process_tree(ctx, t, base, entry.path);
+			process_tree(ctx, t, base, entry.path, referred_commit);
 		}
 		else if (S_ISGITLINK(entry.mode))
 			process_gitlink(ctx, entry.oid.hash,
@@ -142,7 +145,7 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			b->object.flags |= NOT_USER_GIVEN;
-			process_blob(ctx, b, base, entry.path);
+			process_blob(ctx, b, base, entry.path, referred_commit);
 		}
 	}
 }
@@ -150,7 +153,8 @@ static void process_tree_contents(struct traversal_context *ctx,
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
-			 const char *name)
+			 const char *name,
+			 struct object *referred_commit)
 {
 	struct object *obj = &tree->object;
 	struct rev_info *revs = ctx->revs;
@@ -191,14 +195,14 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
+		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
 	if (base->len)
 		strbuf_addch(base, '/');
 
 	if (r & LOFR_SKIP_TREE)
 		trace_printf("Skipping contents of tree %s...\n", base->buf);
 	else if (!failed_parse)
-		process_tree_contents(ctx, tree, base);
+		process_tree_contents(ctx, tree, base, referred_commit);
 
 	r = list_objects_filter__filter_object(ctx->revs->repo,
 					       LOFS_END_TREE, obj,
@@ -207,7 +211,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
+		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
 
 	strbuf_setlen(base, baselen);
 	free_tree_buffer(tree);
@@ -314,9 +318,9 @@ void mark_edges_uninteresting(struct rev_info *revs,
 	}
 }
 
-static void add_pending_tree(struct rev_info *revs, struct tree *tree)
+static void add_pending_tree(struct rev_info *revs, struct tree *tree, struct object *referred_commit)
 {
-	add_pending_object(revs, &tree->object, "");
+	add_pending_object_with_referred_commit(revs, &tree->object, "", referred_commit);
 }
 
 static void traverse_trees_and_blobs(struct traversal_context *ctx,
@@ -329,23 +333,24 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 	for (i = 0; i < ctx->revs->pending.nr; i++) {
 		struct object_array_entry *pending = ctx->revs->pending.objects + i;
 		struct object *obj = pending->item;
+		struct object *referred_commit = pending->referred_commit;
 		const char *name = pending->name;
 		const char *path = pending->path;
 		if (obj->flags & (UNINTERESTING | SEEN))
 			continue;
 		if (obj->type == OBJ_TAG) {
 			obj->flags |= SEEN;
-			ctx->show_object(obj, name, ctx->show_data, NULL);
+			ctx->show_object(obj, name, ctx->show_data, referred_commit);
 			continue;
 		}
 		if (!path)
 			path = "";
 		if (obj->type == OBJ_TREE) {
-			process_tree(ctx, (struct tree *)obj, base, path);
+			process_tree(ctx, (struct tree *)obj, base, path, referred_commit);
 			continue;
 		}
 		if (obj->type == OBJ_BLOB) {
-			process_blob(ctx, (struct blob *)obj, base, path);
+			process_blob(ctx, (struct blob *)obj, base, path, referred_commit);
 			continue;
 		}
 		die("unknown pending object %s (%s)",
@@ -370,7 +375,7 @@ static void do_traverse(struct traversal_context *ctx)
 		else if (get_commit_tree(commit)) {
 			struct tree *tree = get_commit_tree(commit);
 			tree->object.flags |= NOT_USER_GIVEN;
-			add_pending_tree(ctx->revs, tree);
+			add_pending_tree(ctx->revs, tree, &commit->object);
 		} else if (commit->object.parsed) {
 			die(_("unable to load root tree for commit %s"),
 			      oid_to_hex(&commit->object.oid));
diff --git a/object.c b/object.c
index 14188453c5..6b1ce2fcde 100644
--- a/object.c
+++ b/object.c
@@ -322,9 +322,10 @@ void object_list_free(struct object_list **list)
  */
 static char object_array_slopbuf[1];
 
-void add_object_array_with_path(struct object *obj, const char *name,
-				struct object_array *array,
-				unsigned mode, const char *path)
+void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name,
+						    struct object_array *array,
+						    unsigned mode, const char *path,
+						    struct object *referred_commit)
 {
 	unsigned nr = array->nr;
 	unsigned alloc = array->alloc;
@@ -339,6 +340,7 @@ void add_object_array_with_path(struct object *obj, const char *name,
 	}
 	entry = &objects[nr];
 	entry->item = obj;
+	entry->referred_commit = referred_commit;
 	if (!name)
 		entry->name = NULL;
 	else if (!*name)
@@ -354,6 +356,13 @@ void add_object_array_with_path(struct object *obj, const char *name,
 	array->nr = ++nr;
 }
 
+void add_object_array_with_path(struct object *obj, const char *name,
+				struct object_array *array,
+				unsigned mode, const char *path)
+{
+	add_object_array_with_path_and_referred_commit(obj, name, array, mode, path, NULL);
+}
+
 void add_object_array(struct object *obj, const char *name, struct object_array *array)
 {
 	add_object_array_with_path(obj, name, array, S_IFINVALID, NULL);
diff --git a/object.h b/object.h
index 87a6da47c8..d63819ab91 100644
--- a/object.h
+++ b/object.h
@@ -52,6 +52,7 @@ struct object_array {
 		char *name;
 		char *path;
 		unsigned mode;
+		struct object *referred_commit;
 	} *objects;
 };
 
@@ -157,7 +158,8 @@ void object_list_free(struct object_list **list);
 /* Object array handling .. */
 void add_object_array(struct object *obj, const char *name, struct object_array *array);
 void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path);
-
+void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name, struct object_array *array,
+						    unsigned mode, const char *path, struct object *referred_commit);
 /*
  * Returns NULL if the array is empty. Otherwise, returns the last object
  * after removing its entry from the array. Other resources associated
diff --git a/revision.c b/revision.c
index 4853c85d0b..65e0926d25 100644
--- a/revision.c
+++ b/revision.c
@@ -304,10 +304,11 @@ void mark_parents_uninteresting(struct commit *commit)
 	commit_stack_clear(&pending);
 }
 
-static void add_pending_object_with_path(struct rev_info *revs,
-					 struct object *obj,
-					 const char *name, unsigned mode,
-					 const char *path)
+static void add_pending_object_with_path_and_referred_commit(struct rev_info *revs,
+							     struct object *obj,
+							     const char *name, unsigned mode,
+							     const char *path,
+							     struct object *referred_commit)
 {
 	struct interpret_branch_name_options options = { 0 };
 	if (!obj)
@@ -326,20 +327,35 @@ static void add_pending_object_with_path(struct rev_info *revs,
 		strbuf_release(&buf);
 		return; /* do not add the commit itself */
 	}
-	add_object_array_with_path(obj, name, &revs->pending, mode, path);
+	add_object_array_with_path_and_referred_commit(obj, name, &revs->pending, mode, path, referred_commit);
 }
 
+static void add_pending_object_with_path(struct rev_info *revs,
+					 struct object *obj,
+					 const char *name, unsigned mode,
+					 const char *path)
+{
+	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, path, NULL);
+}
 static void add_pending_object_with_mode(struct rev_info *revs,
 					 struct object *obj,
-					 const char *name, unsigned mode)
+					 const char *name, unsigned mode,
+					 struct object *referred_commit)
+{
+	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, NULL, referred_commit);
+}
+
+void add_pending_object_with_referred_commit(struct rev_info *revs,
+					     struct object *obj, const char *name,
+					     struct object *referred_commit)
 {
-	add_pending_object_with_path(revs, obj, name, mode, NULL);
+	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, referred_commit);
 }
 
 void add_pending_object(struct rev_info *revs,
 			struct object *obj, const char *name)
 {
-	add_pending_object_with_mode(revs, obj, name, S_IFINVALID);
+	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, NULL);
 }
 
 void add_head_to_pending(struct rev_info *revs)
@@ -2817,7 +2833,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 		if (get_oid_with_context(revs->repo, revs->def, 0, &oid, &oc))
 			diagnose_missing_default(revs->def);
 		object = get_reference(revs, revs->def, &oid, 0);
-		add_pending_object_with_mode(revs, object, revs->def, oc.mode);
+		add_pending_object_with_mode(revs, object, revs->def, oc.mode, NULL);
 	}
 
 	/* Did the user ask for any diff output? Run the diff! */
diff --git a/revision.h b/revision.h
index a24f72dcd1..f9c9628ed8 100644
--- a/revision.h
+++ b/revision.h
@@ -423,6 +423,9 @@ void show_object_with_name(FILE *, struct object *, const char *);
  */
 void add_pending_object(struct rev_info *revs,
 			struct object *obj, const char *name);
+void add_pending_object_with_referred_commit(struct rev_info *revs,
+					     struct object *obj, const char *name,
+					     struct object *referred_commit);
 
 void add_pending_oid(struct rev_info *revs,
 		     const char *name, const struct object_id *oid,
diff --git a/upload-pack.c b/upload-pack.c
index 5c1cd19612..d26fb351a3 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1751,6 +1751,13 @@ int upload_pack_advertise(struct repository *r,
 			strbuf_addstr(value, " packfile-uris");
 			free(str);
 		}
+
+		if (!repo_config_get_string(the_repository,
+					    "uploadpack.excludeobject",
+					    &str) && str) {
+			strbuf_addstr(value, " packfile-uris");
+			free(str);
+		}
 	}
 
 	return 1;
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 04/14] packfile-uri: support for excluding tree objects
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (2 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 03/14] packfile-uri: support for excluding commit objects Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25  2:21         ` [PATCH v5 05/14] packfile-uri.txt: support for excluding commits and trees Teng Long
                           ` (10 subsequent siblings)
  14 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

This commit supports the use of `uploadpack.excludeobject` to exclude
tree objects, which means that when a type object is configured as
packfile-uri, the tree object itself and all objects contains will be
recursively excluded.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 30 ++++++++++++++++++++----------
 list-objects.c         | 32 ++++++++++++++++++--------------
 object.c               |  6 +++++-
 object.h               | 13 ++++++++++++-
 4 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 63f3aed70a..4ff12ec525 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1312,13 +1312,14 @@ static int want_object_in_pack(const struct object_id *oid,
 			       int exclude,
 			       struct packed_git **found_pack,
 			       off_t *found_offset,
-			       struct object *referred_commit)
+			       struct referred_objects *referred_objs)
 {
 	int want;
 	struct list_head *pos;
 	struct multi_pack_index *m;
+	struct configured_exclusion *commit_ex;
+	struct configured_exclusion *tree_ex;
 	struct configured_exclusion *ex;
-	struct configured_exclusion *referred_ex;
 
 	if (!exclude && local && has_loose_object_nonlocal(oid))
 		return 0;
@@ -1354,14 +1355,23 @@ static int want_object_in_pack(const struct object_id *oid,
 	}
 
 	if (uri_protocols.nr) {
-		if (referred_commit) {
-			referred_ex = oidmap_get(&configured_exclusions, &referred_commit->oid);
-			if (referred_ex && match_packfile_uri_exclusions(referred_ex))
+		if (referred_objs && referred_objs->commit) {
+			commit_ex = oidmap_get(&configured_exclusions, &referred_objs->commit->oid);
+			if (match_packfile_uri_exclusions(commit_ex))
 				return 0;
 		}
 
+		if (referred_objs && referred_objs->trees) {
+			struct object_list *p;
+			for (p = referred_objs->trees; p; p = p->next) {
+				tree_ex = oidmap_get(&configured_exclusions, &p->item->oid);
+				if (match_packfile_uri_exclusions(tree_ex))
+					return 0;
+			}
+		}
+
 		ex = oidmap_get(&configured_exclusions, oid);
-		if (ex && match_packfile_uri_exclusions(ex)) {
+		if (match_packfile_uri_exclusions(ex)) {
 			oidset_insert(&excluded_by_config, oid);
 			return 0;
 		}
@@ -1401,7 +1411,7 @@ static const char no_closure_warning[] = N_(
 
 static int add_object_entry(const struct object_id *oid, enum object_type type,
 			    const char *name, int exclude,
-			    struct object *referred_commit)
+			    struct referred_objects *referred_objs)
 {
 	struct packed_git *found_pack = NULL;
 	off_t found_offset = 0;
@@ -1411,7 +1421,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
 	if (have_duplicate_entry(oid, exclude))
 		return 0;
 
-	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset, referred_commit)) {
+	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset, referred_objs)) {
 		/* The pack is missing an object, so it will not have closure */
 		if (write_bitmap_index) {
 			if (write_bitmap_index != WRITE_BITMAP_QUIET)
@@ -3262,9 +3272,9 @@ static void show_commit(struct commit *commit, void *data)
 
 static void show_object(struct object *obj, const char *name, void *show_data, void *carry_data)
 {
-	struct object *referred_commit = carry_data;
+	struct referred_objects *referred_objs = carry_data;
 	add_preferred_base_object(name);
-	add_object_entry(&obj->oid, obj->type, name, 0, referred_commit);
+	add_object_entry(&obj->oid, obj->type, name, 0, referred_objs);
 	obj->flags |= OBJECT_ADDED;
 
 	if (use_delta_islands) {
diff --git a/list-objects.c b/list-objects.c
index 968d842ceb..49f177cb56 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -25,7 +25,7 @@ static void process_blob(struct traversal_context *ctx,
 			 struct blob *blob,
 			 struct strbuf *path,
 			 const char *name,
-			 struct object *referred_commit)
+			 struct referred_objects *referred_objs)
 {
 	struct object *obj = &blob->object;
 	size_t pathlen;
@@ -61,7 +61,7 @@ static void process_blob(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, path->buf, ctx->show_data, referred_commit);
+		ctx->show_object(obj, path->buf, ctx->show_data, referred_objs);
 	strbuf_setlen(path, pathlen);
 }
 
@@ -99,19 +99,22 @@ static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
 			 const char *name,
-			 struct object *referred_commit);
+			 struct referred_objects *referred_objs);
 
 static void process_tree_contents(struct traversal_context *ctx,
 				  struct tree *tree,
 				  struct strbuf *base,
-				  struct object *referred_commit)
+				  struct referred_objects *referred_objs)
 {
 	struct tree_desc desc;
 	struct name_entry entry;
 	enum interesting match = ctx->revs->diffopt.pathspec.nr == 0 ?
 		all_entries_interesting : entry_not_interesting;
+	struct referred_objects *referred_buf;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
+	referred_buf = xmemdupz(referred_objs, sizeof(struct referred_objects));
+	object_list_insert(&tree->object, &referred_buf->trees);
 
 	while (tree_entry(&desc, &entry)) {
 		if (match != all_entries_interesting) {
@@ -132,7 +135,7 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			t->object.flags |= NOT_USER_GIVEN;
-			process_tree(ctx, t, base, entry.path, referred_commit);
+			process_tree(ctx, t, base, entry.path, referred_buf);
 		}
 		else if (S_ISGITLINK(entry.mode))
 			process_gitlink(ctx, entry.oid.hash,
@@ -145,16 +148,17 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			b->object.flags |= NOT_USER_GIVEN;
-			process_blob(ctx, b, base, entry.path, referred_commit);
+			process_blob(ctx, b, base, entry.path, referred_buf);
 		}
 	}
+	free(referred_buf);
 }
 
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
 			 const char *name,
-			 struct object *referred_commit)
+			 struct referred_objects *referred_objs)
 {
 	struct object *obj = &tree->object;
 	struct rev_info *revs = ctx->revs;
@@ -195,14 +199,14 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
+		ctx->show_object(obj, base->buf, ctx->show_data, referred_objs);
 	if (base->len)
 		strbuf_addch(base, '/');
 
 	if (r & LOFR_SKIP_TREE)
 		trace_printf("Skipping contents of tree %s...\n", base->buf);
 	else if (!failed_parse)
-		process_tree_contents(ctx, tree, base, referred_commit);
+		process_tree_contents(ctx, tree, base, referred_objs);
 
 	r = list_objects_filter__filter_object(ctx->revs->repo,
 					       LOFS_END_TREE, obj,
@@ -211,7 +215,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
+		ctx->show_object(obj, base->buf, ctx->show_data, referred_objs);
 
 	strbuf_setlen(base, baselen);
 	free_tree_buffer(tree);
@@ -333,24 +337,24 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 	for (i = 0; i < ctx->revs->pending.nr; i++) {
 		struct object_array_entry *pending = ctx->revs->pending.objects + i;
 		struct object *obj = pending->item;
-		struct object *referred_commit = pending->referred_commit;
+		struct referred_objects *referred_objs = pending->referred_objects;
 		const char *name = pending->name;
 		const char *path = pending->path;
 		if (obj->flags & (UNINTERESTING | SEEN))
 			continue;
 		if (obj->type == OBJ_TAG) {
 			obj->flags |= SEEN;
-			ctx->show_object(obj, name, ctx->show_data, referred_commit);
+			ctx->show_object(obj, name, ctx->show_data, referred_objs);
 			continue;
 		}
 		if (!path)
 			path = "";
 		if (obj->type == OBJ_TREE) {
-			process_tree(ctx, (struct tree *)obj, base, path, referred_commit);
+			process_tree(ctx, (struct tree *)obj, base, path, referred_objs);
 			continue;
 		}
 		if (obj->type == OBJ_BLOB) {
-			process_blob(ctx, (struct blob *)obj, base, path, referred_commit);
+			process_blob(ctx, (struct blob *)obj, base, path, referred_objs);
 			continue;
 		}
 		die("unknown pending object %s (%s)",
diff --git a/object.c b/object.c
index 6b1ce2fcde..69ba0baf95 100644
--- a/object.c
+++ b/object.c
@@ -331,6 +331,7 @@ void add_object_array_with_path_and_referred_commit(struct object *obj, const ch
 	unsigned alloc = array->alloc;
 	struct object_array_entry *objects = array->objects;
 	struct object_array_entry *entry;
+	struct referred_objects *referred_objs = xmalloc(sizeof(struct referred_objects));
 
 	if (nr >= alloc) {
 		alloc = (alloc + 32) * 2;
@@ -338,9 +339,11 @@ void add_object_array_with_path_and_referred_commit(struct object *obj, const ch
 		array->alloc = alloc;
 		array->objects = objects;
 	}
+	referred_objs->commit = referred_commit;
+	referred_objs->trees = NULL;
 	entry = &objects[nr];
 	entry->item = obj;
-	entry->referred_commit = referred_commit;
+	entry->referred_objects = referred_objs;
 	if (!name)
 		entry->name = NULL;
 	else if (!*name)
@@ -377,6 +380,7 @@ static void object_array_release_entry(struct object_array_entry *ent)
 	if (ent->name != object_array_slopbuf)
 		free(ent->name);
 	free(ent->path);
+	free(ent->referred_objects);
 }
 
 struct object *object_array_pop(struct object_array *array)
diff --git a/object.h b/object.h
index d63819ab91..3785546adf 100644
--- a/object.h
+++ b/object.h
@@ -52,12 +52,23 @@ struct object_array {
 		char *name;
 		char *path;
 		unsigned mode;
-		struct object *referred_commit;
+		 /*
+		 * referred_objects or NULL.  If non-NULL, it will
+		 * temporary storage the referred commit and trees when
+		 * traversing the specified object. Space for time,
+		 * reduce related computing costs (such as packfile-uri
+		 * exclusion), clean up when the traversal is over.
+		 */
+		struct referred_objects *referred_objects;
 	} *objects;
 };
 
 #define OBJECT_ARRAY_INIT { 0, 0, NULL }
 
+struct referred_objects{
+    struct object *commit;
+    struct object_list *trees;
+};
 /*
  * object flag allocation:
  * revision.h:               0---------10         15             23------26
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 05/14] packfile-uri.txt: support for excluding commits and trees
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (3 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 04/14] packfile-uri: support for excluding tree objects Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25 23:52           ` Ævar Arnfjörð Bjarmason
  2021-08-25  2:21         ` [PATCH v5 06/14] t5702: replace with "test_when_finished" for cleanup Teng Long
                           ` (9 subsequent siblings)
  14 siblings, 1 reply; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 Documentation/technical/packfile-uri.txt | 32 ++++++++++++++++--------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt
index f7eabc6c76..c3e4873956 100644
--- a/Documentation/technical/packfile-uri.txt
+++ b/Documentation/technical/packfile-uri.txt
@@ -35,13 +35,26 @@ include some sort of non-trivial implementation in the Minimum Viable Product,
 at least so that we can test the client.
 
 This is the implementation: a feature, marked experimental, that allows the
-server to be configured by one or more `uploadpack.blobPackfileUri=<sha1>
-<uri>` entries. Whenever the list of objects to be sent is assembled, all such
-blobs are excluded, replaced with URIs. As noted in "Future work" below, the
-server can evolve in the future to support excluding other objects (or other
-implementations of servers could be made that support excluding other objects)
-without needing a protocol change, so clients should not expect that packfiles
-downloaded in this way only contain single blobs.
+server to be configured by one or more entries with the format:
+
+    uploadpack.excludeobject=<object-hash> <pack-hash> <uri>
+
+Value <object-hash> is the key of entry, and the object type can be a blob,
+tree, or commit. The exclusion of tree and commit is recursive by default,
+which means that when a tree or commit object is excluded, the object itself
+and all reachable objects of the object will be excluded recursively. Whenever
+the list of objects to be sent is assembled, all such objects are excluded,
+replaced with URIs.
+
+Configuration compatibility
+-------------
+
+The old configuration of packfile-uri:
+
+	`uploadpack.blobPackfileUri=<object-hash> <pack-hash> <uri>`
+
+For the old configuration is compatible with the new one, but it only
+supports the exclusion of blob objects.
 
 Client design
 -------------
@@ -65,9 +78,6 @@ The protocol design allows some evolution of the server and client without any
 need for protocol changes, so only a small-scoped design is included here to
 form the MVP. For example, the following can be done:
 
- * On the server, more sophisticated means of excluding objects (e.g. by
-   specifying a commit to represent that commit and all objects that it
-   references).
  * On the client, resumption of clone. If a clone is interrupted, information
    could be recorded in the repository's config and a "clone-resume" command
    can resume the clone in progress. (Resumption of subsequent fetches is more
@@ -78,4 +88,4 @@ There are some possible features that will require a change in protocol:
 
  * Additional HTTP headers (e.g. authentication)
  * Byte range support
- * Different file formats referenced by URIs (e.g. raw object)
+ * Different file formats referenced by URIs (e.g. raw object)
\ No newline at end of file
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 06/14] t5702: replace with "test_when_finished" for cleanup
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (4 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 05/14] packfile-uri.txt: support for excluding commits and trees Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25 23:55           ` Ævar Arnfjörð Bjarmason
  2021-08-25  2:21         ` [PATCH v5 07/14] t5702: support for excluding commit objects Teng Long
                           ` (8 subsequent siblings)
  14 siblings, 1 reply; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 t/t5702-protocol-v2.sh | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 2e1243ca40..e6314b53b0 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -753,7 +753,7 @@ test_expect_success 'ls-remote with v2 http sends only one POST' '
 '
 
 test_expect_success 'push with http:// and a config of v2 does not request v2' '
-	test_when_finished "rm -f log" &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child log" &&
 	# Till v2 for push is designed, make sure that if a client has
 	# protocol.version configured to use v2, that the client instead falls
 	# back and uses v0.
@@ -776,7 +776,7 @@ test_expect_success 'push with http:// and a config of v2 does not request v2' '
 '
 
 test_expect_success 'when server sends "ready", expect DELIM' '
-	rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" http_child &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child" &&
 
 	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" one &&
@@ -796,7 +796,7 @@ test_expect_success 'when server sends "ready", expect DELIM' '
 '
 
 test_expect_success 'when server does not send "ready", expect FLUSH' '
-	rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" http_child log &&
+	test_when_finished "rm -rf \"$HTTPD_DOCUMENT_ROOT_PATH/http_parent\" http_child log" &&
 
 	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" one &&
@@ -834,7 +834,7 @@ configure_exclusion () {
 
 test_expect_success 'part of packfile response provided as URI' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -883,7 +883,7 @@ test_expect_success 'part of packfile response provided as URI' '
 
 test_expect_success 'packfile URIs with fetch instead of clone' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -904,7 +904,7 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
 
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -935,7 +935,7 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 
 test_expect_success 'packfile-uri with transfer.fsckobjects' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -959,7 +959,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' '
 
 test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -989,7 +989,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
 
 test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmodules is separate from tree' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child &&
+	test_when_finished "rm -rf \"$P\" http_child" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -1015,7 +1015,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
 
 test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodules separate from tree is invalid' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child err &&
+	test_when_finished "rm -rf \"$P\" http_child err" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -1038,4 +1038,4 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodul
 # DO NOT add non-httpd-specific tests here, because the last part of this
 # test script is only executed when httpd is available and enabled.
 
-test_done
+test_done
\ No newline at end of file
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 07/14] t5702: support for excluding commit objects
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (5 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 06/14] t5702: replace with "test_when_finished" for cleanup Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25  2:21         ` [PATCH v5 08/14] Add new parameter "carry_data" for "show_commit function Teng Long
                           ` (7 subsequent siblings)
  14 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 t/t5702-protocol-v2.sh | 292 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 266 insertions(+), 26 deletions(-)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index e6314b53b0..5ad52e0cee 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -824,17 +824,47 @@ test_expect_success 'when server does not send "ready", expect FLUSH' '
 '
 
 configure_exclusion () {
-	git -C "$1" hash-object "$2" >objh &&
-	git -C "$1" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
-	git -C "$1" config --add \
-		"uploadpack.blobpackfileuri" \
-		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
-	cat objh
+	objt="$1"
+	P="$2"
+	oid="$3"
+	version="$4"
+
+	oldc="uploadpack.blobpackfileuri"
+	newc="uploadpack.excludeobject"
+	configkey=""
+
+	if test "$version" = "old"
+	then
+		configkey="$oldc"
+	else
+		configkey="$newc"
+	fi
+
+	if test "$objt" = "blob"
+	then
+		git -C "$P" hash-object "$oid" >objh &&
+		git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
+		git -C "$P" config --add \
+			"$configkey" \
+			"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+		cat objh
+	elif test "$objt" = "commit" || test "$objt" = "tree" || test "$objt" = "tag"
+	then
+		echo "$oid" >objh
+		git -C "$P" pack-objects --revs "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh
+		git -C "$P" config --add \
+        			"$configkey" \
+        			"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+		cat objh
+	else
+		echo "unsupported object type in configure_exclusion (got $objt)"
+	fi
 }
 
-test_expect_success 'part of packfile response provided as URI' '
+part_of_packfile_response_verify() {
+	config="$1"
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	test_when_finished "rm -rf \"$P\" http_child log" &&
+	test_when_finished "rm -rf \"$P\" http_child log *found" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -843,10 +873,10 @@ test_expect_success 'part of packfile response provided as URI' '
 	git -C "$P" add my-blob &&
 	echo other-blob >"$P/other-blob" &&
 	git -C "$P" add other-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
-	configure_exclusion "$P" other-blob >h2 &&
+	configure_exclusion blob "$P" my-blob config >h &&
+	configure_exclusion blob "$P" other-blob config >h2 &&
 
 	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
 	git -c protocol.version=2 \
@@ -879,9 +909,11 @@ test_expect_success 'part of packfile response provided as URI' '
 	ls http_child/.git/objects/pack/*.pack \
 	    http_child/.git/objects/pack/*.idx >filelist &&
 	test_line_count = 6 filelist
-'
+}
+
+blobpackfileuri_fetch () {
+	config="$1"
 
-test_expect_success 'packfile URIs with fetch instead of clone' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_when_finished "rm -rf \"$P\" http_child log" &&
 
@@ -890,9 +922,9 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob $config >h &&
 
 	git init http_child &&
 
@@ -900,6 +932,215 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
 	git -C http_child -c protocol.version=2 \
 		-c fetch.uriprotocols=http,https \
 		fetch "$HTTPD_URL/smart/http_parent"
+}
+
+test_expect_success 'blob-exclusion (using uploadpack.blobpackfileuri): part of packfile response provided as URI' '
+	part_of_packfile_response_verify old
+'
+
+test_expect_success 'blob-exclusion (using uploadpack.excludeobject): part of packfile response provided as URI' '
+	part_of_packfile_response_verify new
+'
+
+test_expect_success 'blob-exclusion (using uploadpack.blobpackfileuri): packfile URIs with fetch instead of clone' '
+	blobpackfileuri_fetch old
+'
+
+test_expect_success 'blob-exclusion (using uploadpack.excludeobject): packfile URIs with fetch instead of clone' '
+	blobpackfileuri_fetch new
+'
+
+test_expect_success 'tree-exclusion: part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log *found" &&
+
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true"  &&
+
+	# Dir struct
+	# 	.
+	#     |-- A.t
+	#     |-- my-tree
+	#     |   `-- my-blob
+	#     `-- other-tree
+	#         |-- other-blob
+	#         `-- sub-tree
+	#             `-- sub-blob
+	mkdir "$P"/my-tree  &&
+	echo my-blob >"$P"/my-tree/my-blob &&
+	git -C "$P" add my-tree &&
+	mkdir "$P"/other-tree &&
+	echo other-blob >"$P"/other-tree/other-blob &&
+	mkdir "$P"/other-tree/sub-tree &&
+	echo sub-blob >"$P"/other-tree/sub-tree/sub-blob &&
+	git -C "$P" add other-tree &&
+ 	test_commit -C "$P" A &&
+
+ 	commith=$(git -C "$P" rev-parse A) &&
+ 	roottreeh=$(git -C "$P" rev-parse A:) &&
+	ah=$(git -C "$P" hash-object A.t) &&
+	mytreeh=$(git -C "$P" ls-tree HEAD my-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	othertreeh=$(git -C "$P" ls-tree HEAD other-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	subtreeh=$(git -C "$P" ls-tree HEAD other-tree/sub-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	myblobh=$(git -C "$P" hash-object my-tree/my-blob) &&
+	otherblobh=$(git -C "$P" hash-object other-tree/other-blob) &&
+	subblobh=$(git -C "$P" hash-object other-tree/sub-tree/sub-blob) &&
+
+	configure_exclusion tree "$P" "$mytreeh" config >h &&
+	configure_exclusion tree "$P" "$othertreeh" config >h2 &&
+
+	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
+	git -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		clone "$HTTPD_URL/smart/http_parent" http_child &&
+
+	# Ensure that my-tree and other-tree and theirs complementary set are in separate packfiles.
+	for idx in http_child/.git/objects/pack/*.idx
+	do
+		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+		{
+			grep "^[0-9a-f]\{16,\} " out || :
+		} >out.objectlist &&
+		if test_line_count = 3 out.objectlist
+		then
+			if grep $commith out
+			then
+				>commithfound
+			fi &&
+			if grep $roottreeh out
+			then
+				>roottreehfound
+			fi &&
+			if grep $ah out
+			then
+				>ahfound
+			fi
+		elif test_line_count = 2 out.objectlist
+		then
+			if grep $mytreeh out
+			then
+				>mytreehfound
+			fi &&
+			if grep $myblobh out
+			then
+				>myblobhfound
+			fi
+		elif test_line_count = 4 out.objectlist
+		then
+			if grep $othertreeh out
+			then
+				>othertreehfound
+			fi &&
+			if grep $otherblobh out
+			then
+				>otherblobhfound
+			fi
+			if grep $subtreeh out
+			then
+				>subtreehfound
+			fi &&
+			if grep $subblobh out
+			then
+				>subblobhfound
+			fi
+		fi
+	done &&
+	test -f mytreehfound &&
+	test -f myblobhfound &&
+	test -f othertreehfound &&
+	test -f otherblobhfound &&
+	test -f subtreehfound &&
+	test -f subblobhfound &&
+	test -f commithfound &&
+	test -f roottreehfound &&
+	test -f ahfound &&
+
+	# Ensure that there are exactly 3 packfiles with associated .idx
+	ls http_child/.git/objects/pack/*.pack \
+		http_child/.git/objects/pack/*.idx >filelist &&
+	test_line_count = 6 filelist
+'
+
+test_expect_success 'commit-exclusion: part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log *found" &&
+
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+
+	mkdir "$P"/my-tree  &&
+	echo my-blob >"$P"/my-tree/my-blob &&
+	git -C "$P" add my-tree &&
+	mkdir "$P"/my-tree/sub-tree &&
+	echo sub-blob >"$P"/my-tree/sub-tree/sub-blob &&
+	git -C "$P" add my-tree &&
+	test_commit -C "$P" A &&
+
+ 	commith=$(git -C "$P" rev-parse A) &&
+ 	roottreeh=$(git -C "$P" rev-parse A:) &&
+	mytreeh=$(git -C "$P" ls-tree HEAD my-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	subtreeh=$(git -C "$P" ls-tree HEAD my-tree/sub-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	ah=$(git -C "$P" hash-object A.t) &&
+	myblobh=$(git -C "$P" hash-object my-tree/my-blob) &&
+	subblobh=$(git -C "$P" hash-object my-tree/sub-tree/sub-blob) &&
+
+	configure_exclusion commit "$P" "$commith" >h &&
+
+	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
+	git -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		clone "$HTTPD_URL/smart/http_parent" http_child &&
+
+	for idx in http_child/.git/objects/pack/*.idx
+	do
+		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+		{
+			grep "^[0-9a-f]\{16,\} " out || :
+		} >out.objectlist &&
+		if test_line_count = 7 out.objectlist
+		then
+			if grep $commith out
+			then
+				>commithfound
+			fi &&
+			if grep $roottreeh out
+			then
+				>roottreehfound
+			fi &&
+			if grep $ah out
+			then
+				>ahfound
+			fi &&
+			if grep $mytreeh out
+			then
+				>mytreehfound
+			fi &&
+			if grep $myblobh out
+			then
+				>myblobhfound
+			fi &&
+			if grep $subtreeh out
+			then
+				>subtreehfound
+			fi &&
+			if grep $subblobh out
+			then
+				>subblobhfound
+			fi
+		fi
+	done &&
+	test -f mytreehfound &&
+	test -f myblobhfound &&
+	test -f subtreehfound &&
+	test -f subblobhfound &&
+	test -f commithfound &&
+	test -f roottreehfound &&
+	test -f ahfound &&
+
+	# Ensure that there are exactly 2 packfiles with associated .idx
+	ls http_child/.git/objects/pack/*.pack \
+		http_child/.git/objects/pack/*.idx >filelist &&
+	test_line_count = 4 filelist
 '
 
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
@@ -913,9 +1154,9 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	git -C "$P" add my-blob &&
 	echo other-blob >"$P/other-blob" &&
 	git -C "$P" add other-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob >h &&
 	# Configure a URL for other-blob. Just reuse the hash of the object as
 	# the hash of the packfile, since the hash does not matter for this
 	# test as long as it is not the hash of the pack, and it is of the
@@ -923,7 +1164,7 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	git -C "$P" hash-object other-blob >objh &&
 	git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
 	git -C "$P" config --add \
-		"uploadpack.blobpackfileuri" \
+		"uploadpack.excludeobject" \
 		"$(cat objh) $(cat objh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
 
 	test_must_fail env GIT_TEST_SIDEBAND_ALL=1 \
@@ -942,9 +1183,8 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' '
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
-
-	configure_exclusion "$P" my-blob >h &&
+	test_commit -C "$P" A &&
+	configure_exclusion blob "$P" my-blob >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -976,9 +1216,9 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -1000,7 +1240,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
 	git -C "$P" add .gitmodules &&
 	git -C "$P" commit -m x &&
 
-	configure_exclusion "$P" .gitmodules >h &&
+	configure_exclusion blob "$P" .gitmodules >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
@@ -1024,9 +1264,9 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodul
 	echo "path = include/foo" >>"$P/.gitmodules" &&
 	echo "url = git://example.com/git/lib.git" >>"$P/.gitmodules" &&
 	git -C "$P" add .gitmodules &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" .gitmodules >h &&
+	configure_exclusion blob "$P" .gitmodules >h &&
 
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 08/14] Add new parameter "carry_data" for "show_commit function
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (6 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 07/14] t5702: support for excluding commit objects Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25  2:21         ` [PATCH v5 09/14] commit.h: add wrapped tags in commit struct Teng Long
                           ` (6 subsequent siblings)
  14 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/describe.c     | 4 ++--
 builtin/pack-objects.c | 6 +++---
 builtin/rev-list.c     | 4 ++--
 bundle.c               | 4 ++--
 list-objects.c         | 2 +-
 list-objects.h         | 2 +-
 pack-bitmap.c          | 6 +++---
 reachable.c            | 2 +-
 shallow.c              | 4 ++--
 9 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 045da79b5c..8fb99bbda5 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -479,9 +479,9 @@ struct process_commit_data {
 	struct rev_info *revs;
 };
 
-static void process_commit(struct commit *commit, void *data)
+static void process_commit(struct commit *commit, void *show_data, void *carry_data)
 {
-	struct process_commit_data *pcd = data;
+	struct process_commit_data *pcd = show_data;
 	pcd->current_commit = commit->object.oid;
 }
 
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 4ff12ec525..d38b24e375 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3080,7 +3080,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
 	return 0;
 }
 
-static void show_commit_pack_hint(struct commit *commit, void *_data)
+static void show_commit_pack_hint(struct commit *commit, void *show_data, void *carry_data)
 {
 	/* nothing to do; commits don't have a namehash */
 }
@@ -3258,7 +3258,7 @@ static void read_object_list_from_stdin(void)
 /* Remember to update object flag allocation in object.h */
 #define OBJECT_ADDED (1u<<20)
 
-static void show_commit(struct commit *commit, void *data)
+static void show_commit(struct commit *commit, void *show_data, void *carry_data)
 {
 	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0, NULL);
 	commit->object.flags |= OBJECT_ADDED;
@@ -3572,7 +3572,7 @@ static void record_recent_object(struct object *obj,
 	oid_array_append(&recent_objects, &obj->oid);
 }
 
-static void record_recent_commit(struct commit *commit, void *data)
+static void record_recent_commit(struct commit *commit, void *show_data, void *carry_data)
 {
 	oid_array_append(&recent_objects, &commit->object.oid);
 }
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 1cad33d9e8..b5e7ba6e83 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -94,9 +94,9 @@ static off_t get_object_disk_usage(struct object *obj)
 }
 
 static void finish_commit(struct commit *commit);
-static void show_commit(struct commit *commit, void *data)
+static void show_commit(struct commit *commit, void *show_data, void *carry_data)
 {
-	struct rev_list_info *info = data;
+	struct rev_list_info *info = show_data;
 	struct rev_info *revs = info->revs;
 
 	display_progress(progress, ++progress_counter);
diff --git a/bundle.c b/bundle.c
index 693d619551..143e45ce0c 100644
--- a/bundle.c
+++ b/bundle.c
@@ -437,9 +437,9 @@ struct bundle_prerequisites_info {
 	int fd;
 };
 
-static void write_bundle_prerequisites(struct commit *commit, void *data)
+static void write_bundle_prerequisites(struct commit *commit, void *show_data, void *carry_data)
 {
-	struct bundle_prerequisites_info *bpi = data;
+	struct bundle_prerequisites_info *bpi = show_data;
 	struct object *object;
 	struct pretty_print_context ctx = { 0 };
 	struct strbuf buf = STRBUF_INIT;
diff --git a/list-objects.c b/list-objects.c
index 49f177cb56..2e53a01458 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -384,7 +384,7 @@ static void do_traverse(struct traversal_context *ctx)
 			die(_("unable to load root tree for commit %s"),
 			      oid_to_hex(&commit->object.oid));
 		}
-		ctx->show_commit(commit, ctx->show_data);
+		ctx->show_commit(commit, ctx->show_data, NULL);
 
 		if (ctx->revs->tree_blobs_in_commit_order)
 			/*
diff --git a/list-objects.h b/list-objects.h
index ab946d34db..838b8c78c9 100644
--- a/list-objects.h
+++ b/list-objects.h
@@ -5,7 +5,7 @@ struct commit;
 struct object;
 struct rev_info;
 
-typedef void (*show_commit_fn)(struct commit *, void *);
+typedef void (*show_commit_fn)(struct commit *, void *, void *);
 typedef void (*show_object_fn)(struct object *, const char *, void *, void *);
 void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *);
 
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 516eb235da..81cf14ef8e 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -473,7 +473,7 @@ static void show_object(struct object *object, const char *name, void *show_data
 	bitmap_set(data->base, bitmap_pos);
 }
 
-static void show_commit(struct commit *commit, void *data)
+static void show_commit(struct commit *commit, void *show_data, void *carry_data)
 {
 }
 
@@ -1281,9 +1281,9 @@ static void test_show_object(struct object *object, const char *name,
 	display_progress(tdata->prg, ++tdata->seen);
 }
 
-static void test_show_commit(struct commit *commit, void *data)
+static void test_show_commit(struct commit *commit, void *show_data, void *carry_data)
 {
-	struct bitmap_test_data *tdata = data;
+	struct bitmap_test_data *tdata = show_data;
 	int bitmap_pos;
 
 	bitmap_pos = bitmap_position(tdata->bitmap_git,
diff --git a/reachable.c b/reachable.c
index 521b39edef..d38c31ae45 100644
--- a/reachable.c
+++ b/reachable.c
@@ -52,7 +52,7 @@ static void mark_object(struct object *obj, const char *name, void *show_data, v
 	update_progress(show_data);
 }
 
-static void mark_commit(struct commit *c, void *show_data)
+static void mark_commit(struct commit *c, void *show_data, void *carry_data)
 {
 	mark_object(&c->object, NULL, show_data, NULL);
 }
diff --git a/shallow.c b/shallow.c
index 9ed18eb884..33f878565c 100644
--- a/shallow.c
+++ b/shallow.c
@@ -185,9 +185,9 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
 	return result;
 }
 
-static void show_commit(struct commit *commit, void *data)
+static void show_commit(struct commit *commit, void *show_data, void *carry_data)
 {
-	commit_list_insert(commit, data);
+	commit_list_insert(commit, show_data);
 }
 
 /*
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 09/14] commit.h: add wrapped tags in commit struct
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (7 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 08/14] Add new parameter "carry_data" for "show_commit function Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25 23:58           ` Ævar Arnfjörð Bjarmason
  2021-09-02 12:39           ` ZheNing Hu
  2021-08-25  2:21         ` [PATCH v5 10/14] object.h: add referred tags in `referred_objects` struct Teng Long
                           ` (5 subsequent siblings)
  14 siblings, 2 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 commit.h   | 5 +++++
 revision.c | 8 ++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/commit.h b/commit.h
index df42eb434f..1e99e9ae8a 100644
--- a/commit.h
+++ b/commit.h
@@ -38,6 +38,11 @@ struct commit {
 	 */
 	struct tree *maybe_tree;
 	unsigned int index;
+	/*
+	* wrapped tags or NULL.  If the commit is peeled from tag(s),
+	* then save the wraps, otherwise will be NULL.
+	*/
+	struct object_list *wraps;
 };
 
 extern int save_commit_buffer;
diff --git a/revision.c b/revision.c
index 65e0926d25..aecf493f46 100644
--- a/revision.c
+++ b/revision.c
@@ -416,14 +416,17 @@ static struct commit *handle_commit(struct rev_info *revs,
 	const char *path = entry->path;
 	unsigned int mode = entry->mode;
 	unsigned long flags = object->flags;
-
+	struct object_list *wraps = NULL;
 	/*
 	 * Tag object? Look what it points to..
 	 */
 	while (object->type == OBJ_TAG) {
 		struct tag *tag = (struct tag *) object;
-		if (revs->tag_objects && !(flags & UNINTERESTING))
+		if (revs->tag_objects && !(flags & UNINTERESTING)) {
+			object_list_insert(object, &wraps);
 			add_pending_object(revs, object, tag->tag);
+		}
+
 		object = parse_object(revs->repo, get_tagged_oid(tag));
 		if (!object) {
 			if (revs->ignore_missing_links || (flags & UNINTERESTING))
@@ -449,6 +452,7 @@ static struct commit *handle_commit(struct rev_info *revs,
 	 */
 	if (object->type == OBJ_COMMIT) {
 		struct commit *commit = (struct commit *)object;
+		commit->wraps = wraps;
 
 		if (repo_parse_commit(revs->repo, commit) < 0)
 			die("unable to parse commit %s", name);
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 10/14] object.h: add referred tags in `referred_objects` struct
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (8 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 09/14] commit.h: add wrapped tags in commit struct Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25  2:21         ` [PATCH v5 11/14] packfile-uri: support for excluding tag objects Teng Long
                           ` (4 subsequent siblings)
  14 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 object.c | 1 +
 object.h | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/object.c b/object.c
index 69ba0baf95..b25b2e2919 100644
--- a/object.c
+++ b/object.c
@@ -341,6 +341,7 @@ void add_object_array_with_path_and_referred_commit(struct object *obj, const ch
 	}
 	referred_objs->commit = referred_commit;
 	referred_objs->trees = NULL;
+	referred_objs->tags = NULL;
 	entry = &objects[nr];
 	entry->item = obj;
 	entry->referred_objects = referred_objs;
diff --git a/object.h b/object.h
index 3785546adf..bd59eaa6d9 100644
--- a/object.h
+++ b/object.h
@@ -54,8 +54,8 @@ struct object_array {
 		unsigned mode;
 		 /*
 		 * referred_objects or NULL.  If non-NULL, it will
-		 * temporary storage the referred commit and trees when
-		 * traversing the specified object. Space for time,
+		 * temporary storage the referred commit, trees and tags
+		 * when traversing the specified object. Space for time,
 		 * reduce related computing costs (such as packfile-uri
 		 * exclusion), clean up when the traversal is over.
 		 */
@@ -68,6 +68,7 @@ struct object_array {
 struct referred_objects{
     struct object *commit;
     struct object_list *trees;
+    struct object_list *tags;
 };
 /*
  * object flag allocation:
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 11/14] packfile-uri: support for excluding tag objects
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (9 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 10/14] object.h: add referred tags in `referred_objects` struct Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25  2:21         ` [PATCH v5 12/14] packfile-uri.txt: " Teng Long
                           ` (3 subsequent siblings)
  14 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

This commit supports the use of `uploadpack.excludeobject` to exclude
tag objects, both lightweight tag and annotated tag are supported:

-If a lightweight tag (such as a commit object) have been configured,
the mechanism of exclusion is the same as the commit object (the commit
object and all objects it contains will be recusively excluded).

-If an annotated tag (created with -a, -s, or -u) have been configured,
the annotated tag and all the objects that it contains will be excluded,

For an example of the annotated tag:

	Create an annotated tag from HEAD:

		git tag -a A -m "tag A description"

	Output the SHA (<tag_oid>) of tag "A" :

		git rev-parse A^{object}

	Dereference <tag_oid>, output the SHA <commit_oid> of commit:

		git rev-parse A^{}

In the above case, when the tag object (<tag_oid>) is configured with
`uploadpack.excludeobject` which means <tag_oid>, the dereference
commit object (<commit_oid>), and all the objects that <commit_oid>
recursively contains (trees, blobs), will be excluded (using a packfile
URI instead).

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 20 ++++++++++++++++++--
 list-objects.c         |  9 ++++++++-
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index d38b24e375..d5e3f2c229 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1317,9 +1317,11 @@ static int want_object_in_pack(const struct object_id *oid,
 	int want;
 	struct list_head *pos;
 	struct multi_pack_index *m;
+	struct configured_exclusion *tag_ex;
 	struct configured_exclusion *commit_ex;
 	struct configured_exclusion *tree_ex;
 	struct configured_exclusion *ex;
+	struct object_list *p;
 
 	if (!exclude && local && has_loose_object_nonlocal(oid))
 		return 0;
@@ -1355,14 +1357,27 @@ static int want_object_in_pack(const struct object_id *oid,
 	}
 
 	if (uri_protocols.nr) {
+		if (referred_objs && referred_objs->tags) {
+			for (p = referred_objs->tags; p; p = p->next) {
+				tag_ex = oidmap_get(&configured_exclusions, &p->item->oid);
+				if (match_packfile_uri_exclusions(tag_ex))
+					return 0;
+			}
+		}
+
 		if (referred_objs && referred_objs->commit) {
 			commit_ex = oidmap_get(&configured_exclusions, &referred_objs->commit->oid);
 			if (match_packfile_uri_exclusions(commit_ex))
 				return 0;
+			struct commit *commit = (struct commit*) referred_objs->commit;
+			for (p = commit->wraps; p; p = p->next) {
+				tag_ex = oidmap_get(&configured_exclusions, &p->item->oid);
+				if (match_packfile_uri_exclusions(tag_ex))
+					return 0;
+			}
 		}
 
 		if (referred_objs && referred_objs->trees) {
-			struct object_list *p;
 			for (p = referred_objs->trees; p; p = p->next) {
 				tree_ex = oidmap_get(&configured_exclusions, &p->item->oid);
 				if (match_packfile_uri_exclusions(tree_ex))
@@ -3260,7 +3275,8 @@ static void read_object_list_from_stdin(void)
 
 static void show_commit(struct commit *commit, void *show_data, void *carry_data)
 {
-	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0, NULL);
+	struct referred_objects *referred_objs = carry_data;
+	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0, referred_objs);
 	commit->object.flags |= OBJECT_ADDED;
 
 	if (write_bitmap_index)
diff --git a/list-objects.c b/list-objects.c
index 2e53a01458..52f38c9151 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -367,6 +367,7 @@ static void do_traverse(struct traversal_context *ctx)
 {
 	struct commit *commit;
 	struct strbuf csp; /* callee's scratch pad */
+	struct referred_objects *referred_objs;
 	strbuf_init(&csp, PATH_MAX);
 
 	while ((commit = get_revision(ctx->revs)) != NULL) {
@@ -384,7 +385,13 @@ static void do_traverse(struct traversal_context *ctx)
 			die(_("unable to load root tree for commit %s"),
 			      oid_to_hex(&commit->object.oid));
 		}
-		ctx->show_commit(commit, ctx->show_data, NULL);
+		referred_objs = xmalloc(sizeof(struct referred_objects));
+		referred_objs->commit = NULL;
+		referred_objs->trees = NULL;
+		referred_objs->tags = commit->wraps;
+
+		ctx->show_commit(commit, ctx->show_data, referred_objs);
+		free(referred_objs);
 
 		if (ctx->revs->tree_blobs_in_commit_order)
 			/*
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 12/14] packfile-uri.txt: support for excluding tag objects
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (10 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 11/14] packfile-uri: support for excluding tag objects Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25  2:21         ` [PATCH v5 13/14] t5702: add tag exclusion test case Teng Long
                           ` (2 subsequent siblings)
  14 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 Documentation/technical/packfile-uri.txt | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt
index c3e4873956..bf5939cb8d 100644
--- a/Documentation/technical/packfile-uri.txt
+++ b/Documentation/technical/packfile-uri.txt
@@ -39,12 +39,18 @@ server to be configured by one or more entries with the format:
 
     uploadpack.excludeobject=<object-hash> <pack-hash> <uri>
 
-Value <object-hash> is the key of entry, and the object type can be a blob,
-tree, or commit. The exclusion of tree and commit is recursive by default,
-which means that when a tree or commit object is excluded, the object itself
-and all reachable objects of the object will be excluded recursively. Whenever
-the list of objects to be sent is assembled, all such objects are excluded,
-replaced with URIs.
+Value <object-hash> is the key of entry, and the object type can be  blob, tree,
+commit, or tag. When an object is configured with `uploadpack.excludeobject` which
+means that whenever the list of objects to be sent is assembled, the object (also
+include the related objects in some cases, the following will introduce) will be
+excluded, replaced with URIS. The mechanism for exclusion is as follows:
+
+	* blob: exclude blob object.
+	* tree: exclude tree object, blobs that the tree list, and recursive into sub-trees.
+	* commit: exclude commit object, and recursively exclude all the reachable trees
+	  (ditto tree exclusion) and blobs it contains.
+	* tag: exclude tag object itself, and the dereference commit (ditto commit exclusion)
+	  if the tag is annotated.
 
 Configuration compatibility
 -------------
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 13/14] t5702: add tag exclusion test case
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (11 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 12/14] packfile-uri.txt: " Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-08-25  2:21         ` [PATCH v5 14/14] pack-objects.c: introduce `want_exclude_object` function Teng Long
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
  14 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 t/t5702-protocol-v2.sh | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 5ad52e0cee..09683ebfdc 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -1142,6 +1142,24 @@ test_expect_success 'commit-exclusion: part of packfile response provided as URI
 		http_child/.git/objects/pack/*.idx >filelist &&
 	test_line_count = 4 filelist
 '
+test_expect_success 'tag-exclusion: part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+ 	test_when_finished "rm -rf \"$P\" http_child log" &&
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+	echo my-blob >"$P/my-blob" &&
+	git -C "$P" add my-blob &&
+	test_commit -C "$P" A &&
+	git -C "$P" tag -a -m "annotated_tag" tagA &&
+	tagObj=$(git -C "$P" rev-parse tagA) &&
+	configure_exclusion tag "$P" "$tagObj" >h2 &&
+	git init http_child &&
+	GIT_TRACE=1 GIT_TRACE_PACKET=`pwd`/log GIT_TEST_SIDEBAND_ALL=1 \
+	git -C http_child \
+		-c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		fetch --tags "$HTTPD_URL/smart/http_parent"
+'
 
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v5 14/14] pack-objects.c: introduce `want_exclude_object` function
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (12 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 13/14] t5702: add tag exclusion test case Teng Long
@ 2021-08-25  2:21         ` Teng Long
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
  14 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-08-25  2:21 UTC (permalink / raw)
  To: gitster, dyroneteng; +Cc: avarab, git, jonathantanmy

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 52 ++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index d5e3f2c229..503f8a5746 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1206,6 +1206,21 @@ static int match_packfile_uri_exclusions(struct configured_exclusion *ex)
 	return 0;
 }
 
+static int want_exclude_object(struct object_list *objects)
+{
+	struct object_list *p;
+	struct configured_exclusion *ex;
+
+	if (!objects)
+		return 0;
+	for (p = objects; p; p = p->next) {
+		ex = oidmap_get(&configured_exclusions, &p->item->oid);
+		if (match_packfile_uri_exclusions(ex))
+			return 1;
+	}
+	return 0;
+}
+
 static int want_found_object(const struct object_id *oid, int exclude,
 			     struct packed_git *p)
 {
@@ -1317,11 +1332,8 @@ static int want_object_in_pack(const struct object_id *oid,
 	int want;
 	struct list_head *pos;
 	struct multi_pack_index *m;
-	struct configured_exclusion *tag_ex;
 	struct configured_exclusion *commit_ex;
-	struct configured_exclusion *tree_ex;
 	struct configured_exclusion *ex;
-	struct object_list *p;
 
 	if (!exclude && local && has_loose_object_nonlocal(oid))
 		return 0;
@@ -1357,41 +1369,27 @@ static int want_object_in_pack(const struct object_id *oid,
 	}
 
 	if (uri_protocols.nr) {
-		if (referred_objs && referred_objs->tags) {
-			for (p = referred_objs->tags; p; p = p->next) {
-				tag_ex = oidmap_get(&configured_exclusions, &p->item->oid);
-				if (match_packfile_uri_exclusions(tag_ex))
+		if (referred_objs) {
+			if (referred_objs->commit) {
+				struct commit *commit = (struct commit *) referred_objs->commit;
+				commit_ex = oidmap_get(&configured_exclusions, &commit->object.oid);
+				if (match_packfile_uri_exclusions(commit_ex))
 					return 0;
-			}
-		}
-
-		if (referred_objs && referred_objs->commit) {
-			commit_ex = oidmap_get(&configured_exclusions, &referred_objs->commit->oid);
-			if (match_packfile_uri_exclusions(commit_ex))
-				return 0;
-			struct commit *commit = (struct commit*) referred_objs->commit;
-			for (p = commit->wraps; p; p = p->next) {
-				tag_ex = oidmap_get(&configured_exclusions, &p->item->oid);
-				if (match_packfile_uri_exclusions(tag_ex))
+				if (want_exclude_object(commit->wraps))
 					return 0;
 			}
-		}
+			if (referred_objs->tags && want_exclude_object(referred_objs->tags))
+				return 0;
 
-		if (referred_objs && referred_objs->trees) {
-			for (p = referred_objs->trees; p; p = p->next) {
-				tree_ex = oidmap_get(&configured_exclusions, &p->item->oid);
-				if (match_packfile_uri_exclusions(tree_ex))
-					return 0;
-			}
+			if (referred_objs->trees && want_exclude_object(referred_objs->trees))
+				return 0;
 		}
-
 		ex = oidmap_get(&configured_exclusions, oid);
 		if (match_packfile_uri_exclusions(ex)) {
 			oidset_insert(&excluded_by_config, oid);
 			return 0;
 		}
 	}
-
 	return 1;
 }
 
-- 
2.31.1.456.gec51e24953


^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 03/14] packfile-uri: support for excluding commit objects
  2021-08-25  2:21         ` [PATCH v5 03/14] packfile-uri: support for excluding commit objects Teng Long
@ 2021-08-25 23:49           ` Ævar Arnfjörð Bjarmason
  2021-09-02 12:26             ` Teng Long
  2021-08-26 20:56           ` Junio C Hamano
  1 sibling, 1 reply; 72+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-08-25 23:49 UTC (permalink / raw)
  To: Teng Long; +Cc: gitster, git, jonathantanmy


On Wed, Aug 25 2021, Teng Long wrote:

> Currently packfile-uri supports the exclusion of blob objects, but in
> some scenarios, users may wish to exclude more types of objects, such as
> commit and tree objects, not only because packfile itself supports
> storing these object types, but also on the other hand, to make
> configuration items maintainable and simpler.
>
> This commit is used to support the recursive exclusion of a commit
> object, which means that if the exclusion of a commit is configured as
> packfile-uri, the commit itself and all the objects it contains will
> also be recursively excluded. In addition, to support this feature, a
> new configuration  `uploadpack.excludeobject` is introduced.
>
> The reason for bringing a new configuration is for two considerations.
> First, the old configuration supports a single object type (blob), which
> limits the use of this feature. Secondly, the name of the old
> configuration is not abstract enough, this make extension difficult. If
> different object types use different configuration names, the
> configuration items will be bloated and difficult to maintain, so the
> new configuration is more abstract in name and easy to extend.
>
> Although a new configuration has been introduced, the old one is
> still available and compatible with the new configuration. The old
> configuration `uploadpack.blobpackfileuri` only supports excluding
> blobs. The new configuration `uploadpack.excludeobject` not only
> supports excluding blob objects, but also supports excluding commit
> objects, as well as recursively excluding tree objects and blob objects
> they contain.

I was under the impression that with uploadpack.blobpackfileuri we
already supported excluding non-blobs, it was just unfortunately
named. Perhaps I'm conflating that with the protocol payload for
packfile-uri, which I know doesn't only support excluding blobs.

What we didn't support at all was a way to have the server-side
mechanism in git.git recursively exclude anything, which I think is what
you're adding here...

> Signed-off-by: Teng Long <dyroneteng@gmail.com>
> ---
>  builtin/pack-objects.c | 50 +++++++++++++++++++++++++-----------------
>  list-objects.c         | 37 +++++++++++++++++--------------
>  object.c               | 15 ++++++++++---
>  object.h               |  4 +++-
>  revision.c             | 34 ++++++++++++++++++++--------
>  revision.h             |  3 +++
>  upload-pack.c          |  7 ++++++
>  7 files changed, 101 insertions(+), 49 deletions(-)
>
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 5f9ec3566f..63f3aed70a 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -1311,11 +1311,14 @@ static int want_object_in_pack_one(struct packed_git *p,
>  static int want_object_in_pack(const struct object_id *oid,
>  			       int exclude,
>  			       struct packed_git **found_pack,
> -			       off_t *found_offset)
> +			       off_t *found_offset,
> +			       struct object *referred_commit)
>  {
>  	int want;
>  	struct list_head *pos;
>  	struct multi_pack_index *m;
> +	struct configured_exclusion *ex;
> +	struct configured_exclusion *referred_ex;
>  
>  	if (!exclude && local && has_loose_object_nonlocal(oid))
>  		return 0;
> @@ -1351,9 +1354,13 @@ static int want_object_in_pack(const struct object_id *oid,
>  	}
>  
>  	if (uri_protocols.nr) {
> -		struct configured_exclusion *ex =
> -			oidmap_get(&configured_exclusions, oid);
> +		if (referred_commit) {
> +			referred_ex = oidmap_get(&configured_exclusions, &referred_commit->oid);
> +			if (referred_ex && match_packfile_uri_exclusions(referred_ex))
> +				return 0;
> +		}
>  
> +		ex = oidmap_get(&configured_exclusions, oid);
>  		if (ex && match_packfile_uri_exclusions(ex)) {
>  			oidset_insert(&excluded_by_config, oid);
>  			return 0;
> @@ -1393,7 +1400,8 @@ static const char no_closure_warning[] = N_(
>  );
>  
>  static int add_object_entry(const struct object_id *oid, enum object_type type,
> -			    const char *name, int exclude)
> +			    const char *name, int exclude,
> +			    struct object *referred_commit)
>  {
>  	struct packed_git *found_pack = NULL;
>  	off_t found_offset = 0;
> @@ -1403,7 +1411,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
>  	if (have_duplicate_entry(oid, exclude))
>  		return 0;
>  
> -	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
> +	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset, referred_commit)) {
>  		/* The pack is missing an object, so it will not have closure */
>  		if (write_bitmap_index) {
>  			if (write_bitmap_index != WRITE_BITMAP_QUIET)
> @@ -1429,7 +1437,7 @@ static int add_object_entry_from_bitmap(const struct object_id *oid,
>  	if (have_duplicate_entry(oid, 0))
>  		return 0;
>  
> -	if (!want_object_in_pack(oid, 0, &pack, &offset))
> +	if (!want_object_in_pack(oid, 0, &pack, &offset, NULL))
>  		return 0;
>  
>  	create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
> @@ -1569,7 +1577,7 @@ static void add_pbase_object(struct tree_desc *tree,
>  		if (name[cmplen] != '/') {
>  			add_object_entry(&entry.oid,
>  					 object_type(entry.mode),
> -					 fullname, 1);
> +					 fullname, 1, NULL);
>  			return;
>  		}
>  		if (S_ISDIR(entry.mode)) {
> @@ -1637,7 +1645,7 @@ static void add_preferred_base_object(const char *name)
>  	cmplen = name_cmp_len(name);
>  	for (it = pbase_tree; it; it = it->next) {
>  		if (cmplen == 0) {
> -			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
> +			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1, NULL);
>  		}
>  		else {
>  			struct tree_desc tree;
> @@ -2839,7 +2847,7 @@ static void add_tag_chain(const struct object_id *oid)
>  			die(_("unable to pack objects reachable from tag %s"),
>  			    oid_to_hex(oid));
>  
> -		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
> +		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0, NULL);
>  
>  		if (tag->tagged->type != OBJ_TAG)
>  			return;
> @@ -2994,7 +3002,7 @@ static int git_pack_config(const char *k, const char *v, void *cb)
>  			pack_idx_opts.flags &= ~WRITE_REV;
>  		return 0;
>  	}
> -	if (!strcmp(k, "uploadpack.blobpackfileuri")) {
> +	if (!strcmp(k, "uploadpack.excludeobject") || !strcmp(k, "uploadpack.blobpackfileuri")) {
>  		struct configured_exclusion *ex = xmalloc(sizeof(*ex));
>  		const char *oid_end, *pack_end;
>  		/*
> @@ -3007,11 +3015,11 @@ static int git_pack_config(const char *k, const char *v, void *cb)
>  		    *oid_end != ' ' ||
>  		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
>  		    *pack_end != ' ')
> -			die(_("value of uploadpack.blobpackfileuri must be "
> +			die(_("value of uploadpack.excludeobject or uploadpack.blobpackfileuri must be "
>  			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
>  		if (oidmap_get(&configured_exclusions, &ex->e.oid))
> -			die(_("object already configured in another "
> -			      "uploadpack.blobpackfileuri (got '%s')"), v);
> +			die(_("object already configured by an earlier "
> +			      "uploadpack.excludeobject or uploadpack.blobpackfileuri (got '%s')"), v);
>  		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
>  		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
>  		ex->uri = xstrdup(pack_end + 1);
> @@ -3040,7 +3048,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
>  		return 0;
>  
>  	ofs = nth_packed_object_offset(p, pos);
> -	if (!want_object_in_pack(oid, 0, &p, &ofs))
> +	if (!want_object_in_pack(oid, 0, &p, &ofs, NULL))
>  		return 0;
>  
>  	oi.typep = &type;
> @@ -3233,7 +3241,7 @@ static void read_object_list_from_stdin(void)
>  			die(_("expected object ID, got garbage:\n %s"), line);
>  
>  		add_preferred_base_object(p + 1);
> -		add_object_entry(&oid, OBJ_NONE, p + 1, 0);
> +		add_object_entry(&oid, OBJ_NONE, p + 1, 0, NULL);
>  	}
>  }
>  
> @@ -3242,7 +3250,7 @@ static void read_object_list_from_stdin(void)
>  
>  static void show_commit(struct commit *commit, void *data)
>  {
> -	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
> +	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0, NULL);
>  	commit->object.flags |= OBJECT_ADDED;
>  
>  	if (write_bitmap_index)
> @@ -3254,8 +3262,9 @@ static void show_commit(struct commit *commit, void *data)
>  
>  static void show_object(struct object *obj, const char *name, void *show_data, void *carry_data)
>  {
> +	struct object *referred_commit = carry_data;
>  	add_preferred_base_object(name);
> -	add_object_entry(&obj->oid, obj->type, name, 0);
> +	add_object_entry(&obj->oid, obj->type, name, 0, referred_commit);
>  	obj->flags |= OBJECT_ADDED;
>  
>  	if (use_delta_islands) {
> @@ -3406,7 +3415,7 @@ static void add_objects_in_unpacked_packs(void)
>  		QSORT(in_pack.array, in_pack.nr, ofscmp);
>  		for (i = 0; i < in_pack.nr; i++) {
>  			struct object *o = in_pack.array[i].object;
> -			add_object_entry(&o->oid, o->type, "", 0);
> +			add_object_entry(&o->oid, o->type, "", 0, NULL);
>  		}
>  	}
>  	free(in_pack.array);
> @@ -3422,7 +3431,7 @@ static int add_loose_object(const struct object_id *oid, const char *path,
>  		return 0;
>  	}
>  
> -	add_object_entry(oid, type, "", 0);
> +	add_object_entry(oid, type, "", 0, NULL);
>  	return 0;
>  }
>  
> @@ -3841,7 +3850,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
>  			 N_("respect islands during delta compression")),
>  		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
>  				N_("protocol"),
> -				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
> +				N_("exclude any configured uploadpack.excludeobject or "
> +				   "uploadpack.blobpackfileuri with this protocol")),
>  		OPT_END(),
>  	};
>  
> diff --git a/list-objects.c b/list-objects.c
> index 427228a3ba..968d842ceb 100644
> --- a/list-objects.c
> +++ b/list-objects.c
> @@ -24,7 +24,8 @@ struct traversal_context {
>  static void process_blob(struct traversal_context *ctx,
>  			 struct blob *blob,
>  			 struct strbuf *path,
> -			 const char *name)
> +			 const char *name,
> +			 struct object *referred_commit)
>  {
>  	struct object *obj = &blob->object;
>  	size_t pathlen;
> @@ -60,7 +61,7 @@ static void process_blob(struct traversal_context *ctx,
>  	if (r & LOFR_MARK_SEEN)
>  		obj->flags |= SEEN;
>  	if (r & LOFR_DO_SHOW)
> -		ctx->show_object(obj, path->buf, ctx->show_data, NULL);
> +		ctx->show_object(obj, path->buf, ctx->show_data, referred_commit);
>  	strbuf_setlen(path, pathlen);
>  }
>  
> @@ -97,11 +98,13 @@ static void process_gitlink(struct traversal_context *ctx,
>  static void process_tree(struct traversal_context *ctx,
>  			 struct tree *tree,
>  			 struct strbuf *base,
> -			 const char *name);
> +			 const char *name,
> +			 struct object *referred_commit);
>  
>  static void process_tree_contents(struct traversal_context *ctx,
>  				  struct tree *tree,
> -				  struct strbuf *base)
> +				  struct strbuf *base,
> +				  struct object *referred_commit)
>  {
>  	struct tree_desc desc;
>  	struct name_entry entry;
> @@ -129,7 +132,7 @@ static void process_tree_contents(struct traversal_context *ctx,
>  				    entry.path, oid_to_hex(&tree->object.oid));
>  			}
>  			t->object.flags |= NOT_USER_GIVEN;
> -			process_tree(ctx, t, base, entry.path);
> +			process_tree(ctx, t, base, entry.path, referred_commit);
>  		}
>  		else if (S_ISGITLINK(entry.mode))
>  			process_gitlink(ctx, entry.oid.hash,
> @@ -142,7 +145,7 @@ static void process_tree_contents(struct traversal_context *ctx,
>  				    entry.path, oid_to_hex(&tree->object.oid));
>  			}
>  			b->object.flags |= NOT_USER_GIVEN;
> -			process_blob(ctx, b, base, entry.path);
> +			process_blob(ctx, b, base, entry.path, referred_commit);
>  		}
>  	}
>  }
> @@ -150,7 +153,8 @@ static void process_tree_contents(struct traversal_context *ctx,
>  static void process_tree(struct traversal_context *ctx,
>  			 struct tree *tree,
>  			 struct strbuf *base,
> -			 const char *name)
> +			 const char *name,
> +			 struct object *referred_commit)
>  {
>  	struct object *obj = &tree->object;
>  	struct rev_info *revs = ctx->revs;
> @@ -191,14 +195,14 @@ static void process_tree(struct traversal_context *ctx,
>  	if (r & LOFR_MARK_SEEN)
>  		obj->flags |= SEEN;
>  	if (r & LOFR_DO_SHOW)
> -		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
> +		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
>  	if (base->len)
>  		strbuf_addch(base, '/');
>  
>  	if (r & LOFR_SKIP_TREE)
>  		trace_printf("Skipping contents of tree %s...\n", base->buf);
>  	else if (!failed_parse)
> -		process_tree_contents(ctx, tree, base);
> +		process_tree_contents(ctx, tree, base, referred_commit);
>  
>  	r = list_objects_filter__filter_object(ctx->revs->repo,
>  					       LOFS_END_TREE, obj,
> @@ -207,7 +211,7 @@ static void process_tree(struct traversal_context *ctx,
>  	if (r & LOFR_MARK_SEEN)
>  		obj->flags |= SEEN;
>  	if (r & LOFR_DO_SHOW)
> -		ctx->show_object(obj, base->buf, ctx->show_data, NULL);
> +		ctx->show_object(obj, base->buf, ctx->show_data, referred_commit);
>  
>  	strbuf_setlen(base, baselen);
>  	free_tree_buffer(tree);
> @@ -314,9 +318,9 @@ void mark_edges_uninteresting(struct rev_info *revs,
>  	}
>  }
>  
> -static void add_pending_tree(struct rev_info *revs, struct tree *tree)
> +static void add_pending_tree(struct rev_info *revs, struct tree *tree, struct object *referred_commit)
>  {
> -	add_pending_object(revs, &tree->object, "");
> +	add_pending_object_with_referred_commit(revs, &tree->object, "", referred_commit);
>  }
>  
>  static void traverse_trees_and_blobs(struct traversal_context *ctx,
> @@ -329,23 +333,24 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
>  	for (i = 0; i < ctx->revs->pending.nr; i++) {
>  		struct object_array_entry *pending = ctx->revs->pending.objects + i;
>  		struct object *obj = pending->item;
> +		struct object *referred_commit = pending->referred_commit;
>  		const char *name = pending->name;
>  		const char *path = pending->path;
>  		if (obj->flags & (UNINTERESTING | SEEN))
>  			continue;
>  		if (obj->type == OBJ_TAG) {
>  			obj->flags |= SEEN;
> -			ctx->show_object(obj, name, ctx->show_data, NULL);
> +			ctx->show_object(obj, name, ctx->show_data, referred_commit);
>  			continue;
>  		}
>  		if (!path)
>  			path = "";
>  		if (obj->type == OBJ_TREE) {
> -			process_tree(ctx, (struct tree *)obj, base, path);
> +			process_tree(ctx, (struct tree *)obj, base, path, referred_commit);
>  			continue;
>  		}
>  		if (obj->type == OBJ_BLOB) {
> -			process_blob(ctx, (struct blob *)obj, base, path);
> +			process_blob(ctx, (struct blob *)obj, base, path, referred_commit);
>  			continue;
>  		}
>  		die("unknown pending object %s (%s)",
> @@ -370,7 +375,7 @@ static void do_traverse(struct traversal_context *ctx)
>  		else if (get_commit_tree(commit)) {
>  			struct tree *tree = get_commit_tree(commit);
>  			tree->object.flags |= NOT_USER_GIVEN;
> -			add_pending_tree(ctx->revs, tree);
> +			add_pending_tree(ctx->revs, tree, &commit->object);
>  		} else if (commit->object.parsed) {
>  			die(_("unable to load root tree for commit %s"),
>  			      oid_to_hex(&commit->object.oid));
> diff --git a/object.c b/object.c
> index 14188453c5..6b1ce2fcde 100644
> --- a/object.c
> +++ b/object.c
> @@ -322,9 +322,10 @@ void object_list_free(struct object_list **list)
>   */
>  static char object_array_slopbuf[1];
>  
> -void add_object_array_with_path(struct object *obj, const char *name,
> -				struct object_array *array,
> -				unsigned mode, const char *path)
> +void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name,
> +						    struct object_array *array,
> +						    unsigned mode, const char *path,
> +						    struct object *referred_commit)
>  {
>  	unsigned nr = array->nr;
>  	unsigned alloc = array->alloc;
> @@ -339,6 +340,7 @@ void add_object_array_with_path(struct object *obj, const char *name,
>  	}
>  	entry = &objects[nr];
>  	entry->item = obj;
> +	entry->referred_commit = referred_commit;
>  	if (!name)
>  		entry->name = NULL;
>  	else if (!*name)
> @@ -354,6 +356,13 @@ void add_object_array_with_path(struct object *obj, const char *name,
>  	array->nr = ++nr;
>  }
>  
> +void add_object_array_with_path(struct object *obj, const char *name,
> +				struct object_array *array,
> +				unsigned mode, const char *path)
> +{
> +	add_object_array_with_path_and_referred_commit(obj, name, array, mode, path, NULL);
> +}
> +
>  void add_object_array(struct object *obj, const char *name, struct object_array *array)
>  {
>  	add_object_array_with_path(obj, name, array, S_IFINVALID, NULL);
> diff --git a/object.h b/object.h
> index 87a6da47c8..d63819ab91 100644
> --- a/object.h
> +++ b/object.h
> @@ -52,6 +52,7 @@ struct object_array {
>  		char *name;
>  		char *path;
>  		unsigned mode;
> +		struct object *referred_commit;
>  	} *objects;
>  };
>  
> @@ -157,7 +158,8 @@ void object_list_free(struct object_list **list);
>  /* Object array handling .. */
>  void add_object_array(struct object *obj, const char *name, struct object_array *array);
>  void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path);
> -
> +void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name, struct object_array *array,
> +						    unsigned mode, const char *path, struct object *referred_commit);
>  /*
>   * Returns NULL if the array is empty. Otherwise, returns the last object
>   * after removing its entry from the array. Other resources associated
> diff --git a/revision.c b/revision.c
> index 4853c85d0b..65e0926d25 100644
> --- a/revision.c
> +++ b/revision.c
> @@ -304,10 +304,11 @@ void mark_parents_uninteresting(struct commit *commit)
>  	commit_stack_clear(&pending);
>  }
>  
> -static void add_pending_object_with_path(struct rev_info *revs,
> -					 struct object *obj,
> -					 const char *name, unsigned mode,
> -					 const char *path)
> +static void add_pending_object_with_path_and_referred_commit(struct rev_info *revs,
> +							     struct object *obj,
> +							     const char *name, unsigned mode,
> +							     const char *path,
> +							     struct object *referred_commit)
>  {
>  	struct interpret_branch_name_options options = { 0 };
>  	if (!obj)
> @@ -326,20 +327,35 @@ static void add_pending_object_with_path(struct rev_info *revs,
>  		strbuf_release(&buf);
>  		return; /* do not add the commit itself */
>  	}
> -	add_object_array_with_path(obj, name, &revs->pending, mode, path);
> +	add_object_array_with_path_and_referred_commit(obj, name, &revs->pending, mode, path, referred_commit);
>  }
>  
> +static void add_pending_object_with_path(struct rev_info *revs,
> +					 struct object *obj,
> +					 const char *name, unsigned mode,
> +					 const char *path)
> +{
> +	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, path, NULL);
> +}
>  static void add_pending_object_with_mode(struct rev_info *revs,
>  					 struct object *obj,
> -					 const char *name, unsigned mode)
> +					 const char *name, unsigned mode,
> +					 struct object *referred_commit)
> +{
> +	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, NULL, referred_commit);
> +}
> +
> +void add_pending_object_with_referred_commit(struct rev_info *revs,
> +					     struct object *obj, const char *name,
> +					     struct object *referred_commit)
>  {
> -	add_pending_object_with_path(revs, obj, name, mode, NULL);
> +	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, referred_commit);
>  }
>  
>  void add_pending_object(struct rev_info *revs,
>  			struct object *obj, const char *name)
>  {
> -	add_pending_object_with_mode(revs, obj, name, S_IFINVALID);
> +	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, NULL);
>  }
>  
>  void add_head_to_pending(struct rev_info *revs)
> @@ -2817,7 +2833,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
>  		if (get_oid_with_context(revs->repo, revs->def, 0, &oid, &oc))
>  			diagnose_missing_default(revs->def);
>  		object = get_reference(revs, revs->def, &oid, 0);
> -		add_pending_object_with_mode(revs, object, revs->def, oc.mode);
> +		add_pending_object_with_mode(revs, object, revs->def, oc.mode, NULL);
>  	}
>  
>  	/* Did the user ask for any diff output? Run the diff! */
> diff --git a/revision.h b/revision.h
> index a24f72dcd1..f9c9628ed8 100644
> --- a/revision.h
> +++ b/revision.h
> @@ -423,6 +423,9 @@ void show_object_with_name(FILE *, struct object *, const char *);
>   */
>  void add_pending_object(struct rev_info *revs,
>  			struct object *obj, const char *name);
> +void add_pending_object_with_referred_commit(struct rev_info *revs,
> +					     struct object *obj, const char *name,
> +					     struct object *referred_commit);
>  
>  void add_pending_oid(struct rev_info *revs,
>  		     const char *name, const struct object_id *oid,
> diff --git a/upload-pack.c b/upload-pack.c
> index 5c1cd19612..d26fb351a3 100644
> --- a/upload-pack.c
> +++ b/upload-pack.c
> @@ -1751,6 +1751,13 @@ int upload_pack_advertise(struct repository *r,
>  			strbuf_addstr(value, " packfile-uris");
>  			free(str);
>  		}
> +
> +		if (!repo_config_get_string(the_repository,
> +					    "uploadpack.excludeobject",
> +					    &str) && str) {
> +			strbuf_addstr(value, " packfile-uris");
> +			free(str);
> +		}
>  	}
>  
>  	return 1;


^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 05/14] packfile-uri.txt: support for excluding commits and trees
  2021-08-25  2:21         ` [PATCH v5 05/14] packfile-uri.txt: support for excluding commits and trees Teng Long
@ 2021-08-25 23:52           ` Ævar Arnfjörð Bjarmason
  2021-09-02 11:23             ` Teng Long
  0 siblings, 1 reply; 72+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-08-25 23:52 UTC (permalink / raw)
  To: Teng Long; +Cc: gitster, git, jonathantanmy


On Wed, Aug 25 2021, Teng Long wrote:

>  Client design
>  -------------
> @@ -65,9 +78,6 @@ The protocol design allows some evolution of the server and client without any
>  need for protocol changes, so only a small-scoped design is included here to
>  form the MVP. For example, the following can be done:
>  
> - * On the server, more sophisticated means of excluding objects (e.g. by
> -   specifying a commit to represent that commit and all objects that it
> -   references).
>   * On the client, resumption of clone. If a clone is interrupted, information
>     could be recorded in the repository's config and a "clone-resume" command
>     can resume the clone in progress. (Resumption of subsequent fetches is more
> @@ -78,4 +88,4 @@ There are some possible features that will require a change in protocol:
>  
>   * Additional HTTP headers (e.g. authentication)
>   * Byte range support
> - * Different file formats referenced by URIs (e.g. raw object)
> + * Different file formats referenced by URIs (e.g. raw object)
> \ No newline at end of file

Newline churn?

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 06/14] t5702: replace with "test_when_finished" for cleanup
  2021-08-25  2:21         ` [PATCH v5 06/14] t5702: replace with "test_when_finished" for cleanup Teng Long
@ 2021-08-25 23:55           ` Ævar Arnfjörð Bjarmason
  2021-09-02 11:37             ` Teng Long
  0 siblings, 1 reply; 72+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-08-25 23:55 UTC (permalink / raw)
  To: Teng Long; +Cc: gitster, git, jonathantanmy


On Wed, Aug 25 2021, Teng Long wrote:

Thanks, much needed cleanup. I have an unsubmitted patch to do pretty
much this, plus some: https://github.com/avar/git/commit/27b3543c6ed

You might find the difference between the two interesting..

> -test_done
> +test_done
> \ No newline at end of file

More newline churn.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 09/14] commit.h: add wrapped tags in commit struct
  2021-08-25  2:21         ` [PATCH v5 09/14] commit.h: add wrapped tags in commit struct Teng Long
@ 2021-08-25 23:58           ` Ævar Arnfjörð Bjarmason
  2021-09-02 12:17             ` Teng Long
  2021-09-02 12:39           ` ZheNing Hu
  1 sibling, 1 reply; 72+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-08-25 23:58 UTC (permalink / raw)
  To: Teng Long; +Cc: gitster, git, jonathantanmy


On Wed, Aug 25 2021, Teng Long wrote:

> Signed-off-by: Teng Long <dyroneteng@gmail.com>
> ---
>  commit.h   | 5 +++++
>  revision.c | 8 ++++++--
>  2 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/commit.h b/commit.h
> index df42eb434f..1e99e9ae8a 100644
> --- a/commit.h
> +++ b/commit.h
> @@ -38,6 +38,11 @@ struct commit {
>  	 */
>  	struct tree *maybe_tree;
>  	unsigned int index;
> +	/*
> +	* wrapped tags or NULL.  If the commit is peeled from tag(s),
> +	* then save the wraps, otherwise will be NULL.
> +	*/
> +	struct object_list *wraps;
>  };
>  
>  extern int save_commit_buffer;
> diff --git a/revision.c b/revision.c
> index 65e0926d25..aecf493f46 100644
> --- a/revision.c
> +++ b/revision.c
> @@ -416,14 +416,17 @@ static struct commit *handle_commit(struct rev_info *revs,
>  	const char *path = entry->path;
>  	unsigned int mode = entry->mode;
>  	unsigned long flags = object->flags;
> -
> +	struct object_list *wraps = NULL;
>  	/*
>  	 * Tag object? Look what it points to..
>  	 */
>  	while (object->type == OBJ_TAG) {
>  		struct tag *tag = (struct tag *) object;
> -		if (revs->tag_objects && !(flags & UNINTERESTING))
> +		if (revs->tag_objects && !(flags & UNINTERESTING)) {
> +			object_list_insert(object, &wraps);
>  			add_pending_object(revs, object, tag->tag);
> +		}
> +
>  		object = parse_object(revs->repo, get_tagged_oid(tag));
>  		if (!object) {
>  			if (revs->ignore_missing_links || (flags & UNINTERESTING))
> @@ -449,6 +452,7 @@ static struct commit *handle_commit(struct rev_info *revs,
>  	 */
>  	if (object->type == OBJ_COMMIT) {
>  		struct commit *commit = (struct commit *)object;
> +		commit->wraps = wraps;
>  
>  		if (repo_parse_commit(revs->repo, commit) < 0)
>  			die("unable to parse commit %s", name);

Can't we store this info on the side between these two static functions
somehow, instead of adding this "wraps" to all commit structs?

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 02/14] Add new parameter "carry_data" for "show_object" function
  2021-08-25  2:21         ` [PATCH v5 02/14] Add new parameter "carry_data" for "show_object" function Teng Long
@ 2021-08-26 20:45           ` Junio C Hamano
  2021-09-02 11:08             ` Teng Long
  0 siblings, 1 reply; 72+ messages in thread
From: Junio C Hamano @ 2021-08-26 20:45 UTC (permalink / raw)
  To: Teng Long; +Cc: avarab, git, jonathantanmy

Teng Long <dyroneteng@gmail.com> writes:

> Subject: Re: [PATCH v5 02/14] Add new parameter "carry_data" for "show_object" function

Since this lacks <area>: prefix, "git shortlog" readers will have a
hard time guessing which show_object() function this commit is
about.

> During the pack-objects process, "show_object" function will be called
> to find the object and show the process("show_object_fn" in
> "list-object.h"), the function definition contains three parameters:
>
> 	1. struct object *obj(contains object type, flags, and oid).
> 	2. const char *name(the object name).
> 	3. void *show_data(function to show progress info).
>
> This commit adds a new parameter: "void *carry_data", the reason is
> mainly based on scalability and performance considerations when showing
> an object, space for time, avoid costly temporary calculations in the
> "show" phase. For example, carry the ownership relationship between
> blob or tree object and the referred commit to avoid redundant and
> expensive calculations.

The above explains what we want to carry around extra data for
(i.e. compute something in one place, and use it later somewhere
else)

But it does not quite explain why we need another parameter to do
so, which involves changing the function signature of many
functions, instead of making show_data to point at a new structure
type that holds the original data show_data used to carry plus
another single void * member (or the set of members you'd be
carrying into these functions using this new parameter).

I also find "carry_data" a meaningless name for the parameter.  All
in-parameters into functions are used to carry some data into it
after all.  The existing "show_data" at least makes a bit more
sense; it contains data necessary for showing the object in these
code paths.  If the purpose this new thing was introduced is to
cache ownership relationship data, perhaps ownership_cache would be
a more descriptive and understandable name (be it a new parameter to
added to many functions, or a member to the new structure that
replaces show_data).

Thanks.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 03/14] packfile-uri: support for excluding commit objects
  2021-08-25  2:21         ` [PATCH v5 03/14] packfile-uri: support for excluding commit objects Teng Long
  2021-08-25 23:49           ` Ævar Arnfjörð Bjarmason
@ 2021-08-26 20:56           ` Junio C Hamano
  2021-09-02 12:51             ` Teng Long
  1 sibling, 1 reply; 72+ messages in thread
From: Junio C Hamano @ 2021-08-26 20:56 UTC (permalink / raw)
  To: Teng Long; +Cc: avarab, git, jonathantanmy

Teng Long <dyroneteng@gmail.com> writes:

> This commit is used to support the recursive exclusion of a commit
> object, which means that if the exclusion of a commit is configured as
> packfile-uri, the commit itself and all the objects it contains will
> also be recursively excluded.

Whenever you say "which means", e.g. "We do X, which means we do Y",
think twice to see if you do not even have to say X.  In this
particular sentence, I think you can simplify the description
greatly and you do not even have to use the word "recursive".  

Also, because this "exclusion by a commit" does not work like the
usual "reachability" relationship Git users are familiar with, it
would help to highlight what is special that is done here to the
readers.

Taking the above together, along the lines of ...

    When a commit is specified to be excluded as packfile-uri,
    exclude all trees and blobs contained in its top-level tree, as
    well as the commit itself, but not the ancestors of the commit
    and objects that are reachable by them.

or something like that, perhaps.

> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 5f9ec3566f..63f3aed70a 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -1311,11 +1311,14 @@ static int want_object_in_pack_one(struct packed_git *p,
>  static int want_object_in_pack(const struct object_id *oid,
>  			       int exclude,
>  			       struct packed_git **found_pack,
> -			       off_t *found_offset)
> +			       off_t *found_offset,
> +			       struct object *referred_commit)

As the caller is limited to pass commit and no other types of
object, the new parameter should be of type "struct commit", no?

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 02/14] Add new parameter "carry_data" for "show_object" function
  2021-08-26 20:45           ` Junio C Hamano
@ 2021-09-02 11:08             ` Teng Long
  0 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-09-02 11:08 UTC (permalink / raw)
  To: gitster; +Cc: avarab, dyroneteng, git, jonathantanmy

> Since this lacks <area>: prefix, "git shortlog" readers will have a
> hard time guessing which show_object() function this commit is
> about.


Sorry for the late reply.
Agree, it will be fixed in next patch.

> But it does not quite explain why we need another parameter to do
> so, which involves changing the function signature of many
> functions, instead of making show_data to point at a new structure
> type that holds the original data show_data used to carry plus
> another single void * member (or the set of members you'd be
> carrying into these functions using this new parameter).
> 
> I also find "carry_data" a meaningless name for the parameter.  All
> in-parameters into functions are used to carry some data into it
> after all.  The existing "show_data" at least makes a bit more
> sense; it contains data necessary for showing the object in these
> code paths.  If the purpose this new thing was introduced is to
> cache ownership relationship data, perhaps ownership_cache would be
> a more descriptive and understandable name (be it a new parameter to
> added to many functions, or a member to the new structure that
> replaces show_data).

Agree.

I think "show_data to point at a new structure" is a better idea. I will
follow the idea and optimize the related code. By the way, the naming
about "ownership_cache" will be introduced in the next patch too, unless I
can find a better one t(-_-t).

Thanks.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 05/14] packfile-uri.txt: support for excluding commits and trees
  2021-08-25 23:52           ` Ævar Arnfjörð Bjarmason
@ 2021-09-02 11:23             ` Teng Long
  0 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-09-02 11:23 UTC (permalink / raw)
  To: avarab; +Cc: dyroneteng, git, gitster, jonathantanmy

> Newline churn?

Agree.

I have no impression why the NEWLINE was deleted, will be fixed
in the next patch.

Thank you.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 06/14] t5702: replace with "test_when_finished" for cleanup
  2021-08-25 23:55           ` Ævar Arnfjörð Bjarmason
@ 2021-09-02 11:37             ` Teng Long
  0 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-09-02 11:37 UTC (permalink / raw)
  To: avarab; +Cc: dyroneteng, git, gitster, jonathantanmy



> Thanks, much needed cleanup. I have an unsubmitted patch to do pretty
> much this, plus some: https://github.com/avar/git/commit/27b3543c6ed
>
> You might find the difference between the two interesting..

Cool.

I had took a look at the commit. In comparision, I ignored some places that need
to cleanup, I don't know if I got your meaning?

If so, how to deal with this situation better, continue my work or rebase your commit
to the current patchset?

>> -test_done
>> +test_done
>> \ No newline at end of file
>
> More newline churn.

Will be fixed in the next patch.

Thank you.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 09/14] commit.h: add wrapped tags in commit struct
  2021-08-25 23:58           ` Ævar Arnfjörð Bjarmason
@ 2021-09-02 12:17             ` Teng Long
  0 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-09-02 12:17 UTC (permalink / raw)
  To: avarab; +Cc: dyroneteng, git, gitster, jonathantanmy


Ævar Arnfjörð Bjarmason wrote:

> Can't we store this info on the side between these two static functions
> somehow, instead of adding this "wraps" to all commit structs?
>	      

Thanks very much and I have some doubts.

> ... instead of adding this "wraps" to all commit structs?
I think "adding this "wraps" to all commit struct" is an easy but a little
rough indeed. I didn't know if this is okay at the time. So I pushed the patch,
hoping to ask some different opinions.

> Can't we store this info on the side between these two static functions...

Do you mean to use static storage to share the "wraps", or other way? I want to make
sure that I understand your opinion accurately.

Thank you.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 03/14] packfile-uri: support for excluding commit objects
  2021-08-25 23:49           ` Ævar Arnfjörð Bjarmason
@ 2021-09-02 12:26             ` Teng Long
  0 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-09-02 12:26 UTC (permalink / raw)
  To: avarab; +Cc: dyroneteng, git, gitster, jonathantanmy

Ævar Arnfjörð Bjarmason wrote:

> I was under the impression that with uploadpack.blobpackfileuri we
> already supported excluding non-blobs, it was just unfortunately
> named. Perhaps I'm conflating that with the protocol payload for
> packfile-uri, which I know doesn't only support excluding blobs.
>
> What we didn't support at all was a way to have the server-side
> mechanism in git.git recursively exclude anything, which I think is what
> you're adding here...
>

Agree.

You are absolutely right(ಥ_ಥ ). The description of the commit is inaccurate, I will
fix this problem in the next patch.

Thank you.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 09/14] commit.h: add wrapped tags in commit struct
  2021-08-25  2:21         ` [PATCH v5 09/14] commit.h: add wrapped tags in commit struct Teng Long
  2021-08-25 23:58           ` Ævar Arnfjörð Bjarmason
@ 2021-09-02 12:39           ` ZheNing Hu
  2021-09-02 13:01             ` Teng Long
  1 sibling, 1 reply; 72+ messages in thread
From: ZheNing Hu @ 2021-09-02 12:39 UTC (permalink / raw)
  To: Teng Long
  Cc: Junio C Hamano, Ævar Arnfjörð Bjarmason, Git List,
	Jonathan Tan

Teng Long <dyroneteng@gmail.com> 于2021年8月25日周三 上午10:24写道:
>
> Signed-off-by: Teng Long <dyroneteng@gmail.com>
> ---
>  commit.h   | 5 +++++
>  revision.c | 8 ++++++--
>  2 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/commit.h b/commit.h
> index df42eb434f..1e99e9ae8a 100644
> --- a/commit.h
> +++ b/commit.h
> @@ -38,6 +38,11 @@ struct commit {
>          */
>         struct tree *maybe_tree;
>         unsigned int index;
> +       /*
> +       * wrapped tags or NULL.  If the commit is peeled from tag(s),
> +       * then save the wraps, otherwise will be NULL.
> +       */
> +       struct object_list *wraps;
>  };
>
>  extern int save_commit_buffer;
> diff --git a/revision.c b/revision.c
> index 65e0926d25..aecf493f46 100644
> --- a/revision.c
> +++ b/revision.c
> @@ -416,14 +416,17 @@ static struct commit *handle_commit(struct rev_info *revs,
>         const char *path = entry->path;
>         unsigned int mode = entry->mode;
>         unsigned long flags = object->flags;
> -
> +       struct object_list *wraps = NULL;
>         /*
>          * Tag object? Look what it points to..
>          */
>         while (object->type == OBJ_TAG) {
>                 struct tag *tag = (struct tag *) object;
> -               if (revs->tag_objects && !(flags & UNINTERESTING))
> +               if (revs->tag_objects && !(flags & UNINTERESTING)) {
> +                       object_list_insert(object, &wraps);
>                         add_pending_object(revs, object, tag->tag);
> +               }
> +
>                 object = parse_object(revs->repo, get_tagged_oid(tag));
>                 if (!object) {
>                         if (revs->ignore_missing_links || (flags & UNINTERESTING))
> @@ -449,6 +452,7 @@ static struct commit *handle_commit(struct rev_info *revs,
>          */
>         if (object->type == OBJ_COMMIT) {
>                 struct commit *commit = (struct commit *)object;
> +               commit->wraps = wraps;
>
>                 if (repo_parse_commit(revs->repo, commit) < 0)
>                         die("unable to parse commit %s", name);
> --
> 2.31.1.456.gec51e24953
>

/*
 * The size of this struct matters in full repo walk operations like
 * 'git clone' or 'git gc'. Consider using commit-slab to attach data
 * to a commit instead of adding new fields here.
 */
struct commit {
        struct object object;
        timestamp_t date;
        struct commit_list *parents;

        /*
         * If the commit is loaded from the commit-graph file, then this
         * member may be NULL. Only access it through repo_get_commit_tree()
         * or get_commit_tree_oid().
         */
        struct tree *maybe_tree;
        unsigned int index;
};

According to the instructions above, I wonder if you should use "commit_slab" to
store part of the data related to the commit object instead of
modifying the member
of struct commit itself?

See:
https://github.com/git/git/blob/master/commit-slab.h
https://github.com/git/git/blob/master/commit-slab-impl.h
https://github.com/git/git/blob/master/commit-slab-decl.h
https://lore.kernel.org/git/CAOLTT8Q8BEKCVwPDypW1w66P9_xP7QC0T-CnLqamqAL4haGzwA@mail.gmail.com/

Thanks.
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 03/14] packfile-uri: support for excluding commit objects
  2021-08-26 20:56           ` Junio C Hamano
@ 2021-09-02 12:51             ` Teng Long
  0 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-09-02 12:51 UTC (permalink / raw)
  To: gitster; +Cc: avarab, dyroneteng, git, jonathantanmy


Junio C Hamano wrote:

> Whenever you say "which means", e.g. "We do X, which means we do Y",
> think twice to see if you do not even have to say X.  In this
> particular sentence, I think you can simplify the description
> greatly and you do not even have to use the word "recursive".  

Forgive my English for another accident.

Will be fixed in the next patch.

> Also, because this "exclusion by a commit" does not work like the
> usual "reachability" relationship Git users are familiar with, it
> would help to highlight what is special that is done here to the
> readers.
> 
> Taking the above together, along the lines of ...
> 
>     When a commit is specified to be excluded as packfile-uri,
>     exclude all trees and blobs contained in its top-level tree, as
>     well as the commit itself, but not the ancestors of the commit
>     and objects that are reachable by them.
> 
> or something like that, perhaps.

Agree.

I will use your suggestion in the next patch (may be slightly modified).

In the next patch, I originally planned to introduce the exclusion of the
commit and with all it's ancestor objects. The problems in the current patch
will also be fixed in the next patch.

Thanks.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH v5 09/14] commit.h: add wrapped tags in commit struct
  2021-09-02 12:39           ` ZheNing Hu
@ 2021-09-02 13:01             ` Teng Long
  0 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-09-02 13:01 UTC (permalink / raw)
  To: adlternative; +Cc: avarab, dyroneteng, git, gitster, jonathantanmy


ZheNing Hu wrote:

> /*
>  * The size of this struct matters in full repo walk operations like
>  * 'git clone' or 'git gc'. Consider using commit-slab to attach data
>  * to a commit instead of adding new fields here.
>  */
> struct commit {
>         struct object object;
>         timestamp_t date;
>         struct commit_list *parents;
> 
>         /*
>          * If the commit is loaded from the commit-graph file, then this
>          * member may be NULL. Only access it through repo_get_commit_tree()
>          * or get_commit_tree_oid().
>          */
>         struct tree *maybe_tree;
>         unsigned int index;
> };
> 
> According to the instructions above, I wonder if you should use "commit_slab" to
> store part of the data related to the commit object instead of
> modifying the member
> of struct commit itself?
> 
> See:
> https://github.com/git/git/blob/master/commit-slab.h
> https://github.com/git/git/blob/master/commit-slab-impl.h
> https://github.com/git/git/blob/master/commit-slab-decl.h
> https://lore.kernel.org/git/CAOLTT8Q8BEKCVwPDypW1w66P9_xP7QC0T-CnLqamqAL4haGzwA@mail.gmail.com/

Awesome!

Maybe it's what I really need now, I will make a try.

Thanks(比心).

^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 00/12] packfile-uri: support excluding multiple object types
  2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
                           ` (13 preceding siblings ...)
  2021-08-25  2:21         ` [PATCH v5 14/14] pack-objects.c: introduce `want_exclude_object` function Teng Long
@ 2021-10-19 11:38         ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 01/12] objects.c: introduce `exclude_level` enum Teng Long
                             ` (11 more replies)
  14 siblings, 12 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long


About This Patch:
-----------------
This patch has almost no reuse of previous patch's commits, because this
one redesigned the exclusion for various object types and reoganized the
commits. 

Commit (1): objects.c: introduce `exclude_level` enum

This commit introduce a new enum named `exclude_level` in object.c, it
contains three enumerated values: "ET_SELF", "ET_INCLUDE" and
"ET_REACHABLE". The commit only makes the definitions, without any
implementations.

Commit (2): Introduce function `match_packfile_uri_exclusions`

This one move the codes used for matching the URI protocols from
`want_found_object` to new function `match_packfile_uri_exclusions`. The
purpose is to improve code readability related to the feature and make
preparation for further extension about the matching and exclusion for
multiple object types.

Commit (3): Replace `show_data` with structure `show_info`

There is no feature related codes in this commit. This commit modified
the parameters in function `show_object` by replacing `show_data` with a
new structure `show_info`, this is another way of implementation about
the previous commit[1] in patch v5.

Commit (4): Introduce `uploadpack.excludeobject` configuration

This commit introduce a new and backward-compatible configuration named
`uploadpack.excludeobject`, the diff between old and new: 

	uploadpack.blobPackfileUri=<object-hash> <pack-hash> <uri>
        uploadpack.excludeobject=<object-hash> <level> <pack-hash> <uri>

The <level> is correspond to the exclusion scope of the given object,
and it's mentioned as `exclude_value` in object.c  by "Commit (1)".

Commit (6,8,10): Implementations for excluding commits, trees and tags

Commit (7,9,11): Tests for excluding excluding commits, trees and tags

Commit (12): Corresponding documentation modifications

bundle-uri And packfile-uri 
----------------------------

Ævar Arnfjörð Bjarmason post a new feature patchset[2] named "bundle-uri",
it supports to let client download the full or incremental bundles
directly without any negotiations with the server (full clone first and
in a MVP progress now). I'm sorry if I misleading the meanings and
please point it out.

In "packfile-uri", different with "bundle-uri", happens in the span of
packing objects during git-upload-pack on the server. This is an
experimental feature, originally designed as a CDN for large BLOB
objects, but it's not yet fully functional. I'm currently working on it
for making it support other types of objects.

I also noticed that some opinions about the two features are mentioned
in the patch[2], so I cc to Stolee in this patch, I hope I did not
disturb you.


[1] https://public-inbox.org/git/xmqqlf4oc4u1.fsf@gitster.g/
[2] https://lore.kernel.org/git/RFC-cover-00.13-0000000000-20210805T150534Z-avarab@gmail.com/

Teng Long (12):
  objects.c: introduce `exclude_level` enum
  Introduce function `match_packfile_uri_exclusions`
  Replace `show_data` with structure `show_info`
  Introduce `uploadpack.excludeobject` configuration
  t5702: test cases for `uploadpack.excludeobject`
  packfile-uri: support for excluding commits
  t5702: test cases for excluding commits
  packfile-uri: support for excluding trees
  t5702: test cases for excluding trees
  packfile-uri: support for excluding tags
  t5702: test cases for excluding tags
  packfile-uri.txt: support multiple object types

 Documentation/technical/packfile-uri.txt |  74 +-
 builtin/describe.c                       |   9 +-
 builtin/pack-objects.c                   | 236 +++++--
 builtin/rev-list.c                       |  11 +-
 bundle.c                                 |   5 +-
 list-objects.c                           |  74 +-
 list-objects.h                           |  11 +-
 object.c                                 |  21 +-
 object.h                                 |  16 +-
 pack-bitmap.c                            |  16 +-
 reachable.c                              |  11 +-
 revision.c                               |  47 +-
 revision.h                               |  12 +
 shallow.c                                |   4 +-
 t/t5702-protocol-v2.sh                   | 856 +++++++++++++++++++++--
 upload-pack.c                            |   7 +
 16 files changed, 1253 insertions(+), 157 deletions(-)

Range-diff against v5:
 1:  3a885678c9 =  1:  3a885678c9 objects.c: introduce `exclude_level` enum
 2:  36426b4d9f =  2:  36426b4d9f Introduce function `match_packfile_uri_exclusions`
 3:  dced036f89 =  3:  dced036f89 Replace `show_data` with structure `show_info`
 4:  b1d779b26a =  4:  b1d779b26a Introduce `uploadpack.excludeobject` configuration
 5:  f643db3c71 =  5:  f643db3c71 t5702: test cases for `uploadpack.excludeobject`
 6:  c29efeac21 =  6:  c29efeac21 packfile-uri: support for excluding commits
 7:  ca72efd22e =  7:  ca72efd22e t5702: test cases for excluding commits
 8:  c7a885ebec =  8:  c7a885ebec packfile-uri: support for excluding trees
 9:  5fc79a9a32 =  9:  5fc79a9a32 t5702: test cases for excluding trees
10:  16c41c40a2 = 10:  16c41c40a2 packfile-uri: support for excluding tags
11:  171ece533b = 11:  171ece533b t5702: test cases for excluding tags
12:  79fc2c23cf = 12:  79fc2c23cf packfile-uri.txt: support multiple object types
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 01/12] objects.c: introduce `exclude_level` enum
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
@ 2021-10-19 11:38           ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 02/12] Introduce function `match_packfile_uri_exclusions` Teng Long
                             ` (10 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

From: Teng Long <dyroneteng@gmail.com>

Currently packfile-uri supports the exclusion of blob objects, but
in some scenarios, users may wish to exclude more types of objects,
such as commits, trees and tags, the difference with blob is they are
more complicated. In addition to the meaning of a single object itself,
it may also represent a collection of the objects that it includes
(trees and blob in a root-tree) or reaches (ancestors of a commit).
The exclusion range is designed by an enum named `exclude_level` in
"pack-objects.c" that enumerate three values, "ET_SELF", "ET_INCLUDE"
and "ET_REACHABLE".

Here are some explanations for their differences:

- Scene 1: "ET_SELF" for excluding object itself.

Reason to support the "ET_SELF" is because the definition of
exclusion-level should be better to be consistent, no matter what
the object type is. Excluding a single object itself has meaning for
blobs, because blob is the smallest granularity among object types, and
indeed, sometimes there are some frequently used big-size blobs in
repository.

If you want to exclude more ownership or reachable objects, it can be
considered to use "ET_INCLUDE" or "ET_REACHABLE".

- Scene 2: "ET_INCLUDE" for excluding object itself and objects it
contains.

When a commit is specified to be excluded as packfile-uri,
more offen, it's hoped to exclude the trees and blobs contained in
its top-level tree, as well as the commit itself, but not the
ancestors of the commit. This applies to scenarios where we want to
exclude a specified non-blob object that includes some big-size
objects.

Commit, tag and tree are suitable for this scenario. When a tag
is specified, it will exclude the dereference commit, and all trees and
blobs contained in its top-tree, as well as the tag itself if it's not a
lightweight one.

- Scene 3: "ET_REACHABLE" for excluding object itself, all the objects
it contains, and its ancestors.

For further exclusion range, the ancestors are needed to excluded
together, for example, the clone scenes.

This commit only defines the `exclude_level` enum type. The implementations
of non-blob object types will be added in subsequent commits.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 44 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6d13cd3e1a..73b92a0c90 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -127,9 +127,52 @@ enum missing_action {
 };
 static enum missing_action arg_missing_action;
 static show_object_fn fn_show_object;
+/*
+ * exclude_level defines how to make the exclusion when the object matches
+ * a packfile-uri (uploadpack.excludeobject or uploadpack.blobpackfileuri)
+ * configuration.
+ *
+ * - ET_SELF;
+ *   This type means only the object itself will be excluded, and all other
+ *   objects it includes or reachable will not. For example, if object type is:
+ *   	- BLOB:  The blob object will be excluded
+ *   	- TREE:  The tree object will be excluded, the sub-trees and blobs it
+ *   	  includes will not be excluded.
+ *   	- COMMIT: The commit object will be excluded, all the trees and blobs
+ *   	  that be included in its top-level tree will not be excluded.
+ *   	- TAG: TAG object will be excluded, the referrenced commit object will
+ *   	  not be excluded.
+ * - ET_INCLUDE;
+ *   This type means that not only the object itself will be excluded, but
+ *   also the objects it includes. For example, if object type is:
+ *   	- BLOB:  Same with 'ET_SELF'
+ *   	- TREE:  The tree object, and also the sub-trees and blobs that
+ *   	  the object includes will be excluded.
+ *   	- COMMIT: The commit object, and also all the trees and blobs
+ *   	  contained in its top-level tree will be excluded.
+ *   	- TAG: The TAG object will be excluded, and also the referrenced
+ *   	  commit will be excluded (the referrenced commit exclusion will
+ *   	  treat as a 'ET_INCLUDE' way).
+ * - ET_REACHABLE;
+ *   This type means that not only the object and all the objects it includes
+ *   will be excluded, but also the reachable objects. For exmple, if object
+ *   type is:
+ *   	- BLOB:  Same with 'ET_INCLUDE'
+ *   	- TREE:  Same with 'ET_INCLUDE'
+ *   	- COMMIT: The Objects in the case of 'ET_INCLUDE' will be excluded,
+ *   	  and also the ancestors of the commit will be excluded.
+ *   	- TAG: The Objects in the case of 'ET_INCLUDE' will be excluded, and
+ *   	  also the ancestors of the referrenced commit will be excluded.
+ */
 
+enum exclude_level {
+    ET_SELF,
+    ET_INCLUDE,
+    ET_REACHABLE,
+};
 struct configured_exclusion {
 	struct oidmap_entry e;
+	int level;
 	char *pack_hash_hex;
 	char *uri;
 };
@@ -3003,6 +3046,7 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 		if (oidmap_get(&configured_exclusions, &ex->e.oid))
 			die(_("object already configured in another "
 			      "uploadpack.blobpackfileuri (got '%s')"), v);
+		ex->level = ET_SELF;
 		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
 		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
 		ex->uri = xstrdup(pack_end + 1);
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 02/12] Introduce function `match_packfile_uri_exclusions`
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
  2021-10-19 11:38           ` [PATCH v6 01/12] objects.c: introduce `exclude_level` enum Teng Long
@ 2021-10-19 11:38           ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 03/12] Replace `show_data` with structure `show_info` Teng Long
                             ` (9 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

From: Teng Long <dyroneteng@gmail.com>

The matching codes now placed in function `want_object_in_pack`, move it
to a new function `match_packfile_uri_exclusions` to prevent subsequent
modifications caused by its continuing expansion.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 73b92a0c90..17053dc85a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1231,6 +1231,24 @@ static int have_duplicate_entry(const struct object_id *oid,
 	return 1;
 }
 
+static int match_packfile_uri_exclusions(struct configured_exclusion *ex)
+{
+	int i;
+	const char *p;
+
+	if (ex) {
+		for (i = 0; i < uri_protocols.nr; i++) {
+			if (skip_prefix(ex->uri,
+					uri_protocols.items[i].string,
+					&p) &&
+			    *p == ':')
+				return 1;
+
+		}
+	}
+	return 0;
+}
+
 static int want_found_object(const struct object_id *oid, int exclude,
 			     struct packed_git *p)
 {
@@ -1378,19 +1396,10 @@ static int want_object_in_pack(const struct object_id *oid,
 	if (uri_protocols.nr) {
 		struct configured_exclusion *ex =
 			oidmap_get(&configured_exclusions, oid);
-		int i;
-		const char *p;
 
-		if (ex) {
-			for (i = 0; i < uri_protocols.nr; i++) {
-				if (skip_prefix(ex->uri,
-						uri_protocols.items[i].string,
-						&p) &&
-				    *p == ':') {
-					oidset_insert(&excluded_by_config, oid);
-					return 0;
-				}
-			}
+		if (ex && match_packfile_uri_exclusions(ex)) {
+			oidset_insert(&excluded_by_config, oid);
+			return 0;
 		}
 	}
 
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 03/12] Replace `show_data` with structure `show_info`
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
  2021-10-19 11:38           ` [PATCH v6 01/12] objects.c: introduce `exclude_level` enum Teng Long
  2021-10-19 11:38           ` [PATCH v6 02/12] Introduce function `match_packfile_uri_exclusions` Teng Long
@ 2021-10-19 11:38           ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 04/12] Introduce `uploadpack.excludeobject` configuration Teng Long
                             ` (8 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

From: Teng Long <dyroneteng@gmail.com>

During the pack-objects process, "show_object" function will be called
to find and show the object the function contains three parameters:

	1. struct object *obj
	2. const char *name
	3. void *show_data

This commit replace "show_data" with with a new structure `show_info`,
it has two members:

	1. void *show_data
        2. void *show_cache

The `show_data` in `show_info `is the same purpose with the original
one, is the callback when showing the objects.

The `show_cache` is the cache that ownership the relationship data.
It stores around the extra data for showing the objects, for example,
carry the ownership relationship between blob or tree objects and the
referred commit to avoid redundant and expensive calculations.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/describe.c     |  9 +++++----
 builtin/pack-objects.c | 23 +++++++++++++----------
 builtin/rev-list.c     | 11 ++++++-----
 bundle.c               |  5 +++--
 list-objects.c         | 29 +++++++++++++++++++++++------
 list-objects.h         | 11 ++++++++---
 pack-bitmap.c          | 16 +++++++++-------
 reachable.c            | 11 +++++++----
 shallow.c              |  4 ++--
 9 files changed, 76 insertions(+), 43 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 40482d8e9f..a6a9e64fb0 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -479,15 +479,16 @@ struct process_commit_data {
 	struct rev_info *revs;
 };
 
-static void process_commit(struct commit *commit, void *data)
+static void process_commit(struct commit *commit, struct show_info *info)
 {
-	struct process_commit_data *pcd = data;
+	struct process_commit_data *pcd = info->show_data;
 	pcd->current_commit = commit->object.oid;
 }
 
-static void process_object(struct object *obj, const char *path, void *data)
+static void process_object(struct object *obj, const char *path,
+			   struct show_info *info)
 {
-	struct process_commit_data *pcd = data;
+	struct process_commit_data *pcd = info->show_data;
 
 	if (oideq(&pcd->looking_for, &obj->oid) && !pcd->dst->len) {
 		reset_revision_walk();
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 17053dc85a..a41a0a3ea7 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3106,13 +3106,13 @@ static int add_object_entry_from_pack(const struct object_id *oid,
 	return 0;
 }
 
-static void show_commit_pack_hint(struct commit *commit, void *_data)
+static void show_commit_pack_hint(struct commit *commit, struct show_info *info)
 {
 	/* nothing to do; commits don't have a namehash */
 }
 
 static void show_object_pack_hint(struct object *object, const char *name,
-				  void *_data)
+				  struct show_info *info)
 {
 	struct object_entry *oe = packlist_find(&to_pack, &object->oid);
 	if (!oe)
@@ -3284,7 +3284,7 @@ static void read_object_list_from_stdin(void)
 /* Remember to update object flag allocation in object.h */
 #define OBJECT_ADDED (1u<<20)
 
-static void show_commit(struct commit *commit, void *data)
+static void show_commit(struct commit *commit, struct show_info *info)
 {
 	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
 	commit->object.flags |= OBJECT_ADDED;
@@ -3296,7 +3296,8 @@ static void show_commit(struct commit *commit, void *data)
 		propagate_island_marks(commit);
 }
 
-static void show_object(struct object *obj, const char *name, void *data)
+static void show_object(struct object *obj, const char *name,
+			struct show_info *info)
 {
 	add_preferred_base_object(name);
 	add_object_entry(&obj->oid, obj->type, name, 0);
@@ -3318,7 +3319,8 @@ static void show_object(struct object *obj, const char *name, void *data)
 	}
 }
 
-static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
+static void show_object__ma_allow_any(struct object *obj, const char *name,
+				      struct show_info *info)
 {
 	assert(arg_missing_action == MA_ALLOW_ANY);
 
@@ -3329,10 +3331,11 @@ static void show_object__ma_allow_any(struct object *obj, const char *name, void
 	if (!has_object(the_repository, &obj->oid, 0))
 		return;
 
-	show_object(obj, name, data);
+	show_object(obj, name, info);
 }
 
-static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
+static void show_object__ma_allow_promisor(struct object *obj, const char *name,
+					   struct show_info *info)
 {
 	assert(arg_missing_action == MA_ALLOW_PROMISOR);
 
@@ -3343,7 +3346,7 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
 	if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
 		return;
 
-	show_object(obj, name, data);
+	show_object(obj, name, info);
 }
 
 static int option_parse_missing_action(const struct option *opt,
@@ -3591,12 +3594,12 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
 
 static void record_recent_object(struct object *obj,
 				 const char *name,
-				 void *data)
+				 struct show_info *info)
 {
 	oid_array_append(&recent_objects, &obj->oid);
 }
 
-static void record_recent_commit(struct commit *commit, void *data)
+static void record_recent_commit(struct commit *commit, struct show_info *info)
 {
 	oid_array_append(&recent_objects, &commit->object.oid);
 }
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index b4d8ea0a35..116d8d39f1 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -94,9 +94,9 @@ static off_t get_object_disk_usage(struct object *obj)
 }
 
 static void finish_commit(struct commit *commit);
-static void show_commit(struct commit *commit, void *data)
+static void show_commit(struct commit *commit, struct show_info *s_info)
 {
-	struct rev_list_info *info = data;
+	struct rev_list_info *info = s_info->show_data;
 	struct rev_info *revs = info->revs;
 
 	display_progress(progress, ++progress_counter);
@@ -266,12 +266,13 @@ static int finish_object(struct object *obj, const char *name, void *cb_data)
 	return 0;
 }
 
-static void show_object(struct object *obj, const char *name, void *cb_data)
+static void show_object(struct object *obj, const char *name,
+			struct show_info *s_info)
 {
-	struct rev_list_info *info = cb_data;
+	struct rev_list_info *info = s_info->show_data;
 	struct rev_info *revs = info->revs;
 
-	if (finish_object(obj, name, cb_data))
+	if (finish_object(obj, name, info))
 		return;
 	display_progress(progress, ++progress_counter);
 	if (show_disk_usage)
diff --git a/bundle.c b/bundle.c
index 693d619551..d4c71ee2b4 100644
--- a/bundle.c
+++ b/bundle.c
@@ -437,9 +437,9 @@ struct bundle_prerequisites_info {
 	int fd;
 };
 
-static void write_bundle_prerequisites(struct commit *commit, void *data)
+static void write_bundle_prerequisites(struct commit *commit, struct show_info *info)
 {
-	struct bundle_prerequisites_info *bpi = data;
+	struct bundle_prerequisites_info *bpi = info->show_data;
 	struct object *object;
 	struct pretty_print_context ctx = { 0 };
 	struct strbuf buf = STRBUF_INIT;
@@ -530,6 +530,7 @@ int create_bundle(struct repository *r, const char *path,
 		die("revision walk setup failed");
 	bpi.fd = bundle_fd;
 	bpi.pending = &revs_copy.pending;
+
 	traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi);
 	object_array_remove_duplicates(&revs_copy.pending);
 
diff --git a/list-objects.c b/list-objects.c
index e19589baa0..bffce67dd8 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -29,6 +29,10 @@ static void process_blob(struct traversal_context *ctx,
 	struct object *obj = &blob->object;
 	size_t pathlen;
 	enum list_objects_filter_result r;
+	struct show_info show_info;
+
+	show_info.show_data = ctx->show_data;
+	show_info.show_cache = NULL;
 
 	if (!ctx->revs->blob_objects)
 		return;
@@ -60,7 +64,7 @@ static void process_blob(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, path->buf, ctx->show_data);
+		ctx->show_object(obj, path->buf, &show_info);
 	strbuf_setlen(path, pathlen);
 }
 
@@ -154,10 +158,14 @@ static void process_tree(struct traversal_context *ctx,
 {
 	struct object *obj = &tree->object;
 	struct rev_info *revs = ctx->revs;
+	struct show_info show_info;
 	int baselen = base->len;
 	enum list_objects_filter_result r;
 	int failed_parse;
 
+	show_info.show_data = ctx->show_data;
+	show_info.show_cache = NULL;
+
 	if (!revs->tree_objects)
 		return;
 	if (!obj)
@@ -191,7 +199,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data);
+		ctx->show_object(obj, base->buf, &show_info);
 	if (base->len)
 		strbuf_addch(base, '/');
 
@@ -207,8 +215,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
-		ctx->show_object(obj, base->buf, ctx->show_data);
-
+		ctx->show_object(obj, base->buf, &show_info);
 	strbuf_setlen(base, baselen);
 	free_tree_buffer(tree);
 }
@@ -322,8 +329,12 @@ static void add_pending_tree(struct rev_info *revs, struct tree *tree)
 static void traverse_trees_and_blobs(struct traversal_context *ctx,
 				     struct strbuf *base)
 {
+	struct show_info show_info;
 	int i;
 
+	show_info.show_data = ctx->show_data;
+	show_info.show_cache = NULL;
+
 	assert(base->len == 0);
 
 	for (i = 0; i < ctx->revs->pending.nr; i++) {
@@ -335,7 +346,7 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 			continue;
 		if (obj->type == OBJ_TAG) {
 			obj->flags |= SEEN;
-			ctx->show_object(obj, name, ctx->show_data);
+			ctx->show_object(obj, name, &show_info);
 			continue;
 		}
 		if (!path)
@@ -358,8 +369,13 @@ static void do_traverse(struct traversal_context *ctx)
 {
 	struct commit *commit;
 	struct strbuf csp; /* callee's scratch pad */
+	struct show_info show_info;
 	strbuf_init(&csp, PATH_MAX);
 
+
+	show_info.show_data = ctx->show_data;
+	show_info.show_cache = NULL;
+
 	while ((commit = get_revision(ctx->revs)) != NULL) {
 		/*
 		 * an uninteresting boundary commit may not have its tree
@@ -375,7 +391,8 @@ static void do_traverse(struct traversal_context *ctx)
 			die(_("unable to load root tree for commit %s"),
 			      oid_to_hex(&commit->object.oid));
 		}
-		ctx->show_commit(commit, ctx->show_data);
+
+		ctx->show_commit(commit, &show_info);
 
 		if (ctx->revs->tree_blobs_in_commit_order)
 			/*
diff --git a/list-objects.h b/list-objects.h
index a952680e46..c7b61e9e10 100644
--- a/list-objects.h
+++ b/list-objects.h
@@ -5,9 +5,14 @@ struct commit;
 struct object;
 struct rev_info;
 
-typedef void (*show_commit_fn)(struct commit *, void *);
-typedef void (*show_object_fn)(struct object *, const char *, void *);
-void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *);
+struct show_info {
+    void *show_data; /* the data necessary for showing the object */
+    void *show_cache; /* the cache ownership relationship data for showing the object */
+};
+
+typedef void (*show_commit_fn)(struct commit *, struct show_info *);
+typedef void (*show_object_fn)(struct object *, const char *, struct show_info *);
+void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *show_data);
 
 typedef void (*show_edge_fn)(struct commit *);
 void mark_edges_uninteresting(struct rev_info *revs,
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 3ed15431cd..0dcfa5b50e 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -459,9 +459,10 @@ struct bitmap_show_data {
 	struct bitmap *base;
 };
 
-static void show_object(struct object *object, const char *name, void *data_)
+static void show_object(struct object *object, const char *name,
+			struct show_info *info)
 {
-	struct bitmap_show_data *data = data_;
+	struct bitmap_show_data *data = info->show_data;
 	int bitmap_pos;
 
 	bitmap_pos = bitmap_position(data->bitmap_git, &object->oid);
@@ -473,7 +474,7 @@ static void show_object(struct object *object, const char *name, void *data_)
 	bitmap_set(data->base, bitmap_pos);
 }
 
-static void show_commit(struct commit *commit, void *data)
+static void show_commit(struct commit *commit, struct show_info *info)
 {
 }
 
@@ -628,6 +629,7 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
 		show_data.bitmap_git = bitmap_git;
 		show_data.base = base;
 
+
 		traverse_commit_list_filtered(filter, revs,
 					      show_commit, show_object,
 					      &show_data, NULL);
@@ -1268,9 +1270,9 @@ struct bitmap_test_data {
 };
 
 static void test_show_object(struct object *object, const char *name,
-			     void *data)
+			     struct show_info *info)
 {
-	struct bitmap_test_data *tdata = data;
+	struct bitmap_test_data *tdata = info->show_data;
 	int bitmap_pos;
 
 	bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid);
@@ -1281,9 +1283,9 @@ static void test_show_object(struct object *object, const char *name,
 	display_progress(tdata->prg, ++tdata->seen);
 }
 
-static void test_show_commit(struct commit *commit, void *data)
+static void test_show_commit(struct commit *commit, struct show_info *info)
 {
-	struct bitmap_test_data *tdata = data;
+	struct bitmap_test_data *tdata = info->show_data;
 	int bitmap_pos;
 
 	bitmap_pos = bitmap_position(tdata->bitmap_git,
diff --git a/reachable.c b/reachable.c
index 77a60c70a5..4f78954c62 100644
--- a/reachable.c
+++ b/reachable.c
@@ -47,14 +47,15 @@ static int add_one_ref(const char *path, const struct object_id *oid,
  * The traversal will have already marked us as SEEN, so we
  * only need to handle any progress reporting here.
  */
-static void mark_object(struct object *obj, const char *name, void *data)
+static void mark_object(struct object *obj, const char *name,
+			struct show_info *info)
 {
-	update_progress(data);
+	update_progress(info->show_data);
 }
 
-static void mark_commit(struct commit *c, void *data)
+static void mark_commit(struct commit *c, struct show_info *info)
 {
-	mark_object(&c->object, NULL, data);
+	mark_object(&c->object, NULL, info);
 }
 
 struct recent_data {
@@ -230,6 +231,7 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
 		return;
 	}
 
+
 	/*
 	 * Set up the revision walk - this will move all commits
 	 * from the pending list to the commit walking list.
@@ -244,6 +246,7 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
 			die("unable to mark recent objects");
 		if (prepare_revision_walk(revs))
 			die("revision walk setup failed");
+
 		traverse_commit_list(revs, mark_commit, mark_object, &cp);
 	}
 
diff --git a/shallow.c b/shallow.c
index 9ed18eb884..ab1e49eba4 100644
--- a/shallow.c
+++ b/shallow.c
@@ -185,9 +185,9 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
 	return result;
 }
 
-static void show_commit(struct commit *commit, void *data)
+static void show_commit(struct commit *commit, struct show_info *info)
 {
-	commit_list_insert(commit, data);
+	commit_list_insert(commit, info->show_data);
 }
 
 /*
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 04/12] Introduce `uploadpack.excludeobject` configuration
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
                             ` (2 preceding siblings ...)
  2021-10-19 11:38           ` [PATCH v6 03/12] Replace `show_data` with structure `show_info` Teng Long
@ 2021-10-19 11:38           ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 05/12] t5702: test cases for `uploadpack.excludeobject` Teng Long
                             ` (7 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

From: Teng Long <dyroneteng@gmail.com>

Background:

The `uploadpack.blobpackfileuri` is made for the "packfile-uri" feature,
the feature is similar to a CDN cache, it supports the client to download
 pack file by a URI directly, without or reducing the server load when make
packing and transporting.

Reasons:

First reason, `uploadpack.blobpackfileuri` supports to exclude a single
object, but actually the object type can be not only a blob, so the name
is inaccurate currently.

Secondly, the name of the old configuration is not abstract enough, this
make the furthur extension difficult. If do not change its name, to let
different object types use different configuration names, the
configuration items will be bloated and difficult to maintain, so the
new configuration is more abstract in name.

Configuation format diff:

	old: uploadpack.blobPackfileUri=<object-hash> <pack-hash> <uri>
	new: uploadpack.excludeobject=<object-hash> <level> <pack-hash> <uri>

The new configuration `uploadpack.excludeobject` not only supports to
exclude a single object itself, but also to exclude related objects with
it at once, the scope of exclusion is determined by the object type and
the specified `<level>` value in the entry.

-Compatibility:

Although a new configuration has been introduced, the old one is
available to use and compatible with the new configuration.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 33 ++++++++++++++++++++++++++++++++-
 upload-pack.c          |  7 +++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index a41a0a3ea7..75461483c0 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3061,6 +3061,36 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 		ex->uri = xstrdup(pack_end + 1);
 		oidmap_put(&configured_exclusions, ex);
 	}
+	if (!strcmp(k, "uploadpack.excludeobject")) {
+		struct configured_exclusion *ex = xmalloc(sizeof(*ex));
+		const char *oid_end, *pack_end, *type_end;
+		struct object_id pack_hash;
+		char type[2];
+		int level;
+
+		if (parse_oid_hex(v, &ex->e.oid, &oid_end) ||
+		    *oid_end != ' ' ||
+		    !strlcpy(type, oid_end + 1, sizeof(type)) ||
+		    parse_oid_hex(oid_end + 3, &pack_hash, &pack_end) ||
+		    *pack_end != ' ')
+			die(_("value of uploadpack.excludeobject must be "
+			      "of the form '<object-hash> <level> <pack-hash> <uri>' (got '%s')"), v);
+		if (oidmap_get(&configured_exclusions, &ex->e.oid))
+			die(_("object already configured by an earlier "
+			      "uploadpack.excludeobject (got '%s')"), v);
+
+		level = atoi(type);
+		if (level < ET_SELF || level > ET_REACHABLE) {
+			die(_("value of <level> must be 0 or 1 or 2 (got '%s')"), v);
+		}
+		ex->level = level;
+		type_end = oid_end + 2;
+		ex->pack_hash_hex = xcalloc(1, pack_end - type_end);
+		memcpy(ex->pack_hash_hex, type_end + 1, pack_end - type_end - 1);
+		ex->uri = xstrdup(pack_end + 1);
+		oidmap_put(&configured_exclusions, ex);
+	}
+
 	return git_default_config(k, v, cb);
 }
 
@@ -3887,7 +3917,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			 N_("respect islands during delta compression")),
 		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
 				N_("protocol"),
-				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
+				N_("exclude any configured uploadpack.excludeobject or "
+					    "uploadpack.blobpackfileuri with this protocol")),
 		OPT_END(),
 	};
 
diff --git a/upload-pack.c b/upload-pack.c
index 5c1cd19612..d26fb351a3 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1751,6 +1751,13 @@ int upload_pack_advertise(struct repository *r,
 			strbuf_addstr(value, " packfile-uris");
 			free(str);
 		}
+
+		if (!repo_config_get_string(the_repository,
+					    "uploadpack.excludeobject",
+					    &str) && str) {
+			strbuf_addstr(value, " packfile-uris");
+			free(str);
+		}
 	}
 
 	return 1;
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 05/12] t5702: test cases for `uploadpack.excludeobject`
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
                             ` (3 preceding siblings ...)
  2021-10-19 11:38           ` [PATCH v6 04/12] Introduce `uploadpack.excludeobject` configuration Teng Long
@ 2021-10-19 11:38           ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 06/12] packfile-uri: support for excluding commits Teng Long
                             ` (6 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

This commit expends the function `configure_exclusion` to support new
excluding type: commit, tree and tag.

Signed-off-by: Teng Long <tenglong@alibaba-inc.com>
---
 t/t5702-protocol-v2.sh | 146 +++++++++++++++++++++++++++--------------
 1 file changed, 98 insertions(+), 48 deletions(-)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 2e1243ca40..ccd3678311 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -824,17 +824,63 @@ test_expect_success 'when server does not send "ready", expect FLUSH' '
 '
 
 configure_exclusion () {
-	git -C "$1" hash-object "$2" >objh &&
-	git -C "$1" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
-	git -C "$1" config --add \
-		"uploadpack.blobpackfileuri" \
-		"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
-	cat objh
+	objt="$1"
+	P="$2"
+	oid="$3"
+	version="$4"
+	excluding_type="$5"
+
+	oldc="uploadpack.blobpackfileuri"
+	newc="uploadpack.excludeobject"
+	configkey=""
+
+	if test "$version" = "old"
+	then
+		configkey="$oldc"
+	else
+		configkey="$newc"
+	fi
+
+	if test "$objt" = "blob"
+	then
+		excluding_type="0"
+		git -C "$P" hash-object "$oid" >objh &&
+		git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
+		if test "$version" = "old"
+		then
+			git -C "$P" config --add \
+            			"$configkey" \
+            			"$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack"
+        else
+        	git -C "$P" config --add \
+						"$configkey" \
+						"$(cat objh) $excluding_type $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack"
+		fi &&
+		cat objh
+	elif test "$objt" = "commit" || test "$objt" = "tree" || test "$objt" = "tag"
+	then
+		echo "$oid" >objh &&
+		if test "$excluding_type" = "0"
+		then
+			git -C "$P" pack-objects  "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh
+		else
+			git -C "$P" pack-objects --revs "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh
+		fi &&
+
+		git -C "$P" config --add \
+        			"$configkey" \
+        			"$(cat objh) $excluding_type $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
+		cat objh
+	else
+		echo "unsupported object type in configure_exclusion (got $objt)"
+	fi
 }
 
-test_expect_success 'part of packfile response provided as URI' '
+part_of_packfile_response_verify() {
+
+	config="$1" &&
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log *found" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
@@ -843,10 +889,10 @@ test_expect_success 'part of packfile response provided as URI' '
 	git -C "$P" add my-blob &&
 	echo other-blob >"$P/other-blob" &&
 	git -C "$P" add other-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
-	configure_exclusion "$P" other-blob >h2 &&
+	configure_exclusion blob "$P" my-blob "$config" >h &&
+	configure_exclusion blob "$P" other-blob "$config" >h2 &&
 
 	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
 	git -c protocol.version=2 \
@@ -879,20 +925,22 @@ test_expect_success 'part of packfile response provided as URI' '
 	ls http_child/.git/objects/pack/*.pack \
 	    http_child/.git/objects/pack/*.idx >filelist &&
 	test_line_count = 6 filelist
-'
+}
+
+blobpackfileuri_fetch () {
+	config="$1"
 
-test_expect_success 'packfile URIs with fetch instead of clone' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
+	test_commit -C "$P" A &&
 
-	configure_exclusion "$P" my-blob >h &&
+	configure_exclusion blob "$P" my-blob $config >h &&
 
 	git init http_child &&
 
@@ -900,12 +948,28 @@ test_expect_success 'packfile URIs with fetch instead of clone' '
 	git -C http_child -c protocol.version=2 \
 		-c fetch.uriprotocols=http,https \
 		fetch "$HTTPD_URL/smart/http_parent"
+}
+
+test_expect_success 'blob-exclusion (using uploadpack.blobpackfileuri): part of packfile response provided as URI' '
+	rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" http_child log &&
+	part_of_packfile_response_verify old
+'
+
+test_expect_success 'blob-exclusion (using uploadpack.excludeobject): part of packfile response provided as URI' '
+	part_of_packfile_response_verify new
+'
+
+test_expect_success 'blob-exclusion (using uploadpack.blobpackfileuri): packfile URIs with fetch instead of clone' '
+	blobpackfileuri_fetch old
+'
+
+test_expect_success 'blob-exclusion (using uploadpack.excludeobject): packfile URIs with fetch instead of clone' '
+	blobpackfileuri_fetch new
 '
 
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
-
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
 
@@ -913,9 +977,8 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	git -C "$P" add my-blob &&
 	echo other-blob >"$P/other-blob" &&
 	git -C "$P" add other-blob &&
-	git -C "$P" commit -m x &&
-
-	configure_exclusion "$P" my-blob >h &&
+	test_commit -C "$P" A &&
+	configure_exclusion blob "$P" my-blob >h &&
 	# Configure a URL for other-blob. Just reuse the hash of the object as
 	# the hash of the packfile, since the hash does not matter for this
 	# test as long as it is not the hash of the pack, and it is of the
@@ -923,9 +986,8 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	git -C "$P" hash-object other-blob >objh &&
 	git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh &&
 	git -C "$P" config --add \
-		"uploadpack.blobpackfileuri" \
-		"$(cat objh) $(cat objh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
-
+		"uploadpack.excludeobject" \
+		"$(cat objh) 0 $(cat objh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" &&
 	test_must_fail env GIT_TEST_SIDEBAND_ALL=1 \
 		git -c protocol.version=2 \
 		-c fetch.uriprotocols=http,https \
@@ -935,17 +997,14 @@ test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 
 test_expect_success 'packfile-uri with transfer.fsckobjects' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
-
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
-
-	configure_exclusion "$P" my-blob >h &&
-
+	test_commit -C "$P" A &&
+	configure_exclusion blob "$P" my-blob >h &&
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
 		-c fetch.uriprotocols=http,https \
@@ -959,8 +1018,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' '
 
 test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child log &&
-
+	test_when_finished "rm -rf \"$P\" http_child log" &&
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
 
@@ -976,10 +1034,8 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
 
 	echo my-blob >"$P/my-blob" &&
 	git -C "$P" add my-blob &&
-	git -C "$P" commit -m x &&
-
-	configure_exclusion "$P" my-blob >h &&
-
+	test_commit -C "$P" A &&
+	configure_exclusion blob "$P" my-blob >h &&
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
 		-c fetch.uriprotocols=http,https \
@@ -989,8 +1045,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
 
 test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmodules is separate from tree' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child &&
-
+	test_when_finished "rm -rf \"$P\" http_child" &&
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
 
@@ -999,9 +1054,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
 	echo "url = git://example.com/git/lib.git" >>"$P/.gitmodules" &&
 	git -C "$P" add .gitmodules &&
 	git -C "$P" commit -m x &&
-
-	configure_exclusion "$P" .gitmodules >h &&
-
+	configure_exclusion blob "$P" .gitmodules >h &&
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	git -c protocol.version=2 -c transfer.fsckobjects=1 \
 		-c fetch.uriprotocols=http,https \
@@ -1015,8 +1068,7 @@ test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmo
 
 test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodules separate from tree is invalid' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
-	rm -rf "$P" http_child err &&
-
+	test_when_finished "rm -rf \"$P\" http_child err" &&
 	git init "$P" &&
 	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
 
@@ -1024,10 +1076,8 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodul
 	echo "path = include/foo" >>"$P/.gitmodules" &&
 	echo "url = git://example.com/git/lib.git" >>"$P/.gitmodules" &&
 	git -C "$P" add .gitmodules &&
-	git -C "$P" commit -m x &&
-
-	configure_exclusion "$P" .gitmodules >h &&
-
+	test_commit -C "$P" A &&
+	configure_exclusion blob "$P" .gitmodules >h &&
 	sane_unset GIT_TEST_SIDEBAND_ALL &&
 	test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
 		-c fetch.uriprotocols=http,https \
@@ -1038,4 +1088,4 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodul
 # DO NOT add non-httpd-specific tests here, because the last part of this
 # test script is only executed when httpd is available and enabled.
 
-test_done
+test_done
\ No newline at end of file
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 06/12] packfile-uri: support for excluding commits
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
                             ` (4 preceding siblings ...)
  2021-10-19 11:38           ` [PATCH v6 05/12] t5702: test cases for `uploadpack.excludeobject` Teng Long
@ 2021-10-19 11:38           ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 07/12] t5702: test cases " Teng Long
                             ` (5 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

From: Teng Long <dyroneteng@gmail.com>

This commit work on the exlusion of commit objects, and the
Its excluding `level` can be configured as "ET_SELF", "ET_INCLUDE"
or "ET_REACHABLE".

Exclusion scope on different level:

1. When a commit is specified to be excluded with level "ET_SELF", only the
commit object itself will be excluded.

2. When it's specified to be excluded with level "ET_INCLUDE", exclude
all trees and blobs contained in its top-level tree, as well as the
commit itself.

3. When it is specified with level "ET_REACHABLE", exclude its
ancestors, as well as the objects need to be excluded under the level
"ET_INCLUDED".

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 83 +++++++++++++++++++++++++++++++++---------
 list-objects.c         | 37 +++++++++++--------
 object.c               | 19 ++++++++--
 object.h               | 14 ++++++-
 revision.c             | 34 ++++++++++++-----
 revision.h             |  3 ++
 6 files changed, 144 insertions(+), 46 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 75461483c0..e7b27ef443 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -36,6 +36,7 @@
 #include "trace2.h"
 #include "shallow.h"
 #include "promisor-remote.h"
+#include "commit-reach.h"
 
 #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
 #define SIZE(obj) oe_size(&to_pack, obj)
@@ -1354,11 +1355,14 @@ static int want_object_in_pack_one(struct packed_git *p,
 static int want_object_in_pack(const struct object_id *oid,
 			       int exclude,
 			       struct packed_git **found_pack,
-			       off_t *found_offset)
+			       off_t *found_offset,
+			       struct referred_objects *referred_objs)
 {
 	int want;
 	struct list_head *pos;
 	struct multi_pack_index *m;
+	struct configured_exclusion *ex;
+	struct configured_exclusion *commit_ex;
 
 	if (!exclude && local && has_loose_object_nonlocal(oid))
 		return 0;
@@ -1394,9 +1398,16 @@ static int want_object_in_pack(const struct object_id *oid,
 	}
 
 	if (uri_protocols.nr) {
-		struct configured_exclusion *ex =
-			oidmap_get(&configured_exclusions, oid);
+		if (referred_objs) {
+			struct commit *commit = referred_objs->commit;
+			if (commit) {
+				commit_ex = oidmap_get(&configured_exclusions, &commit->object.oid);
+				if (match_packfile_uri_exclusions(commit_ex) && commit_ex->level > ET_SELF)
+					return 0;
+			}
+		}
 
+		ex = oidmap_get(&configured_exclusions, oid);
 		if (ex && match_packfile_uri_exclusions(ex)) {
 			oidset_insert(&excluded_by_config, oid);
 			return 0;
@@ -1436,7 +1447,8 @@ static const char no_closure_warning[] = N_(
 );
 
 static int add_object_entry(const struct object_id *oid, enum object_type type,
-			    const char *name, int exclude)
+			    const char *name, int exclude,
+			    struct referred_objects *referred_objs)
 {
 	struct packed_git *found_pack = NULL;
 	off_t found_offset = 0;
@@ -1446,7 +1458,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
 	if (have_duplicate_entry(oid, exclude))
 		return 0;
 
-	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
+	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset, referred_objs)) {
 		/* The pack is missing an object, so it will not have closure */
 		if (write_bitmap_index) {
 			if (write_bitmap_index != WRITE_BITMAP_QUIET)
@@ -1472,7 +1484,7 @@ static int add_object_entry_from_bitmap(const struct object_id *oid,
 	if (have_duplicate_entry(oid, 0))
 		return 0;
 
-	if (!want_object_in_pack(oid, 0, &pack, &offset))
+	if (!want_object_in_pack(oid, 0, &pack, &offset, NULL))
 		return 0;
 
 	create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
@@ -1612,7 +1624,7 @@ static void add_pbase_object(struct tree_desc *tree,
 		if (name[cmplen] != '/') {
 			add_object_entry(&entry.oid,
 					 object_type(entry.mode),
-					 fullname, 1);
+					 fullname, 1, NULL);
 			return;
 		}
 		if (S_ISDIR(entry.mode)) {
@@ -1680,7 +1692,7 @@ static void add_preferred_base_object(const char *name)
 	cmplen = name_cmp_len(name);
 	for (it = pbase_tree; it; it = it->next) {
 		if (cmplen == 0) {
-			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
+			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1, NULL);
 		}
 		else {
 			struct tree_desc tree;
@@ -2882,7 +2894,7 @@ static void add_tag_chain(const struct object_id *oid)
 			die(_("unable to pack objects reachable from tag %s"),
 			    oid_to_hex(oid));
 
-		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
+		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0, NULL);
 
 		if (tag->tagged->type != OBJ_TAG)
 			return;
@@ -3050,8 +3062,6 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 		    *oid_end != ' ' ||
 		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
 		    *pack_end != ' ')
-			die(_("value of uploadpack.blobpackfileuri must be "
-			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
 		if (oidmap_get(&configured_exclusions, &ex->e.oid))
 			die(_("object already configured in another "
 			      "uploadpack.blobpackfileuri (got '%s')"), v);
@@ -3114,7 +3124,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
 		return 0;
 
 	ofs = nth_packed_object_offset(p, pos);
-	if (!want_object_in_pack(oid, 0, &p, &ofs))
+	if (!want_object_in_pack(oid, 0, &p, &ofs, NULL))
 		return 0;
 
 	oi.typep = &type;
@@ -3307,7 +3317,7 @@ static void read_object_list_from_stdin(void)
 			die(_("expected object ID, got garbage:\n %s"), line);
 
 		add_preferred_base_object(p + 1);
-		add_object_entry(&oid, OBJ_NONE, p + 1, 0);
+		add_object_entry(&oid, OBJ_NONE, p + 1, 0, NULL);
 	}
 }
 
@@ -3316,7 +3326,7 @@ static void read_object_list_from_stdin(void)
 
 static void show_commit(struct commit *commit, struct show_info *info)
 {
-	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
+	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0, NULL);
 	commit->object.flags |= OBJECT_ADDED;
 
 	if (write_bitmap_index)
@@ -3329,8 +3339,9 @@ static void show_commit(struct commit *commit, struct show_info *info)
 static void show_object(struct object *obj, const char *name,
 			struct show_info *info)
 {
+	struct referred_objects *referred_objs = info->show_cache;
 	add_preferred_base_object(name);
-	add_object_entry(&obj->oid, obj->type, name, 0);
+	add_object_entry(&obj->oid, obj->type, name, 0, referred_objs);
 	obj->flags |= OBJECT_ADDED;
 
 	if (use_delta_islands) {
@@ -3483,7 +3494,7 @@ static void add_objects_in_unpacked_packs(void)
 		QSORT(in_pack.array, in_pack.nr, ofscmp);
 		for (i = 0; i < in_pack.nr; i++) {
 			struct object *o = in_pack.array[i].object;
-			add_object_entry(&o->oid, o->type, "", 0);
+			add_object_entry(&o->oid, o->type, "", 0, NULL);
 		}
 	}
 	free(in_pack.array);
@@ -3499,7 +3510,7 @@ static int add_loose_object(const struct object_id *oid, const char *path,
 		return 0;
 	}
 
-	add_object_entry(oid, type, "", 0);
+	add_object_entry(oid, type, "", 0, NULL);
 	return 0;
 }
 
@@ -3665,6 +3676,42 @@ static void mark_bitmap_preferred_tips(void)
 	}
 }
 
+static void reuse_exclusion_packfile(struct rev_info *revs)
+{
+	struct commit *commit;
+	struct oidmap_iter iter;
+	struct configured_exclusion *ex;
+	struct object_id ex_oid;
+	struct commit *ex_commit;
+	struct commit_list *list = revs->commits;
+	struct commit_list *newlist = NULL;
+	struct commit_list **p = &newlist;
+
+	if (revs->limited)
+		return;
+	while (list) {
+		commit = pop_commit(&list);
+		if (commit_list_contains(commit, newlist)) {
+			continue;
+		}
+		p = commit_list_append(commit, p);
+		oidmap_iter_init(&configured_exclusions, &iter);
+		while ((ex = oidmap_iter_next(&iter)) && ex->level == ET_REACHABLE) {
+			ex_oid = ex->e.oid;
+			ex_commit = lookup_commit_reference(the_repository, &ex_oid);
+			if (!ex_commit)
+				die("Not a valid commit name %s", oid_to_hex(&ex_oid));
+			if (!in_merge_bases(ex_commit, commit))
+				continue;
+			oidset_insert(&excluded_by_config, &ex_oid);
+			ex_commit->object.flags |= UNINTERESTING;
+			p = commit_list_append(ex_commit, p);
+			break;
+		}
+	}
+	revs->commits = newlist;
+}
+
 static void get_object_list(int ac, const char **av)
 {
 	struct rev_info revs;
@@ -3726,6 +3773,8 @@ static void get_object_list(int ac, const char **av)
 		die(_("revision walk setup failed"));
 	mark_edges_uninteresting(&revs, show_edge, sparse);
 
+	reuse_exclusion_packfile(&revs);
+
 	if (!fn_show_object)
 		fn_show_object = show_object;
 	traverse_commit_list_filtered(&filter_options, &revs,
diff --git a/list-objects.c b/list-objects.c
index bffce67dd8..b32213ecf1 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -24,7 +24,8 @@ struct traversal_context {
 static void process_blob(struct traversal_context *ctx,
 			 struct blob *blob,
 			 struct strbuf *path,
-			 const char *name)
+			 const char *name,
+			 struct referred_objects *referred_obj)
 {
 	struct object *obj = &blob->object;
 	size_t pathlen;
@@ -32,7 +33,7 @@ static void process_blob(struct traversal_context *ctx,
 	struct show_info show_info;
 
 	show_info.show_data = ctx->show_data;
-	show_info.show_cache = NULL;
+	show_info.show_cache = referred_obj;
 
 	if (!ctx->revs->blob_objects)
 		return;
@@ -101,11 +102,13 @@ static void process_gitlink(struct traversal_context *ctx,
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
-			 const char *name);
+			 const char *name,
+			 struct referred_objects *referred_objs);
 
 static void process_tree_contents(struct traversal_context *ctx,
 				  struct tree *tree,
-				  struct strbuf *base)
+				  struct strbuf *base,
+				  struct referred_objects *referred_objs)
 {
 	struct tree_desc desc;
 	struct name_entry entry;
@@ -133,7 +136,7 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			t->object.flags |= NOT_USER_GIVEN;
-			process_tree(ctx, t, base, entry.path);
+			process_tree(ctx, t, base, entry.path, referred_objs);
 		}
 		else if (S_ISGITLINK(entry.mode))
 			process_gitlink(ctx, entry.oid.hash,
@@ -146,7 +149,7 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			b->object.flags |= NOT_USER_GIVEN;
-			process_blob(ctx, b, base, entry.path);
+			process_blob(ctx, b, base, entry.path, referred_objs);
 		}
 	}
 }
@@ -154,7 +157,8 @@ static void process_tree_contents(struct traversal_context *ctx,
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
-			 const char *name)
+			 const char *name,
+			 struct referred_objects *referred_objs)
 {
 	struct object *obj = &tree->object;
 	struct rev_info *revs = ctx->revs;
@@ -163,9 +167,6 @@ static void process_tree(struct traversal_context *ctx,
 	enum list_objects_filter_result r;
 	int failed_parse;
 
-	show_info.show_data = ctx->show_data;
-	show_info.show_cache = NULL;
-
 	if (!revs->tree_objects)
 		return;
 	if (!obj)
@@ -196,6 +197,9 @@ static void process_tree(struct traversal_context *ctx,
 					       LOFS_BEGIN_TREE, obj,
 					       base->buf, &base->buf[baselen],
 					       ctx->filter);
+	show_info.show_cache = referred_objs;
+	show_info.show_data = ctx->show_data;
+
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
@@ -206,7 +210,7 @@ static void process_tree(struct traversal_context *ctx,
 	if (r & LOFR_SKIP_TREE)
 		trace_printf("Skipping contents of tree %s...\n", base->buf);
 	else if (!failed_parse)
-		process_tree_contents(ctx, tree, base);
+		process_tree_contents(ctx, tree, base, referred_objs);
 
 	r = list_objects_filter__filter_object(ctx->revs->repo,
 					       LOFS_END_TREE, obj,
@@ -321,9 +325,9 @@ void mark_edges_uninteresting(struct rev_info *revs,
 	}
 }
 
-static void add_pending_tree(struct rev_info *revs, struct tree *tree)
+static void add_pending_tree(struct rev_info *revs, struct tree *tree, struct commit *referred_commit)
 {
-	add_pending_object(revs, &tree->object, "");
+	add_pending_object_with_referred_commit(revs, &tree->object, "", referred_commit);
 }
 
 static void traverse_trees_and_blobs(struct traversal_context *ctx,
@@ -340,6 +344,7 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 	for (i = 0; i < ctx->revs->pending.nr; i++) {
 		struct object_array_entry *pending = ctx->revs->pending.objects + i;
 		struct object *obj = pending->item;
+		struct referred_objects *referred_objs = pending->referred_objects;
 		const char *name = pending->name;
 		const char *path = pending->path;
 		if (obj->flags & (UNINTERESTING | SEEN))
@@ -352,11 +357,11 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 		if (!path)
 			path = "";
 		if (obj->type == OBJ_TREE) {
-			process_tree(ctx, (struct tree *)obj, base, path);
+			process_tree(ctx, (struct tree *)obj, base, path, referred_objs);
 			continue;
 		}
 		if (obj->type == OBJ_BLOB) {
-			process_blob(ctx, (struct blob *)obj, base, path);
+			process_blob(ctx, (struct blob *)obj, base, path, referred_objs);
 			continue;
 		}
 		die("unknown pending object %s (%s)",
@@ -386,7 +391,7 @@ static void do_traverse(struct traversal_context *ctx)
 		else if (get_commit_tree(commit)) {
 			struct tree *tree = get_commit_tree(commit);
 			tree->object.flags |= NOT_USER_GIVEN;
-			add_pending_tree(ctx->revs, tree);
+			add_pending_tree(ctx->revs, tree, commit);
 		} else if (commit->object.parsed) {
 			die(_("unable to load root tree for commit %s"),
 			      oid_to_hex(&commit->object.oid));
diff --git a/object.c b/object.c
index 14188453c5..f86b52c4d6 100644
--- a/object.c
+++ b/object.c
@@ -322,14 +322,18 @@ void object_list_free(struct object_list **list)
  */
 static char object_array_slopbuf[1];
 
-void add_object_array_with_path(struct object *obj, const char *name,
-				struct object_array *array,
-				unsigned mode, const char *path)
+void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name,
+						    struct object_array *array,
+						    unsigned mode, const char *path,
+						    struct commit *referred_commit)
 {
 	unsigned nr = array->nr;
 	unsigned alloc = array->alloc;
 	struct object_array_entry *objects = array->objects;
 	struct object_array_entry *entry;
+	struct referred_objects *referred_objs;
+	referred_objs = xmalloc(sizeof(struct referred_objects));
+	referred_objs->commit = referred_commit;
 
 	if (nr >= alloc) {
 		alloc = (alloc + 32) * 2;
@@ -339,6 +343,7 @@ void add_object_array_with_path(struct object *obj, const char *name,
 	}
 	entry = &objects[nr];
 	entry->item = obj;
+	entry->referred_objects = referred_objs;
 	if (!name)
 		entry->name = NULL;
 	else if (!*name)
@@ -354,6 +359,13 @@ void add_object_array_with_path(struct object *obj, const char *name,
 	array->nr = ++nr;
 }
 
+void add_object_array_with_path(struct object *obj, const char *name,
+				struct object_array *array,
+				unsigned mode, const char *path)
+{
+	add_object_array_with_path_and_referred_commit(obj, name, array, mode, path, NULL);
+}
+
 void add_object_array(struct object *obj, const char *name, struct object_array *array)
 {
 	add_object_array_with_path(obj, name, array, S_IFINVALID, NULL);
@@ -368,6 +380,7 @@ static void object_array_release_entry(struct object_array_entry *ent)
 	if (ent->name != object_array_slopbuf)
 		free(ent->name);
 	free(ent->path);
+	free(ent->referred_objects);
 }
 
 struct object *object_array_pop(struct object_array *array)
diff --git a/object.h b/object.h
index 87a6da47c8..4db0ecc3f2 100644
--- a/object.h
+++ b/object.h
@@ -52,9 +52,20 @@ struct object_array {
 		char *name;
 		char *path;
 		unsigned mode;
+		/*
+		* referred_objects or NULL.  If non-NULL, it will
+		* temporary storage the referred objects when
+		* traversing the specified object. Space for time,
+		* reduce related computing costs (such as packfile-uri
+		* exclusion), clean up when the traversal is over.
+		*/
+		struct referred_objects *referred_objects;
 	} *objects;
 };
 
+struct referred_objects{
+    struct commit *commit;
+};
 #define OBJECT_ARRAY_INIT { 0, 0, NULL }
 
 /*
@@ -157,7 +168,8 @@ void object_list_free(struct object_list **list);
 /* Object array handling .. */
 void add_object_array(struct object *obj, const char *name, struct object_array *array);
 void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path);
-
+void add_object_array_with_path_and_referred_commit(struct object *obj, const char *name, struct object_array *array,
+						    unsigned mode, const char *path, struct commit *referred_commit);
 /*
  * Returns NULL if the array is empty. Otherwise, returns the last object
  * after removing its entry from the array. Other resources associated
diff --git a/revision.c b/revision.c
index 4853c85d0b..89a8b311ea 100644
--- a/revision.c
+++ b/revision.c
@@ -304,10 +304,11 @@ void mark_parents_uninteresting(struct commit *commit)
 	commit_stack_clear(&pending);
 }
 
-static void add_pending_object_with_path(struct rev_info *revs,
-					 struct object *obj,
-					 const char *name, unsigned mode,
-					 const char *path)
+static void add_pending_object_with_path_and_referred_commit(struct rev_info *revs,
+							     struct object *obj,
+							     const char *name, unsigned mode,
+							     const char *path,
+							     struct commit *referred_commit)
 {
 	struct interpret_branch_name_options options = { 0 };
 	if (!obj)
@@ -326,20 +327,35 @@ static void add_pending_object_with_path(struct rev_info *revs,
 		strbuf_release(&buf);
 		return; /* do not add the commit itself */
 	}
-	add_object_array_with_path(obj, name, &revs->pending, mode, path);
+	add_object_array_with_path_and_referred_commit(obj, name, &revs->pending, mode, path, referred_commit);
 }
 
+static void add_pending_object_with_path(struct rev_info *revs,
+					 struct object *obj,
+					 const char *name, unsigned mode,
+					 const char *path)
+{
+	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, path, NULL);
+}
 static void add_pending_object_with_mode(struct rev_info *revs,
 					 struct object *obj,
-					 const char *name, unsigned mode)
+					 const char *name, unsigned mode,
+					 struct commit *referred_commit)
+{
+	add_pending_object_with_path_and_referred_commit(revs, obj, name, mode, NULL, referred_commit);
+}
+
+void add_pending_object_with_referred_commit(struct rev_info *revs,
+					     struct object *obj, const char *name,
+					     struct commit *referred_commit)
 {
-	add_pending_object_with_path(revs, obj, name, mode, NULL);
+	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, referred_commit);
 }
 
 void add_pending_object(struct rev_info *revs,
 			struct object *obj, const char *name)
 {
-	add_pending_object_with_mode(revs, obj, name, S_IFINVALID);
+	add_pending_object_with_mode(revs, obj, name, S_IFINVALID, NULL);
 }
 
 void add_head_to_pending(struct rev_info *revs)
@@ -2817,7 +2833,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 		if (get_oid_with_context(revs->repo, revs->def, 0, &oid, &oc))
 			diagnose_missing_default(revs->def);
 		object = get_reference(revs, revs->def, &oid, 0);
-		add_pending_object_with_mode(revs, object, revs->def, oc.mode);
+		add_pending_object_with_mode(revs, object, revs->def, oc.mode, NULL);
 	}
 
 	/* Did the user ask for any diff output? Run the diff! */
diff --git a/revision.h b/revision.h
index a24f72dcd1..b2e0c0b9b7 100644
--- a/revision.h
+++ b/revision.h
@@ -423,6 +423,9 @@ void show_object_with_name(FILE *, struct object *, const char *);
  */
 void add_pending_object(struct rev_info *revs,
 			struct object *obj, const char *name);
+void add_pending_object_with_referred_commit(struct rev_info *revs,
+					     struct object *obj, const char *name,
+					     struct commit *referred_commit);
 
 void add_pending_oid(struct rev_info *revs,
 		     const char *name, const struct object_id *oid,
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 07/12] t5702: test cases for excluding commits
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
                             ` (5 preceding siblings ...)
  2021-10-19 11:38           ` [PATCH v6 06/12] packfile-uri: support for excluding commits Teng Long
@ 2021-10-19 11:38           ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 08/12] packfile-uri: support for excluding trees Teng Long
                             ` (4 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

Signed-off-by: Teng Long <tenglong@alibaba-inc.com>
---
 t/t5702-protocol-v2.sh | 300 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 300 insertions(+)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index ccd3678311..6e323253f7 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -967,6 +967,306 @@ test_expect_success 'blob-exclusion (using uploadpack.excludeobject): packfile U
 	blobpackfileuri_fetch new
 '
 
+test_expect_success 'commit-exclusion(excluding_type=ET_SELF): part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log *found" &&
+	excluding_type="0" &&
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+	mkdir "$P"/my-tree  &&
+	echo my-blob >"$P"/my-tree/my-blob &&
+	git -C "$P" add my-tree &&
+	mkdir "$P"/my-tree/sub-tree &&
+	echo sub-blob >"$P"/my-tree/sub-tree/sub-blob &&
+	git -C "$P" add my-tree &&
+	test_commit -C "$P" A &&
+ 	commith=$(git -C "$P" rev-parse A) &&
+ 	roottreeh=$(git -C "$P" rev-parse A:) &&
+	mytreeh=$(git -C "$P" ls-tree HEAD my-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	subtreeh=$(git -C "$P" ls-tree HEAD my-tree/sub-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	ah=$(git -C "$P" hash-object A.t) &&
+	myblobh=$(git -C "$P" hash-object my-tree/my-blob) &&
+	subblobh=$(git -C "$P" hash-object my-tree/sub-tree/sub-blob) &&
+	configure_exclusion commit "$P" "$commith" new "$excluding_type" >h &&
+	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
+	git -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		clone "$HTTPD_URL/smart/http_parent" http_child
+'
+
+test_expect_success 'commit-exclusion(excluding_type=ET_INCLUDE): part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log *found" &&
+	excluding_type="1" &&
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+	mkdir "$P"/my-tree  &&
+	echo my-blob >"$P"/my-tree/my-blob &&
+	git -C "$P" add my-tree &&
+	mkdir "$P"/my-tree/sub-tree &&
+	echo sub-blob >"$P"/my-tree/sub-tree/sub-blob &&
+	git -C "$P" add my-tree &&
+	test_commit -C "$P" A &&
+ 	commith=$(git -C "$P" rev-parse A) &&
+ 	roottreeh=$(git -C "$P" rev-parse A:) &&
+	mytreeh=$(git -C "$P" ls-tree HEAD my-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	subtreeh=$(git -C "$P" ls-tree HEAD my-tree/sub-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	ah=$(git -C "$P" hash-object A.t) &&
+	myblobh=$(git -C "$P" hash-object my-tree/my-blob) &&
+	subblobh=$(git -C "$P" hash-object my-tree/sub-tree/sub-blob) &&
+	configure_exclusion commit "$P" "$commith" new "$excluding_type" >h &&
+	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
+	git -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		clone "$HTTPD_URL/smart/http_parent" http_child &&
+	for idx in http_child/.git/objects/pack/*.idx
+	do
+		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+		{
+			grep "^[0-9a-f]\{16,\} " out || :
+		} >out.objectlist &&
+		if test_line_count = 7 out.objectlist
+		then
+			if grep $commith out
+			then
+				>commithfound
+			fi &&
+			if grep $roottreeh out
+			then
+				>roottreehfound
+			fi &&
+			if grep $ah out
+			then
+				>ahfound
+			fi &&
+			if grep $mytreeh out
+			then
+				>mytreehfound
+			fi &&
+			if grep $myblobh out
+			then
+				>myblobhfound
+			fi &&
+			if grep $subtreeh out
+			then
+				>subtreehfound
+			fi &&
+			if grep $subblobh out
+			then
+				>subblobhfound
+			fi
+		fi
+	done &&
+	test -f mytreehfound &&
+	test -f myblobhfound &&
+	test -f subtreehfound &&
+	test -f subblobhfound &&
+	test -f commithfound &&
+	test -f roottreehfound &&
+	test -f ahfound &&
+	# Ensure that there are exactly 2 packfiles with associated .idx
+	ls http_child/.git/objects/pack/*.pack \
+		http_child/.git/objects/pack/*.idx >filelist &&
+	test_line_count = 4 filelist
+'
+
+test_expect_success 'commit-exclusion(excluding_type=ET_REACHABLE):  hitten a full packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+ 	test_when_finished "rm -rf \"$P\" http_child log *found" &&
+	excluding_type="2" &&
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+	mkdir "$P"/my-tree  &&
+	echo my-blob >"$P"/my-tree/my-blob &&
+	git -C "$P" add my-tree &&
+	test_commit -C "$P" A &&
+ 	commith=$(git -C "$P" rev-parse A) &&
+  	roottreeh=$(git -C "$P" rev-parse A:) &&
+ 	mytreeh=$(git -C "$P" ls-tree HEAD my-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+ 	ah=$(git -C "$P" hash-object A.t) &&
+ 	myblobh=$(git -C "$P" hash-object my-tree/my-blob) &&
+	configure_exclusion commit "$P" "$commith" new "$excluding_type" >h &&
+	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
+	git -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		clone "$HTTPD_URL/smart/http_parent" http_child &&
+	for idx in http_child/.git/objects/pack/*.idx
+	do
+		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+		{
+			grep "^[0-9a-f]\{16,\} " out || :
+		} >out.objectlist &&
+		if test_line_count = 5 out.objectlist
+		then
+			if grep $commith out
+			then
+				>commithfound
+			fi &&
+			if grep $roottreeh out
+			then
+				>roottreehfound
+			fi &&
+			if grep $mytreeh out
+			then
+				>mytreehfound
+			fi &&
+			if grep $ah out
+			then
+				>ahfound
+			fi &&
+			if grep $myblobh out
+			then
+				>myblobhfound
+			fi
+		elif test_line_count = 0 out.objectlist
+		then
+				>emptypackfound
+		fi
+	done &&
+	test -f emptypackfound &&
+	test -f mytreehfound &&
+	test -f myblobhfound &&
+	test -f commithfound &&
+	test -f roottreehfound &&
+	test -f ahfound &&
+	# Ensure that there are exactly 2 packfiles with associated .idx
+	ls http_child/.git/objects/pack/*.pack \
+		http_child/.git/objects/pack/*.idx >filelist &&
+	test_line_count = 4 filelist
+'
+
+test_expect_success 'commit-exclusion(excluding_type=ET_REACHABLE): part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+ 	test_when_finished "rm -rf \"$P\" http_child log *found" &&
+	excluding_type="2" &&
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+	mkdir "$P"/my-tree  &&
+	echo my-blob >"$P"/my-tree/my-blob &&
+	git -C "$P" add my-tree &&
+	test_commit -C "$P" A &&
+	mkdir "$P"/other-tree  &&
+	echo other-blob >"$P"/other-tree/other-blob &&
+	git -C "$P" add other-tree &&
+	test_commit -C "$P" B &&
+    ah=$(git -C "$P" hash-object A.t) &&
+    bh=$(git -C "$P" hash-object B.t) &&
+	myblobh=$(git -C "$P" hash-object my-tree/my-blob) &&
+	otherblobh=$(git -C "$P" hash-object other-tree/other-blob) &&
+	rm -rf "$P"/my-tree "$P"/other-tree "$P"/A.t "$P"/B.t &&
+	mkdir "$P"/another-tree  &&
+	echo another-blob >"$P"/another-tree/another-blob &&
+	git -C "$P" add . &&
+	test_commit -C "$P" C &&
+ 	commitAh=$(git -C "$P" rev-parse A) &&
+ 	commitBh=$(git -C "$P" rev-parse B) &&
+ 	commitCh=$(git -C "$P" rev-parse C) &&
+ 	roottreeAh=$(git -C "$P" rev-parse A:) &&
+ 	roottreeBh=$(git -C "$P" rev-parse B:) &&
+ 	roottreeCh=$(git -C "$P" rev-parse C:) &&
+    mytreeh=$(git -C "$P" ls-tree A my-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+    othertreeh=$(git -C "$P" ls-tree B other-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+    anothertreeh=$(git -C "$P" ls-tree C another-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+    ch=$(git -C "$P" hash-object C.t) &&
+	anotherblobh=$(git -C "$P" hash-object another-tree/another-blob) &&
+	configure_exclusion commit "$P" "$commitBh" new "$excluding_type" >h &&
+	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
+	git -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		clone "$HTTPD_URL/smart/http_parent" http_child &&
+	for idx in http_child/.git/objects/pack/*.idx
+	do
+		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+		{
+			grep "^[0-9a-f]\{16,\} " out || :
+		} >out.objectlist &&
+		if test_line_count = 5 out.objectlist
+		then
+			if grep $commitCh out
+			then
+				>commitChfound
+			fi &&
+			if grep $roottreeCh out
+			then
+				>roottreeChfound
+			fi &&
+			if grep $anothertreeh out
+			then
+				>anothertreehfound
+			fi &&
+			if grep $anotherblobh out
+			then
+				>anotherblobhfound
+			fi &&
+			if grep $ch out
+			then
+				>chfound
+			fi
+		elif test_line_count = 10 out.objectlist
+		then
+			if grep $commitAh out
+			then
+				>commitAhfound
+			fi &&
+			if grep $commitBh out
+			then
+				>commitBhfound
+			fi &&
+			if grep $roottreeAh out
+			then
+				>roottreeAhfound
+			fi &&
+			if grep $roottreeBh out
+			then
+				>roottreeBhfound
+			fi &&
+			if grep $mytreeh out
+			then
+				>mytreehfound
+			fi &&
+			if grep $othertreeh out
+			then
+				>othertreehfound
+			fi &&
+			if grep $myblobh out
+			then
+				>myblobhfound
+			fi &&
+			if grep $otherblobh out
+			then
+				>otherblobhfound
+			fi &&
+			if grep $ah out
+			then
+				>ahfound
+			fi &&
+			if grep $bh out
+			then
+				>bhfound
+			fi
+		fi
+	done &&
+	test -f commitChfound &&
+	test -f roottreeChfound &&
+	test -f anothertreehfound &&
+	test -f anotherblobhfound &&
+	test -f chfound &&
+	test -f commitAhfound &&
+	test -f commitBhfound &&
+	test -f roottreeAhfound &&
+	test -f roottreeBhfound &&
+	test -f mytreehfound &&
+	test -f othertreehfound &&
+	test -f myblobhfound &&
+	test -f otherblobhfound &&
+	test -f ahfound &&
+	test -f bhfound &&
+	# Ensure that there are exactly 2 packfiles with associated .idx
+	ls http_child/.git/objects/pack/*.pack \
+		http_child/.git/objects/pack/*.idx >filelist &&
+	test_line_count = 4 filelist
+'
+
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_when_finished "rm -rf \"$P\" http_child log" &&
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 08/12] packfile-uri: support for excluding trees
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
                             ` (6 preceding siblings ...)
  2021-10-19 11:38           ` [PATCH v6 07/12] t5702: test cases " Teng Long
@ 2021-10-19 11:38           ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 09/12] t5702: test cases " Teng Long
                             ` (3 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

From: Teng Long <dyroneteng@gmail.com>

This commit introduce the exclusion of tree objects. The exclusion range
is designed by an enum named `exclude_level` in "pack-objects.c" that
enumerate three values, "ET_SELF", "ET_INCLUDE" and "ET_REACHABLE".

Exclusion scope on different level:

1. When a tree is specified to be excluded with level "ET_SELF",
only the tree object itself will be excluded.

2. When a tree specified to be excluded with level "ET_INCLUDE",
exclude the tree itself, as well as all the trees and blobs it contains.

3. When it is specified with level "ET_REACHABLE", the excluding scope
is as same as level "ET_INCLUDE", because tree do not have any
ancestors.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c | 18 ++++++++++++++++++
 list-objects.c         |  8 ++++++--
 object.c               |  1 +
 object.h               |  1 +
 4 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index e7b27ef443..6713e734fb 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1250,6 +1250,21 @@ static int match_packfile_uri_exclusions(struct configured_exclusion *ex)
 	return 0;
 }
 
+static int want_exclude_object(struct object_list *objects)
+{
+	struct object_list *p;
+	struct configured_exclusion *ex;
+
+	if (!objects)
+		return 0;
+	for (p = objects; p; p = p->next) {
+		ex = oidmap_get(&configured_exclusions, &p->item->oid);
+		if (match_packfile_uri_exclusions(ex) && ex->level > ET_SELF)
+			return 1;
+	}
+	return 0;
+}
+
 static int want_found_object(const struct object_id *oid, int exclude,
 			     struct packed_git *p)
 {
@@ -1400,11 +1415,14 @@ static int want_object_in_pack(const struct object_id *oid,
 	if (uri_protocols.nr) {
 		if (referred_objs) {
 			struct commit *commit = referred_objs->commit;
+			struct object_list *trees = referred_objs->trees;
 			if (commit) {
 				commit_ex = oidmap_get(&configured_exclusions, &commit->object.oid);
 				if (match_packfile_uri_exclusions(commit_ex) && commit_ex->level > ET_SELF)
 					return 0;
 			}
+			if (want_exclude_object(trees))
+				return 0;
 		}
 
 		ex = oidmap_get(&configured_exclusions, oid);
diff --git a/list-objects.c b/list-objects.c
index b32213ecf1..40292e2cc8 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -114,8 +114,11 @@ static void process_tree_contents(struct traversal_context *ctx,
 	struct name_entry entry;
 	enum interesting match = ctx->revs->diffopt.pathspec.nr == 0 ?
 		all_entries_interesting : entry_not_interesting;
+	struct referred_objects *referred_buf;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
+	referred_buf = xmemdupz(referred_objs, sizeof(struct referred_objects));
+	object_list_insert(&tree->object, &referred_buf->trees);
 
 	while (tree_entry(&desc, &entry)) {
 		if (match != all_entries_interesting) {
@@ -136,7 +139,7 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			t->object.flags |= NOT_USER_GIVEN;
-			process_tree(ctx, t, base, entry.path, referred_objs);
+			process_tree(ctx, t, base, entry.path, referred_buf);
 		}
 		else if (S_ISGITLINK(entry.mode))
 			process_gitlink(ctx, entry.oid.hash,
@@ -149,9 +152,10 @@ static void process_tree_contents(struct traversal_context *ctx,
 				    entry.path, oid_to_hex(&tree->object.oid));
 			}
 			b->object.flags |= NOT_USER_GIVEN;
-			process_blob(ctx, b, base, entry.path, referred_objs);
+			process_blob(ctx, b, base, entry.path, referred_buf);
 		}
 	}
+	free(referred_buf);
 }
 
 static void process_tree(struct traversal_context *ctx,
diff --git a/object.c b/object.c
index f86b52c4d6..895068cbc2 100644
--- a/object.c
+++ b/object.c
@@ -334,6 +334,7 @@ void add_object_array_with_path_and_referred_commit(struct object *obj, const ch
 	struct referred_objects *referred_objs;
 	referred_objs = xmalloc(sizeof(struct referred_objects));
 	referred_objs->commit = referred_commit;
+	referred_objs->trees = NULL;
 
 	if (nr >= alloc) {
 		alloc = (alloc + 32) * 2;
diff --git a/object.h b/object.h
index 4db0ecc3f2..618d674249 100644
--- a/object.h
+++ b/object.h
@@ -65,6 +65,7 @@ struct object_array {
 
 struct referred_objects{
     struct commit *commit;
+    struct object_list *trees;
 };
 #define OBJECT_ARRAY_INIT { 0, 0, NULL }
 
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 09/12] t5702: test cases for excluding trees
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
                             ` (7 preceding siblings ...)
  2021-10-19 11:38           ` [PATCH v6 08/12] packfile-uri: support for excluding trees Teng Long
@ 2021-10-19 11:38           ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 10/12] packfile-uri: support for excluding tags Teng Long
                             ` (2 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

Signed-off-by: Teng Long <tenglong@alibaba-inc.com>
---
 t/t5702-protocol-v2.sh | 213 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 213 insertions(+)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 6e323253f7..762a9c2505 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -1267,6 +1267,219 @@ test_expect_success 'commit-exclusion(excluding_type=ET_REACHABLE): part of pack
 	test_line_count = 4 filelist
 '
 
+test_expect_success 'tree-exclusion(excluding_type=ET_SELF): part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log *found" &&
+	excluding_type="0" &&
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+	# Dir struct
+	# 	.
+	#     |-- A.t
+	#     |-- my-tree
+	#     |   `-- my-blob
+	#     `-- other-tree
+	#         |-- other-blob
+	#         `-- sub-tree
+	#             `-- sub-blob
+	mkdir "$P"/my-tree  &&
+	echo my-blob >"$P"/my-tree/my-blob &&
+	git -C "$P" add my-tree &&
+	mkdir "$P"/other-tree &&
+	echo other-blob >"$P"/other-tree/other-blob &&
+	mkdir "$P"/other-tree/sub-tree &&
+	echo sub-blob >"$P"/other-tree/sub-tree/sub-blob &&
+	git -C "$P" add other-tree &&
+ 	test_commit -C "$P" A &&
+ 	commith=$(git -C "$P" rev-parse A) &&
+ 	roottreeh=$(git -C "$P" rev-parse A:) &&
+	ah=$(git -C "$P" hash-object A.t) &&
+	mytreeh=$(git -C "$P" ls-tree HEAD my-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	othertreeh=$(git -C "$P" ls-tree HEAD other-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	subtreeh=$(git -C "$P" ls-tree HEAD other-tree/sub-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	myblobh=$(git -C "$P" hash-object my-tree/my-blob) &&
+	otherblobh=$(git -C "$P" hash-object other-tree/other-blob) &&
+	subblobh=$(git -C "$P" hash-object other-tree/sub-tree/sub-blob) &&
+
+	configure_exclusion tree "$P" "$mytreeh" new 0 >h &&
+	configure_exclusion tree "$P" "$othertreeh" new 0 >h2 &&
+
+	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
+	git -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		clone "$HTTPD_URL/smart/http_parent" http_child &&
+# 	Ensure that my-tree and other-tree and theirs complementary set are in separate packfiles.
+	for idx in http_child/.git/objects/pack/*.idx
+	do
+		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+		{
+			grep "^[0-9a-f]\{16,\} " out || :
+		} >out.objectlist &&
+		if test_line_count = 1 out.objectlist
+		then
+			if grep $mytreeh out
+			then
+				>mytreehfound
+			fi &&
+			if grep $othertreeh out
+			then
+				>othertreehfound
+			fi
+		elif test_line_count = 7 out.objectlist
+		then
+			if grep $commith out
+			then
+				>commithfound
+			fi &&
+			if grep $roottreeh out
+			then
+				>roottreehfound
+			fi &&
+			if grep $subtreeh out
+			then
+				>subtreehfound
+			fi &&
+			if grep $ah out
+			then
+				>ahfound
+			fi &&
+			if grep $myblobh out
+			then
+				>myblobhfound
+			fi &&
+			if grep $otherblobh out
+			then
+				>otherblobhfound
+			fi &&
+			if grep $subblobh out
+			then
+				>subblobhfound
+			fi
+		fi
+	done &&
+	test -f mytreehfound &&
+	test -f myblobhfound &&
+	test -f othertreehfound &&
+	test -f otherblobhfound &&
+	test -f subtreehfound &&
+	test -f subblobhfound &&
+	test -f commithfound &&
+	test -f roottreehfound &&
+	test -f ahfound &&
+	# Ensure that there are exactly 3 packfiles with associated .idx
+	ls http_child/.git/objects/pack/*.pack \
+		http_child/.git/objects/pack/*.idx >filelist &&
+	test_line_count = 6 filelist
+'
+
+test_expect_success 'tree-exclusion(excluding_type=ET_INCLUDE), part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	test_when_finished "rm -rf \"$P\" http_child log *found" &&
+	excluding_type="1" &&
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true"  &&
+	# Dir struct
+	# 	.
+	#     |-- A.t
+	#     |-- my-tree
+	#     |   `-- my-blob
+	#     `-- other-tree
+	#         |-- other-blob
+	#         `-- sub-tree
+	#             `-- sub-blob
+	mkdir "$P"/my-tree  &&
+	echo my-blob >"$P"/my-tree/my-blob &&
+	git -C "$P" add my-tree &&
+	mkdir "$P"/other-tree &&
+	echo other-blob >"$P"/other-tree/other-blob &&
+	mkdir "$P"/other-tree/sub-tree &&
+	echo sub-blob >"$P"/other-tree/sub-tree/sub-blob &&
+	git -C "$P" add other-tree &&
+ 	test_commit -C "$P" A &&
+
+ 	commith=$(git -C "$P" rev-parse A) &&
+ 	roottreeh=$(git -C "$P" rev-parse A:) &&
+	ah=$(git -C "$P" hash-object A.t) &&
+	mytreeh=$(git -C "$P" ls-tree HEAD my-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	othertreeh=$(git -C "$P" ls-tree HEAD other-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	subtreeh=$(git -C "$P" ls-tree HEAD other-tree/sub-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	myblobh=$(git -C "$P" hash-object my-tree/my-blob) &&
+	otherblobh=$(git -C "$P" hash-object other-tree/other-blob) &&
+	subblobh=$(git -C "$P" hash-object other-tree/sub-tree/sub-blob) &&
+
+	configure_exclusion tree "$P" "$mytreeh" new $excluding_type >h &&
+	configure_exclusion tree "$P" "$othertreeh" new $excluding_type >h2 &&
+
+	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
+	git -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		clone "$HTTPD_URL/smart/http_parent" http_child &&
+	# Ensure that my-tree and other-tree and theirs complementary set are in separate packfiles.
+	for idx in http_child/.git/objects/pack/*.idx
+	do
+		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+		{
+			grep "^[0-9a-f]\{16,\} " out || :
+		} >out.objectlist &&
+		if test_line_count = 3 out.objectlist
+		then
+			if grep $commith out
+			then
+				>commithfound
+			fi &&
+			if grep $roottreeh out
+			then
+				>roottreehfound
+			fi &&
+			if grep $ah out
+			then
+				>ahfound
+			fi
+		elif test_line_count = 2 out.objectlist
+		then
+			if grep $mytreeh out
+			then
+				>mytreehfound
+			fi &&
+			if grep $myblobh out
+			then
+				>myblobhfound
+			fi
+		elif test_line_count = 4 out.objectlist
+		then
+			if grep $othertreeh out
+			then
+				>othertreehfound
+			fi &&
+			if grep $otherblobh out
+			then
+				>otherblobhfound
+			fi &&
+			if grep $subtreeh out
+			then
+				>subtreehfound
+			fi &&
+			if grep $subblobh out
+			then
+				>subblobhfound
+			fi
+		fi
+	done &&
+	test -f mytreehfound &&
+	test -f myblobhfound &&
+	test -f othertreehfound &&
+	test -f otherblobhfound &&
+	test -f subtreehfound &&
+	test -f subblobhfound &&
+	test -f commithfound &&
+	test -f roottreehfound &&
+	test -f ahfound &&
+	# Ensure that there are exactly 3 packfiles with associated .idx
+	ls http_child/.git/objects/pack/*.pack \
+		http_child/.git/objects/pack/*.idx >filelist &&
+	test_line_count = 6 filelist
+'
+
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_when_finished "rm -rf \"$P\" http_child log" &&
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 10/12] packfile-uri: support for excluding tags
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
                             ` (8 preceding siblings ...)
  2021-10-19 11:38           ` [PATCH v6 09/12] t5702: test cases " Teng Long
@ 2021-10-19 11:38           ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 11/12] t5702: test cases " Teng Long
  2021-10-19 11:38           ` [PATCH v6 12/12] packfile-uri.txt: support multiple object types Teng Long
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

From: Teng Long <dyroneteng@gmail.com>

This commit brings the tags exclusion feature of packfile-uri. The
excluding level of tag is supported with "ET_SELF", "ET_INCLUDE"
and "ET_REACHABLE".

Exclusion scope on different level:

1. When a tag is specified to be excluded with level "ET_SELF",
only the tag object itself will be excluded.

2. When it's specified to be excluded with level "ET_INCLUDE",
exclude the referenced commit, and all trees and blobs contained in its
top-level. If it's a annotated tag, the tag object will be excluded
too.

3. When it is specified with level "ET_REACHABLE", exclude the ancestors
of the referenced commit, as well as the objects need to be excluded
under the level "ET_INCLUDED".

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 builtin/pack-objects.c |  6 +++++-
 list-objects.c         | 20 ++++++++++++++++----
 object.c               |  1 +
 object.h               |  1 +
 revision.c             | 13 ++++++++++++-
 revision.h             |  9 +++++++++
 6 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6713e734fb..073c3815a1 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1416,6 +1416,9 @@ static int want_object_in_pack(const struct object_id *oid,
 		if (referred_objs) {
 			struct commit *commit = referred_objs->commit;
 			struct object_list *trees = referred_objs->trees;
+			struct object_list *tags = referred_objs->tags;
+			if (want_exclude_object(tags))
+				return 0;
 			if (commit) {
 				commit_ex = oidmap_get(&configured_exclusions, &commit->object.oid);
 				if (match_packfile_uri_exclusions(commit_ex) && commit_ex->level > ET_SELF)
@@ -3344,7 +3347,8 @@ static void read_object_list_from_stdin(void)
 
 static void show_commit(struct commit *commit, struct show_info *info)
 {
-	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0, NULL);
+	struct referred_objects *referred_objs = info->show_cache;
+	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0, referred_objs);
 	commit->object.flags |= OBJECT_ADDED;
 
 	if (write_bitmap_index)
diff --git a/list-objects.c b/list-objects.c
index 40292e2cc8..2e241e8707 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -348,9 +348,10 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 	for (i = 0; i < ctx->revs->pending.nr; i++) {
 		struct object_array_entry *pending = ctx->revs->pending.objects + i;
 		struct object *obj = pending->item;
-		struct referred_objects *referred_objs = pending->referred_objects;
 		const char *name = pending->name;
 		const char *path = pending->path;
+		struct referred_objects *referred_objs = pending->referred_objects;
+		struct commit_wraps_entry *cw_entry = NULL;
 		if (obj->flags & (UNINTERESTING | SEEN))
 			continue;
 		if (obj->type == OBJ_TAG) {
@@ -358,6 +359,10 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx,
 			ctx->show_object(obj, name, &show_info);
 			continue;
 		}
+		if (referred_objs->commit)
+			cw_entry = oidmap_get(ctx->revs->commit_wraps, &referred_objs->commit->object.oid);
+		if (cw_entry)
+			referred_objs->tags = cw_entry->wraps;
 		if (!path)
 			path = "";
 		if (obj->type == OBJ_TREE) {
@@ -378,12 +383,14 @@ static void do_traverse(struct traversal_context *ctx)
 {
 	struct commit *commit;
 	struct strbuf csp; /* callee's scratch pad */
-	struct show_info show_info;
+	struct commit_wraps_entry *entry;
+	struct oidmap *commit_wraps = ctx->revs->commit_wraps;
+	struct referred_objects referred_objs = { NULL, NULL, NULL };
+	struct show_info show_info = { ctx->show_data , NULL };
 	strbuf_init(&csp, PATH_MAX);
 
-
 	show_info.show_data = ctx->show_data;
-	show_info.show_cache = NULL;
+	show_info.show_cache = &referred_objs;
 
 	while ((commit = get_revision(ctx->revs)) != NULL) {
 		/*
@@ -401,6 +408,11 @@ static void do_traverse(struct traversal_context *ctx)
 			      oid_to_hex(&commit->object.oid));
 		}
 
+		if (commit_wraps) {
+			entry = oidmap_get(commit_wraps, &commit->object.oid);
+			referred_objs.tags = entry ? entry->wraps : NULL;
+		}
+
 		ctx->show_commit(commit, &show_info);
 
 		if (ctx->revs->tree_blobs_in_commit_order)
diff --git a/object.c b/object.c
index 895068cbc2..167cc87ec9 100644
--- a/object.c
+++ b/object.c
@@ -335,6 +335,7 @@ void add_object_array_with_path_and_referred_commit(struct object *obj, const ch
 	referred_objs = xmalloc(sizeof(struct referred_objects));
 	referred_objs->commit = referred_commit;
 	referred_objs->trees = NULL;
+	referred_objs->tags = NULL;
 
 	if (nr >= alloc) {
 		alloc = (alloc + 32) * 2;
diff --git a/object.h b/object.h
index 618d674249..42a2178531 100644
--- a/object.h
+++ b/object.h
@@ -66,6 +66,7 @@ struct object_array {
 struct referred_objects{
     struct commit *commit;
     struct object_list *trees;
+    struct object_list *tags;
 };
 #define OBJECT_ARRAY_INIT { 0, 0, NULL }
 
diff --git a/revision.c b/revision.c
index 89a8b311ea..d16f9bbbb9 100644
--- a/revision.c
+++ b/revision.c
@@ -416,14 +416,17 @@ static struct commit *handle_commit(struct rev_info *revs,
 	const char *path = entry->path;
 	unsigned int mode = entry->mode;
 	unsigned long flags = object->flags;
+	struct object_list *wraps = NULL;
 
 	/*
 	 * Tag object? Look what it points to..
 	 */
 	while (object->type == OBJ_TAG) {
 		struct tag *tag = (struct tag *) object;
-		if (revs->tag_objects && !(flags & UNINTERESTING))
+		if (revs->tag_objects && !(flags & UNINTERESTING)) {
 			add_pending_object(revs, object, tag->tag);
+			object_list_insert(object, &wraps);
+		}
 		object = parse_object(revs->repo, get_tagged_oid(tag));
 		if (!object) {
 			if (revs->ignore_missing_links || (flags & UNINTERESTING))
@@ -449,6 +452,14 @@ static struct commit *handle_commit(struct rev_info *revs,
 	 */
 	if (object->type == OBJ_COMMIT) {
 		struct commit *commit = (struct commit *)object;
+		struct oidmap *commit_wraps = malloc(sizeof(struct oidmap));
+		struct commit_wraps_entry *cw_entry = xmalloc(sizeof(struct commit_wraps_entry));
+
+		oidmap_init(commit_wraps, 0);
+		cw_entry->e.oid = object->oid;
+		cw_entry->wraps = wraps;
+		oidmap_put(commit_wraps, cw_entry);
+		revs->commit_wraps = commit_wraps;
 
 		if (repo_parse_commit(revs->repo, commit) < 0)
 			die("unable to parse commit %s", name);
diff --git a/revision.h b/revision.h
index b2e0c0b9b7..a633ea2174 100644
--- a/revision.h
+++ b/revision.h
@@ -8,6 +8,7 @@
 #include "pretty.h"
 #include "diff.h"
 #include "commit-slab-decl.h"
+#include "oidmap.h"
 
 /**
  * The revision walking API offers functions to build a list of revisions
@@ -64,6 +65,11 @@ struct bloom_key;
 struct bloom_filter_settings;
 define_shared_commit_slab(revision_sources, char *);
 
+struct commit_wraps_entry {
+    struct oidmap_entry e;
+    struct object_list *wraps;
+};
+
 struct rev_cmdline_info {
 	unsigned int nr;
 	unsigned int alloc;
@@ -321,6 +327,9 @@ struct rev_info {
 
 	/* misc. flags related to '--no-kept-objects' */
 	unsigned keep_pack_cache_flags;
+
+	/* The commit_wraps caches the referred wrapped objects(such as tags) of a commit */
+	struct oidmap *commit_wraps;
 };
 
 int ref_excluded(struct string_list *, const char *path);
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 11/12] t5702: test cases for excluding tags
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
                             ` (9 preceding siblings ...)
  2021-10-19 11:38           ` [PATCH v6 10/12] packfile-uri: support for excluding tags Teng Long
@ 2021-10-19 11:38           ` Teng Long
  2021-10-19 11:38           ` [PATCH v6 12/12] packfile-uri.txt: support multiple object types Teng Long
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

Signed-off-by: Teng Long <tenglong@alibaba-inc.com>
---
 t/t5702-protocol-v2.sh | 197 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 197 insertions(+)

diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 762a9c2505..f91ae62fd8 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -1480,6 +1480,203 @@ test_expect_success 'tree-exclusion(excluding_type=ET_INCLUDE), part of packfile
 	test_line_count = 6 filelist
 '
 
+test_expect_success 'tag-exclusion(excluding_type=ET_SELF): part of packfile response provided as URI' '
+  	test_when_finished "rm -rf \"$P\" http_child log" &&
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+	echo my-blob >"$P/my-blob" &&
+	git -C "$P" add my-blob &&
+	test_commit -C "$P" A &&
+	git -C "$P" tag -a -m "annotated_tag" tagA &&
+
+	tagh=$(git -C "$P" rev-parse tagA) &&
+	commith=$(git -C "$P" rev-parse A) &&
+	roottreeh=$(git -C "$P" rev-parse A:) &&
+	ah=$(git -C "$P" hash-object A.t) &&
+	myblobh=$(git -C "$P" hash-object my-blob) &&
+
+	configure_exclusion tag "$P" "$tagh" new 0 >h2 &&
+	git init http_child &&
+	GIT_TRACE=1 GIT_TRACE_PACKET=`pwd`/log GIT_TEST_SIDEBAND_ALL=1 \
+	git -C http_child \
+		-c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		fetch --tags "$HTTPD_URL/smart/http_parent" &&
+
+	# Ensure that my-tree and other-tree and theirs complementary set are in separate packfiles.
+	for idx in http_child/.git/objects/pack/*.idx
+	do
+		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+		{
+			grep "^[0-9a-f]\{16,\} " out || :
+		} >out.objectlist &&
+		if test_line_count = 1 out.objectlist
+		then
+			if grep $tagh out
+			then
+				>taghfound
+			fi
+
+		elif test_line_count = 4 out.objectlist
+		then
+			if grep $commith out
+			then
+				>commithfound
+			fi &&
+			if grep $roottreeh out
+			then
+				>roottreehfound
+			fi &&
+			if grep $ah out
+			then
+				>ahfound
+			fi &&
+			if grep $myblobh out
+			then
+				>myblobhfound
+			fi
+		fi
+	done &&
+	test -f myblobhfound &&
+	test -f commithfound &&
+	test -f roottreehfound &&
+	test -f ahfound &&
+	test -f taghfound &&
+	# Ensure that there are exactly 3 packfiles with associated .idx
+	ls http_child/.git/objects/pack/*.pack \
+		http_child/.git/objects/pack/*.idx >filelist &&
+	test_line_count = 4 filelist
+'
+
+test_expect_success 'tag-exclusion(excluding_type=ET_INCLUDE): part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+ 	test_when_finished "rm -rf \"$P\" http_child log" &&
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+	echo my-blob >"$P/my-blob" &&
+	git -C "$P" add my-blob &&
+	test_commit -C "$P" A &&
+	git -C "$P" tag -a -m "annotated_tag" tagA &&
+
+	tagh=$(git -C "$P" rev-parse tagA) &&
+	commith=$(git -C "$P" rev-parse A) &&
+	roottreeh=$(git -C "$P" rev-parse A:) &&
+	ah=$(git -C "$P" hash-object A.t) &&
+	myblobh=$(git -C "$P" hash-object my-blob) &&
+
+	configure_exclusion tag "$P" "$tagh" new 1 >h2 &&
+
+	git init http_child &&
+	GIT_TRACE=1 GIT_TRACE_PACKET=`pwd`/log GIT_TEST_SIDEBAND_ALL=1 \
+	git -C http_child \
+		-c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		fetch --tags "$HTTPD_URL/smart/http_parent" &&
+
+	# Ensure that my-tree and other-tree and theirs complementary set are in separate packfiles.
+	for idx in http_child/.git/objects/pack/*.idx
+	do
+		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+		{
+			grep "^[0-9a-f]\{16,\} " out || :
+		} >out.objectlist &&
+		if test_line_count = 5 out.objectlist
+		then
+			if grep $tagh out
+			then
+				>taghfound
+			fi &&
+			if grep $commith out
+			then
+				>commithfound
+			fi &&
+			if grep $roottreeh out
+			then
+				>roottreehfound
+			fi &&
+			if grep $ah out
+			then
+				>ahfound
+			fi &&
+			if grep $myblobh out
+			then
+				>myblobhfound
+			fi
+		fi
+	done &&
+	test -f myblobhfound &&
+	test -f commithfound &&
+	test -f roottreehfound &&
+	test -f ahfound &&
+	test -f taghfound &&
+	# Ensure that there are exactly 3 packfiles with associated .idx
+	ls http_child/.git/objects/pack/*.pack \
+		http_child/.git/objects/pack/*.idx >filelist &&
+	test_line_count = 4 filelist
+'
+
+test_expect_success 'tag-exclusion(excluding_type=ET_REACHABLE): part of packfile response provided as URI' '
+	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+ 	test_when_finished "rm -rf \"$P\" http_child log" &&
+	git init "$P" &&
+	git -C "$P" config "uploadpack.allowsidebandall" "true" &&
+
+
+	mkdir "$P"/my-tree  &&
+	echo my-blob >"$P"/my-tree/my-blob &&
+	git -C "$P" add my-tree &&
+	test_commit -C "$P" A &&
+
+	mkdir "$P"/other-tree &&
+	echo other-blob >"$P"/other-tree/other-blob &&
+	git -C "$P" add other-tree &&
+	test_commit -C "$P" B &&
+
+	git -C "$P" tag -a -m "tag X" tagX &&
+
+	tagh=$(git -C "$P" rev-parse tagX) &&
+ 	commitAh=$(git -C "$P" rev-parse A) &&
+ 	commitBh=$(git -C "$P" rev-parse B) &&
+ 	roottreeAh=$(git -C "$P" rev-parse A:) &&
+	roottreeBh=$(git -C "$P" rev-parse B:) &&
+	mytreeh=$(git -C "$P" ls-tree HEAD my-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	othertreeh=$(git -C "$P" ls-tree HEAD other-tree | sed -ne "s/.*\($OID_REGEX\).*/\1/p") &&
+	ah=$(git -C "$P" hash-object A.t) &&
+	bh=$(git -C "$P" hash-object B.t) &&
+	myblobh=$(git -C "$P" hash-object my-tree/my-blob) &&
+	otherblobh=$(git -C "$P" hash-object other-tree/other-blob) &&
+
+	configure_exclusion tag "$P" "$tagh" new 2 >h &&
+
+	GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \
+	git -c protocol.version=2 \
+		-c fetch.uriprotocols=http,https \
+		clone "$HTTPD_URL/smart/http_parent" http_child &&
+
+	# Ensure that my-tree and other-tree and theirs complementary set are in separate packfiles.
+    	for idx in http_child/.git/objects/pack/*.idx
+    	do
+    		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
+    		{
+    			grep "^[0-9a-f]\{16,\} " out || :
+    		} >out.objectlist &&
+    		if test_line_count = 11 out.objectlist
+    		then
+    				>fullpackfound
+    		elif test_line_count = 0 out.objectlist
+    		then
+					>emptypackfound
+    		fi
+    	done &&
+    	test -f fullpackfound &&
+    	test -f emptypackfound &&
+    	# Ensure that there are exactly 3 packfiles with associated .idx
+    	ls http_child/.git/objects/pack/*.pack \
+    		http_child/.git/objects/pack/*.idx >filelist &&
+    	test_line_count = 4 filelist
+'
+
 test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
 	P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
 	test_when_finished "rm -rf \"$P\" http_child log" &&
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH v6 12/12] packfile-uri.txt: support multiple object types
  2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
                             ` (10 preceding siblings ...)
  2021-10-19 11:38           ` [PATCH v6 11/12] t5702: test cases " Teng Long
@ 2021-10-19 11:38           ` Teng Long
  11 siblings, 0 replies; 72+ messages in thread
From: Teng Long @ 2021-10-19 11:38 UTC (permalink / raw)
  To: git
  Cc: gitster, avarab, jonathantanmy, bagasdotme, adlternative, stolee,
	Teng Long

Signed-off-by: Teng Long <tenglong@alibaba-inc.com>
---
 Documentation/technical/packfile-uri.txt | 74 ++++++++++++++++++++----
 1 file changed, 63 insertions(+), 11 deletions(-)

diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt
index f7eabc6c76..0e6d7c8dda 100644
--- a/Documentation/technical/packfile-uri.txt
+++ b/Documentation/technical/packfile-uri.txt
@@ -34,14 +34,69 @@ having it advertise `packfile-uris`, tolerating the client sending
 include some sort of non-trivial implementation in the Minimum Viable Product,
 at least so that we can test the client.
 
-This is the implementation: a feature, marked experimental, that allows the
-server to be configured by one or more `uploadpack.blobPackfileUri=<sha1>
-<uri>` entries. Whenever the list of objects to be sent is assembled, all such
-blobs are excluded, replaced with URIs. As noted in "Future work" below, the
-server can evolve in the future to support excluding other objects (or other
-implementations of servers could be made that support excluding other objects)
-without needing a protocol change, so clients should not expect that packfiles
-downloaded in this way only contain single blobs.
+This is the implementation: a feature, marked experimental, that allows
+the server to be configured by one or more entries with the format:
+
+    uploadpack.excludeobject=<object-hash> <level> <pack-hash> <uri>
+
+Value `<object-hash>` is the key of entry, and the object type can be
+blob, tree, commit, or tag. Value of entry has three parts,
+`<pack-hash>` is used to identify the packfile which contains the given
+`<object-hash>` object, and `<uri>` is the URI to download the packfile by
+client. For example, When a blob is configured with `uploadpack.excludeobject`
+that means whenever the blob to be send is assembled, the object will
+be excluded.
+
+In addition to excluding a single object like blob, sometimes it's
+hoped to exclude not only the object itself, but also all the related
+objects with it, like all the objects a tree contains or the ancestors
+that a commit can reach. In these cases, the `<level>` is designed to
+distinguish the scope of exclusion, it supports three levels:
+
+- Level 0: Excluding a single object itself, without any objects that
+  have a relationship with it. 
+
+- Level 1: Excluding object itself, and objects it contains.
+
+- Level 2: Excluding object itself, the objects it contains, and the
+  ancestors it can reach.
+
+If `<level>` is configured as 0, only the object itself will be
+excluded, no matter what the object type is. It is a common scenario
+for large size blobs, but it does much not sense for other object types
+(e.g. download a singe commit without downloading the blobs and tree
+in it).
+
+If `<level>` is configured as 1, not only the single object but also all
+the objects in it will be excluded. This applies to scenarios where
+it's wanted to exclude a specified non-blob object that includes some
+lage size objects.
+
+- If <object-hash> is a blob, the result is the same as level 0, because blob
+contains nothing just itself.
+
+- If <object-hash> is a tree, the tree itself, and all blobs and trees
+  in it will be excluded.
+
+- If <object-hash> is a commit, the commit itself, the referenced
+  root-tree, and all blobs and trees in the root-tree will be excluded.
+
+- If <object-hash> is a tag, the tag itself, the dereferenced commit
+  and all trees and blobs contained in its root-tree will be excluded.
+
+If `<level>` is configured as 2, not only the objects in the scope of
+level 1 , but also the reachable ancestors will be excluded if
+`<object-hash>` is commit or tag.
+
+Configuration compatibility
+---------------------------
+
+The old configuration of packfile-uri:
+
+    uploadpack.blobPackfileUri=<object-hash> <pack-hash> <uri>
+
+For the old configuration is compatible with the new one, but it only
+supports the exclusion of blob objects.
 
 Client design
 -------------
@@ -65,9 +120,6 @@ The protocol design allows some evolution of the server and client without any
 need for protocol changes, so only a small-scoped design is included here to
 form the MVP. For example, the following can be done:
 
- * On the server, more sophisticated means of excluding objects (e.g. by
-   specifying a commit to represent that commit and all objects that it
-   references).
  * On the client, resumption of clone. If a clone is interrupted, information
    could be recorded in the repository's config and a "clone-resume" command
    can resume the clone in progress. (Resumption of subsequent fetches is more
-- 
2.31.1.453.g945ddc3a74.dirty


^ permalink raw reply	[flat|nested] 72+ messages in thread

end of thread, other threads:[~2021-10-19 11:40 UTC | newest]

Thread overview: 72+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-07  2:11 [PATCH] Packfile-uris support excluding commit objects Teng Long
2021-05-10 11:14 ` Ævar Arnfjörð Bjarmason
2021-05-18  8:49 ` [PATCH v2 0/3] packfile-uris: commit objects exclusion Teng Long
2021-05-18  8:49   ` [PATCH v2 1/3] packfile-uris: support for excluding commit object Teng Long
2021-05-19  4:28     ` Junio C Hamano
2021-05-20  4:46     ` Junio C Hamano
2021-05-18  8:49   ` [PATCH v2 2/3] packfile-uris.txt: " Teng Long
2021-05-18  8:49   ` [PATCH v2 3/3] t5702: excluding commits with packfile-uris Teng Long
2021-07-26  9:46   ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Teng Long
2021-07-26  9:46     ` [PATCH v3 1/3] packfile-uris: support for excluding commit objects Teng Long
2021-07-26 18:15       ` Junio C Hamano
2021-07-26 19:45         ` Felipe Contreras
2021-08-11  1:44         ` Teng Long
2021-07-26  9:46     ` [PATCH v3 2/3] t5702: " Teng Long
2021-07-26 15:03       ` Ævar Arnfjörð Bjarmason
2021-08-11  1:46         ` [PATCH v3 1/3] packfile-uris: " Teng Long
2021-07-26  9:46     ` [PATCH v3 3/3] packfile-uri.txt: " Teng Long
2021-07-26 20:52       ` Junio C Hamano
2021-08-11  1:47         ` Teng Long
2021-07-26 12:34     ` [PATCH v3 0/3] packfile-uris: commit objects exclusio Ævar Arnfjörð Bjarmason
2021-08-11  1:48       ` Teng Long
2021-08-11  7:45     ` [PATCH v4 0/7] packfile-uris: commits and trees exclusion Teng Long
2021-08-11  7:45       ` [PATCH v4 1/7] pack-objects.c: introduce new method `match_packfile_uri_exclusions` Teng Long
2021-08-11  7:45       ` [PATCH v4 2/7] Add new parameter "carry_data" for "show_object" function Teng Long
2021-08-11  7:45       ` [PATCH v4 3/7] packfile-uri: support for excluding commit objects Teng Long
2021-08-11  7:45       ` [PATCH v4 4/7] packfile-uri: support for excluding tree objects Teng Long
2021-08-11  7:45       ` [PATCH v4 5/7] packfile-uri.txt: support for excluding commits and trees Teng Long
2021-08-11  9:59         ` Bagas Sanjaya
2021-08-11  7:45       ` [PATCH v4 6/7] t5702: replace with "test_when_finished" for cleanup Teng Long
2021-08-11  7:45       ` [PATCH v4 7/7] t5702: support for excluding commit objects Teng Long
2021-08-25  2:21       ` [PATCH v5 00/14] packfile-uris: commits, trees and tags exclusion Teng Long
2021-08-25  2:21         ` [PATCH v5 01/14] pack-objects.c: introduce new method `match_packfile_uri_exclusions` Teng Long
2021-08-25  2:21         ` [PATCH v5 02/14] Add new parameter "carry_data" for "show_object" function Teng Long
2021-08-26 20:45           ` Junio C Hamano
2021-09-02 11:08             ` Teng Long
2021-08-25  2:21         ` [PATCH v5 03/14] packfile-uri: support for excluding commit objects Teng Long
2021-08-25 23:49           ` Ævar Arnfjörð Bjarmason
2021-09-02 12:26             ` Teng Long
2021-08-26 20:56           ` Junio C Hamano
2021-09-02 12:51             ` Teng Long
2021-08-25  2:21         ` [PATCH v5 04/14] packfile-uri: support for excluding tree objects Teng Long
2021-08-25  2:21         ` [PATCH v5 05/14] packfile-uri.txt: support for excluding commits and trees Teng Long
2021-08-25 23:52           ` Ævar Arnfjörð Bjarmason
2021-09-02 11:23             ` Teng Long
2021-08-25  2:21         ` [PATCH v5 06/14] t5702: replace with "test_when_finished" for cleanup Teng Long
2021-08-25 23:55           ` Ævar Arnfjörð Bjarmason
2021-09-02 11:37             ` Teng Long
2021-08-25  2:21         ` [PATCH v5 07/14] t5702: support for excluding commit objects Teng Long
2021-08-25  2:21         ` [PATCH v5 08/14] Add new parameter "carry_data" for "show_commit function Teng Long
2021-08-25  2:21         ` [PATCH v5 09/14] commit.h: add wrapped tags in commit struct Teng Long
2021-08-25 23:58           ` Ævar Arnfjörð Bjarmason
2021-09-02 12:17             ` Teng Long
2021-09-02 12:39           ` ZheNing Hu
2021-09-02 13:01             ` Teng Long
2021-08-25  2:21         ` [PATCH v5 10/14] object.h: add referred tags in `referred_objects` struct Teng Long
2021-08-25  2:21         ` [PATCH v5 11/14] packfile-uri: support for excluding tag objects Teng Long
2021-08-25  2:21         ` [PATCH v5 12/14] packfile-uri.txt: " Teng Long
2021-08-25  2:21         ` [PATCH v5 13/14] t5702: add tag exclusion test case Teng Long
2021-08-25  2:21         ` [PATCH v5 14/14] pack-objects.c: introduce `want_exclude_object` function Teng Long
2021-10-19 11:38         ` [PATCH v6 00/12] packfile-uri: support excluding multiple object types Teng Long
2021-10-19 11:38           ` [PATCH v6 01/12] objects.c: introduce `exclude_level` enum Teng Long
2021-10-19 11:38           ` [PATCH v6 02/12] Introduce function `match_packfile_uri_exclusions` Teng Long
2021-10-19 11:38           ` [PATCH v6 03/12] Replace `show_data` with structure `show_info` Teng Long
2021-10-19 11:38           ` [PATCH v6 04/12] Introduce `uploadpack.excludeobject` configuration Teng Long
2021-10-19 11:38           ` [PATCH v6 05/12] t5702: test cases for `uploadpack.excludeobject` Teng Long
2021-10-19 11:38           ` [PATCH v6 06/12] packfile-uri: support for excluding commits Teng Long
2021-10-19 11:38           ` [PATCH v6 07/12] t5702: test cases " Teng Long
2021-10-19 11:38           ` [PATCH v6 08/12] packfile-uri: support for excluding trees Teng Long
2021-10-19 11:38           ` [PATCH v6 09/12] t5702: test cases " Teng Long
2021-10-19 11:38           ` [PATCH v6 10/12] packfile-uri: support for excluding tags Teng Long
2021-10-19 11:38           ` [PATCH v6 11/12] t5702: test cases " Teng Long
2021-10-19 11:38           ` [PATCH v6 12/12] packfile-uri.txt: support multiple object types Teng Long

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).